package metrics import ( "github.com/prometheus/client_golang/prometheus" ) // Registry holds all vecna Prometheus metrics on a dedicated (non-global) registry. type Registry struct { reg *prometheus.Registry RequestsTotal *prometheus.CounterVec RequestDuration *prometheus.HistogramVec ForwardDuration *prometheus.HistogramVec TranslateDuration *prometheus.HistogramVec EndpointPriority *prometheus.GaugeVec EndpointInflight *prometheus.GaugeVec EndpointErrorsTotal *prometheus.CounterVec TokensTotal *prometheus.CounterVec } // New creates and registers all metrics on a fresh Prometheus registry. func New() *Registry { reg := prometheus.NewRegistry() r := &Registry{ reg: reg, RequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "vecna_requests_total", Help: "Total number of requests served by vecna.", }, []string{"endpoint", "status"}), RequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "vecna_request_duration_seconds", Help: "Total request wall-clock time.", Buckets: prometheus.DefBuckets, }, []string{"endpoint"}), ForwardDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "vecna_forward_duration_seconds", Help: "Time spent waiting on the backing embedding model.", Buckets: prometheus.DefBuckets, }, []string{"target", "url"}), TranslateDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "vecna_translate_duration_seconds", Help: "Time spent in the dimension adapter.", Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05}, }, []string{"adapter_type"}), EndpointPriority: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "vecna_endpoint_priority", Help: "Current dynamic routing priority for a forwarding endpoint.", }, []string{"target", "url"}), EndpointInflight: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "vecna_endpoint_inflight", Help: "Number of active in-flight requests per forwarding endpoint.", }, []string{"target", "url"}), EndpointErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "vecna_endpoint_errors_total", Help: "Total forwarding errors per endpoint, labelled by error type.", }, []string{"target", "url", "error"}), TokensTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "vecna_tokens_total", Help: "Tokens consumed by the backing embedding model, by target, model, and token type.", }, []string{"target", "model", "token_type"}), } reg.MustRegister( r.RequestsTotal, r.RequestDuration, r.ForwardDuration, r.TranslateDuration, r.EndpointPriority, r.EndpointInflight, r.EndpointErrorsTotal, r.TokensTotal, ) return r } // Prometheus returns the underlying registry for use with promhttp.HandlerFor. func (r *Registry) Prometheus() *prometheus.Registry { return r.reg } // Convenience setters used by the router. func (r *Registry) SetEndpointPriority(target, url string, v float64) { r.EndpointPriority.WithLabelValues(target, url).Set(v) } func (r *Registry) SetEndpointInflight(target, url string, v float64) { r.EndpointInflight.WithLabelValues(target, url).Set(v) } func (r *Registry) IncEndpointErrors(target, url, errType string) { r.EndpointErrorsTotal.WithLabelValues(target, url, errType).Inc() } func (r *Registry) AddTokens(target, model string, promptTokens, totalTokens int) { if promptTokens > 0 { r.TokensTotal.WithLabelValues(target, model, "prompt").Add(float64(promptTokens)) } if totalTokens > 0 { r.TokensTotal.WithLabelValues(target, model, "total").Add(float64(totalTokens)) } }