Files
vecna/pkg/metrics/metrics.go
Hein 4009a54e39 feat: 🎉 Vectors na Vectors, the begining
Translate 1536 <-> 768 , 3072 <-> 2048
2026-04-11 18:05:05 +02:00

113 lines
3.6 KiB
Go

package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
// Registry holds all vecna Prometheus metrics on a dedicated (non-global) registry.
type Registry struct {
reg *prometheus.Registry
RequestsTotal *prometheus.CounterVec
RequestDuration *prometheus.HistogramVec
ForwardDuration *prometheus.HistogramVec
TranslateDuration *prometheus.HistogramVec
EndpointPriority *prometheus.GaugeVec
EndpointInflight *prometheus.GaugeVec
EndpointErrorsTotal *prometheus.CounterVec
TokensTotal *prometheus.CounterVec
}
// New creates and registers all metrics on a fresh Prometheus registry.
func New() *Registry {
reg := prometheus.NewRegistry()
r := &Registry{
reg: reg,
RequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_requests_total",
Help: "Total number of requests served by vecna.",
}, []string{"endpoint", "status"}),
RequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_request_duration_seconds",
Help: "Total request wall-clock time.",
Buckets: prometheus.DefBuckets,
}, []string{"endpoint"}),
ForwardDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_forward_duration_seconds",
Help: "Time spent waiting on the backing embedding model.",
Buckets: prometheus.DefBuckets,
}, []string{"target", "url"}),
TranslateDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_translate_duration_seconds",
Help: "Time spent in the dimension adapter.",
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05},
}, []string{"adapter_type"}),
EndpointPriority: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "vecna_endpoint_priority",
Help: "Current dynamic routing priority for a forwarding endpoint.",
}, []string{"target", "url"}),
EndpointInflight: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "vecna_endpoint_inflight",
Help: "Number of active in-flight requests per forwarding endpoint.",
}, []string{"target", "url"}),
EndpointErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_endpoint_errors_total",
Help: "Total forwarding errors per endpoint, labelled by error type.",
}, []string{"target", "url", "error"}),
TokensTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_tokens_total",
Help: "Tokens consumed by the backing embedding model, by target, model, and token type.",
}, []string{"target", "model", "token_type"}),
}
reg.MustRegister(
r.RequestsTotal,
r.RequestDuration,
r.ForwardDuration,
r.TranslateDuration,
r.EndpointPriority,
r.EndpointInflight,
r.EndpointErrorsTotal,
r.TokensTotal,
)
return r
}
// Prometheus returns the underlying registry for use with promhttp.HandlerFor.
func (r *Registry) Prometheus() *prometheus.Registry {
return r.reg
}
// Convenience setters used by the router.
func (r *Registry) SetEndpointPriority(target, url string, v float64) {
r.EndpointPriority.WithLabelValues(target, url).Set(v)
}
func (r *Registry) SetEndpointInflight(target, url string, v float64) {
r.EndpointInflight.WithLabelValues(target, url).Set(v)
}
func (r *Registry) IncEndpointErrors(target, url, errType string) {
r.EndpointErrorsTotal.WithLabelValues(target, url, errType).Inc()
}
func (r *Registry) AddTokens(target, model string, promptTokens, totalTokens int) {
if promptTokens > 0 {
r.TokensTotal.WithLabelValues(target, model, "prompt").Add(float64(promptTokens))
}
if totalTokens > 0 {
r.TokensTotal.WithLabelValues(target, model, "total").Add(float64(totalTokens))
}
}