mirror of
https://github.com/Warky-Devs/vecna.git
synced 2026-05-05 01:26:58 +00:00
113 lines
3.6 KiB
Go
113 lines
3.6 KiB
Go
package metrics
|
|
|
|
import (
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
// Registry holds all vecna Prometheus metrics on a dedicated (non-global) registry.
|
|
type Registry struct {
|
|
reg *prometheus.Registry
|
|
|
|
RequestsTotal *prometheus.CounterVec
|
|
RequestDuration *prometheus.HistogramVec
|
|
ForwardDuration *prometheus.HistogramVec
|
|
TranslateDuration *prometheus.HistogramVec
|
|
EndpointPriority *prometheus.GaugeVec
|
|
EndpointInflight *prometheus.GaugeVec
|
|
EndpointErrorsTotal *prometheus.CounterVec
|
|
TokensTotal *prometheus.CounterVec
|
|
}
|
|
|
|
// New creates and registers all metrics on a fresh Prometheus registry.
|
|
func New() *Registry {
|
|
reg := prometheus.NewRegistry()
|
|
|
|
r := &Registry{
|
|
reg: reg,
|
|
|
|
RequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "vecna_requests_total",
|
|
Help: "Total number of requests served by vecna.",
|
|
}, []string{"endpoint", "status"}),
|
|
|
|
RequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "vecna_request_duration_seconds",
|
|
Help: "Total request wall-clock time.",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"endpoint"}),
|
|
|
|
ForwardDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "vecna_forward_duration_seconds",
|
|
Help: "Time spent waiting on the backing embedding model.",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"target", "url"}),
|
|
|
|
TranslateDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "vecna_translate_duration_seconds",
|
|
Help: "Time spent in the dimension adapter.",
|
|
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05},
|
|
}, []string{"adapter_type"}),
|
|
|
|
EndpointPriority: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "vecna_endpoint_priority",
|
|
Help: "Current dynamic routing priority for a forwarding endpoint.",
|
|
}, []string{"target", "url"}),
|
|
|
|
EndpointInflight: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "vecna_endpoint_inflight",
|
|
Help: "Number of active in-flight requests per forwarding endpoint.",
|
|
}, []string{"target", "url"}),
|
|
|
|
EndpointErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "vecna_endpoint_errors_total",
|
|
Help: "Total forwarding errors per endpoint, labelled by error type.",
|
|
}, []string{"target", "url", "error"}),
|
|
|
|
TokensTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "vecna_tokens_total",
|
|
Help: "Tokens consumed by the backing embedding model, by target, model, and token type.",
|
|
}, []string{"target", "model", "token_type"}),
|
|
}
|
|
|
|
reg.MustRegister(
|
|
r.RequestsTotal,
|
|
r.RequestDuration,
|
|
r.ForwardDuration,
|
|
r.TranslateDuration,
|
|
r.EndpointPriority,
|
|
r.EndpointInflight,
|
|
r.EndpointErrorsTotal,
|
|
r.TokensTotal,
|
|
)
|
|
|
|
return r
|
|
}
|
|
|
|
// Prometheus returns the underlying registry for use with promhttp.HandlerFor.
|
|
func (r *Registry) Prometheus() *prometheus.Registry {
|
|
return r.reg
|
|
}
|
|
|
|
// Convenience setters used by the router.
|
|
|
|
func (r *Registry) SetEndpointPriority(target, url string, v float64) {
|
|
r.EndpointPriority.WithLabelValues(target, url).Set(v)
|
|
}
|
|
|
|
func (r *Registry) SetEndpointInflight(target, url string, v float64) {
|
|
r.EndpointInflight.WithLabelValues(target, url).Set(v)
|
|
}
|
|
|
|
func (r *Registry) IncEndpointErrors(target, url, errType string) {
|
|
r.EndpointErrorsTotal.WithLabelValues(target, url, errType).Inc()
|
|
}
|
|
|
|
func (r *Registry) AddTokens(target, model string, promptTokens, totalTokens int) {
|
|
if promptTokens > 0 {
|
|
r.TokensTotal.WithLabelValues(target, model, "prompt").Add(float64(promptTokens))
|
|
}
|
|
if totalTokens > 0 {
|
|
r.TokensTotal.WithLabelValues(target, model, "total").Add(float64(totalTokens))
|
|
}
|
|
}
|