mirror of
https://github.com/Warky-Devs/vecna.git
synced 2026-05-05 01:26:58 +00:00
feat: 🎉 Vectors na Vectors, the begining
Translate 1536 <-> 768 , 3072 <-> 2048
This commit is contained in:
112
pkg/metrics/metrics.go
Normal file
112
pkg/metrics/metrics.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// Registry holds all vecna Prometheus metrics on a dedicated (non-global) registry.
|
||||
type Registry struct {
|
||||
reg *prometheus.Registry
|
||||
|
||||
RequestsTotal *prometheus.CounterVec
|
||||
RequestDuration *prometheus.HistogramVec
|
||||
ForwardDuration *prometheus.HistogramVec
|
||||
TranslateDuration *prometheus.HistogramVec
|
||||
EndpointPriority *prometheus.GaugeVec
|
||||
EndpointInflight *prometheus.GaugeVec
|
||||
EndpointErrorsTotal *prometheus.CounterVec
|
||||
TokensTotal *prometheus.CounterVec
|
||||
}
|
||||
|
||||
// New creates and registers all metrics on a fresh Prometheus registry.
|
||||
func New() *Registry {
|
||||
reg := prometheus.NewRegistry()
|
||||
|
||||
r := &Registry{
|
||||
reg: reg,
|
||||
|
||||
RequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "vecna_requests_total",
|
||||
Help: "Total number of requests served by vecna.",
|
||||
}, []string{"endpoint", "status"}),
|
||||
|
||||
RequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "vecna_request_duration_seconds",
|
||||
Help: "Total request wall-clock time.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"endpoint"}),
|
||||
|
||||
ForwardDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "vecna_forward_duration_seconds",
|
||||
Help: "Time spent waiting on the backing embedding model.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"target", "url"}),
|
||||
|
||||
TranslateDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "vecna_translate_duration_seconds",
|
||||
Help: "Time spent in the dimension adapter.",
|
||||
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05},
|
||||
}, []string{"adapter_type"}),
|
||||
|
||||
EndpointPriority: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "vecna_endpoint_priority",
|
||||
Help: "Current dynamic routing priority for a forwarding endpoint.",
|
||||
}, []string{"target", "url"}),
|
||||
|
||||
EndpointInflight: prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "vecna_endpoint_inflight",
|
||||
Help: "Number of active in-flight requests per forwarding endpoint.",
|
||||
}, []string{"target", "url"}),
|
||||
|
||||
EndpointErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "vecna_endpoint_errors_total",
|
||||
Help: "Total forwarding errors per endpoint, labelled by error type.",
|
||||
}, []string{"target", "url", "error"}),
|
||||
|
||||
TokensTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "vecna_tokens_total",
|
||||
Help: "Tokens consumed by the backing embedding model, by target, model, and token type.",
|
||||
}, []string{"target", "model", "token_type"}),
|
||||
}
|
||||
|
||||
reg.MustRegister(
|
||||
r.RequestsTotal,
|
||||
r.RequestDuration,
|
||||
r.ForwardDuration,
|
||||
r.TranslateDuration,
|
||||
r.EndpointPriority,
|
||||
r.EndpointInflight,
|
||||
r.EndpointErrorsTotal,
|
||||
r.TokensTotal,
|
||||
)
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// Prometheus returns the underlying registry for use with promhttp.HandlerFor.
|
||||
func (r *Registry) Prometheus() *prometheus.Registry {
|
||||
return r.reg
|
||||
}
|
||||
|
||||
// Convenience setters used by the router.
|
||||
|
||||
func (r *Registry) SetEndpointPriority(target, url string, v float64) {
|
||||
r.EndpointPriority.WithLabelValues(target, url).Set(v)
|
||||
}
|
||||
|
||||
func (r *Registry) SetEndpointInflight(target, url string, v float64) {
|
||||
r.EndpointInflight.WithLabelValues(target, url).Set(v)
|
||||
}
|
||||
|
||||
func (r *Registry) IncEndpointErrors(target, url, errType string) {
|
||||
r.EndpointErrorsTotal.WithLabelValues(target, url, errType).Inc()
|
||||
}
|
||||
|
||||
func (r *Registry) AddTokens(target, model string, promptTokens, totalTokens int) {
|
||||
if promptTokens > 0 {
|
||||
r.TokensTotal.WithLabelValues(target, model, "prompt").Add(float64(promptTokens))
|
||||
}
|
||||
if totalTokens > 0 {
|
||||
r.TokensTotal.WithLabelValues(target, model, "total").Add(float64(totalTokens))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user