feat: 🎉 Vectors na Vectors, the begining

Translate 1536 <-> 768 , 3072 <-> 2048
This commit is contained in:
2026-04-11 18:05:05 +02:00
parent d98ea7c222
commit 4009a54e39
58 changed files with 5324 additions and 2 deletions

112
pkg/metrics/metrics.go Normal file
View File

@@ -0,0 +1,112 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
// Registry holds all vecna Prometheus metrics on a dedicated (non-global) registry.
type Registry struct {
reg *prometheus.Registry
RequestsTotal *prometheus.CounterVec
RequestDuration *prometheus.HistogramVec
ForwardDuration *prometheus.HistogramVec
TranslateDuration *prometheus.HistogramVec
EndpointPriority *prometheus.GaugeVec
EndpointInflight *prometheus.GaugeVec
EndpointErrorsTotal *prometheus.CounterVec
TokensTotal *prometheus.CounterVec
}
// New creates and registers all metrics on a fresh Prometheus registry.
func New() *Registry {
reg := prometheus.NewRegistry()
r := &Registry{
reg: reg,
RequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_requests_total",
Help: "Total number of requests served by vecna.",
}, []string{"endpoint", "status"}),
RequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_request_duration_seconds",
Help: "Total request wall-clock time.",
Buckets: prometheus.DefBuckets,
}, []string{"endpoint"}),
ForwardDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_forward_duration_seconds",
Help: "Time spent waiting on the backing embedding model.",
Buckets: prometheus.DefBuckets,
}, []string{"target", "url"}),
TranslateDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vecna_translate_duration_seconds",
Help: "Time spent in the dimension adapter.",
Buckets: []float64{0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05},
}, []string{"adapter_type"}),
EndpointPriority: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "vecna_endpoint_priority",
Help: "Current dynamic routing priority for a forwarding endpoint.",
}, []string{"target", "url"}),
EndpointInflight: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "vecna_endpoint_inflight",
Help: "Number of active in-flight requests per forwarding endpoint.",
}, []string{"target", "url"}),
EndpointErrorsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_endpoint_errors_total",
Help: "Total forwarding errors per endpoint, labelled by error type.",
}, []string{"target", "url", "error"}),
TokensTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vecna_tokens_total",
Help: "Tokens consumed by the backing embedding model, by target, model, and token type.",
}, []string{"target", "model", "token_type"}),
}
reg.MustRegister(
r.RequestsTotal,
r.RequestDuration,
r.ForwardDuration,
r.TranslateDuration,
r.EndpointPriority,
r.EndpointInflight,
r.EndpointErrorsTotal,
r.TokensTotal,
)
return r
}
// Prometheus returns the underlying registry for use with promhttp.HandlerFor.
func (r *Registry) Prometheus() *prometheus.Registry {
return r.reg
}
// Convenience setters used by the router.
func (r *Registry) SetEndpointPriority(target, url string, v float64) {
r.EndpointPriority.WithLabelValues(target, url).Set(v)
}
func (r *Registry) SetEndpointInflight(target, url string, v float64) {
r.EndpointInflight.WithLabelValues(target, url).Set(v)
}
func (r *Registry) IncEndpointErrors(target, url, errType string) {
r.EndpointErrorsTotal.WithLabelValues(target, url, errType).Inc()
}
func (r *Registry) AddTokens(target, model string, promptTokens, totalTokens int) {
if promptTokens > 0 {
r.TokensTotal.WithLabelValues(target, model, "prompt").Add(float64(promptTokens))
}
if totalTokens > 0 {
r.TokensTotal.WithLabelValues(target, model, "total").Add(float64(totalTokens))
}
}

62
pkg/metrics/middleware.go Normal file
View File

@@ -0,0 +1,62 @@
package metrics
import (
"fmt"
"net/http"
"github.com/uptrace/bunrouter"
)
// Middleware returns a bunrouter middleware that records per-request Prometheus metrics.
// It reads timing from the RequestTrace stored in the context (set by server/trace.go).
// The trace target/url labels are optional; pass empty strings if not applicable.
func (r *Registry) Middleware(getTrace func(req bunrouter.Request) TraceSnapshot) bunrouter.MiddlewareFunc {
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
rw := &statusWriter{ResponseWriter: w, status: http.StatusOK}
err := next(rw, req)
snap := getTrace(req)
endpoint := req.URL.Path
status := fmt.Sprintf("%d", rw.status)
r.RequestsTotal.WithLabelValues(endpoint, status).Inc()
r.RequestDuration.WithLabelValues(endpoint).Observe(snap.TotalSeconds)
if snap.ForwardTarget != "" {
r.ForwardDuration.WithLabelValues(snap.ForwardTarget, snap.ForwardURL).Observe(snap.ForwardSeconds)
}
if snap.AdapterType != "" {
r.TranslateDuration.WithLabelValues(snap.AdapterType).Observe(snap.TranslateSeconds)
}
if snap.PromptTokens > 0 || snap.TotalTokens > 0 {
r.AddTokens(snap.ForwardTarget, snap.ForwardModel, snap.PromptTokens, snap.TotalTokens)
}
return err
}
}
}
// TraceSnapshot carries the timing and usage values the metrics middleware needs.
type TraceSnapshot struct {
TotalSeconds float64
ForwardSeconds float64
TranslateSeconds float64
ForwardTarget string
ForwardURL string
ForwardModel string
AdapterType string
PromptTokens int
TotalTokens int
}
// statusWriter wraps http.ResponseWriter to capture the written status code.
type statusWriter struct {
http.ResponseWriter
status int
}
func (sw *statusWriter) WriteHeader(code int) {
sw.status = code
sw.ResponseWriter.WriteHeader(code)
}