feat(server): add support for extra maps in adapter configuration

* Introduced ExtraMapConfig to allow multiple adapter configurations.
* Updated server and handler to utilize extra maps for routing.
* Added dashboard handler for metrics visualization.
This commit is contained in:
2026-04-11 21:43:14 +02:00
parent c12e16c9f7
commit c7a3fed6e1
10 changed files with 461 additions and 37 deletions

View File

@@ -136,6 +136,11 @@ Override with `--config path/to/file.yaml` or env vars prefixed `VECNA_`.
"truncate_mode": "from_end", "truncate_mode": "from_end",
"pad_mode": "at_end" "pad_mode": "at_end"
}, },
"extra_maps": {
"512": { "target_dim": 512 },
"256": { "target_dim": 256, "type": "random", "seed": 42 },
"fast": { "target_dim": 768, "forward_target": "small-model" }
},
"metrics": { "metrics": {
"enabled": true, "enabled": true,
"path": "/metrics", "path": "/metrics",
@@ -184,6 +189,43 @@ There is no partial migration path — a mixed index produces degraded or incorr
--- ---
## Extra maps
`extra_maps` lets you expose multiple adapter configurations on a single vecna instance. Each entry is a named `AdapterConfig` whose unset fields fall back to the global `adapter` values.
```json
"adapter": { "type": "truncate", "source_dim": 1024, "target_dim": 1536 },
"extra_maps": {
"512": { "target_dim": 512 },
"256": { "target_dim": 256, "type": "random", "seed": 42 },
"openai-alt": { "target_dim": 1536, "forward_target": "openai" }
}
```
| Route | Forwarder | Adapter |
|-------|-----------|---------|
| `POST /v1/embeddings` | global default | global `adapter` |
| `POST /map/512/v1/embeddings` | global default | `extra_maps["512"]` — target 512, rest from global |
| `POST /map/256/v1/embeddings` | global default | `extra_maps["256"]` — random projection to 256 |
| `POST /map/openai-alt/v1/embeddings` | `openai` target | `extra_maps["openai-alt"]` adapter |
All fields are overridable per map entry:
| Field | Description |
|-------|-------------|
| `forward_target` | Named target from `forward.targets`; empty = global default |
| `type` | `truncate` / `random` / `projection` |
| `source_dim` | Source dimension; falls back to global `adapter.source_dim` |
| `target_dim` | Target dimension |
| `truncate_mode` | `from_end` / `from_start` |
| `pad_mode` | `at_end` / `at_start` |
| `seed` | Seed for random projection |
| `matrix_file` | Path to projection matrix JSON |
> The same re-embedding warning applies per map — changing any setting for an `extra_maps` entry requires re-embedding all vectors indexed through that endpoint.
---
## Truncation and padding modes ## Truncation and padding modes
### `truncate_mode` — which part of the vector is kept when downscaling ### `truncate_mode` — which part of the vector is kept when downscaling
@@ -242,6 +284,18 @@ POST /v1/models/{model}:embedContent
POST /v1/models/{model}:batchEmbedContents POST /v1/models/{model}:batchEmbedContents
``` ```
### Extra-map routes
Serve the same backing model with a different adapter per endpoint. The `{mapping}` segment matches a key in `extra_maps`.
```
POST /map/{mapping}/v1/embeddings
POST /map/{mapping}/v1/models/{model}:embedContent
POST /map/{mapping}/v1/models/{model}:batchEmbedContents
```
All extra-map routes require the same authentication as the standard API routes.
### OpenAPI spec and docs ### OpenAPI spec and docs
``` ```
@@ -263,7 +317,7 @@ GET /docs
## Prometheus metrics ## Prometheus metrics
Enable in config: `metrics.enabled: true`. Scrape at `GET /metrics`. Enable in config: `metrics.enabled: true`. Scrape at `GET /metrics`. Human-readable dashboard at `GET /dashboard`.
| Metric | Type | Description | | Metric | Type | Description |
|--------|------|-------------| |--------|------|-------------|
@@ -276,6 +330,12 @@ Enable in config: `metrics.enabled: true`. Scrape at `GET /metrics`.
| `vecna_endpoint_errors_total` | counter | Forwarding failures by error type | | `vecna_endpoint_errors_total` | counter | Forwarding failures by error type |
| `vecna_tokens_total` | counter | Tokens consumed, by target, model, and type (`prompt`/`total`) | | `vecna_tokens_total` | counter | Tokens consumed, by target, model, and type (`prompt`/`total`) |
### Dashboard
`GET /dashboard` renders a live HTML view of all metrics. Counters show request counts with status-code badges, histograms show p50/p95/p99 latencies, gauges show current endpoint priority and inflight counts.
Auth follows the same rules as `/metrics`: server `api_keys` apply, and `metrics.api_key` adds a second layer if set.
--- ---
## Development ## Development
@@ -315,7 +375,7 @@ Starts vecna and an Ollama instance. The `vecna_config` named volume persists th
### Onboard (interactive setup) ### Onboard (interactive setup)
```sh ```sh
docker compose run --rm -it vecna onboard --config /config/vecna.json docker compose run --rm -it vecna onboard
``` ```
Ollama is reachable by hostname on the Docker network — the scanner will find it automatically. After onboarding, restart the proxy: Ollama is reachable by hostname on the Docker network — the scanner will find it automatically. After onboarding, restart the proxy:
@@ -327,17 +387,17 @@ docker compose restart vecna
### Query ### Query
```sh ```sh
docker compose run --rm vecna query --compact "hello world" --config /config/vecna.json docker compose run --rm vecna query --compact "hello world"
``` ```
### Test endpoints ### Test endpoints
```sh ```sh
# report latency and dims # report latency and dims
docker compose run --rm vecna test --config /config/vecna.json docker compose run --rm vecna test
# test and remove failing endpoints # test and remove failing endpoints
docker compose run --rm vecna test --config /config/vecna.json --remove-broken docker compose run --rm vecna test --remove-broken
``` ```
### Edit config manually ### Edit config manually

View File

@@ -10,6 +10,7 @@ import (
"github.com/Warky-Devs/vecna.git/pkg/adapter" "github.com/Warky-Devs/vecna.git/pkg/adapter"
"github.com/Warky-Devs/vecna.git/pkg/config" "github.com/Warky-Devs/vecna.git/pkg/config"
"github.com/Warky-Devs/vecna.git/pkg/server"
) )
var ( var (
@@ -94,7 +95,37 @@ func openWriter(path string) (io.Writer, error) {
// buildAdapter constructs the Adapter from the loaded config. // buildAdapter constructs the Adapter from the loaded config.
func buildAdapter(cfg *config.Config) (adapter.Adapter, error) { func buildAdapter(cfg *config.Config) (adapter.Adapter, error) {
ac := cfg.Adapter return buildAdapterFromConfig(cfg.Adapter)
}
// buildExtraMapAdapters builds a server.ExtraMap for each entry in cfg.ExtraMaps.
// Unset adapter fields fall back to the global Adapter values.
func buildExtraMapAdapters(cfg *config.Config) (map[string]server.ExtraMap, error) {
result := make(map[string]server.ExtraMap, len(cfg.ExtraMaps))
for name, mc := range cfg.ExtraMaps {
ac := config.AdapterConfig{
Type: coalesce(mc.Type, cfg.Adapter.Type),
SourceDim: coalescei(mc.SourceDim, cfg.Adapter.SourceDim),
TargetDim: mc.TargetDim,
TruncateMode: coalesce(mc.TruncateMode, cfg.Adapter.TruncateMode),
PadMode: coalesce(mc.PadMode, cfg.Adapter.PadMode),
Seed: coalescei64(mc.Seed, cfg.Adapter.Seed),
MatrixFile: coalesce(mc.MatrixFile, cfg.Adapter.MatrixFile),
}
adp, err := buildAdapterFromConfig(ac)
if err != nil {
return nil, fmt.Errorf("extra_map %q: %w", name, err)
}
result[name] = server.ExtraMap{
Adapter: adp,
ForwardTarget: mc.ForwardTarget,
}
}
return result, nil
}
// buildAdapterFromConfig constructs an Adapter from an AdapterConfig.
func buildAdapterFromConfig(ac config.AdapterConfig) (adapter.Adapter, error) {
if ac.SourceDim == 0 && ac.TargetDim == 0 { if ac.SourceDim == 0 && ac.TargetDim == 0 {
return adapter.NewPassthrough(), nil return adapter.NewPassthrough(), nil
} }
@@ -124,6 +155,27 @@ func buildAdapter(cfg *config.Config) (adapter.Adapter, error) {
} }
} }
func coalesce(a, b string) string {
if a != "" {
return a
}
return b
}
func coalescei(a, b int) int {
if a != 0 {
return a
}
return b
}
func coalescei64(a, b int64) int64 {
if a != 0 {
return a
}
return b
}
func parseTruncateModes(truncMode, padMode string) (adapter.TruncateMode, adapter.PadMode, error) { func parseTruncateModes(truncMode, padMode string) (adapter.TruncateMode, adapter.PadMode, error) {
var tm adapter.TruncateMode var tm adapter.TruncateMode
switch truncMode { switch truncMode {

View File

@@ -48,6 +48,11 @@ func runServe(cmd *cobra.Command, _ []string) error {
return fmt.Errorf("build adapter: %w", err) return fmt.Errorf("build adapter: %w", err)
} }
extraMaps, err := buildExtraMapAdapters(cfg)
if err != nil {
return fmt.Errorf("build extra_maps: %w", err)
}
clients, err := buildClients(cfg) clients, err := buildClients(cfg)
if err != nil { if err != nil {
return fmt.Errorf("build clients: %w", err) return fmt.Errorf("build clients: %w", err)
@@ -58,7 +63,7 @@ func runServe(cmd *cobra.Command, _ []string) error {
reg = metrics.New() reg = metrics.New()
} }
router, err := server.New(cfg, clients, adp, reg, logger) router, err := server.New(cfg, clients, adp, extraMaps, reg, logger)
if err != nil { if err != nil {
return fmt.Errorf("build router: %w", err) return fmt.Errorf("build router: %w", err)
} }

2
go.mod
View File

@@ -5,6 +5,7 @@ go 1.26.1
require ( require (
github.com/go-viper/mapstructure/v2 v2.4.0 github.com/go-viper/mapstructure/v2 v2.4.0
github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_golang v1.23.2
github.com/prometheus/client_model v0.6.2
github.com/spf13/cobra v1.10.2 github.com/spf13/cobra v1.10.2
github.com/spf13/viper v1.21.0 github.com/spf13/viper v1.21.0
github.com/stretchr/testify v1.11.1 github.com/stretchr/testify v1.11.1
@@ -21,7 +22,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect github.com/prometheus/procfs v0.16.1 // indirect
github.com/sagikazarmark/locafero v0.11.0 // indirect github.com/sagikazarmark/locafero v0.11.0 // indirect

View File

@@ -16,6 +16,7 @@ type Config struct {
Metrics MetricsConfig `mapstructure:"metrics" json:"metrics" yaml:"metrics" xml:"metrics"` Metrics MetricsConfig `mapstructure:"metrics" json:"metrics" yaml:"metrics" xml:"metrics"`
Forward ForwardConfig `mapstructure:"forward" json:"forward" yaml:"forward" xml:"forward"` Forward ForwardConfig `mapstructure:"forward" json:"forward" yaml:"forward" xml:"forward"`
Adapter AdapterConfig `mapstructure:"adapter" json:"adapter" yaml:"adapter" xml:"adapter"` Adapter AdapterConfig `mapstructure:"adapter" json:"adapter" yaml:"adapter" xml:"adapter"`
ExtraMaps map[string]ExtraMapConfig `mapstructure:"extra_maps" json:"extra_maps" yaml:"extra_maps" xml:"extra_maps"`
} }
// ServerConfig controls the HTTP listener and inbound auth. // ServerConfig controls the HTTP listener and inbound auth.
@@ -57,6 +58,20 @@ type EndpointConfig struct {
APIKey string `mapstructure:"api_key" json:"api_key" yaml:"api_key" xml:"api_key"` APIKey string `mapstructure:"api_key" json:"api_key" yaml:"api_key" xml:"api_key"`
} }
// ExtraMapConfig is a per-mapping override: all adapter fields fall back to the
// global adapter when unset; ForwardTarget selects a named target from
// forward.targets (empty = global forward.default).
type ExtraMapConfig struct {
ForwardTarget string `mapstructure:"forward_target" json:"forward_target,omitempty" yaml:"forward_target,omitempty" xml:"forward_target,omitempty"`
Type string `mapstructure:"type" json:"type,omitempty" yaml:"type,omitempty" xml:"type,omitempty"`
SourceDim int `mapstructure:"source_dim" json:"source_dim,omitempty" yaml:"source_dim,omitempty" xml:"source_dim,omitempty"`
TargetDim int `mapstructure:"target_dim" json:"target_dim,omitempty" yaml:"target_dim,omitempty" xml:"target_dim,omitempty"`
TruncateMode string `mapstructure:"truncate_mode" json:"truncate_mode,omitempty" yaml:"truncate_mode,omitempty" xml:"truncate_mode,omitempty"`
PadMode string `mapstructure:"pad_mode" json:"pad_mode,omitempty" yaml:"pad_mode,omitempty" xml:"pad_mode,omitempty"`
Seed int64 `mapstructure:"seed" json:"seed,omitempty" yaml:"seed,omitempty" xml:"seed,omitempty"`
MatrixFile string `mapstructure:"matrix_file" json:"matrix_file,omitempty" yaml:"matrix_file,omitempty" xml:"matrix_file,omitempty"`
}
// AdapterConfig selects and tunes the dimension adapter. // AdapterConfig selects and tunes the dimension adapter.
type AdapterConfig struct { type AdapterConfig struct {
Type string `mapstructure:"type" json:"type" yaml:"type" xml:"type"` Type string `mapstructure:"type" json:"type" yaml:"type" xml:"type"`

214
pkg/server/dashboard.go Normal file
View File

@@ -0,0 +1,214 @@
package server
import (
"fmt"
"net/http"
"sort"
"strings"
dto "github.com/prometheus/client_model/go"
"github.com/uptrace/bunrouter"
"github.com/Warky-Devs/vecna.git/pkg/metrics"
)
// dashboardHandler returns an HTML metrics dashboard by gathering from the registry.
func dashboardHandler(reg *metrics.Registry) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
families, err := reg.Prometheus().Gather()
if err != nil {
http.Error(w, "failed to gather metrics", http.StatusInternalServerError)
return nil
}
// Sort families by name for deterministic output.
sort.Slice(families, func(i, j int) bool {
return families[i].GetName() < families[j].GetName()
})
var b strings.Builder
b.WriteString(`<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Vecna Metrics Dashboard</title>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{font-family:system-ui,sans-serif;background:#0f1117;color:#e2e8f0;padding:1.5rem}
h1{font-size:1.4rem;font-weight:700;margin-bottom:1.25rem;color:#7dd3fc}
h2{font-size:.85rem;font-weight:600;color:#94a3b8;text-transform:uppercase;letter-spacing:.06em;margin:1.5rem 0 .5rem}
table{width:100%;border-collapse:collapse;font-size:.82rem;margin-bottom:.5rem}
th{text-align:left;padding:.35rem .6rem;background:#1e2330;color:#64748b;font-weight:600;border-bottom:1px solid #2d3748}
td{padding:.3rem .6rem;border-bottom:1px solid #1e2330;color:#cbd5e1}
tr:last-child td{border-bottom:none}
tr:hover td{background:#1a2035}
.val{text-align:right;font-variant-numeric:tabular-nums;color:#7dd3fc}
.badge{display:inline-block;padding:.1rem .45rem;border-radius:.25rem;font-size:.75rem;font-weight:600}
.b-ok{background:#14532d;color:#86efac}
.b-err{background:#7f1d1d;color:#fca5a5}
.section{background:#141824;border:1px solid #1e2330;border-radius:.5rem;padding:1rem;margin-bottom:1rem}
.help{font-size:.75rem;color:#475569;margin-bottom:.5rem}
</style>
</head>
<body>
<h1>Vecna Metrics Dashboard</h1>
`)
for _, fam := range families {
name := fam.GetName()
help := fam.GetHelp()
mtype := fam.GetType()
b.WriteString(`<div class="section">`)
fmt.Fprintf(&b, "<h2>%s</h2>", htmlEsc(name))
if help != "" {
fmt.Fprintf(&b, `<div class="help">%s</div>`, htmlEsc(help))
}
switch mtype {
case dto.MetricType_COUNTER:
renderCounter(&b, fam.GetMetric())
case dto.MetricType_GAUGE:
renderGauge(&b, fam.GetMetric())
case dto.MetricType_HISTOGRAM:
renderHistogram(&b, fam.GetMetric())
default:
renderGeneric(&b, fam.GetMetric())
}
b.WriteString(`</div>`)
}
b.WriteString("</body></html>")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, err = fmt.Fprint(w, b.String())
return err
}
}
func renderCounter(b *strings.Builder, ms []*dto.Metric) {
b.WriteString("<table><thead><tr>")
if len(ms) > 0 && len(ms[0].GetLabel()) > 0 {
for _, lp := range ms[0].GetLabel() {
fmt.Fprintf(b, "<th>%s</th>", htmlEsc(lp.GetName()))
}
}
b.WriteString("<th style='text-align:right'>count</th></tr></thead><tbody>")
for _, m := range ms {
b.WriteString("<tr>")
for _, lp := range m.GetLabel() {
val := lp.GetValue()
cls := ""
if lp.GetName() == "status" {
if strings.HasPrefix(val, "2") {
cls = ` class="badge b-ok"`
} else if strings.HasPrefix(val, "4") || strings.HasPrefix(val, "5") {
cls = ` class="badge b-err"`
}
}
if cls != "" {
fmt.Fprintf(b, "<td><span%s>%s</span></td>", cls, htmlEsc(val))
} else {
fmt.Fprintf(b, "<td>%s</td>", htmlEsc(val))
}
}
fmt.Fprintf(b, `<td class="val">%.0f</td>`, m.GetCounter().GetValue())
b.WriteString("</tr>")
}
b.WriteString("</tbody></table>")
}
func renderGauge(b *strings.Builder, ms []*dto.Metric) {
b.WriteString("<table><thead><tr>")
if len(ms) > 0 && len(ms[0].GetLabel()) > 0 {
for _, lp := range ms[0].GetLabel() {
fmt.Fprintf(b, "<th>%s</th>", htmlEsc(lp.GetName()))
}
}
b.WriteString("<th style='text-align:right'>value</th></tr></thead><tbody>")
for _, m := range ms {
b.WriteString("<tr>")
for _, lp := range m.GetLabel() {
fmt.Fprintf(b, "<td>%s</td>", htmlEsc(lp.GetValue()))
}
fmt.Fprintf(b, `<td class="val">%.4g</td>`, m.GetGauge().GetValue())
b.WriteString("</tr>")
}
b.WriteString("</tbody></table>")
}
func renderHistogram(b *strings.Builder, ms []*dto.Metric) {
b.WriteString("<table><thead><tr>")
if len(ms) > 0 && len(ms[0].GetLabel()) > 0 {
for _, lp := range ms[0].GetLabel() {
fmt.Fprintf(b, "<th>%s</th>", htmlEsc(lp.GetName()))
}
}
b.WriteString("<th style='text-align:right'>count</th><th style='text-align:right'>p50 (s)</th><th style='text-align:right'>p95 (s)</th><th style='text-align:right'>p99 (s)</th></tr></thead><tbody>")
for _, m := range ms {
b.WriteString("<tr>")
for _, lp := range m.GetLabel() {
fmt.Fprintf(b, "<td>%s</td>", htmlEsc(lp.GetValue()))
}
h := m.GetHistogram()
count := h.GetSampleCount()
p50 := histogramQuantile(h, 0.50)
p95 := histogramQuantile(h, 0.95)
p99 := histogramQuantile(h, 0.99)
fmt.Fprintf(b, `<td class="val">%d</td>`, count)
fmt.Fprintf(b, `<td class="val">%s</td>`, fmtSeconds(p50))
fmt.Fprintf(b, `<td class="val">%s</td>`, fmtSeconds(p95))
fmt.Fprintf(b, `<td class="val">%s</td>`, fmtSeconds(p99))
b.WriteString("</tr>")
}
b.WriteString("</tbody></table>")
}
func renderGeneric(b *strings.Builder, ms []*dto.Metric) {
fmt.Fprintf(b, `<div class="help">%d series</div>`, len(ms))
}
// histogramQuantile computes a linear-interpolated quantile from a cumulative histogram.
func histogramQuantile(h *dto.Histogram, q float64) float64 {
buckets := h.GetBucket()
total := float64(h.GetSampleCount())
if total == 0 || len(buckets) == 0 {
return 0
}
target := q * total
var prevCount float64
var prevBound float64
for _, b := range buckets {
count := float64(b.GetCumulativeCount())
bound := b.GetUpperBound()
if count >= target {
if count == prevCount {
return prevBound
}
// linear interpolation within bucket
return prevBound + (bound-prevBound)*(target-prevCount)/(count-prevCount)
}
prevCount = count
prevBound = bound
}
return prevBound
}
func fmtSeconds(s float64) string {
if s == 0 {
return "—"
}
if s < 0.001 {
return fmt.Sprintf("%.3fms", s*1000)
}
return fmt.Sprintf("%.3fs", s)
}
func htmlEsc(s string) string {
s = strings.ReplaceAll(s, "&", "&amp;")
s = strings.ReplaceAll(s, "<", "&lt;")
s = strings.ReplaceAll(s, ">", "&gt;")
return s
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/uptrace/bunrouter" "github.com/uptrace/bunrouter"
"github.com/Warky-Devs/vecna.git/pkg/adapter"
"github.com/Warky-Devs/vecna.git/pkg/embedclient" "github.com/Warky-Devs/vecna.git/pkg/embedclient"
) )
@@ -16,6 +17,18 @@ import (
// correct Google handler. The colon is a literal method separator in the // correct Google handler. The colon is a literal method separator in the
// Google embedding API, not a bunrouter parameter prefix. // Google embedding API, not a bunrouter parameter prefix.
func (h *handler) googleDispatch(w http.ResponseWriter, req bunrouter.Request) error { func (h *handler) googleDispatch(w http.ResponseWriter, req bunrouter.Request) error {
return h.googleDispatchWithAdapter(w, req, h.adapter, "")
}
func (h *handler) googleDispatchMapped(w http.ResponseWriter, req bunrouter.Request) error {
em, err := h.resolveExtraMap(req.Param("mapping"))
if err != nil {
return writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()})
}
return h.googleDispatchWithAdapter(w, req, em.Adapter, em.ForwardTarget)
}
func (h *handler) googleDispatchWithAdapter(w http.ResponseWriter, req bunrouter.Request, adp adapter.Adapter, targetOverride string) error {
modelaction := req.Param("modelaction") // e.g. "text-embedding-foo:embedContent" modelaction := req.Param("modelaction") // e.g. "text-embedding-foo:embedContent"
idx := strings.LastIndex(modelaction, ":") idx := strings.LastIndex(modelaction, ":")
if idx < 0 { if idx < 0 {
@@ -29,9 +42,9 @@ func (h *handler) googleDispatch(w http.ResponseWriter, req bunrouter.Request) e
switch action { switch action {
case "embedContent": case "embedContent":
return h.googleEmbedContent(w, req) return h.googleEmbedContentWithAdapter(w, req, adp, targetOverride)
case "batchEmbedContents": case "batchEmbedContents":
return h.googleBatchEmbedContents(w, req) return h.googleBatchEmbedContentsWithAdapter(w, req, adp, targetOverride)
default: default:
return writeJSON(w, http.StatusNotFound, map[string]string{"error": "unknown Google API method: " + action}) return writeJSON(w, http.StatusNotFound, map[string]string{"error": "unknown Google API method: " + action})
} }
@@ -60,7 +73,7 @@ type googleEmbeddingValues struct {
Values []float32 `json:"values"` Values []float32 `json:"values"`
} }
func (h *handler) googleEmbedContent(w http.ResponseWriter, req bunrouter.Request) error { func (h *handler) googleEmbedContentWithAdapter(w http.ResponseWriter, req bunrouter.Request, adp adapter.Adapter, targetOverride string) error {
model, _ := req.Context().Value(modelKey).(string) model, _ := req.Context().Value(modelKey).(string)
var body googleEmbedContentRequest var body googleEmbedContentRequest
@@ -73,7 +86,7 @@ func (h *handler) googleEmbedContent(w http.ResponseWriter, req bunrouter.Reques
texts[i] = p.Text texts[i] = p.Text
} }
client, targetName, targetURL := h.resolveClient(model) client, targetName, targetURL := h.resolveClientOverride(targetOverride, model)
trace := TraceFromContext(req.Context()) trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName trace.ForwardTarget = targetName
trace.ForwardURL = targetURL trace.ForwardURL = targetURL
@@ -91,7 +104,7 @@ func (h *handler) googleEmbedContent(w http.ResponseWriter, req bunrouter.Reques
t1 := time.Now() t1 := time.Now()
var adapted []float32 var adapted []float32
if len(embedResp.Embeddings) > 0 { if len(embedResp.Embeddings) > 0 {
adapted, err = h.adapter.Adapt(embedResp.Embeddings[0]) adapted, err = adp.Adapt(embedResp.Embeddings[0])
if err != nil { if err != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
} }
@@ -115,7 +128,7 @@ type googleBatchResponse struct {
Embeddings []googleEmbeddingValues `json:"embeddings"` Embeddings []googleEmbeddingValues `json:"embeddings"`
} }
func (h *handler) googleBatchEmbedContents(w http.ResponseWriter, req bunrouter.Request) error { func (h *handler) googleBatchEmbedContentsWithAdapter(w http.ResponseWriter, req bunrouter.Request, adp adapter.Adapter, targetOverride string) error {
model, _ := req.Context().Value(modelKey).(string) model, _ := req.Context().Value(modelKey).(string)
var body googleBatchRequest var body googleBatchRequest
@@ -130,7 +143,7 @@ func (h *handler) googleBatchEmbedContents(w http.ResponseWriter, req bunrouter.
} }
} }
client, targetName, targetURL := h.resolveClient(model) client, targetName, targetURL := h.resolveClientOverride(targetOverride, model)
trace := TraceFromContext(req.Context()) trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName trace.ForwardTarget = targetName
trace.ForwardURL = targetURL trace.ForwardURL = targetURL
@@ -148,7 +161,7 @@ func (h *handler) googleBatchEmbedContents(w http.ResponseWriter, req bunrouter.
t1 := time.Now() t1 := time.Now()
result := make([]googleEmbeddingValues, len(embedResp.Embeddings)) result := make([]googleEmbeddingValues, len(embedResp.Embeddings))
for i, vec := range embedResp.Embeddings { for i, vec := range embedResp.Embeddings {
adapted, adaptErr := h.adapter.Adapt(vec) adapted, adaptErr := adp.Adapt(vec)
if adaptErr != nil { if adaptErr != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()}) return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
} }

View File

@@ -14,14 +14,30 @@ import (
"github.com/Warky-Devs/vecna.git/pkg/embedclient" "github.com/Warky-Devs/vecna.git/pkg/embedclient"
) )
// ExtraMap pairs a dimension adapter with an optional forward-target override.
type ExtraMap struct {
Adapter adapter.Adapter
ForwardTarget string // named target in forward.targets; empty = model-based resolution
}
// handler holds shared dependencies for all HTTP handlers. // handler holds shared dependencies for all HTTP handlers.
type handler struct { type handler struct {
cfg *config.Config cfg *config.Config
clients map[string]embedclient.Client clients map[string]embedclient.Client
adapter adapter.Adapter adapter adapter.Adapter
extraMaps map[string]ExtraMap
logger *zap.Logger logger *zap.Logger
} }
// resolveExtraMap returns the ExtraMap for the named extra_map entry.
func (h *handler) resolveExtraMap(name string) (ExtraMap, error) {
em, ok := h.extraMaps[name]
if !ok {
return ExtraMap{}, fmt.Errorf("extra_map %q not configured", name)
}
return em, nil
}
// resolveClient selects the embed client for the given model name. // resolveClient selects the embed client for the given model name.
// Returns the client, target name, and first endpoint URL for tracing. // Returns the client, target name, and first endpoint URL for tracing.
func (h *handler) resolveClient(model string) (embedclient.Client, string, string) { func (h *handler) resolveClient(model string) (embedclient.Client, string, string) {
@@ -32,12 +48,24 @@ func (h *handler) resolveClient(model string) (embedclient.Client, string, strin
name := h.cfg.Forward.Default name := h.cfg.Forward.Default
c, ok := h.clients[name] c, ok := h.clients[name]
if !ok { if !ok {
// No configured client — return a nil-safe error client
return &errClient{err: fmt.Errorf("no client configured for model %q and no default", model)}, name, "" return &errClient{err: fmt.Errorf("no client configured for model %q and no default", model)}, name, ""
} }
return c, name, firstEndpointURL(h.cfg, name) return c, name, firstEndpointURL(h.cfg, name)
} }
// resolveClientOverride selects the client for targetOverride when set,
// otherwise falls back to model-based resolution.
func (h *handler) resolveClientOverride(targetOverride, model string) (embedclient.Client, string, string) {
if targetOverride == "" {
return h.resolveClient(model)
}
c, ok := h.clients[targetOverride]
if !ok {
return &errClient{err: fmt.Errorf("extra_map forward_target %q not configured", targetOverride)}, targetOverride, ""
}
return c, targetOverride, firstEndpointURL(h.cfg, targetOverride)
}
func firstEndpointURL(cfg *config.Config, targetName string) string { func firstEndpointURL(cfg *config.Config, targetName string) string {
t, ok := cfg.Forward.Targets[targetName] t, ok := cfg.Forward.Targets[targetName]
if !ok || len(t.Endpoints) == 0 { if !ok || len(t.Endpoints) == 0 {

View File

@@ -8,6 +8,7 @@ import (
"github.com/uptrace/bunrouter" "github.com/uptrace/bunrouter"
"github.com/Warky-Devs/vecna.git/pkg/adapter"
"github.com/Warky-Devs/vecna.git/pkg/embedclient" "github.com/Warky-Devs/vecna.git/pkg/embedclient"
) )
@@ -35,6 +36,18 @@ type openAIUsage struct {
} }
func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request) error { func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request) error {
return h.openAIEmbeddingsWithAdapter(w, req, h.adapter, "")
}
func (h *handler) openAIEmbeddingsMapped(w http.ResponseWriter, req bunrouter.Request) error {
em, err := h.resolveExtraMap(req.Param("mapping"))
if err != nil {
return writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()})
}
return h.openAIEmbeddingsWithAdapter(w, req, em.Adapter, em.ForwardTarget)
}
func (h *handler) openAIEmbeddingsWithAdapter(w http.ResponseWriter, req bunrouter.Request, adp adapter.Adapter, targetOverride string) error {
var body openAIEmbedRequest var body openAIEmbedRequest
if err := json.NewDecoder(req.Body).Decode(&body); err != nil { if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"}) return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
@@ -45,7 +58,7 @@ func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request)
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()}) return writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
} }
client, targetName, targetURL := h.resolveClient(body.Model) client, targetName, targetURL := h.resolveClientOverride(targetOverride, body.Model)
trace := TraceFromContext(req.Context()) trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName trace.ForwardTarget = targetName
trace.ForwardURL = targetURL trace.ForwardURL = targetURL
@@ -63,7 +76,7 @@ func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request)
t1 := time.Now() t1 := time.Now()
data := make([]openAIEmbedDatum, len(embedResp.Embeddings)) data := make([]openAIEmbedDatum, len(embedResp.Embeddings))
for i, vec := range embedResp.Embeddings { for i, vec := range embedResp.Embeddings {
adapted, adaptErr := h.adapter.Adapt(vec) adapted, adaptErr := adp.Adapt(vec)
if adaptErr != nil { if adaptErr != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()}) return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
} }

View File

@@ -23,6 +23,7 @@ func New(
cfg *config.Config, cfg *config.Config,
clients map[string]embedclient.Client, clients map[string]embedclient.Client,
adp adapter.Adapter, adp adapter.Adapter,
extraMaps map[string]ExtraMap,
reg *metrics.Registry, reg *metrics.Registry,
logger *zap.Logger, logger *zap.Logger,
) (router *bunrouter.Router, err error) { ) (router *bunrouter.Router, err error) {
@@ -34,38 +35,49 @@ func New(
}() }()
router = bunrouter.New( router = bunrouter.New(
bunrouter.WithMiddleware(authMiddleware(cfg.Server.APIKeys)),
bunrouter.WithMiddleware(traceMiddleware()), bunrouter.WithMiddleware(traceMiddleware()),
bunrouter.WithMiddleware(metricsMiddleware(reg, adp)), bunrouter.WithMiddleware(metricsMiddleware(reg, adp)),
bunrouter.WithMiddleware(loggingMiddleware(logger)), bunrouter.WithMiddleware(loggingMiddleware(logger)),
) )
h := &handler{cfg: cfg, clients: clients, adapter: adp, logger: logger} h := &handler{cfg: cfg, clients: clients, adapter: adp, extraMaps: extraMaps, logger: logger}
router.POST("/v1/embeddings", h.openAIEmbeddings) // Public routes — no authentication required.
router.GET("/", spec.DocsHandler())
router.GET("/docs", spec.DocsHandler())
router.GET("/openapi.yaml", spec.SpecHandler())
// All API routes require authentication.
authed := router.NewGroup("", bunrouter.WithMiddleware(authMiddleware(cfg.Server.APIKeys)))
authed.POST("/v1/embeddings", h.openAIEmbeddings)
// Google API uses a literal colon as a method separator (e.g. /v1/models/foo:embedContent). // Google API uses a literal colon as a method separator (e.g. /v1/models/foo:embedContent).
// bunrouter can't distinguish two routes with the same :param prefix, so a single wildcard // bunrouter can't distinguish two routes with the same :param prefix, so a single wildcard
// captures the full "model:action" segment and dispatches internally. // captures the full "model:action" segment and dispatches internally.
router.POST("/v1/models/*modelaction", h.googleDispatch) authed.POST("/v1/models/*modelaction", h.googleDispatch)
// OpenAPI spec + docs // Extra-map routes: /map/:mapping/v1/... uses the adapter configured under extra_maps[mapping].
router.GET("/openapi.yaml", spec.SpecHandler()) authed.POST("/map/:mapping/v1/embeddings", h.openAIEmbeddingsMapped)
router.GET("/docs", spec.DocsHandler()) authed.POST("/map/:mapping/v1/models/*modelaction", h.googleDispatchMapped)
// Metrics — only when enabled // Metrics — only when enabled; registered in the authed group so server
// auth (if configured) applies. Metrics may additionally enforce its own key.
if cfg.Metrics.Enabled { if cfg.Metrics.Enabled {
metricsHandler := promhttp.HandlerFor(reg.Prometheus(), promhttp.HandlerOpts{}) metricsHandler := promhttp.HandlerFor(reg.Prometheus(), promhttp.HandlerOpts{})
path := cfg.Metrics.Path path := cfg.Metrics.Path
if path == "" { if path == "" {
path = "/metrics" path = "/metrics"
} }
dash := dashboardHandler(reg)
if cfg.Metrics.APIKey != "" { if cfg.Metrics.APIKey != "" {
router.GET(path, metricsAuthHandler(cfg.Metrics.APIKey, metricsHandler)) authed.GET(path, metricsAuthHandler(cfg.Metrics.APIKey, metricsHandler))
authed.GET("/dashboard", metricsKeyMiddleware(cfg.Metrics.APIKey, dash))
} else { } else {
router.GET(path, func(w http.ResponseWriter, req bunrouter.Request) error { authed.GET(path, func(w http.ResponseWriter, req bunrouter.Request) error {
metricsHandler.ServeHTTP(w, req.Request) metricsHandler.ServeHTTP(w, req.Request)
return nil return nil
}) })
authed.GET("/dashboard", dash)
} }
} }
@@ -148,6 +160,18 @@ func loggingMiddleware(logger *zap.Logger) bunrouter.MiddlewareFunc {
} }
} }
// metricsKeyMiddleware guards a bunrouter.HandlerFunc with a dedicated Bearer token.
func metricsKeyMiddleware(apiKey string, h bunrouter.HandlerFunc) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
if token != apiKey {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return nil
}
return h(w, req)
}
}
// metricsAuthHandler wraps a standard http.Handler with Bearer token auth. // metricsAuthHandler wraps a standard http.Handler with Bearer token auth.
func metricsAuthHandler(apiKey string, h http.Handler) bunrouter.HandlerFunc { func metricsAuthHandler(apiKey string, h http.Handler) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error { return func(w http.ResponseWriter, req bunrouter.Request) error {