feat: 🎉 Vectors na Vectors, the begining

Translate 1536 <-> 768 , 3072 <-> 2048
This commit is contained in:
2026-04-11 18:05:05 +02:00
parent d98ea7c222
commit 4009a54e39
58 changed files with 5324 additions and 2 deletions

135
pkg/server/google.go Normal file
View File

@@ -0,0 +1,135 @@
package server
import (
"encoding/json"
"net/http"
"time"
"github.com/uptrace/bunrouter"
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
)
// --- single embedContent ---
type googleEmbedContentRequest struct {
Content googleContent `json:"content"`
TaskType string `json:"taskType,omitempty"`
}
type googleContent struct {
Parts []googlePart `json:"parts"`
}
type googlePart struct {
Text string `json:"text"`
}
type googleEmbedContentResponse struct {
Embedding googleEmbeddingValues `json:"embedding"`
}
type googleEmbeddingValues struct {
Values []float32 `json:"values"`
}
func (h *handler) googleEmbedContent(w http.ResponseWriter, req bunrouter.Request) error {
model := req.Param("model")
var body googleEmbedContentRequest
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
}
texts := make([]string, len(body.Content.Parts))
for i, p := range body.Content.Parts {
texts[i] = p.Text
}
client, targetName, targetURL := h.resolveClient(model)
trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName
trace.ForwardURL = targetURL
t0 := time.Now()
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: model})
trace.ForwardDuration = time.Since(t0)
if err != nil {
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
}
trace.ForwardModel = embedResp.Model
trace.PromptTokens = embedResp.Usage.PromptTokens
trace.TotalTokens = embedResp.Usage.TotalTokens
t1 := time.Now()
var adapted []float32
if len(embedResp.Embeddings) > 0 {
adapted, err = h.adapter.Adapt(embedResp.Embeddings[0])
if err != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
}
}
trace.TranslateDuration = time.Since(t1)
writeTraceHeaders(w, trace)
return writeJSON(w, http.StatusOK, googleEmbedContentResponse{
Embedding: googleEmbeddingValues{Values: adapted},
})
}
// --- batch batchEmbedContents ---
type googleBatchRequest struct {
Requests []googleEmbedContentRequest `json:"requests"`
}
type googleBatchResponse struct {
Embeddings []googleEmbeddingValues `json:"embeddings"`
}
func (h *handler) googleBatchEmbedContents(w http.ResponseWriter, req bunrouter.Request) error {
model := req.Param("model")
var body googleBatchRequest
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
}
var texts []string
for _, r := range body.Requests {
for _, p := range r.Content.Parts {
texts = append(texts, p.Text)
}
}
client, targetName, targetURL := h.resolveClient(model)
trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName
trace.ForwardURL = targetURL
t0 := time.Now()
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: model})
trace.ForwardDuration = time.Since(t0)
if err != nil {
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
}
trace.ForwardModel = embedResp.Model
trace.PromptTokens = embedResp.Usage.PromptTokens
trace.TotalTokens = embedResp.Usage.TotalTokens
t1 := time.Now()
result := make([]googleEmbeddingValues, len(embedResp.Embeddings))
for i, vec := range embedResp.Embeddings {
adapted, adaptErr := h.adapter.Adapt(vec)
if adaptErr != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
}
result[i] = googleEmbeddingValues{Values: adapted}
}
trace.TranslateDuration = time.Since(t1)
writeTraceHeaders(w, trace)
return writeJSON(w, http.StatusOK, googleBatchResponse{Embeddings: result})
}

74
pkg/server/handler.go Normal file
View File

@@ -0,0 +1,74 @@
package server
import (
"context"
"encoding/json"
"fmt"
"net/http"
"time"
"go.uber.org/zap"
"github.com/Warky-Devs/vecna.git/pkg/adapter"
"github.com/Warky-Devs/vecna.git/pkg/config"
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
)
// handler holds shared dependencies for all HTTP handlers.
type handler struct {
cfg *config.Config
clients map[string]embedclient.Client
adapter adapter.Adapter
logger *zap.Logger
}
// resolveClient selects the embed client for the given model name.
// Returns the client, target name, and first endpoint URL for tracing.
func (h *handler) resolveClient(model string) (embedclient.Client, string, string) {
if c, ok := h.clients[model]; ok {
url := firstEndpointURL(h.cfg, model)
return c, model, url
}
name := h.cfg.Forward.Default
c, ok := h.clients[name]
if !ok {
// No configured client — return a nil-safe error client
return &errClient{err: fmt.Errorf("no client configured for model %q and no default", model)}, name, ""
}
return c, name, firstEndpointURL(h.cfg, name)
}
func firstEndpointURL(cfg *config.Config, targetName string) string {
t, ok := cfg.Forward.Targets[targetName]
if !ok || len(t.Endpoints) == 0 {
return ""
}
return t.Endpoints[0].URL
}
// writeJSON encodes v as JSON and writes it with the given status code.
func writeJSON(w http.ResponseWriter, status int, v interface{}) error {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(v); err != nil {
return fmt.Errorf("writeJSON: %w", err)
}
return nil
}
// writeTraceHeaders writes X-Vecna-* timing headers from the RequestTrace.
func writeTraceHeaders(w http.ResponseWriter, t *RequestTrace) {
total := time.Since(t.Start)
w.Header().Set("X-Vecna-Forward-Ms", fmt.Sprintf("%d", t.ForwardDuration.Milliseconds()))
w.Header().Set("X-Vecna-Translate-Ms", fmt.Sprintf("%d", t.TranslateDuration.Milliseconds()))
w.Header().Set("X-Vecna-Total-Ms", fmt.Sprintf("%d", total.Milliseconds()))
}
// errClient is a Client that always returns a fixed error (used as safe fallback).
type errClient struct {
err error
}
func (e *errClient) Embed(_ context.Context, _ embedclient.Request) (embedclient.Response, error) {
return embedclient.Response{}, e.err
}

102
pkg/server/openai.go Normal file
View File

@@ -0,0 +1,102 @@
package server
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/uptrace/bunrouter"
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
)
type openAIEmbedRequest struct {
Input interface{} `json:"input"` // string or []string
Model string `json:"model"`
}
type openAIEmbedResponse struct {
Object string `json:"object"`
Data []openAIEmbedDatum `json:"data"`
Model string `json:"model"`
Usage openAIUsage `json:"usage"`
}
type openAIEmbedDatum struct {
Object string `json:"object"`
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
}
type openAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
TotalTokens int `json:"total_tokens"`
}
func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request) error {
var body openAIEmbedRequest
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
}
texts, err := toStringSlice(body.Input)
if err != nil {
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
}
client, targetName, targetURL := h.resolveClient(body.Model)
trace := TraceFromContext(req.Context())
trace.ForwardTarget = targetName
trace.ForwardURL = targetURL
t0 := time.Now()
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: body.Model})
trace.ForwardDuration = time.Since(t0)
if err != nil {
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
}
trace.ForwardModel = embedResp.Model
trace.PromptTokens = embedResp.Usage.PromptTokens
trace.TotalTokens = embedResp.Usage.TotalTokens
t1 := time.Now()
data := make([]openAIEmbedDatum, len(embedResp.Embeddings))
for i, vec := range embedResp.Embeddings {
adapted, adaptErr := h.adapter.Adapt(vec)
if adaptErr != nil {
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
}
data[i] = openAIEmbedDatum{Object: "embedding", Embedding: adapted, Index: i}
}
trace.TranslateDuration = time.Since(t1)
writeTraceHeaders(w, trace)
return writeJSON(w, http.StatusOK, openAIEmbedResponse{
Object: "list",
Data: data,
Model: embedResp.Model,
Usage: openAIUsage{PromptTokens: embedResp.Usage.PromptTokens, TotalTokens: embedResp.Usage.TotalTokens},
})
}
// toStringSlice accepts a JSON string or array of strings.
func toStringSlice(v interface{}) ([]string, error) {
switch val := v.(type) {
case string:
return []string{val}, nil
case []interface{}:
out := make([]string, len(val))
for i, item := range val {
s, ok := item.(string)
if !ok {
return nil, fmt.Errorf("input array element %d is not a string", i)
}
out[i] = s
}
return out, nil
default:
return nil, fmt.Errorf("input must be a string or array of strings")
}
}

163
pkg/server/server.go Normal file
View File

@@ -0,0 +1,163 @@
package server
import (
"fmt"
"net/http"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/uptrace/bunrouter"
"go.uber.org/zap"
"github.com/Warky-Devs/vecna.git/pkg/adapter"
"github.com/Warky-Devs/vecna.git/pkg/config"
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
"github.com/Warky-Devs/vecna.git/pkg/metrics"
"github.com/Warky-Devs/vecna.git/pkg/server/spec"
)
// New builds and returns a configured bunrouter.Router.
func New(
cfg *config.Config,
clients map[string]embedclient.Client,
adp adapter.Adapter,
reg *metrics.Registry,
logger *zap.Logger,
) *bunrouter.Router {
router := bunrouter.New(
bunrouter.WithMiddleware(authMiddleware(cfg.Server.APIKeys)),
bunrouter.WithMiddleware(traceMiddleware()),
bunrouter.WithMiddleware(metricsMiddleware(reg, adp)),
bunrouter.WithMiddleware(loggingMiddleware(logger)),
)
h := &handler{cfg: cfg, clients: clients, adapter: adp, logger: logger}
router.POST("/v1/embeddings", h.openAIEmbeddings)
router.POST("/v1/models/:model:embedContent", h.googleEmbedContent)
router.POST("/v1/models/:model:batchEmbedContents", h.googleBatchEmbedContents)
// OpenAPI spec + docs
router.GET("/openapi.yaml", spec.SpecHandler())
router.GET("/docs", spec.DocsHandler())
// Metrics — only when enabled
if cfg.Metrics.Enabled {
metricsHandler := promhttp.HandlerFor(reg.Prometheus(), promhttp.HandlerOpts{})
path := cfg.Metrics.Path
if path == "" {
path = "/metrics"
}
if cfg.Metrics.APIKey != "" {
router.GET(path, metricsAuthHandler(cfg.Metrics.APIKey, metricsHandler))
} else {
router.GET(path, func(w http.ResponseWriter, req bunrouter.Request) error {
metricsHandler.ServeHTTP(w, req.Request)
return nil
})
}
}
return router
}
// authMiddleware rejects requests without a valid Bearer token when api_keys is configured.
func authMiddleware(apiKeys []string) bunrouter.MiddlewareFunc {
if len(apiKeys) == 0 {
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc { return next }
}
keySet := make(map[string]struct{}, len(apiKeys))
for _, k := range apiKeys {
keySet[k] = struct{}{}
}
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
if _, ok := keySet[token]; !ok {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return nil
}
return next(w, req)
}
}
}
// traceMiddleware injects a *RequestTrace into every request context.
func traceMiddleware() bunrouter.MiddlewareFunc {
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
ctx := WithTrace(req.Context())
return next(w, req.WithContext(ctx))
}
}
}
// metricsMiddleware records Prometheus observations after the handler returns.
func metricsMiddleware(reg *metrics.Registry, adp adapter.Adapter) bunrouter.MiddlewareFunc {
if reg == nil {
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc { return next }
}
adpType := fmt.Sprintf("%T", adp)
return reg.Middleware(func(req bunrouter.Request) metrics.TraceSnapshot {
t := TraceFromContext(req.Context())
total := time.Since(t.Start)
return metrics.TraceSnapshot{
TotalSeconds: total.Seconds(),
ForwardSeconds: t.ForwardDuration.Seconds(),
TranslateSeconds: t.TranslateDuration.Seconds(),
ForwardTarget: t.ForwardTarget,
ForwardURL: t.ForwardURL,
ForwardModel: t.ForwardModel,
AdapterType: adpType,
PromptTokens: t.PromptTokens,
TotalTokens: t.TotalTokens,
}
})
}
// loggingMiddleware logs method, path, status, and timing via zap.
func loggingMiddleware(logger *zap.Logger) bunrouter.MiddlewareFunc {
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
sw := &statusWriter{ResponseWriter: w, status: http.StatusOK}
err := next(sw, req)
t := TraceFromContext(req.Context())
total := time.Since(t.Start)
logger.Info("request",
zap.String("method", req.Method),
zap.String("path", req.URL.Path),
zap.Int("status", sw.status),
zap.Int64("total_ms", total.Milliseconds()),
zap.Int64("forward_ms", t.ForwardDuration.Milliseconds()),
zap.Int64("translate_ms", t.TranslateDuration.Milliseconds()),
)
return err
}
}
}
// metricsAuthHandler wraps a standard http.Handler with Bearer token auth.
func metricsAuthHandler(apiKey string, h http.Handler) bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
if token != apiKey {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return nil
}
h.ServeHTTP(w, req.Request)
return nil
}
}
// statusWriter captures the HTTP status code written by a handler.
type statusWriter struct {
http.ResponseWriter
status int
}
func (sw *statusWriter) WriteHeader(code int) {
sw.status = code
sw.ResponseWriter.WriteHeader(code)
}

View File

@@ -0,0 +1,36 @@
package spec
import (
_ "embed"
"net/http"
"github.com/uptrace/bunrouter"
)
//go:embed openapi.yaml
var openapiYAML []byte
// SpecHandler serves the raw OpenAPI YAML spec.
func SpecHandler() bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
w.Header().Set("Content-Type", "application/yaml")
_, err := w.Write(openapiYAML)
return err
}
}
// DocsHandler serves the Scalar API reference UI.
func DocsHandler() bunrouter.HandlerFunc {
return func(w http.ResponseWriter, req bunrouter.Request) error {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, err := w.Write([]byte(`<!doctype html>
<html>
<head><title>vecna API</title><meta charset="utf-8"/></head>
<body>
<script id="api-reference" data-url="/openapi.yaml"></script>
<script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script>
</body>
</html>`))
return err
}
}

View File

@@ -0,0 +1,252 @@
openapi: "3.1.0"
info:
title: vecna Embedding Adapter
description: Proxies text to a backing embedding model and adapts the result vectors between dimensions.
version: "1.0.0"
servers:
- url: http://localhost:8080
security:
- BearerAuth: []
components:
securitySchemes:
BearerAuth:
type: http
scheme: bearer
schemas:
Error:
type: object
properties:
error:
type: string
OpenAIEmbedRequest:
type: object
required: [input, model]
properties:
input:
oneOf:
- type: string
- type: array
items:
type: string
model:
type: string
OpenAIEmbedResponse:
type: object
properties:
object:
type: string
example: list
model:
type: string
data:
type: array
items:
type: object
properties:
object:
type: string
example: embedding
index:
type: integer
embedding:
type: array
items:
type: number
format: float
usage:
type: object
properties:
prompt_tokens:
type: integer
total_tokens:
type: integer
GoogleEmbedContentRequest:
type: object
required: [content]
properties:
content:
type: object
properties:
parts:
type: array
items:
type: object
properties:
text:
type: string
taskType:
type: string
GoogleEmbedContentResponse:
type: object
properties:
embedding:
type: object
properties:
values:
type: array
items:
type: number
format: float
GoogleBatchRequest:
type: object
required: [requests]
properties:
requests:
type: array
items:
$ref: '#/components/schemas/GoogleEmbedContentRequest'
GoogleBatchResponse:
type: object
properties:
embeddings:
type: array
items:
type: object
properties:
values:
type: array
items:
type: number
format: float
headers:
X-Vecna-Forward-Ms:
description: Time spent forwarding the request to the backing model (milliseconds).
schema:
type: integer
X-Vecna-Translate-Ms:
description: Time spent in the dimension adapter (milliseconds).
schema:
type: integer
X-Vecna-Total-Ms:
description: Total request wall-clock time (milliseconds).
schema:
type: integer
paths:
/v1/embeddings:
post:
summary: OpenAI-compatible embeddings
operationId: openaiEmbeddings
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIEmbedRequest'
responses:
"200":
description: Adapted embeddings
headers:
X-Vecna-Forward-Ms:
$ref: '#/components/headers/X-Vecna-Forward-Ms'
X-Vecna-Translate-Ms:
$ref: '#/components/headers/X-Vecna-Translate-Ms'
X-Vecna-Total-Ms:
$ref: '#/components/headers/X-Vecna-Total-Ms'
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIEmbedResponse'
"400":
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
"401":
description: Unauthorized
"502":
description: Backing model error
/v1/models/{model}:embedContent:
post:
summary: Google-compatible single embedContent
operationId: googleEmbedContent
parameters:
- name: model
in: path
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/GoogleEmbedContentRequest'
responses:
"200":
description: Adapted embedding
headers:
X-Vecna-Forward-Ms:
$ref: '#/components/headers/X-Vecna-Forward-Ms'
X-Vecna-Translate-Ms:
$ref: '#/components/headers/X-Vecna-Translate-Ms'
X-Vecna-Total-Ms:
$ref: '#/components/headers/X-Vecna-Total-Ms'
content:
application/json:
schema:
$ref: '#/components/schemas/GoogleEmbedContentResponse'
"400":
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
"401":
description: Unauthorized
"502":
description: Backing model error
/v1/models/{model}:batchEmbedContents:
post:
summary: Google-compatible batch batchEmbedContents
operationId: googleBatchEmbedContents
parameters:
- name: model
in: path
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/GoogleBatchRequest'
responses:
"200":
description: Adapted embeddings
headers:
X-Vecna-Forward-Ms:
$ref: '#/components/headers/X-Vecna-Forward-Ms'
X-Vecna-Translate-Ms:
$ref: '#/components/headers/X-Vecna-Translate-Ms'
X-Vecna-Total-Ms:
$ref: '#/components/headers/X-Vecna-Total-Ms'
content:
application/json:
schema:
$ref: '#/components/schemas/GoogleBatchResponse'
"400":
description: Bad request
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
"401":
description: Unauthorized
"502":
description: Backing model error

37
pkg/server/trace.go Normal file
View File

@@ -0,0 +1,37 @@
package server
import (
"context"
"time"
)
type contextKey int
const traceKey contextKey = iota
// RequestTrace holds per-request timing data populated by handlers and middleware.
type RequestTrace struct {
Start time.Time
ForwardDuration time.Duration
TranslateDuration time.Duration
ForwardTarget string
ForwardURL string
ForwardModel string
AdapterType string
PromptTokens int
TotalTokens int
}
// WithTrace injects a new *RequestTrace into ctx.
func WithTrace(ctx context.Context) context.Context {
return context.WithValue(ctx, traceKey, &RequestTrace{Start: time.Now()})
}
// TraceFromContext retrieves the *RequestTrace from ctx.
// Returns a zero-value trace (non-nil) if none was set.
func TraceFromContext(ctx context.Context) *RequestTrace {
if t, ok := ctx.Value(traceKey).(*RequestTrace); ok && t != nil {
return t
}
return &RequestTrace{Start: time.Now()}
}