mirror of
https://github.com/Warky-Devs/vecna.git
synced 2026-05-05 01:26:58 +00:00
feat: 🎉 Vectors na Vectors, the begining
Translate 1536 <-> 768 , 3072 <-> 2048
This commit is contained in:
135
pkg/server/google.go
Normal file
135
pkg/server/google.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/uptrace/bunrouter"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
// --- single embedContent ---
|
||||
|
||||
type googleEmbedContentRequest struct {
|
||||
Content googleContent `json:"content"`
|
||||
TaskType string `json:"taskType,omitempty"`
|
||||
}
|
||||
|
||||
type googleContent struct {
|
||||
Parts []googlePart `json:"parts"`
|
||||
}
|
||||
|
||||
type googlePart struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type googleEmbedContentResponse struct {
|
||||
Embedding googleEmbeddingValues `json:"embedding"`
|
||||
}
|
||||
|
||||
type googleEmbeddingValues struct {
|
||||
Values []float32 `json:"values"`
|
||||
}
|
||||
|
||||
func (h *handler) googleEmbedContent(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
model := req.Param("model")
|
||||
|
||||
var body googleEmbedContentRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
|
||||
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
|
||||
}
|
||||
|
||||
texts := make([]string, len(body.Content.Parts))
|
||||
for i, p := range body.Content.Parts {
|
||||
texts[i] = p.Text
|
||||
}
|
||||
|
||||
client, targetName, targetURL := h.resolveClient(model)
|
||||
trace := TraceFromContext(req.Context())
|
||||
trace.ForwardTarget = targetName
|
||||
trace.ForwardURL = targetURL
|
||||
|
||||
t0 := time.Now()
|
||||
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: model})
|
||||
trace.ForwardDuration = time.Since(t0)
|
||||
if err != nil {
|
||||
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
|
||||
}
|
||||
trace.ForwardModel = embedResp.Model
|
||||
trace.PromptTokens = embedResp.Usage.PromptTokens
|
||||
trace.TotalTokens = embedResp.Usage.TotalTokens
|
||||
|
||||
t1 := time.Now()
|
||||
var adapted []float32
|
||||
if len(embedResp.Embeddings) > 0 {
|
||||
adapted, err = h.adapter.Adapt(embedResp.Embeddings[0])
|
||||
if err != nil {
|
||||
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
}
|
||||
trace.TranslateDuration = time.Since(t1)
|
||||
|
||||
writeTraceHeaders(w, trace)
|
||||
|
||||
return writeJSON(w, http.StatusOK, googleEmbedContentResponse{
|
||||
Embedding: googleEmbeddingValues{Values: adapted},
|
||||
})
|
||||
}
|
||||
|
||||
// --- batch batchEmbedContents ---
|
||||
|
||||
type googleBatchRequest struct {
|
||||
Requests []googleEmbedContentRequest `json:"requests"`
|
||||
}
|
||||
|
||||
type googleBatchResponse struct {
|
||||
Embeddings []googleEmbeddingValues `json:"embeddings"`
|
||||
}
|
||||
|
||||
func (h *handler) googleBatchEmbedContents(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
model := req.Param("model")
|
||||
|
||||
var body googleBatchRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
|
||||
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
|
||||
}
|
||||
|
||||
var texts []string
|
||||
for _, r := range body.Requests {
|
||||
for _, p := range r.Content.Parts {
|
||||
texts = append(texts, p.Text)
|
||||
}
|
||||
}
|
||||
|
||||
client, targetName, targetURL := h.resolveClient(model)
|
||||
trace := TraceFromContext(req.Context())
|
||||
trace.ForwardTarget = targetName
|
||||
trace.ForwardURL = targetURL
|
||||
|
||||
t0 := time.Now()
|
||||
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: model})
|
||||
trace.ForwardDuration = time.Since(t0)
|
||||
if err != nil {
|
||||
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
|
||||
}
|
||||
trace.ForwardModel = embedResp.Model
|
||||
trace.PromptTokens = embedResp.Usage.PromptTokens
|
||||
trace.TotalTokens = embedResp.Usage.TotalTokens
|
||||
|
||||
t1 := time.Now()
|
||||
result := make([]googleEmbeddingValues, len(embedResp.Embeddings))
|
||||
for i, vec := range embedResp.Embeddings {
|
||||
adapted, adaptErr := h.adapter.Adapt(vec)
|
||||
if adaptErr != nil {
|
||||
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
|
||||
}
|
||||
result[i] = googleEmbeddingValues{Values: adapted}
|
||||
}
|
||||
trace.TranslateDuration = time.Since(t1)
|
||||
|
||||
writeTraceHeaders(w, trace)
|
||||
|
||||
return writeJSON(w, http.StatusOK, googleBatchResponse{Embeddings: result})
|
||||
}
|
||||
74
pkg/server/handler.go
Normal file
74
pkg/server/handler.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/adapter"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
// handler holds shared dependencies for all HTTP handlers.
|
||||
type handler struct {
|
||||
cfg *config.Config
|
||||
clients map[string]embedclient.Client
|
||||
adapter adapter.Adapter
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// resolveClient selects the embed client for the given model name.
|
||||
// Returns the client, target name, and first endpoint URL for tracing.
|
||||
func (h *handler) resolveClient(model string) (embedclient.Client, string, string) {
|
||||
if c, ok := h.clients[model]; ok {
|
||||
url := firstEndpointURL(h.cfg, model)
|
||||
return c, model, url
|
||||
}
|
||||
name := h.cfg.Forward.Default
|
||||
c, ok := h.clients[name]
|
||||
if !ok {
|
||||
// No configured client — return a nil-safe error client
|
||||
return &errClient{err: fmt.Errorf("no client configured for model %q and no default", model)}, name, ""
|
||||
}
|
||||
return c, name, firstEndpointURL(h.cfg, name)
|
||||
}
|
||||
|
||||
func firstEndpointURL(cfg *config.Config, targetName string) string {
|
||||
t, ok := cfg.Forward.Targets[targetName]
|
||||
if !ok || len(t.Endpoints) == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.Endpoints[0].URL
|
||||
}
|
||||
|
||||
// writeJSON encodes v as JSON and writes it with the given status code.
|
||||
func writeJSON(w http.ResponseWriter, status int, v interface{}) error {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
if err := json.NewEncoder(w).Encode(v); err != nil {
|
||||
return fmt.Errorf("writeJSON: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeTraceHeaders writes X-Vecna-* timing headers from the RequestTrace.
|
||||
func writeTraceHeaders(w http.ResponseWriter, t *RequestTrace) {
|
||||
total := time.Since(t.Start)
|
||||
w.Header().Set("X-Vecna-Forward-Ms", fmt.Sprintf("%d", t.ForwardDuration.Milliseconds()))
|
||||
w.Header().Set("X-Vecna-Translate-Ms", fmt.Sprintf("%d", t.TranslateDuration.Milliseconds()))
|
||||
w.Header().Set("X-Vecna-Total-Ms", fmt.Sprintf("%d", total.Milliseconds()))
|
||||
}
|
||||
|
||||
// errClient is a Client that always returns a fixed error (used as safe fallback).
|
||||
type errClient struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *errClient) Embed(_ context.Context, _ embedclient.Request) (embedclient.Response, error) {
|
||||
return embedclient.Response{}, e.err
|
||||
}
|
||||
102
pkg/server/openai.go
Normal file
102
pkg/server/openai.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/uptrace/bunrouter"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
type openAIEmbedRequest struct {
|
||||
Input interface{} `json:"input"` // string or []string
|
||||
Model string `json:"model"`
|
||||
}
|
||||
|
||||
type openAIEmbedResponse struct {
|
||||
Object string `json:"object"`
|
||||
Data []openAIEmbedDatum `json:"data"`
|
||||
Model string `json:"model"`
|
||||
Usage openAIUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type openAIEmbedDatum struct {
|
||||
Object string `json:"object"`
|
||||
Embedding []float32 `json:"embedding"`
|
||||
Index int `json:"index"`
|
||||
}
|
||||
|
||||
type openAIUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
func (h *handler) openAIEmbeddings(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
var body openAIEmbedRequest
|
||||
if err := json.NewDecoder(req.Body).Decode(&body); err != nil {
|
||||
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
|
||||
}
|
||||
|
||||
texts, err := toStringSlice(body.Input)
|
||||
if err != nil {
|
||||
return writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
|
||||
client, targetName, targetURL := h.resolveClient(body.Model)
|
||||
trace := TraceFromContext(req.Context())
|
||||
trace.ForwardTarget = targetName
|
||||
trace.ForwardURL = targetURL
|
||||
|
||||
t0 := time.Now()
|
||||
embedResp, err := client.Embed(req.Context(), embedclient.Request{Texts: texts, Model: body.Model})
|
||||
trace.ForwardDuration = time.Since(t0)
|
||||
if err != nil {
|
||||
return writeJSON(w, http.StatusBadGateway, map[string]string{"error": err.Error()})
|
||||
}
|
||||
trace.ForwardModel = embedResp.Model
|
||||
trace.PromptTokens = embedResp.Usage.PromptTokens
|
||||
trace.TotalTokens = embedResp.Usage.TotalTokens
|
||||
|
||||
t1 := time.Now()
|
||||
data := make([]openAIEmbedDatum, len(embedResp.Embeddings))
|
||||
for i, vec := range embedResp.Embeddings {
|
||||
adapted, adaptErr := h.adapter.Adapt(vec)
|
||||
if adaptErr != nil {
|
||||
return writeJSON(w, http.StatusInternalServerError, map[string]string{"error": adaptErr.Error()})
|
||||
}
|
||||
data[i] = openAIEmbedDatum{Object: "embedding", Embedding: adapted, Index: i}
|
||||
}
|
||||
trace.TranslateDuration = time.Since(t1)
|
||||
|
||||
writeTraceHeaders(w, trace)
|
||||
|
||||
return writeJSON(w, http.StatusOK, openAIEmbedResponse{
|
||||
Object: "list",
|
||||
Data: data,
|
||||
Model: embedResp.Model,
|
||||
Usage: openAIUsage{PromptTokens: embedResp.Usage.PromptTokens, TotalTokens: embedResp.Usage.TotalTokens},
|
||||
})
|
||||
}
|
||||
|
||||
// toStringSlice accepts a JSON string or array of strings.
|
||||
func toStringSlice(v interface{}) ([]string, error) {
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
return []string{val}, nil
|
||||
case []interface{}:
|
||||
out := make([]string, len(val))
|
||||
for i, item := range val {
|
||||
s, ok := item.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("input array element %d is not a string", i)
|
||||
}
|
||||
out[i] = s
|
||||
}
|
||||
return out, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("input must be a string or array of strings")
|
||||
}
|
||||
}
|
||||
163
pkg/server/server.go
Normal file
163
pkg/server/server.go
Normal file
@@ -0,0 +1,163 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/uptrace/bunrouter"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/adapter"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/metrics"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/server/spec"
|
||||
)
|
||||
|
||||
// New builds and returns a configured bunrouter.Router.
|
||||
func New(
|
||||
cfg *config.Config,
|
||||
clients map[string]embedclient.Client,
|
||||
adp adapter.Adapter,
|
||||
reg *metrics.Registry,
|
||||
logger *zap.Logger,
|
||||
) *bunrouter.Router {
|
||||
router := bunrouter.New(
|
||||
bunrouter.WithMiddleware(authMiddleware(cfg.Server.APIKeys)),
|
||||
bunrouter.WithMiddleware(traceMiddleware()),
|
||||
bunrouter.WithMiddleware(metricsMiddleware(reg, adp)),
|
||||
bunrouter.WithMiddleware(loggingMiddleware(logger)),
|
||||
)
|
||||
|
||||
h := &handler{cfg: cfg, clients: clients, adapter: adp, logger: logger}
|
||||
|
||||
router.POST("/v1/embeddings", h.openAIEmbeddings)
|
||||
router.POST("/v1/models/:model:embedContent", h.googleEmbedContent)
|
||||
router.POST("/v1/models/:model:batchEmbedContents", h.googleBatchEmbedContents)
|
||||
|
||||
// OpenAPI spec + docs
|
||||
router.GET("/openapi.yaml", spec.SpecHandler())
|
||||
router.GET("/docs", spec.DocsHandler())
|
||||
|
||||
// Metrics — only when enabled
|
||||
if cfg.Metrics.Enabled {
|
||||
metricsHandler := promhttp.HandlerFor(reg.Prometheus(), promhttp.HandlerOpts{})
|
||||
path := cfg.Metrics.Path
|
||||
if path == "" {
|
||||
path = "/metrics"
|
||||
}
|
||||
if cfg.Metrics.APIKey != "" {
|
||||
router.GET(path, metricsAuthHandler(cfg.Metrics.APIKey, metricsHandler))
|
||||
} else {
|
||||
router.GET(path, func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
metricsHandler.ServeHTTP(w, req.Request)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return router
|
||||
}
|
||||
|
||||
// authMiddleware rejects requests without a valid Bearer token when api_keys is configured.
|
||||
func authMiddleware(apiKeys []string) bunrouter.MiddlewareFunc {
|
||||
if len(apiKeys) == 0 {
|
||||
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc { return next }
|
||||
}
|
||||
keySet := make(map[string]struct{}, len(apiKeys))
|
||||
for _, k := range apiKeys {
|
||||
keySet[k] = struct{}{}
|
||||
}
|
||||
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
|
||||
if _, ok := keySet[token]; !ok {
|
||||
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
||||
return nil
|
||||
}
|
||||
return next(w, req)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// traceMiddleware injects a *RequestTrace into every request context.
|
||||
func traceMiddleware() bunrouter.MiddlewareFunc {
|
||||
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
ctx := WithTrace(req.Context())
|
||||
return next(w, req.WithContext(ctx))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// metricsMiddleware records Prometheus observations after the handler returns.
|
||||
func metricsMiddleware(reg *metrics.Registry, adp adapter.Adapter) bunrouter.MiddlewareFunc {
|
||||
if reg == nil {
|
||||
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc { return next }
|
||||
}
|
||||
adpType := fmt.Sprintf("%T", adp)
|
||||
return reg.Middleware(func(req bunrouter.Request) metrics.TraceSnapshot {
|
||||
t := TraceFromContext(req.Context())
|
||||
total := time.Since(t.Start)
|
||||
return metrics.TraceSnapshot{
|
||||
TotalSeconds: total.Seconds(),
|
||||
ForwardSeconds: t.ForwardDuration.Seconds(),
|
||||
TranslateSeconds: t.TranslateDuration.Seconds(),
|
||||
ForwardTarget: t.ForwardTarget,
|
||||
ForwardURL: t.ForwardURL,
|
||||
ForwardModel: t.ForwardModel,
|
||||
AdapterType: adpType,
|
||||
PromptTokens: t.PromptTokens,
|
||||
TotalTokens: t.TotalTokens,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// loggingMiddleware logs method, path, status, and timing via zap.
|
||||
func loggingMiddleware(logger *zap.Logger) bunrouter.MiddlewareFunc {
|
||||
return func(next bunrouter.HandlerFunc) bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
sw := &statusWriter{ResponseWriter: w, status: http.StatusOK}
|
||||
err := next(sw, req)
|
||||
t := TraceFromContext(req.Context())
|
||||
total := time.Since(t.Start)
|
||||
|
||||
logger.Info("request",
|
||||
zap.String("method", req.Method),
|
||||
zap.String("path", req.URL.Path),
|
||||
zap.Int("status", sw.status),
|
||||
zap.Int64("total_ms", total.Milliseconds()),
|
||||
zap.Int64("forward_ms", t.ForwardDuration.Milliseconds()),
|
||||
zap.Int64("translate_ms", t.TranslateDuration.Milliseconds()),
|
||||
)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// metricsAuthHandler wraps a standard http.Handler with Bearer token auth.
|
||||
func metricsAuthHandler(apiKey string, h http.Handler) bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
|
||||
if token != apiKey {
|
||||
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
||||
return nil
|
||||
}
|
||||
h.ServeHTTP(w, req.Request)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// statusWriter captures the HTTP status code written by a handler.
|
||||
type statusWriter struct {
|
||||
http.ResponseWriter
|
||||
status int
|
||||
}
|
||||
|
||||
func (sw *statusWriter) WriteHeader(code int) {
|
||||
sw.status = code
|
||||
sw.ResponseWriter.WriteHeader(code)
|
||||
}
|
||||
36
pkg/server/spec/handler.go
Normal file
36
pkg/server/spec/handler.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package spec
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"net/http"
|
||||
|
||||
"github.com/uptrace/bunrouter"
|
||||
)
|
||||
|
||||
//go:embed openapi.yaml
|
||||
var openapiYAML []byte
|
||||
|
||||
// SpecHandler serves the raw OpenAPI YAML spec.
|
||||
func SpecHandler() bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
w.Header().Set("Content-Type", "application/yaml")
|
||||
_, err := w.Write(openapiYAML)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// DocsHandler serves the Scalar API reference UI.
|
||||
func DocsHandler() bunrouter.HandlerFunc {
|
||||
return func(w http.ResponseWriter, req bunrouter.Request) error {
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
_, err := w.Write([]byte(`<!doctype html>
|
||||
<html>
|
||||
<head><title>vecna API</title><meta charset="utf-8"/></head>
|
||||
<body>
|
||||
<script id="api-reference" data-url="/openapi.yaml"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script>
|
||||
</body>
|
||||
</html>`))
|
||||
return err
|
||||
}
|
||||
}
|
||||
252
pkg/server/spec/openapi.yaml
Normal file
252
pkg/server/spec/openapi.yaml
Normal file
@@ -0,0 +1,252 @@
|
||||
openapi: "3.1.0"
|
||||
info:
|
||||
title: vecna Embedding Adapter
|
||||
description: Proxies text to a backing embedding model and adapts the result vectors between dimensions.
|
||||
version: "1.0.0"
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8080
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
|
||||
schemas:
|
||||
Error:
|
||||
type: object
|
||||
properties:
|
||||
error:
|
||||
type: string
|
||||
|
||||
OpenAIEmbedRequest:
|
||||
type: object
|
||||
required: [input, model]
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
|
||||
OpenAIEmbedResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
example: list
|
||||
model:
|
||||
type: string
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
example: embedding
|
||||
index:
|
||||
type: integer
|
||||
embedding:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
format: float
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
prompt_tokens:
|
||||
type: integer
|
||||
total_tokens:
|
||||
type: integer
|
||||
|
||||
GoogleEmbedContentRequest:
|
||||
type: object
|
||||
required: [content]
|
||||
properties:
|
||||
content:
|
||||
type: object
|
||||
properties:
|
||||
parts:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
taskType:
|
||||
type: string
|
||||
|
||||
GoogleEmbedContentResponse:
|
||||
type: object
|
||||
properties:
|
||||
embedding:
|
||||
type: object
|
||||
properties:
|
||||
values:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
format: float
|
||||
|
||||
GoogleBatchRequest:
|
||||
type: object
|
||||
required: [requests]
|
||||
properties:
|
||||
requests:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/GoogleEmbedContentRequest'
|
||||
|
||||
GoogleBatchResponse:
|
||||
type: object
|
||||
properties:
|
||||
embeddings:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
values:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
format: float
|
||||
|
||||
headers:
|
||||
X-Vecna-Forward-Ms:
|
||||
description: Time spent forwarding the request to the backing model (milliseconds).
|
||||
schema:
|
||||
type: integer
|
||||
X-Vecna-Translate-Ms:
|
||||
description: Time spent in the dimension adapter (milliseconds).
|
||||
schema:
|
||||
type: integer
|
||||
X-Vecna-Total-Ms:
|
||||
description: Total request wall-clock time (milliseconds).
|
||||
schema:
|
||||
type: integer
|
||||
|
||||
paths:
|
||||
/v1/embeddings:
|
||||
post:
|
||||
summary: OpenAI-compatible embeddings
|
||||
operationId: openaiEmbeddings
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIEmbedRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Adapted embeddings
|
||||
headers:
|
||||
X-Vecna-Forward-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Forward-Ms'
|
||||
X-Vecna-Translate-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Translate-Ms'
|
||||
X-Vecna-Total-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Total-Ms'
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIEmbedResponse'
|
||||
"400":
|
||||
description: Bad request
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Error'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
"502":
|
||||
description: Backing model error
|
||||
|
||||
/v1/models/{model}:embedContent:
|
||||
post:
|
||||
summary: Google-compatible single embedContent
|
||||
operationId: googleEmbedContent
|
||||
parameters:
|
||||
- name: model
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GoogleEmbedContentRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Adapted embedding
|
||||
headers:
|
||||
X-Vecna-Forward-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Forward-Ms'
|
||||
X-Vecna-Translate-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Translate-Ms'
|
||||
X-Vecna-Total-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Total-Ms'
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GoogleEmbedContentResponse'
|
||||
"400":
|
||||
description: Bad request
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Error'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
"502":
|
||||
description: Backing model error
|
||||
|
||||
/v1/models/{model}:batchEmbedContents:
|
||||
post:
|
||||
summary: Google-compatible batch batchEmbedContents
|
||||
operationId: googleBatchEmbedContents
|
||||
parameters:
|
||||
- name: model
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GoogleBatchRequest'
|
||||
responses:
|
||||
"200":
|
||||
description: Adapted embeddings
|
||||
headers:
|
||||
X-Vecna-Forward-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Forward-Ms'
|
||||
X-Vecna-Translate-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Translate-Ms'
|
||||
X-Vecna-Total-Ms:
|
||||
$ref: '#/components/headers/X-Vecna-Total-Ms'
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GoogleBatchResponse'
|
||||
"400":
|
||||
description: Bad request
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Error'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
"502":
|
||||
description: Backing model error
|
||||
37
pkg/server/trace.go
Normal file
37
pkg/server/trace.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
)
|
||||
|
||||
type contextKey int
|
||||
|
||||
const traceKey contextKey = iota
|
||||
|
||||
// RequestTrace holds per-request timing data populated by handlers and middleware.
|
||||
type RequestTrace struct {
|
||||
Start time.Time
|
||||
ForwardDuration time.Duration
|
||||
TranslateDuration time.Duration
|
||||
ForwardTarget string
|
||||
ForwardURL string
|
||||
ForwardModel string
|
||||
AdapterType string
|
||||
PromptTokens int
|
||||
TotalTokens int
|
||||
}
|
||||
|
||||
// WithTrace injects a new *RequestTrace into ctx.
|
||||
func WithTrace(ctx context.Context) context.Context {
|
||||
return context.WithValue(ctx, traceKey, &RequestTrace{Start: time.Now()})
|
||||
}
|
||||
|
||||
// TraceFromContext retrieves the *RequestTrace from ctx.
|
||||
// Returns a zero-value trace (non-nil) if none was set.
|
||||
func TraceFromContext(ctx context.Context) *RequestTrace {
|
||||
if t, ok := ctx.Value(traceKey).(*RequestTrace); ok && t != nil {
|
||||
return t
|
||||
}
|
||||
return &RequestTrace{Start: time.Now()}
|
||||
}
|
||||
Reference in New Issue
Block a user