mirror of
https://github.com/Warky-Devs/vecna.git
synced 2026-05-05 01:26:58 +00:00
feat: 🎉 Vectors na Vectors, the begining
Translate 1536 <-> 768 , 3072 <-> 2048
This commit is contained in:
160
cmd/vecna/convert.go
Normal file
160
cmd/vecna/convert.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/adapter"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
)
|
||||
|
||||
var (
|
||||
convertInput string
|
||||
convertOutput string
|
||||
)
|
||||
|
||||
var convertCmd = &cobra.Command{
|
||||
Use: "convert",
|
||||
Short: "Convert vectors from one dimension to another",
|
||||
Long: "Reads a JSON array of float32 vectors, applies the configured adapter, and writes the result.",
|
||||
RunE: runConvert,
|
||||
}
|
||||
|
||||
func init() {
|
||||
convertCmd.Flags().StringVarP(&convertInput, "input", "i", "-", "input file path (- for stdin)")
|
||||
convertCmd.Flags().StringVarP(&convertOutput, "output", "o", "-", "output file path (- for stdout)")
|
||||
}
|
||||
|
||||
func runConvert(cmd *cobra.Command, _ []string) error {
|
||||
cfg, err := config.Load(cfgFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
adp, err := buildAdapter(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build adapter: %w", err)
|
||||
}
|
||||
|
||||
in, err := openReader(convertInput)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open input: %w", err)
|
||||
}
|
||||
if f, ok := in.(*os.File); ok && f != os.Stdin {
|
||||
defer func() { _ = f.Close() }()
|
||||
}
|
||||
|
||||
out, err := openWriter(convertOutput)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open output: %w", err)
|
||||
}
|
||||
if f, ok := out.(*os.File); ok && f != os.Stdout {
|
||||
defer func() { _ = f.Close() }()
|
||||
}
|
||||
|
||||
var vecs [][]float32
|
||||
if err := json.NewDecoder(in).Decode(&vecs); err != nil {
|
||||
return fmt.Errorf("decode input: %w", err)
|
||||
}
|
||||
|
||||
result := make([][]float32, len(vecs))
|
||||
for i, v := range vecs {
|
||||
adapted, adaptErr := adp.Adapt(v)
|
||||
if adaptErr != nil {
|
||||
return fmt.Errorf("adapt vector %d: %w", i, adaptErr)
|
||||
}
|
||||
result[i] = adapted
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(out)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(result); err != nil {
|
||||
return fmt.Errorf("encode output: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func openReader(path string) (io.Reader, error) {
|
||||
if path == "-" {
|
||||
return os.Stdin, nil
|
||||
}
|
||||
return os.Open(path)
|
||||
}
|
||||
|
||||
func openWriter(path string) (io.Writer, error) {
|
||||
if path == "-" {
|
||||
return os.Stdout, nil
|
||||
}
|
||||
return os.Create(path)
|
||||
}
|
||||
|
||||
// buildAdapter constructs the Adapter from the loaded config.
|
||||
func buildAdapter(cfg *config.Config) (adapter.Adapter, error) {
|
||||
ac := cfg.Adapter
|
||||
switch ac.Type {
|
||||
case "truncate":
|
||||
tm, pm, err := parseTruncateModes(ac.TruncateMode, ac.PadMode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return adapter.NewTruncate(ac.SourceDim, ac.TargetDim, tm, pm)
|
||||
|
||||
case "random":
|
||||
return adapter.NewRandom(ac.SourceDim, ac.TargetDim, ac.Seed)
|
||||
|
||||
case "projection":
|
||||
if ac.MatrixFile == "" {
|
||||
return nil, fmt.Errorf("adapter type 'projection' requires matrix_file")
|
||||
}
|
||||
matrix, err := loadMatrix(ac.MatrixFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load projection matrix: %w", err)
|
||||
}
|
||||
return adapter.NewProjection(ac.SourceDim, ac.TargetDim, matrix)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown adapter type %q; valid: truncate, random, projection", ac.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func parseTruncateModes(truncMode, padMode string) (adapter.TruncateMode, adapter.PadMode, error) {
|
||||
var tm adapter.TruncateMode
|
||||
switch truncMode {
|
||||
case "from_end", "":
|
||||
tm = adapter.TruncateFromEnd
|
||||
case "from_start":
|
||||
tm = adapter.TruncateFromStart
|
||||
default:
|
||||
return 0, 0, fmt.Errorf("unknown truncate_mode %q; valid: from_end, from_start", truncMode)
|
||||
}
|
||||
|
||||
var pm adapter.PadMode
|
||||
switch padMode {
|
||||
case "at_end", "":
|
||||
pm = adapter.PadAtEnd
|
||||
case "at_start":
|
||||
pm = adapter.PadAtStart
|
||||
default:
|
||||
return 0, 0, fmt.Errorf("unknown pad_mode %q; valid: at_end, at_start", padMode)
|
||||
}
|
||||
|
||||
return tm, pm, nil
|
||||
}
|
||||
|
||||
func loadMatrix(path string) ([][]float32, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
|
||||
var m [][]float32
|
||||
if err := json.NewDecoder(f).Decode(&m); err != nil {
|
||||
return nil, fmt.Errorf("decode matrix JSON: %w", err)
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
105
cmd/vecna/editconfig.go
Normal file
105
cmd/vecna/editconfig.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
)
|
||||
|
||||
var editConfigCmd = &cobra.Command{
|
||||
Use: "editconfig",
|
||||
Short: "Open the vecna config file in your editor",
|
||||
RunE: runEditConfig,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(editConfigCmd)
|
||||
}
|
||||
|
||||
func runEditConfig(cmd *cobra.Command, _ []string) error {
|
||||
path := config.ResolveFile(cfgFile)
|
||||
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := createDefaultConfig(path); err != nil {
|
||||
return fmt.Errorf("create default config: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(path)
|
||||
|
||||
editor := resolveEditor()
|
||||
c := exec.CommandContext(context.Background(), editor, path)
|
||||
c.Stdin = os.Stdin
|
||||
c.Stdout = os.Stdout
|
||||
c.Stderr = os.Stderr
|
||||
if err := c.Run(); err != nil {
|
||||
return fmt.Errorf("editor %q exited with error: %w", editor, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveEditor returns $EDITOR, falling back to nvim then nano.
|
||||
func resolveEditor() string {
|
||||
if e := os.Getenv("EDITOR"); e != "" {
|
||||
return e
|
||||
}
|
||||
for _, e := range []string{"nvim", "nano"} {
|
||||
if path, err := exec.LookPath(e); err == nil {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return "nano"
|
||||
}
|
||||
|
||||
// createDefaultConfig writes a minimal JSON config skeleton to path.
|
||||
func createDefaultConfig(path string) error {
|
||||
skeleton := config.Config{
|
||||
Server: config.ServerConfig{
|
||||
Port: 8080,
|
||||
Host: "0.0.0.0",
|
||||
APIKeys: []string{},
|
||||
},
|
||||
Metrics: config.MetricsConfig{
|
||||
Enabled: false,
|
||||
Path: "/metrics",
|
||||
},
|
||||
Forward: config.ForwardConfig{
|
||||
Default: "default",
|
||||
Targets: map[string]config.ForwardTarget{
|
||||
"default": {
|
||||
APIType: "openai",
|
||||
Model: "text-embedding-3-small",
|
||||
Endpoints: []config.EndpointConfig{
|
||||
{URL: "https://api.openai.com", Priority: 10},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Adapter: config.AdapterConfig{
|
||||
Type: "truncate",
|
||||
SourceDim: 1536,
|
||||
TargetDim: 768,
|
||||
TruncateMode: "from_end",
|
||||
PadMode: "at_end",
|
||||
},
|
||||
}
|
||||
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create %s: %w", path, err)
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
|
||||
enc := json.NewEncoder(f)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(skeleton); err != nil {
|
||||
return fmt.Errorf("write default config: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
43
cmd/vecna/main.go
Normal file
43
cmd/vecna/main.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
var (
|
||||
cfgFile string
|
||||
logLevel string
|
||||
logger *zap.Logger
|
||||
version = "dev"
|
||||
)
|
||||
|
||||
var rootCmd = &cobra.Command{
|
||||
Use: "vecna",
|
||||
Short: "Embedding dimension adapter — translate vectors between model spaces",
|
||||
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
|
||||
var err error
|
||||
if logLevel == "debug" {
|
||||
logger, err = zap.NewDevelopment()
|
||||
} else {
|
||||
logger, err = zap.NewProduction()
|
||||
}
|
||||
return err
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default: ./vecna.yaml)")
|
||||
rootCmd.PersistentFlags().StringVar(&logLevel, "log-level", "info", "log level: info | debug")
|
||||
rootCmd.AddCommand(convertCmd)
|
||||
rootCmd.AddCommand(serveCmd)
|
||||
rootCmd.AddCommand(versionCmd)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
91
cmd/vecna/models.go
Normal file
91
cmd/vecna/models.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/discovery"
|
||||
)
|
||||
|
||||
var modelsCmd = &cobra.Command{
|
||||
Use: "models",
|
||||
Short: "List models available on each configured forwarder",
|
||||
RunE: runModels,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(modelsCmd)
|
||||
}
|
||||
|
||||
func runModels(_ *cobra.Command, _ []string) error {
|
||||
cfg, err := config.Load(cfgFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
if len(cfg.Forward.Targets) == 0 {
|
||||
fmt.Println("No forwarder targets configured.")
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
for targetName, target := range cfg.Forward.Targets {
|
||||
fmt.Printf("[ %s ]\n", targetName)
|
||||
|
||||
for _, ep := range target.Endpoints {
|
||||
kind := discovery.Kind{
|
||||
Name: targetName,
|
||||
APIType: target.APIType,
|
||||
Port: 0, // not used by Models()
|
||||
}
|
||||
|
||||
models, err := discovery.Models(ctx, ep.URL, kind)
|
||||
if err != nil {
|
||||
fmt.Printf(" %s error: %s\n\n", ep.URL, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(models) == 0 {
|
||||
fmt.Printf(" %s (no models listed)\n\n", ep.URL)
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf(" %s\n", ep.URL)
|
||||
for _, m := range models {
|
||||
marker := " "
|
||||
if m == target.Model {
|
||||
marker = "* "
|
||||
}
|
||||
fmt.Printf(" %s%s\n", marker, m)
|
||||
}
|
||||
|
||||
if target.Model != "" && !contains(models, target.Model) {
|
||||
fmt.Printf(" ! configured model %q not found in list\n", target.Model)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
if len(target.Endpoints) == 0 {
|
||||
fmt.Printf(" (no endpoints configured)\n\n")
|
||||
}
|
||||
|
||||
fmt.Printf(" API type : %s\n", target.APIType)
|
||||
fmt.Printf(" Model : %s\n\n", strings.TrimSpace(target.Model))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func contains(ss []string, s string) bool {
|
||||
for _, v := range ss {
|
||||
if v == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
480
cmd/vecna/onboard.go
Normal file
480
cmd/vecna/onboard.go
Normal file
@@ -0,0 +1,480 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/discovery"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
var onboardCmd = &cobra.Command{
|
||||
Use: "onboard",
|
||||
Short: "Interactive setup wizard: discover servers, configure, test, and write config",
|
||||
RunE: runOnboard,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(onboardCmd)
|
||||
}
|
||||
|
||||
func runOnboard(_ *cobra.Command, _ []string) error {
|
||||
in := bufio.NewReader(os.Stdin)
|
||||
|
||||
fmt.Println("=== vecna onboard ===")
|
||||
fmt.Println()
|
||||
|
||||
// ── Step 1: Discover ──────────────────────────────────────────────────────
|
||||
|
||||
step(1, 5, "Discover embedding servers")
|
||||
|
||||
fmt.Println("Scanning (Ollama, LM Studio, vLLM, LocalAI, Jan, Kobold, Tabby)...")
|
||||
servers := discovery.Scan(context.Background())
|
||||
|
||||
var targets []pendingTarget
|
||||
|
||||
if len(servers) == 0 {
|
||||
fmt.Println("No servers found automatically.")
|
||||
} else {
|
||||
fmt.Printf("Found %d server(s):\n\n", len(servers))
|
||||
for i, s := range servers {
|
||||
fmt.Printf(" [%d] %-12s %s\n Models: %s\n\n",
|
||||
i+1, s.Kind.Name, s.BaseURL, joinModels(s.Models))
|
||||
}
|
||||
|
||||
// Let user pick one or more from the list; 0 = manual
|
||||
for {
|
||||
choice, err := promptInt(in,
|
||||
fmt.Sprintf("Select server [1-%d] or 0 to enter URL manually: ", len(servers)), 0, len(servers))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var pt pendingTarget
|
||||
if choice == 0 {
|
||||
pt, err = collectManualTarget(in)
|
||||
} else {
|
||||
pt, err = collectDiscoveredTarget(in, servers[choice-1])
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
targets = append(targets, pt)
|
||||
|
||||
again, err := promptBool(in, "Add another forwarder?", false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !again {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(targets) == 0 {
|
||||
pt, err := collectManualTarget(in)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
targets = append(targets, pt)
|
||||
}
|
||||
|
||||
// ── Step 2: Detect dimensions ─────────────────────────────────────────────
|
||||
|
||||
step(2, 5, "Detect model dimensions")
|
||||
|
||||
for i := range targets {
|
||||
fmt.Printf("Probing %s / %s ... ", targets[i].endpoint, targets[i].model)
|
||||
dim, err := detectDim(targets[i])
|
||||
if err != nil {
|
||||
fmt.Printf("failed (%s) — you will need to enter the dimension manually\n", err)
|
||||
targets[i].detectedDim = 0
|
||||
} else {
|
||||
fmt.Printf("%d dims\n", dim)
|
||||
targets[i].detectedDim = dim
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// ── Step 3: Configure adapter ─────────────────────────────────────────────
|
||||
|
||||
step(3, 5, "Configure dimension adapter")
|
||||
|
||||
// Use the first target's detected dim as the source dimension default
|
||||
firstDim := 0
|
||||
for _, t := range targets {
|
||||
if t.detectedDim > 0 {
|
||||
firstDim = t.detectedDim
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
srcDimStr := ""
|
||||
if firstDim > 0 {
|
||||
srcDimStr = fmt.Sprintf("%d", firstDim)
|
||||
}
|
||||
|
||||
sourceDimRaw, err := promptString(in,
|
||||
fmt.Sprintf("Source dimension (native model output dim)%s: ", defaultHint(srcDimStr)), srcDimStr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sourceDim := mustParseInt(sourceDimRaw, firstDim)
|
||||
|
||||
targetDimRaw, err := promptString(in, "Target dimension (output dim vecna will serve) [1536]: ", "1536")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
targetDim := mustParseInt(targetDimRaw, 1536)
|
||||
|
||||
adapterType, err := promptString(in, "Adapter type (truncate/random/projection) [truncate]: ", "truncate")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
truncateMode := "from_end"
|
||||
padMode := "at_end"
|
||||
if adapterType == "truncate" {
|
||||
truncateMode, err = promptString(in, "Truncate mode (from_end/from_start) [from_end]: ", "from_end")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
padMode, err = promptString(in, "Pad mode (at_end/at_start) [at_end]: ", "at_end")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// ── Step 4: Configure vecna server ────────────────────────────────────────
|
||||
|
||||
step(4, 5, "Configure vecna server")
|
||||
|
||||
portRaw, err := promptString(in, "Bind port [8080]: ", "8080")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
port := mustParseInt(portRaw, 8080)
|
||||
|
||||
apiKeysRaw, err := promptString(in,
|
||||
"Inbound API keys for vecna (comma-separated, leave empty to disable auth): ", "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var apiKeys []string
|
||||
for _, k := range strings.Split(apiKeysRaw, ",") {
|
||||
if k := strings.TrimSpace(k); k != "" {
|
||||
apiKeys = append(apiKeys, k)
|
||||
}
|
||||
}
|
||||
|
||||
enableMetrics, err := promptBool(in, "Enable Prometheus /metrics endpoint?", false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
metricsAPIKey := ""
|
||||
if enableMetrics {
|
||||
metricsAPIKey, err = promptString(in, "Metrics API key (leave empty for open access): ", "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// ── Step 5: Test & write ──────────────────────────────────────────────────
|
||||
|
||||
step(5, 5, "Test connections and write config")
|
||||
|
||||
allPassed := true
|
||||
for _, t := range targets {
|
||||
fmt.Printf("Testing %-45s ", t.endpoint+"...")
|
||||
_, elapsed, dims, testErr := runSingleTest(t)
|
||||
if testErr != nil {
|
||||
fmt.Printf("FAIL %s\n", truncate(testErr.Error(), 55))
|
||||
allPassed = false
|
||||
} else {
|
||||
fmt.Printf("OK %dms dims=%d\n", elapsed.Milliseconds(), dims)
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
if !allPassed {
|
||||
proceed, err := promptBool(in, "Some tests failed. Write config anyway?", false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !proceed {
|
||||
fmt.Println("Aborted. No config written.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Build the config struct
|
||||
defaultTarget := ""
|
||||
forwardTargets := make(map[string]config.ForwardTarget, len(targets))
|
||||
for i, t := range targets {
|
||||
forwardTargets[t.name] = config.ForwardTarget{
|
||||
APIType: t.apiType,
|
||||
Model: t.model,
|
||||
APIKey: t.apiKey,
|
||||
Endpoints: []config.EndpointConfig{
|
||||
{URL: t.endpoint, Priority: 10},
|
||||
},
|
||||
TimeoutSecs: 30,
|
||||
CooldownSecs: 60,
|
||||
PriorityDecay: 2,
|
||||
PriorityRecovery: 5,
|
||||
}
|
||||
if i == 0 {
|
||||
defaultTarget = t.name
|
||||
}
|
||||
}
|
||||
|
||||
cfg := config.Config{
|
||||
Server: config.ServerConfig{
|
||||
Port: port,
|
||||
Host: "0.0.0.0",
|
||||
APIKeys: apiKeys,
|
||||
},
|
||||
Metrics: config.MetricsConfig{
|
||||
Enabled: enableMetrics,
|
||||
Path: "/metrics",
|
||||
APIKey: metricsAPIKey,
|
||||
},
|
||||
Forward: config.ForwardConfig{
|
||||
Default: defaultTarget,
|
||||
Targets: forwardTargets,
|
||||
},
|
||||
Adapter: config.AdapterConfig{
|
||||
Type: adapterType,
|
||||
SourceDim: sourceDim,
|
||||
TargetDim: targetDim,
|
||||
TruncateMode: truncateMode,
|
||||
PadMode: padMode,
|
||||
},
|
||||
}
|
||||
|
||||
defaultCfgPath := config.ResolveFile(cfgFile)
|
||||
fmt.Printf("Config will be written to: %s\n", defaultCfgPath)
|
||||
cfgPath, err := promptString(in, "Config path (press Enter to accept): ", defaultCfgPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := writeFullConfig(cfgPath, cfg); err != nil {
|
||||
return fmt.Errorf("write config: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Config written to %s\n", cfgPath)
|
||||
fmt.Println()
|
||||
fmt.Println("Run 'vecna serve' to start the proxy server.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
// pendingTarget collects configuration for a single forwarding target before
|
||||
// the config is assembled.
|
||||
type pendingTarget struct {
|
||||
name string
|
||||
endpoint string
|
||||
model string
|
||||
apiType string
|
||||
apiKey string
|
||||
detectedDim int
|
||||
}
|
||||
|
||||
func step(n, total int, title string) {
|
||||
fmt.Printf("[%d/%d] %s\n", n, total, title)
|
||||
fmt.Println(strings.Repeat("-", 40))
|
||||
}
|
||||
|
||||
func defaultHint(s string) string {
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf(" [%s]", s)
|
||||
}
|
||||
|
||||
func joinModels(models []string) string {
|
||||
if len(models) == 0 {
|
||||
return "(none)"
|
||||
}
|
||||
if len(models) > 5 {
|
||||
return strings.Join(models[:5], ", ") + fmt.Sprintf(" (+%d more)", len(models)-5)
|
||||
}
|
||||
return strings.Join(models, ", ")
|
||||
}
|
||||
|
||||
func collectDiscoveredTarget(in *bufio.Reader, srv discovery.Found) (pendingTarget, error) {
|
||||
defaultName := strings.ToLower(strings.ReplaceAll(srv.Kind.Name, " ", "_"))
|
||||
|
||||
model, err := pickModel(in, srv.Models)
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
name, err := promptString(in, fmt.Sprintf("Target name in config [%s]: ", defaultName), defaultName)
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
apiKey, err := promptAPIKey(in, srv.Kind.NeedsKey)
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
return pendingTarget{
|
||||
name: name,
|
||||
endpoint: srv.BaseURL,
|
||||
model: model,
|
||||
apiType: srv.Kind.APIType,
|
||||
apiKey: apiKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func collectManualTarget(in *bufio.Reader) (pendingTarget, error) {
|
||||
fmt.Println("Enter server details manually:")
|
||||
|
||||
endpoint, err := promptString(in, "Server URL (e.g. http://localhost:11434): ", "")
|
||||
if err != nil || endpoint == "" {
|
||||
return pendingTarget{}, fmt.Errorf("server URL is required")
|
||||
}
|
||||
|
||||
apiTypeStr, err := promptString(in, "API type (openai/google) [openai]: ", "openai")
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
model, err := promptString(in, "Model name: ", "")
|
||||
if err != nil || model == "" {
|
||||
return pendingTarget{}, fmt.Errorf("model name is required")
|
||||
}
|
||||
|
||||
name, err := promptString(in, "Target name in config [custom]: ", "custom")
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
apiKey, err := promptAPIKey(in, false)
|
||||
if err != nil {
|
||||
return pendingTarget{}, err
|
||||
}
|
||||
|
||||
return pendingTarget{
|
||||
name: name,
|
||||
endpoint: endpoint,
|
||||
model: model,
|
||||
apiType: apiTypeStr,
|
||||
apiKey: apiKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func pickModel(in *bufio.Reader, models []string) (string, error) {
|
||||
switch {
|
||||
case len(models) == 0:
|
||||
return promptString(in, "Model name: ", "")
|
||||
case len(models) == 1:
|
||||
fmt.Printf("Using model: %s\n", models[0])
|
||||
return models[0], nil
|
||||
default:
|
||||
fmt.Println("Available models:")
|
||||
for i, m := range models {
|
||||
fmt.Printf(" [%d] %s\n", i+1, m)
|
||||
}
|
||||
idx, err := promptInt(in, fmt.Sprintf("Select model [1-%d]: ", len(models)), 1, len(models))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return models[idx-1], nil
|
||||
}
|
||||
}
|
||||
|
||||
func promptAPIKey(in *bufio.Reader, required bool) (string, error) {
|
||||
prompt := "API key (leave empty if none): "
|
||||
if required {
|
||||
prompt = "API key: "
|
||||
}
|
||||
return promptString(in, prompt, "")
|
||||
}
|
||||
|
||||
// detectDim sends a single test embedding and returns the vector length.
|
||||
func detectDim(t pendingTarget) (int, error) {
|
||||
httpClient := &http.Client{Timeout: 10 * time.Second}
|
||||
var client embedclient.Client
|
||||
if t.apiType == "google" {
|
||||
client = embedclient.NewGoogle(t.endpoint, t.apiKey, t.model, httpClient)
|
||||
} else {
|
||||
client = embedclient.NewOpenAI(t.endpoint, t.apiKey, httpClient)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
resp, err := client.Embed(ctx, embedclient.Request{
|
||||
Texts: []string{"dimension probe"},
|
||||
Model: t.model,
|
||||
})
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(resp.Embeddings) == 0 || len(resp.Embeddings[0]) == 0 {
|
||||
return 0, fmt.Errorf("empty embedding in response")
|
||||
}
|
||||
return len(resp.Embeddings[0]), nil
|
||||
}
|
||||
|
||||
// runSingleTest runs one test embed and returns success, elapsed time, dims, and any error.
|
||||
func runSingleTest(t pendingTarget) (bool, time.Duration, int, error) {
|
||||
httpClient := &http.Client{Timeout: 30 * time.Second}
|
||||
var client embedclient.Client
|
||||
if t.apiType == "google" {
|
||||
client = embedclient.NewGoogle(t.endpoint, t.apiKey, t.model, httpClient)
|
||||
} else {
|
||||
client = embedclient.NewOpenAI(t.endpoint, t.apiKey, httpClient)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
resp, err := client.Embed(ctx, embedclient.Request{
|
||||
Texts: []string{testPhrase},
|
||||
Model: t.model,
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
if err != nil {
|
||||
return false, elapsed, 0, err
|
||||
}
|
||||
|
||||
dims, _ := embeddingStats(resp.Embeddings)
|
||||
return true, elapsed, dims, nil
|
||||
}
|
||||
|
||||
func mustParseInt(s string, fallback int) int {
|
||||
var n int
|
||||
if _, err := fmt.Sscanf(s, "%d", &n); err != nil || n <= 0 {
|
||||
return fallback
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func writeFullConfig(path string, cfg config.Config) error {
|
||||
// If file already exists, preserve any targets not touched by onboard
|
||||
// by using SaveTarget for each new target; otherwise write the whole file.
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := createDefaultConfig(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Overwrite with the complete onboard config
|
||||
return config.WriteConfig(path, cfg)
|
||||
}
|
||||
59
cmd/vecna/prompt.go
Normal file
59
cmd/vecna/prompt.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// promptInt reads an integer in [min, max] from the reader, re-prompting on bad input.
|
||||
func promptInt(in *bufio.Reader, prompt string, min, max int) (int, error) {
|
||||
for {
|
||||
fmt.Print(prompt)
|
||||
line, err := in.ReadString('\n')
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read input: %w", err)
|
||||
}
|
||||
n, err := strconv.Atoi(strings.TrimSpace(line))
|
||||
if err != nil || n < min || n > max {
|
||||
fmt.Printf(" Enter a number between %d and %d\n", min, max)
|
||||
continue
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
}
|
||||
|
||||
// promptString reads a line, returning defaultVal when the user presses Enter with no input.
|
||||
func promptString(in *bufio.Reader, prompt, defaultVal string) (string, error) {
|
||||
fmt.Print(prompt)
|
||||
line, err := in.ReadString('\n')
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read input: %w", err)
|
||||
}
|
||||
if s := strings.TrimSpace(line); s != "" {
|
||||
return s, nil
|
||||
}
|
||||
return defaultVal, nil
|
||||
}
|
||||
|
||||
// promptBool reads a y/N confirmation, returning defaultVal on empty input.
|
||||
func promptBool(in *bufio.Reader, prompt string, defaultVal bool) (bool, error) {
|
||||
hint := "y/N"
|
||||
if defaultVal {
|
||||
hint = "Y/n"
|
||||
}
|
||||
fmt.Printf("%s [%s]: ", prompt, hint)
|
||||
line, err := in.ReadString('\n')
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("read input: %w", err)
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(line)) {
|
||||
case "y", "yes":
|
||||
return true, nil
|
||||
case "n", "no":
|
||||
return false, nil
|
||||
default:
|
||||
return defaultVal, nil
|
||||
}
|
||||
}
|
||||
154
cmd/vecna/query.go
Normal file
154
cmd/vecna/query.go
Normal file
@@ -0,0 +1,154 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
var (
|
||||
queryTarget string
|
||||
queryRaw bool
|
||||
queryCompact bool
|
||||
)
|
||||
|
||||
var queryCmd = &cobra.Command{
|
||||
Use: "query <text>",
|
||||
Short: "Embed text and print the resulting vector",
|
||||
Long: `Sends text to the configured forwarding target, applies the dimension adapter,
|
||||
and prints the resulting vector as a JSON array.
|
||||
|
||||
Text can be supplied as a positional argument or via stdin (use - as the argument).`,
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: runQuery,
|
||||
}
|
||||
|
||||
func init() {
|
||||
queryCmd.Flags().StringVar(&queryTarget, "target", "",
|
||||
"forward target to use (default: forward.default from config)")
|
||||
queryCmd.Flags().BoolVar(&queryRaw, "raw", false,
|
||||
"skip the adapter — output the raw vector from the backing model")
|
||||
queryCmd.Flags().BoolVar(&queryCompact, "compact", false,
|
||||
"print vector on a single line instead of pretty-printed")
|
||||
rootCmd.AddCommand(queryCmd)
|
||||
}
|
||||
|
||||
func runQuery(_ *cobra.Command, args []string) error {
|
||||
cfg, err := config.Load(cfgFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
// Resolve text: positional arg, "-" reads stdin, no arg reads stdin.
|
||||
text, err := queryText(args)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Resolve target.
|
||||
targetName := queryTarget
|
||||
if targetName == "" {
|
||||
targetName = cfg.Forward.Default
|
||||
}
|
||||
target, ok := cfg.Forward.Targets[targetName]
|
||||
if !ok {
|
||||
return fmt.Errorf("target %q not found in config", targetName)
|
||||
}
|
||||
if len(target.Endpoints) == 0 {
|
||||
return fmt.Errorf("target %q has no endpoints", targetName)
|
||||
}
|
||||
|
||||
// Build client (use first endpoint directly — no router needed for a one-shot query).
|
||||
ep := target.Endpoints[0]
|
||||
apiKey := ep.APIKey
|
||||
if apiKey == "" {
|
||||
apiKey = target.APIKey
|
||||
}
|
||||
timeout := time.Duration(target.TimeoutSecs) * time.Second
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
httpClient := &http.Client{Timeout: timeout}
|
||||
|
||||
var client embedclient.Client
|
||||
switch target.APIType {
|
||||
case "google":
|
||||
client = embedclient.NewGoogle(ep.URL, apiKey, target.Model, httpClient)
|
||||
default:
|
||||
client = embedclient.NewOpenAI(ep.URL, apiKey, httpClient)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
resp, err := client.Embed(ctx, embedclient.Request{
|
||||
Texts: []string{text},
|
||||
Model: target.Model,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("embed: %w", err)
|
||||
}
|
||||
if len(resp.Embeddings) == 0 || len(resp.Embeddings[0]) == 0 {
|
||||
return fmt.Errorf("empty embedding in response")
|
||||
}
|
||||
|
||||
vec := resp.Embeddings[0]
|
||||
|
||||
if !queryRaw {
|
||||
adp, err := buildAdapter(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build adapter: %w", err)
|
||||
}
|
||||
vec, err = adp.Adapt(vec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("adapt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "target=%s model=%s dims=%d tokens=%d\n",
|
||||
targetName, resp.Model, len(vec), resp.Usage.TotalTokens)
|
||||
|
||||
return printVector(vec)
|
||||
}
|
||||
|
||||
func queryText(args []string) (string, error) {
|
||||
if len(args) == 0 || args[0] == "-" {
|
||||
raw, err := os.ReadFile("/dev/stdin")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read stdin: %w", err)
|
||||
}
|
||||
return strings.TrimRight(string(raw), "\n"), nil
|
||||
}
|
||||
return args[0], nil
|
||||
}
|
||||
|
||||
func printVector(vec []float32) error {
|
||||
// Convert to []any so json.Marshal produces clean floats without float32 quirks.
|
||||
out := make([]float64, len(vec))
|
||||
for i, v := range vec {
|
||||
out[i] = float64(v)
|
||||
}
|
||||
|
||||
var b []byte
|
||||
var err error
|
||||
if queryCompact {
|
||||
b, err = json.Marshal(out)
|
||||
} else {
|
||||
b, err = json.MarshalIndent(out, "", " ")
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal vector: %w", err)
|
||||
}
|
||||
|
||||
fmt.Println(string(b))
|
||||
return nil
|
||||
}
|
||||
121
cmd/vecna/search.go
Normal file
121
cmd/vecna/search.go
Normal file
@@ -0,0 +1,121 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/discovery"
|
||||
)
|
||||
|
||||
var searchCmd = &cobra.Command{
|
||||
Use: "search",
|
||||
Short: "Scan the network for LLM servers and add one to the config",
|
||||
RunE: runSearch,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(searchCmd)
|
||||
}
|
||||
|
||||
func runSearch(_ *cobra.Command, _ []string) error {
|
||||
in := bufio.NewReader(os.Stdin)
|
||||
|
||||
fmt.Println("Scanning for LLM servers (Ollama, LM Studio, vLLM, LocalAI, Jan, Kobold, Tabby)...")
|
||||
servers := discovery.Scan(context.Background())
|
||||
|
||||
if len(servers) == 0 {
|
||||
fmt.Println("No servers found.")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("\nFound %d server(s):\n\n", len(servers))
|
||||
for i, s := range servers {
|
||||
modelList := strings.Join(s.Models, ", ")
|
||||
if modelList == "" {
|
||||
modelList = "(no models listed)"
|
||||
}
|
||||
fmt.Printf(" [%d] %s %s\n Models: %s\n\n", i+1, s.Kind.Name, s.BaseURL, modelList)
|
||||
}
|
||||
|
||||
// Select server
|
||||
chosen, err := promptInt(in, fmt.Sprintf("Select server [1-%d]: ", len(servers)), 1, len(servers))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
srv := servers[chosen-1]
|
||||
|
||||
// Select model
|
||||
var model string
|
||||
switch {
|
||||
case len(srv.Models) == 1:
|
||||
model = srv.Models[0]
|
||||
fmt.Printf("Using model: %s\n", model)
|
||||
case len(srv.Models) > 1:
|
||||
fmt.Println("\nAvailable models:")
|
||||
for i, m := range srv.Models {
|
||||
fmt.Printf(" [%d] %s\n", i+1, m)
|
||||
}
|
||||
idx, err := promptInt(in, fmt.Sprintf("Select model [1-%d]: ", len(srv.Models)), 1, len(srv.Models))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
model = srv.Models[idx-1]
|
||||
default:
|
||||
model, err = promptString(in, "Model name: ", "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Target name in config
|
||||
defaultName := strings.ToLower(strings.ReplaceAll(srv.Kind.Name, " ", "_"))
|
||||
targetName, err := promptString(in, fmt.Sprintf("Target name in config [%s]: ", defaultName), defaultName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// API key
|
||||
keyPrompt := "API key (leave empty if none): "
|
||||
if srv.Kind.NeedsKey {
|
||||
keyPrompt = "API key: "
|
||||
}
|
||||
apiKey, err := promptString(in, keyPrompt, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
target := config.ForwardTarget{
|
||||
APIType: srv.Kind.APIType,
|
||||
Model: model,
|
||||
APIKey: apiKey,
|
||||
Endpoints: []config.EndpointConfig{
|
||||
{URL: srv.BaseURL, Priority: 10},
|
||||
},
|
||||
TimeoutSecs: 30,
|
||||
CooldownSecs: 60,
|
||||
PriorityDecay: 2,
|
||||
PriorityRecovery: 5,
|
||||
}
|
||||
|
||||
cfgPath := config.ResolveFile(cfgFile)
|
||||
|
||||
// Create default config if it doesn't exist yet
|
||||
if _, err := os.Stat(cfgPath); os.IsNotExist(err) {
|
||||
if err := createDefaultConfig(cfgPath); err != nil {
|
||||
return fmt.Errorf("create default config: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := config.SaveTarget(cfgPath, targetName, target); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Printf("\nAdded target %q to %s\n", targetName, cfgPath)
|
||||
return nil
|
||||
}
|
||||
150
cmd/vecna/serve.go
Normal file
150
cmd/vecna/serve.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/metrics"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/server"
|
||||
)
|
||||
|
||||
var serveCmd = &cobra.Command{
|
||||
Use: "serve",
|
||||
Short: "Start the vecna embedding proxy server",
|
||||
RunE: runServe,
|
||||
}
|
||||
|
||||
func init() {
|
||||
serveCmd.Flags().String("host", "", "bind host (overrides config)")
|
||||
serveCmd.Flags().Int("port", 0, "bind port (overrides config)")
|
||||
}
|
||||
|
||||
func runServe(cmd *cobra.Command, _ []string) error {
|
||||
cfg, err := config.Load(cfgFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
// Flag overrides
|
||||
if h, _ := cmd.Flags().GetString("host"); h != "" {
|
||||
cfg.Server.Host = h
|
||||
}
|
||||
if p, _ := cmd.Flags().GetInt("port"); p != 0 {
|
||||
cfg.Server.Port = p
|
||||
}
|
||||
|
||||
adp, err := buildAdapter(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build adapter: %w", err)
|
||||
}
|
||||
|
||||
clients, err := buildClients(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build clients: %w", err)
|
||||
}
|
||||
|
||||
var reg *metrics.Registry
|
||||
if cfg.Metrics.Enabled {
|
||||
reg = metrics.New()
|
||||
}
|
||||
|
||||
router := server.New(cfg, clients, adp, reg, logger)
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port)
|
||||
srv := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: router,
|
||||
}
|
||||
|
||||
// Graceful shutdown
|
||||
quit := make(chan os.Signal, 1)
|
||||
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
logger.Info("vecna listening", zap.String("addr", addr))
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
logger.Error("server error", zap.Error(err))
|
||||
quit <- syscall.SIGTERM
|
||||
}
|
||||
}()
|
||||
|
||||
<-quit
|
||||
logger.Info("shutting down")
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(ctx); err != nil {
|
||||
return fmt.Errorf("graceful shutdown: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildClients constructs one embedclient.Client per named forward target.
|
||||
func buildClients(cfg *config.Config) (map[string]embedclient.Client, error) {
|
||||
clients := make(map[string]embedclient.Client, len(cfg.Forward.Targets))
|
||||
|
||||
for name, target := range cfg.Forward.Targets {
|
||||
if len(target.Endpoints) == 0 {
|
||||
return nil, fmt.Errorf("target %q has no endpoints", name)
|
||||
}
|
||||
|
||||
timeout := time.Duration(target.TimeoutSecs) * time.Second
|
||||
httpClient := &http.Client{Timeout: timeout}
|
||||
|
||||
slots := make([]embedclient.RouterSlot, len(target.Endpoints))
|
||||
for i, ep := range target.Endpoints {
|
||||
apiKey := ep.APIKey
|
||||
if apiKey == "" {
|
||||
apiKey = target.APIKey
|
||||
}
|
||||
|
||||
var c embedclient.Client
|
||||
switch target.APIType {
|
||||
case "google":
|
||||
c = embedclient.NewGoogle(ep.URL, apiKey, target.Model, httpClient)
|
||||
default: // "openai" or unset
|
||||
c = embedclient.NewOpenAI(ep.URL, apiKey, httpClient)
|
||||
}
|
||||
|
||||
slots[i] = embedclient.RouterSlot{
|
||||
Client: c,
|
||||
URL: ep.URL,
|
||||
Priority: ep.Priority,
|
||||
}
|
||||
}
|
||||
|
||||
routerCfg := embedclient.RouterConfig{
|
||||
TargetName: name,
|
||||
TimeoutSecs: target.TimeoutSecs,
|
||||
CooldownSecs: target.CooldownSecs,
|
||||
PriorityDecay: target.PriorityDecay,
|
||||
PriorityRecovery: target.PriorityRecovery,
|
||||
}
|
||||
|
||||
// metrics registry may be nil (disabled)
|
||||
var reg *metrics.Registry
|
||||
if cfg.Metrics.Enabled {
|
||||
// Registry is created in runServe before buildClients; pass nil here,
|
||||
// caller wires it in after creation. See note below.
|
||||
_ = reg
|
||||
}
|
||||
|
||||
router, err := embedclient.NewTargetRouter(slots, routerCfg, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build router for target %q: %w", name, err)
|
||||
}
|
||||
clients[name] = router
|
||||
}
|
||||
|
||||
return clients, nil
|
||||
}
|
||||
150
cmd/vecna/test.go
Normal file
150
cmd/vecna/test.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/Warky-Devs/vecna.git/pkg/config"
|
||||
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
||||
)
|
||||
|
||||
var removeBroken bool
|
||||
|
||||
var testCmd = &cobra.Command{
|
||||
Use: "test",
|
||||
Short: "Send a test embedding request to each configured forwarder",
|
||||
RunE: runTest,
|
||||
}
|
||||
|
||||
func init() {
|
||||
testCmd.Flags().BoolVar(&removeBroken, "remove-broken", false,
|
||||
"Remove endpoints (and targets with no endpoints left) that fail the test from the config file")
|
||||
rootCmd.AddCommand(testCmd)
|
||||
}
|
||||
|
||||
const testPhrase = "The quick brown fox jumps over the lazy dog"
|
||||
|
||||
func runTest(_ *cobra.Command, _ []string) error {
|
||||
cfg, err := config.Load(cfgFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
if len(cfg.Forward.Targets) == 0 {
|
||||
fmt.Println("No forwarder targets configured.")
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("Test phrase: %q\n\n", testPhrase)
|
||||
|
||||
passed, failed := 0, 0
|
||||
// brokenEndpoints maps target name → list of failing endpoint URLs
|
||||
brokenEndpoints := make(map[string][]string)
|
||||
|
||||
for targetName, target := range cfg.Forward.Targets {
|
||||
fmt.Printf("[ %s ] model: %s type: %s\n", targetName, target.Model, target.APIType)
|
||||
|
||||
timeout := time.Duration(target.TimeoutSecs) * time.Second
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
httpClient := &http.Client{Timeout: timeout}
|
||||
|
||||
for _, ep := range target.Endpoints {
|
||||
apiKey := ep.APIKey
|
||||
if apiKey == "" {
|
||||
apiKey = target.APIKey
|
||||
}
|
||||
|
||||
var client embedclient.Client
|
||||
switch target.APIType {
|
||||
case "google":
|
||||
client = embedclient.NewGoogle(ep.URL, apiKey, target.Model, httpClient)
|
||||
default:
|
||||
client = embedclient.NewOpenAI(ep.URL, apiKey, httpClient)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
start := time.Now()
|
||||
resp, embedErr := client.Embed(ctx, embedclient.Request{
|
||||
Texts: []string{testPhrase},
|
||||
Model: target.Model,
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
cancel()
|
||||
|
||||
if embedErr != nil {
|
||||
fmt.Printf(" %-45s FAIL %s\n", ep.URL, truncate(embedErr.Error(), 60))
|
||||
brokenEndpoints[targetName] = append(brokenEndpoints[targetName], ep.URL)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
dims, norm := embeddingStats(resp.Embeddings)
|
||||
fmt.Printf(" %-45s OK %dms dims=%d norm=%.4f\n",
|
||||
ep.URL, elapsed.Milliseconds(), dims, norm)
|
||||
passed++
|
||||
}
|
||||
|
||||
if len(target.Endpoints) == 0 {
|
||||
fmt.Println(" (no endpoints configured)")
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
fmt.Printf("Results: %d passed, %d failed\n", passed, failed)
|
||||
|
||||
if removeBroken && len(brokenEndpoints) > 0 {
|
||||
if err := applyRemoveBroken(brokenEndpoints); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
return fmt.Errorf("%d forwarder(s) failed", failed)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func applyRemoveBroken(broken map[string][]string) error {
|
||||
cfgPath := config.ResolveFile(cfgFile)
|
||||
removed, err := config.RemoveBrokenEndpoints(cfgPath, broken)
|
||||
if err != nil {
|
||||
return fmt.Errorf("remove broken: %w", err)
|
||||
}
|
||||
if len(removed) == 0 {
|
||||
return nil
|
||||
}
|
||||
fmt.Println("\nRemoved from config:")
|
||||
for _, r := range removed {
|
||||
fmt.Printf(" - %s\n", r)
|
||||
}
|
||||
fmt.Printf("Config updated: %s\n", cfgPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// embeddingStats returns the dimension count and L2 norm of the first embedding.
|
||||
func embeddingStats(embeddings [][]float32) (dims int, norm float32) {
|
||||
if len(embeddings) == 0 || len(embeddings[0]) == 0 {
|
||||
return 0, 0
|
||||
}
|
||||
vec := embeddings[0]
|
||||
dims = len(vec)
|
||||
var sum float64
|
||||
for _, v := range vec {
|
||||
sum += float64(v) * float64(v)
|
||||
}
|
||||
return dims, float32(math.Sqrt(sum))
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n-3] + "..."
|
||||
}
|
||||
15
cmd/vecna/version.go
Normal file
15
cmd/vecna/version.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var versionCmd = &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "Print the version",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Println(version)
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user