feat: 🎉 Vectors na Vectors, the begining

Translate 1536 <-> 768 , 3072 <-> 2048
2026-05-05 01:26:58 +00:00 · 2026-04-11 18:05:05 +02:00
parent d98ea7c222
commit 4009a54e39
58 changed files with 5324 additions and 2 deletions
--- a/pkg/discovery/discovery.go
+++ b/pkg/discovery/discovery.go
@@ -0,0 +1,206 @@
+package discovery
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net"
+	"net/http"
+	"sync"
+	"time"
+)
+
+// Kind describes a known server type.
+type Kind struct {
+	Name     string
+	APIType  string
+	Port     int
+	NeedsKey bool
+}
+
+// Found is a server discovered on the network.
+type Found struct {
+	Kind    Kind
+	BaseURL string
+	Models  []string
+}
+
+// knownServers lists server types by their default port and display name.
+// Ollama is listed separately because it uses a non-OpenAI probe endpoint.
+var knownServers = []Kind{
+	{Name: "Ollama", APIType: "openai", Port: 11434},
+	{Name: "LM Studio", APIType: "openai", Port: 1234},
+	{Name: "vLLM", APIType: "openai", Port: 8000, NeedsKey: true},
+	{Name: "LocalAI", APIType: "openai", Port: 8080},
+	{Name: "Jan", APIType: "openai", Port: 1337},
+	{Name: "Kobold", APIType: "openai", Port: 5001},
+	{Name: "Tabby", APIType: "openai", Port: 9090},
+}
+
+// Scan concurrently probes localhost and LAN gateway addresses for known LLM servers.
+// Results are returned in the order they are found (non-deterministic).
+func Scan(ctx context.Context) []Found {
+	hosts := localHosts()
+
+	var (
+		mu      sync.Mutex
+		results []Found
+		wg      sync.WaitGroup
+	)
+
+	probeCtx, cancel := context.WithTimeout(ctx, 4*time.Second)
+	defer cancel()
+
+	httpClient := &http.Client{Timeout: 600 * time.Millisecond}
+
+	for _, host := range hosts {
+		for _, kind := range knownServers {
+			host := host
+			wg.Add(1)
+			go func(kind Kind) {
+				defer wg.Done()
+				baseURL := fmt.Sprintf("http://%s:%d", host, kind.Port)
+				models, err := probe(probeCtx, httpClient, baseURL, kind)
+				if err != nil {
+					return
+				}
+				mu.Lock()
+				results = append(results, Found{Kind: kind, BaseURL: baseURL, Models: models})
+				mu.Unlock()
+			}(kind)
+		}
+	}
+
+	wg.Wait()
+	return results
+}
+
+// Models fetches the model list from a single base URL and kind (for the models command).
+func Models(ctx context.Context, baseURL string, kind Kind) ([]string, error) {
+	httpClient := &http.Client{Timeout: 5 * time.Second}
+	return probe(ctx, httpClient, baseURL, kind)
+}
+
+// localHosts returns localhost plus the .1 gateway of every local IPv4 subnet.
+func localHosts() []string {
+	seen := map[string]bool{"127.0.0.1": true}
+	hosts := []string{"127.0.0.1"}
+
+	ifaces, err := net.Interfaces()
+	if err != nil {
+		return hosts
+	}
+
+	for _, iface := range ifaces {
+		if iface.Flags&net.FlagUp == 0 {
+			continue
+		}
+		addrs, err := iface.Addrs()
+		if err != nil {
+			continue
+		}
+		for _, addr := range addrs {
+			ipnet, ok := addr.(*net.IPNet)
+			if !ok {
+				continue
+			}
+			ip := ipnet.IP.To4()
+			if ip == nil || ip.IsLoopback() {
+				continue
+			}
+			// derive the likely gateway: network address + 1
+			gw := ip.Mask(ipnet.Mask)
+			gw[3] = 1
+			h := gw.String()
+			if !seen[h] {
+				seen[h] = true
+				hosts = append(hosts, h)
+			}
+		}
+	}
+	return hosts
+}
+
+// probe attempts to identify the server at baseURL and returns its model list.
+func probe(ctx context.Context, client *http.Client, baseURL string, kind Kind) ([]string, error) {
+	// Ollama has its own endpoint; everything else is OpenAI-compatible
+	if kind.Name == "Ollama" {
+		models, err := probeOllama(ctx, client, baseURL)
+		if err != nil {
+			// Ollama also exposes /v1/models since v0.1.27 — fall back
+			return probeOpenAI(ctx, client, baseURL)
+		}
+		return models, nil
+	}
+	return probeOpenAI(ctx, client, baseURL)
+}
+
+// --- Ollama ---
+
+type ollamaTagsResponse struct {
+	Models []struct {
+		Name string `json:"name"`
+	} `json:"models"`
+}
+
+func probeOllama(ctx context.Context, client *http.Client, baseURL string) ([]string, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/api/tags", nil)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("status %d", resp.StatusCode)
+	}
+
+	var body ollamaTagsResponse
+	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
+		return nil, fmt.Errorf("decode ollama response: %w", err)
+	}
+
+	models := make([]string, len(body.Models))
+	for i, m := range body.Models {
+		models[i] = m.Name
+	}
+	return models, nil
+}
+
+// --- OpenAI-compatible ---
+
+type openAIModelsResponse struct {
+	Data []struct {
+		ID string `json:"id"`
+	} `json:"data"`
+}
+
+func probeOpenAI(ctx context.Context, client *http.Client, baseURL string) ([]string, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/v1/models", nil)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("status %d", resp.StatusCode)
+	}
+
+	var body openAIModelsResponse
+	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
+		return nil, fmt.Errorf("decode openai response: %w", err)
+	}
+
+	models := make([]string, len(body.Data))
+	for i, m := range body.Data {
+		models[i] = m.ID
+	}
+	return models, nil
+}