//go:build integration package integration import ( "context" "math" "net/http" "os" "strconv" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/Warky-Devs/vecna.git/pkg/adapter" "github.com/Warky-Devs/vecna.git/pkg/embedclient" ) // Environment variables that configure the integration tests: // // VECNA_TEST_URL base URL of the embedding server (required) // VECNA_TEST_MODEL model name to request (required) // VECNA_TEST_API_TYPE "openai" (default) or "google" // VECNA_TEST_API_KEY bearer token, empty if not needed // // Example (Ollama): // // VECNA_TEST_URL=http://localhost:11434 VECNA_TEST_MODEL=nomic-embed-text \ // go test -tags integration ./tests/integration/ const testText = "The quick brown fox jumps over the lazy dog" // cfg holds resolved test parameters. type cfg struct { url string model string apiType string apiKey string } func loadCfg(t *testing.T) cfg { t.Helper() url := os.Getenv("VECNA_TEST_URL") if url == "" { t.Skip("VECNA_TEST_URL not set — skipping integration tests") } model := os.Getenv("VECNA_TEST_MODEL") if model == "" { t.Skip("VECNA_TEST_MODEL not set — skipping integration tests") } apiType := os.Getenv("VECNA_TEST_API_TYPE") if apiType == "" { apiType = "openai" } return cfg{ url: url, model: model, apiType: apiType, apiKey: os.Getenv("VECNA_TEST_API_KEY"), } } func newClient(c cfg) embedclient.Client { httpClient := &http.Client{Timeout: 30 * time.Second} if c.apiType == "google" { return embedclient.NewGoogle(c.url, c.apiKey, c.model, httpClient) } return embedclient.NewOpenAI(c.url, c.apiKey, httpClient) } // embed fetches a single embedding vector for testText. func embed(t *testing.T, client embedclient.Client, model string) []float32 { t.Helper() resp, err := client.Embed(context.Background(), embedclient.Request{ Texts: []string{testText}, Model: model, }) require.NoError(t, err, "embedding request failed") require.Len(t, resp.Embeddings, 1, "expected exactly one embedding in response") require.NotEmpty(t, resp.Embeddings[0], "embedding vector is empty") return resp.Embeddings[0] } func l2Norm(v []float32) float64 { var sum float64 for _, x := range v { sum += float64(x) * float64(x) } return math.Sqrt(sum) } // assertUnitNorm checks the vector is approximately L2-normalised. func assertUnitNorm(t *testing.T, v []float32) { t.Helper() norm := l2Norm(v) assert.InDelta(t, 1.0, norm, 0.01, "expected unit L2 norm after adaptation") } // ---- Tests ---------------------------------------------------------------- // TestNativeDimension verifies the server returns a non-empty vector. // This is the baseline; the native dimension is logged so it can be used // as VECNA_TEST_SOURCE_DIM for the dimension tests below. func TestNativeDimension(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) t.Logf("native dimension: %d", len(vec)) t.Logf("native L2 norm: %.6f", l2Norm(vec)) assert.Greater(t, len(vec), 0) } // TestDownscaleTruncate tests truncation to half the native dimension. func TestDownscaleTruncate(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim / 2 if tgtDim == 0 { t.Skipf("source dim %d too small to halve", srcDim) } adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim, "output dimension mismatch") assertUnitNorm(t, out) t.Logf("downscale truncate: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestDownscaleTruncateFromStart tests keeping the last N dims. func TestDownscaleTruncateFromStart(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim / 2 if tgtDim == 0 { t.Skipf("source dim %d too small to halve", srcDim) } adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromStart, adapter.PadAtEnd) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("downscale truncate-from-start: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestDownscaleRandom tests random projection to a lower dimension. func TestDownscaleRandom(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim / 2 if tgtDim == 0 { t.Skipf("source dim %d too small to halve", srcDim) } adp, err := adapter.NewRandom(srcDim, tgtDim, 42) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("downscale random: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestDownscaleToFixed tests truncation to a fixed well-known target (e.g. 768 → 256). // Skips if the native dimension is not larger than the target. func TestDownscaleToFixed(t *testing.T) { c := loadCfg(t) client := newClient(c) tgtDim := intEnv("VECNA_TEST_TARGET_DIM", 256) vec := embed(t, client, c.model) srcDim := len(vec) if srcDim <= tgtDim { t.Skipf("native dim %d is not larger than target dim %d", srcDim, tgtDim) } adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("downscale to fixed: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestUpscalePadEnd tests zero-padding to double the native dimension. func TestUpscalePadEnd(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim * 2 adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) // The second half of the raw output (before normalisation) should have been zero-padded. // After normalisation all values shrink but the last half should all be equal (zero → 0). t.Logf("upscale pad-end: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestUpscalePadStart tests zero-padding prepended to the vector. func TestUpscalePadStart(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim * 2 adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtStart) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("upscale pad-start: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestUpscaleRandom tests random projection to a higher dimension. func TestUpscaleRandom(t *testing.T) { c := loadCfg(t) client := newClient(c) vec := embed(t, client, c.model) srcDim := len(vec) tgtDim := srcDim * 2 adp, err := adapter.NewRandom(srcDim, tgtDim, 42) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("upscale random: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestUpscaleToFixed tests upscaling to a fixed well-known target (e.g. 768 → 1536). // Skips if the native dimension is already larger than or equal to the target. func TestUpscaleToFixed(t *testing.T) { c := loadCfg(t) client := newClient(c) tgtDim := intEnv("VECNA_TEST_TARGET_DIM", 1536) vec := embed(t, client, c.model) srcDim := len(vec) if srcDim >= tgtDim { t.Skipf("native dim %d is not smaller than target dim %d", srcDim, tgtDim) } adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd) require.NoError(t, err) out, err := adp.Adapt(vec) require.NoError(t, err) assert.Len(t, out, tgtDim) assertUnitNorm(t, out) t.Logf("upscale to fixed: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out)) } // TestRoundtripConsistency embeds the same text twice and checks the vectors are identical. func TestRoundtripConsistency(t *testing.T) { c := loadCfg(t) client := newClient(c) v1 := embed(t, client, c.model) v2 := embed(t, client, c.model) require.Equal(t, len(v1), len(v2), "dimension mismatch between two identical requests") var maxDiff float32 for i := range v1 { d := v1[i] - v2[i] if d < 0 { d = -d } if d > maxDiff { maxDiff = d } } t.Logf("max element-wise diff between two identical embeds: %e", maxDiff) assert.Less(t, maxDiff, float32(1e-5), "embeddings for identical input should be deterministic") } // intEnv reads an integer from an env var, returning defaultVal if unset or invalid. func intEnv(key string, defaultVal int) int { if s := os.Getenv(key); s != "" { if n, err := strconv.Atoi(s); err == nil { return n } } return defaultVal }