mirror of
https://github.com/Warky-Devs/vecna.git
synced 2026-05-05 01:26:58 +00:00
331 lines
8.9 KiB
Go
331 lines
8.9 KiB
Go
//go:build integration
|
|
|
|
package integration
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"net/http"
|
|
"os"
|
|
"strconv"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/Warky-Devs/vecna.git/pkg/adapter"
|
|
"github.com/Warky-Devs/vecna.git/pkg/embedclient"
|
|
)
|
|
|
|
// Environment variables that configure the integration tests:
|
|
//
|
|
// VECNA_TEST_URL base URL of the embedding server (required)
|
|
// VECNA_TEST_MODEL model name to request (required)
|
|
// VECNA_TEST_API_TYPE "openai" (default) or "google"
|
|
// VECNA_TEST_API_KEY bearer token, empty if not needed
|
|
//
|
|
// Example (Ollama):
|
|
//
|
|
// VECNA_TEST_URL=http://localhost:11434 VECNA_TEST_MODEL=nomic-embed-text \
|
|
// go test -tags integration ./tests/integration/
|
|
|
|
const testText = "The quick brown fox jumps over the lazy dog"
|
|
|
|
// cfg holds resolved test parameters.
|
|
type cfg struct {
|
|
url string
|
|
model string
|
|
apiType string
|
|
apiKey string
|
|
}
|
|
|
|
func loadCfg(t *testing.T) cfg {
|
|
t.Helper()
|
|
url := os.Getenv("VECNA_TEST_URL")
|
|
if url == "" {
|
|
t.Skip("VECNA_TEST_URL not set — skipping integration tests")
|
|
}
|
|
model := os.Getenv("VECNA_TEST_MODEL")
|
|
if model == "" {
|
|
t.Skip("VECNA_TEST_MODEL not set — skipping integration tests")
|
|
}
|
|
apiType := os.Getenv("VECNA_TEST_API_TYPE")
|
|
if apiType == "" {
|
|
apiType = "openai"
|
|
}
|
|
return cfg{
|
|
url: url,
|
|
model: model,
|
|
apiType: apiType,
|
|
apiKey: os.Getenv("VECNA_TEST_API_KEY"),
|
|
}
|
|
}
|
|
|
|
func newClient(c cfg) embedclient.Client {
|
|
httpClient := &http.Client{Timeout: 30 * time.Second}
|
|
if c.apiType == "google" {
|
|
return embedclient.NewGoogle(c.url, c.apiKey, c.model, httpClient)
|
|
}
|
|
return embedclient.NewOpenAI(c.url, c.apiKey, httpClient)
|
|
}
|
|
|
|
// embed fetches a single embedding vector for testText.
|
|
func embed(t *testing.T, client embedclient.Client, model string) []float32 {
|
|
t.Helper()
|
|
resp, err := client.Embed(context.Background(), embedclient.Request{
|
|
Texts: []string{testText},
|
|
Model: model,
|
|
})
|
|
require.NoError(t, err, "embedding request failed")
|
|
require.Len(t, resp.Embeddings, 1, "expected exactly one embedding in response")
|
|
require.NotEmpty(t, resp.Embeddings[0], "embedding vector is empty")
|
|
return resp.Embeddings[0]
|
|
}
|
|
|
|
func l2Norm(v []float32) float64 {
|
|
var sum float64
|
|
for _, x := range v {
|
|
sum += float64(x) * float64(x)
|
|
}
|
|
return math.Sqrt(sum)
|
|
}
|
|
|
|
// assertUnitNorm checks the vector is approximately L2-normalised.
|
|
func assertUnitNorm(t *testing.T, v []float32) {
|
|
t.Helper()
|
|
norm := l2Norm(v)
|
|
assert.InDelta(t, 1.0, norm, 0.01, "expected unit L2 norm after adaptation")
|
|
}
|
|
|
|
// ---- Tests ----------------------------------------------------------------
|
|
|
|
// TestNativeDimension verifies the server returns a non-empty vector.
|
|
// This is the baseline; the native dimension is logged so it can be used
|
|
// as VECNA_TEST_SOURCE_DIM for the dimension tests below.
|
|
func TestNativeDimension(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
t.Logf("native dimension: %d", len(vec))
|
|
t.Logf("native L2 norm: %.6f", l2Norm(vec))
|
|
|
|
assert.Greater(t, len(vec), 0)
|
|
}
|
|
|
|
// TestDownscaleTruncate tests truncation to half the native dimension.
|
|
func TestDownscaleTruncate(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim / 2
|
|
if tgtDim == 0 {
|
|
t.Skipf("source dim %d too small to halve", srcDim)
|
|
}
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim, "output dimension mismatch")
|
|
assertUnitNorm(t, out)
|
|
t.Logf("downscale truncate: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestDownscaleTruncateFromStart tests keeping the last N dims.
|
|
func TestDownscaleTruncateFromStart(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim / 2
|
|
if tgtDim == 0 {
|
|
t.Skipf("source dim %d too small to halve", srcDim)
|
|
}
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromStart, adapter.PadAtEnd)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("downscale truncate-from-start: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestDownscaleRandom tests random projection to a lower dimension.
|
|
func TestDownscaleRandom(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim / 2
|
|
if tgtDim == 0 {
|
|
t.Skipf("source dim %d too small to halve", srcDim)
|
|
}
|
|
|
|
adp, err := adapter.NewRandom(srcDim, tgtDim, 42)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("downscale random: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestDownscaleToFixed tests truncation to a fixed well-known target (e.g. 768 → 256).
|
|
// Skips if the native dimension is not larger than the target.
|
|
func TestDownscaleToFixed(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
tgtDim := intEnv("VECNA_TEST_TARGET_DIM", 256)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
if srcDim <= tgtDim {
|
|
t.Skipf("native dim %d is not larger than target dim %d", srcDim, tgtDim)
|
|
}
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("downscale to fixed: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestUpscalePadEnd tests zero-padding to double the native dimension.
|
|
func TestUpscalePadEnd(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim * 2
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
// The second half of the raw output (before normalisation) should have been zero-padded.
|
|
// After normalisation all values shrink but the last half should all be equal (zero → 0).
|
|
t.Logf("upscale pad-end: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestUpscalePadStart tests zero-padding prepended to the vector.
|
|
func TestUpscalePadStart(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim * 2
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtStart)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("upscale pad-start: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestUpscaleRandom tests random projection to a higher dimension.
|
|
func TestUpscaleRandom(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
tgtDim := srcDim * 2
|
|
|
|
adp, err := adapter.NewRandom(srcDim, tgtDim, 42)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("upscale random: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestUpscaleToFixed tests upscaling to a fixed well-known target (e.g. 768 → 1536).
|
|
// Skips if the native dimension is already larger than or equal to the target.
|
|
func TestUpscaleToFixed(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
tgtDim := intEnv("VECNA_TEST_TARGET_DIM", 1536)
|
|
|
|
vec := embed(t, client, c.model)
|
|
srcDim := len(vec)
|
|
if srcDim >= tgtDim {
|
|
t.Skipf("native dim %d is not smaller than target dim %d", srcDim, tgtDim)
|
|
}
|
|
|
|
adp, err := adapter.NewTruncate(srcDim, tgtDim, adapter.TruncateFromEnd, adapter.PadAtEnd)
|
|
require.NoError(t, err)
|
|
|
|
out, err := adp.Adapt(vec)
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, out, tgtDim)
|
|
assertUnitNorm(t, out)
|
|
t.Logf("upscale to fixed: %d → %d norm=%.6f", srcDim, tgtDim, l2Norm(out))
|
|
}
|
|
|
|
// TestRoundtripConsistency embeds the same text twice and checks the vectors are identical.
|
|
func TestRoundtripConsistency(t *testing.T) {
|
|
c := loadCfg(t)
|
|
client := newClient(c)
|
|
|
|
v1 := embed(t, client, c.model)
|
|
v2 := embed(t, client, c.model)
|
|
|
|
require.Equal(t, len(v1), len(v2), "dimension mismatch between two identical requests")
|
|
|
|
var maxDiff float32
|
|
for i := range v1 {
|
|
d := v1[i] - v2[i]
|
|
if d < 0 {
|
|
d = -d
|
|
}
|
|
if d > maxDiff {
|
|
maxDiff = d
|
|
}
|
|
}
|
|
t.Logf("max element-wise diff between two identical embeds: %e", maxDiff)
|
|
assert.Less(t, maxDiff, float32(1e-5), "embeddings for identical input should be deterministic")
|
|
}
|
|
|
|
// intEnv reads an integer from an env var, returning defaultVal if unset or invalid.
|
|
func intEnv(key string, defaultVal int) int {
|
|
if s := os.Getenv(key); s != "" {
|
|
if n, err := strconv.Atoi(s); err == nil {
|
|
return n
|
|
}
|
|
}
|
|
return defaultVal
|
|
}
|