feat(metadata): implement streaming response handling and enhance error management for metadata extraction
This commit is contained in:
@@ -7,364 +7,151 @@ import (
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func discardLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
}
|
||||
|
||||
func TestEmbedRetriesTransientFailures(t *testing.T) {
|
||||
var calls atomic.Int32
|
||||
func TestExtractMetadataFromStreamingResponse(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if calls.Add(1) < 3 {
|
||||
http.Error(w, "temporary failure", http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"data": []map[string]any{
|
||||
{"embedding": []float32{1, 2, 3}},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
defer r.Body.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
EmbeddingModel: "embed-model",
|
||||
MetadataModel: "meta-model",
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
Dimensions: 3,
|
||||
})
|
||||
|
||||
embedding, err := client.Embed(context.Background(), "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("Embed() error = %v", err)
|
||||
}
|
||||
if len(embedding) != 3 {
|
||||
t.Fatalf("embedding len = %d, want 3", len(embedding))
|
||||
}
|
||||
if got := calls.Load(); got != 3 {
|
||||
t.Fatalf("call count = %d, want 3", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataStripsThinkingBlocks(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
content string
|
||||
}{
|
||||
{
|
||||
name: "think tag with braces inside",
|
||||
content: "<think>\nLet me map {this} to the schema carefully.\n</think>\n{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
|
||||
},
|
||||
{
|
||||
name: "thinking tag",
|
||||
content: "<thinking>reasoning {here}</thinking>{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
content := tc.content
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": content}},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
MetadataModel: "meta-model",
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if metadata.Type != "idea" {
|
||||
t.Fatalf("metadata type = %q, want idea", metadata.Type)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataFallbackModel(t *testing.T) {
|
||||
var calls atomic.Int32
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
var req chatCompletionsRequest
|
||||
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||
|
||||
if req.Model == "primary-model" {
|
||||
calls.Add(1)
|
||||
http.Error(w, "model unavailable", http.StatusServiceUnavailable)
|
||||
return
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
t.Fatalf("decode request: %v", err)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"task\",\"source\":\"\"}"}},
|
||||
},
|
||||
})
|
||||
if req.Stream == nil || !*req.Stream {
|
||||
t.Fatalf("stream flag = %v, want true", req.Stream)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"{\\\"people\\\":[],\"}}]}\n\n")
|
||||
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"\\\"action_items\\\":[],\\\"dates_mentioned\\\":[],\"}}]}\n\n")
|
||||
_, _ = io.WriteString(w, "data: {\"choices\":[{\"delta\":{\"content\":\"\\\"topics\\\":[\\\"android\\\"],\\\"type\\\":\\\"idea\\\",\\\"source\\\":\\\"stream\\\"}\"}}]}\n\n")
|
||||
_, _ = io.WriteString(w, "data: [DONE]\n\n")
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
MetadataModel: "primary-model",
|
||||
FallbackMetadataModels: []string{"fallback-model"},
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if metadata.Type != "task" {
|
||||
t.Fatalf("metadata type = %q, want task", metadata.Type)
|
||||
}
|
||||
if calls.Load() == 0 {
|
||||
t.Fatal("primary model was never called")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataParsesCodeFencedJSON(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{
|
||||
"message": map[string]any{
|
||||
"content": "```json\n{\"people\":[\"Alice\"],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"memory\"],\"type\":\"idea\",\"source\":\"mcp\"}\n```",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
Name: "litellm",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
EmbeddingModel: "embed-model",
|
||||
MetadataModel: "meta-model",
|
||||
APIKey: "test-key",
|
||||
MetadataModel: "qwen3.5:latest",
|
||||
Temperature: 0.1,
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
Log: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
EmbeddingModel: "unused",
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "Project idea: Build an Android companion app.")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
|
||||
if metadata.Type != "idea" {
|
||||
t.Fatalf("metadata type = %q, want idea", metadata.Type)
|
||||
}
|
||||
if len(metadata.People) != 1 || metadata.People[0] != "Alice" {
|
||||
t.Fatalf("metadata people = %#v, want [Alice]", metadata.People)
|
||||
if metadata.Source != "stream" {
|
||||
t.Fatalf("metadata source = %q, want stream", metadata.Source)
|
||||
}
|
||||
if len(metadata.Topics) != 1 || metadata.Topics[0] != "android" {
|
||||
t.Fatalf("metadata topics = %#v, want [android]", metadata.Topics)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataParsesArrayContent(t *testing.T) {
|
||||
func TestExtractMetadataRetriesWithoutJSONMode(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var mu sync.Mutex
|
||||
jsonModeCalls := 0
|
||||
plainCalls := 0
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{
|
||||
"message": map[string]any{
|
||||
"content": []map[string]any{
|
||||
{"type": "text", "text": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"auth\"],\"type\":\"reference\",\"source\":\"mcp\"}"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
defer r.Body.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
EmbeddingModel: "embed-model",
|
||||
MetadataModel: "meta-model",
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if metadata.Type != "reference" {
|
||||
t.Fatalf("metadata type = %q, want reference", metadata.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataUsesReasoningContentWhenContentEmpty(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{
|
||||
"message": map[string]any{
|
||||
"content": "",
|
||||
"reasoning_content": "{\"people\":[\"Hein\"],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"profile\"],\"type\":\"person_note\",\"source\":\"mcp\"}",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
EmbeddingModel: "embed-model",
|
||||
MetadataModel: "meta-model",
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if metadata.Type != "person_note" {
|
||||
t.Fatalf("metadata type = %q, want person_note", metadata.Type)
|
||||
}
|
||||
if len(metadata.People) != 1 || metadata.People[0] != "Hein" {
|
||||
t.Fatalf("metadata people = %#v, want [Hein]", metadata.People)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataFallsBackToHeuristicsWhenModelsFail(t *testing.T) {
|
||||
var calls atomic.Int32
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_ = calls.Add(1)
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": "not json"}},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
MetadataModel: "primary",
|
||||
FallbackMetadataModels: []string{"secondary"},
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
})
|
||||
|
||||
input := "Personal profile - Hein (Warkanum):\n- Born: 23 May 1989\n- Wife: Cindy, born 16 November 1994"
|
||||
metadata, err := client.ExtractMetadata(context.Background(), input)
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if calls.Load() != 2 {
|
||||
t.Fatalf("call count = %d, want 2", calls.Load())
|
||||
}
|
||||
if metadata.Type != "person_note" {
|
||||
t.Fatalf("metadata type = %q, want person_note", metadata.Type)
|
||||
}
|
||||
if len(metadata.DatesMentioned) < 2 {
|
||||
t.Fatalf("metadata dates = %#v, want extracted dates", metadata.DatesMentioned)
|
||||
}
|
||||
if len(metadata.People) == 0 || !strings.EqualFold(metadata.People[0], "Cindy") {
|
||||
t.Fatalf("metadata people = %#v, want Cindy", metadata.People)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataRetriesEmptyResponse(t *testing.T) {
|
||||
var calls atomic.Int32
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
call := calls.Add(1)
|
||||
var req chatCompletionsRequest
|
||||
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||
|
||||
if req.Stream == nil || *req.Stream {
|
||||
t.Fatalf("expected stream=false, got %#v", req.Stream)
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
t.Fatalf("decode request: %v", err)
|
||||
}
|
||||
|
||||
if call == 1 {
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": ""}},
|
||||
},
|
||||
})
|
||||
if req.ResponseFormat != nil && req.ResponseFormat.Type == "json_object" {
|
||||
mu.Lock()
|
||||
jsonModeCalls++
|
||||
mu.Unlock()
|
||||
_, _ = io.WriteString(w, `{"choices":[{"message":{"role":"assistant","content":""}}]}`)
|
||||
return
|
||||
}
|
||||
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"mcp\"],\"type\":\"observation\",\"source\":\"mcp\"}"}},
|
||||
},
|
||||
})
|
||||
mu.Lock()
|
||||
plainCalls++
|
||||
mu.Unlock()
|
||||
_, _ = io.WriteString(w, `{"choices":[{"message":{"role":"assistant","content":"{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"android\"],\"type\":\"idea\",\"source\":\"test\"}"}}]}`)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
MetadataModel: "meta-model",
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
Name: "litellm",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
MetadataModel: "qwen3.5:latest",
|
||||
Temperature: 0.1,
|
||||
HTTPClient: server.Client(),
|
||||
Log: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
EmbeddingModel: "unused",
|
||||
})
|
||||
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "Project idea: Build an Android companion app.")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if calls.Load() < 2 {
|
||||
t.Fatalf("call count = %d, want >= 2", calls.Load())
|
||||
|
||||
if metadata.Type != "idea" {
|
||||
t.Fatalf("metadata type = %q, want idea", metadata.Type)
|
||||
}
|
||||
if metadata.Type != "observation" {
|
||||
t.Fatalf("metadata type = %q, want observation", metadata.Type)
|
||||
if metadata.Source != "test" {
|
||||
t.Fatalf("metadata source = %q, want test", metadata.Source)
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if jsonModeCalls != maxMetadataAttempts {
|
||||
t.Fatalf("json mode calls = %d, want %d", jsonModeCalls, maxMetadataAttempts)
|
||||
}
|
||||
if plainCalls != 1 {
|
||||
t.Fatalf("plain calls = %d, want 1", plainCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractMetadataBypassesModelAfterRepeatedEmptyResponses(t *testing.T) {
|
||||
var primaryCalls atomic.Int32
|
||||
var fallbackCalls atomic.Int32
|
||||
func TestExtractMetadataBypassesInvalidFallbackModelAfterFirstFailure(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var mu sync.Mutex
|
||||
primaryCalls := 0
|
||||
invalidFallbackCalls := 0
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
defer r.Body.Close()
|
||||
|
||||
var req chatCompletionsRequest
|
||||
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
t.Fatalf("decode request: %v", err)
|
||||
}
|
||||
|
||||
switch req.Model {
|
||||
case "primary":
|
||||
primaryCalls.Add(1)
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": ""}},
|
||||
},
|
||||
})
|
||||
case "fallback":
|
||||
fallbackCalls.Add(1)
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"choices": []map[string]any{
|
||||
{"message": map[string]any{"content": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"mcp\"],\"type\":\"observation\",\"source\":\"mcp\"}"}},
|
||||
},
|
||||
})
|
||||
case "empty-primary":
|
||||
_, _ = io.WriteString(w, `{"choices":[{"message":{"role":"assistant","content":""}}]}`)
|
||||
case "qwen3.5:latest":
|
||||
mu.Lock()
|
||||
primaryCalls++
|
||||
mu.Unlock()
|
||||
_, _ = io.WriteString(w, `{"choices":[{"message":{"role":"assistant","content":"{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"metadata\"],\"type\":\"observation\",\"source\":\"primary\"}"}}]}`)
|
||||
case "qwen3":
|
||||
mu.Lock()
|
||||
invalidFallbackCalls++
|
||||
mu.Unlock()
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_, _ = io.WriteString(w, "{\"error\":{\"message\":\"{'error': '/chat/completions: Invalid model name passed in model=qwen3. Call `/v1/models` to view available models for your key.'}\"}}")
|
||||
default:
|
||||
t.Fatalf("unexpected model %q", req.Model)
|
||||
}
|
||||
@@ -372,35 +159,33 @@ func TestExtractMetadataBypassesModelAfterRepeatedEmptyResponses(t *testing.T) {
|
||||
defer server.Close()
|
||||
|
||||
client := New(Config{
|
||||
Name: "test",
|
||||
Name: "litellm",
|
||||
BaseURL: server.URL,
|
||||
APIKey: "secret",
|
||||
MetadataModel: "primary",
|
||||
FallbackMetadataModels: []string{"fallback"},
|
||||
APIKey: "test-key",
|
||||
MetadataModel: "empty-primary",
|
||||
FallbackMetadataModels: []string{"qwen3", "qwen3.5:latest"},
|
||||
Temperature: 0.1,
|
||||
HTTPClient: server.Client(),
|
||||
Log: discardLogger(),
|
||||
Log: slog.New(slog.NewTextHandler(io.Discard, nil)),
|
||||
EmbeddingModel: "unused",
|
||||
})
|
||||
|
||||
// First three calls should probe primary and then use fallback.
|
||||
for i := 0; i < 3; i++ {
|
||||
if _, err := client.ExtractMetadata(context.Background(), "hello"); err != nil {
|
||||
for i := 0; i < 2; i++ {
|
||||
metadata, err := client.ExtractMetadata(context.Background(), "A short note about metadata.")
|
||||
if err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if metadata.Source != "primary" {
|
||||
t.Fatalf("metadata source = %q, want primary", metadata.Source)
|
||||
}
|
||||
}
|
||||
|
||||
primaryBefore := primaryCalls.Load()
|
||||
if primaryBefore == 0 {
|
||||
t.Fatal("expected primary model to be called before bypass")
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if invalidFallbackCalls != 1 {
|
||||
t.Fatalf("invalid fallback calls = %d, want 1", invalidFallbackCalls)
|
||||
}
|
||||
|
||||
// Fourth call should bypass primary (no additional primary calls).
|
||||
if _, err := client.ExtractMetadata(context.Background(), "hello"); err != nil {
|
||||
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||
}
|
||||
if primaryCalls.Load() != primaryBefore {
|
||||
t.Fatalf("primary calls increased after bypass: before=%d after=%d", primaryBefore, primaryCalls.Load())
|
||||
}
|
||||
if fallbackCalls.Load() < 4 {
|
||||
t.Fatalf("fallback calls = %d, want at least 4", fallbackCalls.Load())
|
||||
if primaryCalls != 2 {
|
||||
t.Fatalf("valid fallback calls = %d, want 2", primaryCalls)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user