feat(metadata): add stripThinkingBlocks function and related tests

This commit is contained in:
2026-03-27 00:05:41 +02:00
parent a5c7b90f49
commit 6af02a2ba1
2 changed files with 114 additions and 0 deletions

View File

@@ -189,6 +189,7 @@ func (c *Client) extractMetadataWithModel(ctx context.Context, input, model stri
} }
metadataText := strings.TrimSpace(resp.Choices[0].Message.Content) metadataText := strings.TrimSpace(resp.Choices[0].Message.Content)
metadataText = stripThinkingBlocks(metadataText)
metadataText = stripCodeFence(metadataText) metadataText = stripCodeFence(metadataText)
metadataText = extractJSONObject(metadataText) metadataText = extractJSONObject(metadataText)
if metadataText == "" { if metadataText == "" {
@@ -320,6 +321,30 @@ func extractJSONObject(s string) string {
return s[start : end+1] return s[start : end+1]
} }
// stripThinkingBlocks removes <think>...</think> and <thinking>...</thinking>
// blocks produced by reasoning models (DeepSeek R1, QwQ, etc.) so that the
// remaining text can be parsed as JSON without interference from thinking content
// that may itself contain braces.
func stripThinkingBlocks(s string) string {
for _, tag := range []string{"think", "thinking"} {
open := "<" + tag + ">"
close := "</" + tag + ">"
for {
start := strings.Index(s, open)
if start == -1 {
break
}
end := strings.Index(s[start:], close)
if end == -1 {
s = s[:start]
break
}
s = s[:start] + s[start+end+len(close):]
}
}
return strings.TrimSpace(s)
}
func stripCodeFence(value string) string { func stripCodeFence(value string) string {
value = strings.TrimSpace(value) value = strings.TrimSpace(value)
if !strings.HasPrefix(value, "```") { if !strings.HasPrefix(value, "```") {

View File

@@ -54,6 +54,95 @@ func TestEmbedRetriesTransientFailures(t *testing.T) {
} }
} }
func TestExtractMetadataStripsThinkingBlocks(t *testing.T) {
cases := []struct {
name string
content string
}{
{
name: "think tag with braces inside",
content: "<think>\nLet me map {this} to the schema carefully.\n</think>\n{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
},
{
name: "thinking tag",
content: "<thinking>reasoning {here}</thinking>{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
content := tc.content
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{
{"message": map[string]any{"content": content}},
},
})
}))
defer server.Close()
client := New(Config{
Name: "test",
BaseURL: server.URL,
APIKey: "secret",
MetadataModel: "meta-model",
HTTPClient: server.Client(),
Log: discardLogger(),
})
metadata, err := client.ExtractMetadata(context.Background(), "hello")
if err != nil {
t.Fatalf("ExtractMetadata() error = %v", err)
}
if metadata.Type != "idea" {
t.Fatalf("metadata type = %q, want idea", metadata.Type)
}
})
}
}
func TestExtractMetadataFallbackModel(t *testing.T) {
var calls atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req chatCompletionsRequest
_ = json.NewDecoder(r.Body).Decode(&req)
if req.Model == "primary-model" {
calls.Add(1)
http.Error(w, "model unavailable", http.StatusServiceUnavailable)
return
}
_ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{
{"message": map[string]any{"content": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"task\",\"source\":\"\"}"}},
},
})
}))
defer server.Close()
client := New(Config{
Name: "test",
BaseURL: server.URL,
APIKey: "secret",
MetadataModel: "primary-model",
FallbackMetadataModel: "fallback-model",
HTTPClient: server.Client(),
Log: discardLogger(),
})
metadata, err := client.ExtractMetadata(context.Background(), "hello")
if err != nil {
t.Fatalf("ExtractMetadata() error = %v", err)
}
if metadata.Type != "task" {
t.Fatalf("metadata type = %q, want task", metadata.Type)
}
if calls.Load() == 0 {
t.Fatal("primary model was never called")
}
}
func TestExtractMetadataParsesCodeFencedJSON(t *testing.T) { func TestExtractMetadataParsesCodeFencedJSON(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{