feat(metadata): add stripThinkingBlocks function and related tests
This commit is contained in:
@@ -189,6 +189,7 @@ func (c *Client) extractMetadataWithModel(ctx context.Context, input, model stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
metadataText := strings.TrimSpace(resp.Choices[0].Message.Content)
|
metadataText := strings.TrimSpace(resp.Choices[0].Message.Content)
|
||||||
|
metadataText = stripThinkingBlocks(metadataText)
|
||||||
metadataText = stripCodeFence(metadataText)
|
metadataText = stripCodeFence(metadataText)
|
||||||
metadataText = extractJSONObject(metadataText)
|
metadataText = extractJSONObject(metadataText)
|
||||||
if metadataText == "" {
|
if metadataText == "" {
|
||||||
@@ -320,6 +321,30 @@ func extractJSONObject(s string) string {
|
|||||||
return s[start : end+1]
|
return s[start : end+1]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stripThinkingBlocks removes <think>...</think> and <thinking>...</thinking>
|
||||||
|
// blocks produced by reasoning models (DeepSeek R1, QwQ, etc.) so that the
|
||||||
|
// remaining text can be parsed as JSON without interference from thinking content
|
||||||
|
// that may itself contain braces.
|
||||||
|
func stripThinkingBlocks(s string) string {
|
||||||
|
for _, tag := range []string{"think", "thinking"} {
|
||||||
|
open := "<" + tag + ">"
|
||||||
|
close := "</" + tag + ">"
|
||||||
|
for {
|
||||||
|
start := strings.Index(s, open)
|
||||||
|
if start == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
end := strings.Index(s[start:], close)
|
||||||
|
if end == -1 {
|
||||||
|
s = s[:start]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s = s[:start] + s[start+end+len(close):]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(s)
|
||||||
|
}
|
||||||
|
|
||||||
func stripCodeFence(value string) string {
|
func stripCodeFence(value string) string {
|
||||||
value = strings.TrimSpace(value)
|
value = strings.TrimSpace(value)
|
||||||
if !strings.HasPrefix(value, "```") {
|
if !strings.HasPrefix(value, "```") {
|
||||||
|
|||||||
@@ -54,6 +54,95 @@ func TestEmbedRetriesTransientFailures(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractMetadataStripsThinkingBlocks(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
content string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "think tag with braces inside",
|
||||||
|
content: "<think>\nLet me map {this} to the schema carefully.\n</think>\n{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "thinking tag",
|
||||||
|
content: "<thinking>reasoning {here}</thinking>{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"idea\",\"source\":\"\"}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
content := tc.content
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"content": content}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
client := New(Config{
|
||||||
|
Name: "test",
|
||||||
|
BaseURL: server.URL,
|
||||||
|
APIKey: "secret",
|
||||||
|
MetadataModel: "meta-model",
|
||||||
|
HTTPClient: server.Client(),
|
||||||
|
Log: discardLogger(),
|
||||||
|
})
|
||||||
|
|
||||||
|
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||||
|
}
|
||||||
|
if metadata.Type != "idea" {
|
||||||
|
t.Fatalf("metadata type = %q, want idea", metadata.Type)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractMetadataFallbackModel(t *testing.T) {
|
||||||
|
var calls atomic.Int32
|
||||||
|
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req chatCompletionsRequest
|
||||||
|
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||||
|
|
||||||
|
if req.Model == "primary-model" {
|
||||||
|
calls.Add(1)
|
||||||
|
http.Error(w, "model unavailable", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
"choices": []map[string]any{
|
||||||
|
{"message": map[string]any{"content": "{\"people\":[],\"action_items\":[],\"dates_mentioned\":[],\"topics\":[\"test\"],\"type\":\"task\",\"source\":\"\"}"}},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
client := New(Config{
|
||||||
|
Name: "test",
|
||||||
|
BaseURL: server.URL,
|
||||||
|
APIKey: "secret",
|
||||||
|
MetadataModel: "primary-model",
|
||||||
|
FallbackMetadataModel: "fallback-model",
|
||||||
|
HTTPClient: server.Client(),
|
||||||
|
Log: discardLogger(),
|
||||||
|
})
|
||||||
|
|
||||||
|
metadata, err := client.ExtractMetadata(context.Background(), "hello")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ExtractMetadata() error = %v", err)
|
||||||
|
}
|
||||||
|
if metadata.Type != "task" {
|
||||||
|
t.Fatalf("metadata type = %q, want task", metadata.Type)
|
||||||
|
}
|
||||||
|
if calls.Load() == 0 {
|
||||||
|
t.Fatal("primary model was never called")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestExtractMetadataParsesCodeFencedJSON(t *testing.T) {
|
func TestExtractMetadataParsesCodeFencedJSON(t *testing.T) {
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||||
|
|||||||
Reference in New Issue
Block a user