Files
amcs/internal/tools/reparse_metadata.go
Hein 14e218d784
Some checks failed
CI / build-and-test (push) Failing after -32m22s
test(config): add migration tests for litellm provider
* Implement tests for migrating configuration from v1 to v2 for the litellm provider.
* Validate the structure and values of the migrated configuration.
* Ensure migration rejects newer versions of the configuration.
fix(validate): enhance AI provider validation logic
* Consolidate provider validation into a dedicated method.
* Ensure at least one provider is specified and validate its type.
* Check for required fields based on provider type.
fix(mcpserver): update tool set to use new enrichment tool
* Replace RetryMetadataTool with RetryEnrichmentTool in the ToolSet.
fix(tools): refactor tools to use embedding and metadata runners
* Update tools to utilize EmbeddingRunner and MetadataRunner instead of Provider.
* Adjust method calls to align with the new runner interfaces.
2026-04-21 21:14:28 +02:00

167 lines
5.2 KiB
Go

package tools
import (
"context"
"log/slog"
"reflect"
"sync"
"time"
"github.com/google/uuid"
"github.com/modelcontextprotocol/go-sdk/mcp"
"golang.org/x/sync/semaphore"
"git.warky.dev/wdevs/amcs/internal/ai"
"git.warky.dev/wdevs/amcs/internal/config"
"git.warky.dev/wdevs/amcs/internal/metadata"
"git.warky.dev/wdevs/amcs/internal/session"
"git.warky.dev/wdevs/amcs/internal/store"
thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
)
const metadataReparseConcurrency = 4
type ReparseMetadataTool struct {
store *store.DB
metadata *ai.MetadataRunner
capture config.CaptureConfig
sessions *session.ActiveProjects
logger *slog.Logger
}
type ReparseMetadataInput struct {
Project string `json:"project,omitempty" jsonschema:"optional project name or id to scope the reparse"`
Limit int `json:"limit,omitempty" jsonschema:"maximum number of thoughts to process in one call; defaults to 100"`
IncludeArchived bool `json:"include_archived,omitempty" jsonschema:"whether to include archived thoughts; defaults to false"`
OlderThanDays int `json:"older_than_days,omitempty" jsonschema:"only reparse thoughts older than N days; 0 means no restriction"`
DryRun bool `json:"dry_run,omitempty" jsonschema:"report counts without updating metadata"`
}
type ReparseMetadataFailure struct {
ID string `json:"id"`
Error string `json:"error"`
}
type ReparseMetadataOutput struct {
Scanned int `json:"scanned"`
Reparsed int `json:"reparsed"`
Normalized int `json:"normalized"`
Updated int `json:"updated"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
DryRun bool `json:"dry_run"`
Failures []ReparseMetadataFailure `json:"failures,omitempty"`
}
func NewReparseMetadataTool(db *store.DB, metadataRunner *ai.MetadataRunner, capture config.CaptureConfig, sessions *session.ActiveProjects, logger *slog.Logger) *ReparseMetadataTool {
return &ReparseMetadataTool{store: db, metadata: metadataRunner, capture: capture, sessions: sessions, logger: logger}
}
func (t *ReparseMetadataTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in ReparseMetadataInput) (*mcp.CallToolResult, ReparseMetadataOutput, error) {
limit := in.Limit
if limit <= 0 {
limit = 100
}
project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
if err != nil {
return nil, ReparseMetadataOutput{}, err
}
var projectID *uuid.UUID
if project != nil {
projectID = &project.ID
}
thoughts, err := t.store.ListThoughtsForMetadataReparse(ctx, limit, projectID, in.IncludeArchived, in.OlderThanDays)
if err != nil {
return nil, ReparseMetadataOutput{}, err
}
out := ReparseMetadataOutput{
Scanned: len(thoughts),
DryRun: in.DryRun,
}
if in.DryRun || len(thoughts) == 0 {
return nil, out, nil
}
start := time.Now()
sem := semaphore.NewWeighted(metadataReparseConcurrency)
var mu sync.Mutex
var wg sync.WaitGroup
for _, thought := range thoughts {
if ctx.Err() != nil {
break
}
if err := sem.Acquire(ctx, 1); err != nil {
break
}
wg.Add(1)
go func(thought thoughttypes.Thought) {
defer wg.Done()
defer sem.Release(1)
normalizedCurrent := metadata.Normalize(thought.Metadata, t.capture)
attemptedAt := time.Now().UTC()
extracted, extractErr := t.metadata.ExtractMetadata(ctx, thought.Content)
normalizedTarget := normalizedCurrent
if extractErr != nil {
normalizedTarget = metadata.MarkMetadataFailed(normalizedCurrent, t.capture, attemptedAt, extractErr)
mu.Lock()
out.Normalized++
mu.Unlock()
t.logger.Warn("metadata reparse extract failed, using normalized existing metadata", slog.String("thought_id", thought.ID.String()), slog.String("error", extractErr.Error()))
} else {
normalizedTarget = metadata.MarkMetadataComplete(metadata.SanitizeExtracted(extracted), t.capture, attemptedAt)
normalizedTarget.Attachments = thought.Metadata.Attachments
mu.Lock()
out.Reparsed++
mu.Unlock()
}
if metadataEqual(thought.Metadata, normalizedTarget) {
mu.Lock()
out.Skipped++
mu.Unlock()
return
}
if _, updateErr := t.store.UpdateThought(ctx, thought.ID, thought.Content, nil, "", normalizedTarget, thought.ProjectID); updateErr != nil {
mu.Lock()
out.Failures = append(out.Failures, ReparseMetadataFailure{ID: thought.ID.String(), Error: updateErr.Error()})
mu.Unlock()
t.logger.Warn("metadata reparse update failed", slog.String("thought_id", thought.ID.String()), slog.String("error", updateErr.Error()))
return
}
mu.Lock()
out.Updated++
mu.Unlock()
}(thought)
}
wg.Wait()
out.Failed = len(out.Failures)
t.logger.Info("metadata reparse completed",
slog.Int("scanned", out.Scanned),
slog.Int("reparsed", out.Reparsed),
slog.Int("normalized", out.Normalized),
slog.Int("updated", out.Updated),
slog.Int("skipped", out.Skipped),
slog.Int("failed", out.Failed),
slog.Duration("duration", time.Since(start)),
)
return nil, out, nil
}
func metadataEqual(a, b thoughttypes.ThoughtMetadata) bool {
return reflect.DeepEqual(a, b)
}