- Added file upload handler to process both multipart and raw file uploads. - Implemented parsing logic for upload requests, including handling file metadata. - Introduced SaveFileDecodedInput structure for handling decoded file uploads. - Created unit tests for file upload parsing and validation. feat: add metadata retry configuration and functionality - Introduced MetadataRetryConfig to the application configuration. - Implemented MetadataRetryer to handle retrying metadata extraction for thoughts. - Added new tool for retrying failed metadata extractions. - Updated thought metadata structure to include status and timestamps for metadata processing. fix: enhance metadata normalization and error handling - Updated metadata normalization functions to track status and errors. - Improved handling of metadata extraction failures during thought updates and captures. - Ensured that metadata status is correctly set during various operations. refactor: streamline file saving logic in FilesTool - Refactored Save method in FilesTool to utilize new SaveDecoded method. - Simplified project and thought ID resolution logic during file saving.
167 lines
5.1 KiB
Go
167 lines
5.1 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"reflect"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/modelcontextprotocol/go-sdk/mcp"
|
|
"golang.org/x/sync/semaphore"
|
|
|
|
"git.warky.dev/wdevs/amcs/internal/ai"
|
|
"git.warky.dev/wdevs/amcs/internal/config"
|
|
"git.warky.dev/wdevs/amcs/internal/metadata"
|
|
"git.warky.dev/wdevs/amcs/internal/session"
|
|
"git.warky.dev/wdevs/amcs/internal/store"
|
|
thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
|
|
)
|
|
|
|
const metadataReparseConcurrency = 4
|
|
|
|
type ReparseMetadataTool struct {
|
|
store *store.DB
|
|
provider ai.Provider
|
|
capture config.CaptureConfig
|
|
sessions *session.ActiveProjects
|
|
logger *slog.Logger
|
|
}
|
|
|
|
type ReparseMetadataInput struct {
|
|
Project string `json:"project,omitempty" jsonschema:"optional project name or id to scope the reparse"`
|
|
Limit int `json:"limit,omitempty" jsonschema:"maximum number of thoughts to process in one call; defaults to 100"`
|
|
IncludeArchived bool `json:"include_archived,omitempty" jsonschema:"whether to include archived thoughts; defaults to false"`
|
|
OlderThanDays int `json:"older_than_days,omitempty" jsonschema:"only reparse thoughts older than N days; 0 means no restriction"`
|
|
DryRun bool `json:"dry_run,omitempty" jsonschema:"report counts without updating metadata"`
|
|
}
|
|
|
|
type ReparseMetadataFailure struct {
|
|
ID string `json:"id"`
|
|
Error string `json:"error"`
|
|
}
|
|
|
|
type ReparseMetadataOutput struct {
|
|
Scanned int `json:"scanned"`
|
|
Reparsed int `json:"reparsed"`
|
|
Normalized int `json:"normalized"`
|
|
Updated int `json:"updated"`
|
|
Skipped int `json:"skipped"`
|
|
Failed int `json:"failed"`
|
|
DryRun bool `json:"dry_run"`
|
|
Failures []ReparseMetadataFailure `json:"failures,omitempty"`
|
|
}
|
|
|
|
func NewReparseMetadataTool(db *store.DB, provider ai.Provider, capture config.CaptureConfig, sessions *session.ActiveProjects, logger *slog.Logger) *ReparseMetadataTool {
|
|
return &ReparseMetadataTool{store: db, provider: provider, capture: capture, sessions: sessions, logger: logger}
|
|
}
|
|
|
|
func (t *ReparseMetadataTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in ReparseMetadataInput) (*mcp.CallToolResult, ReparseMetadataOutput, error) {
|
|
limit := in.Limit
|
|
if limit <= 0 {
|
|
limit = 100
|
|
}
|
|
|
|
project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
|
|
if err != nil {
|
|
return nil, ReparseMetadataOutput{}, err
|
|
}
|
|
|
|
var projectID *uuid.UUID
|
|
if project != nil {
|
|
projectID = &project.ID
|
|
}
|
|
|
|
thoughts, err := t.store.ListThoughtsForMetadataReparse(ctx, limit, projectID, in.IncludeArchived, in.OlderThanDays)
|
|
if err != nil {
|
|
return nil, ReparseMetadataOutput{}, err
|
|
}
|
|
|
|
out := ReparseMetadataOutput{
|
|
Scanned: len(thoughts),
|
|
DryRun: in.DryRun,
|
|
}
|
|
|
|
if in.DryRun || len(thoughts) == 0 {
|
|
return nil, out, nil
|
|
}
|
|
|
|
start := time.Now()
|
|
sem := semaphore.NewWeighted(metadataReparseConcurrency)
|
|
var mu sync.Mutex
|
|
var wg sync.WaitGroup
|
|
|
|
for _, thought := range thoughts {
|
|
if ctx.Err() != nil {
|
|
break
|
|
}
|
|
if err := sem.Acquire(ctx, 1); err != nil {
|
|
break
|
|
}
|
|
wg.Add(1)
|
|
go func(thought thoughttypes.Thought) {
|
|
defer wg.Done()
|
|
defer sem.Release(1)
|
|
|
|
normalizedCurrent := metadata.Normalize(thought.Metadata, t.capture)
|
|
|
|
attemptedAt := time.Now().UTC()
|
|
extracted, extractErr := t.provider.ExtractMetadata(ctx, thought.Content)
|
|
normalizedTarget := normalizedCurrent
|
|
if extractErr != nil {
|
|
normalizedTarget = metadata.MarkMetadataFailed(normalizedCurrent, t.capture, attemptedAt, extractErr)
|
|
mu.Lock()
|
|
out.Normalized++
|
|
mu.Unlock()
|
|
t.logger.Warn("metadata reparse extract failed, using normalized existing metadata", slog.String("thought_id", thought.ID.String()), slog.String("error", extractErr.Error()))
|
|
} else {
|
|
normalizedTarget = metadata.MarkMetadataComplete(extracted, t.capture, attemptedAt)
|
|
normalizedTarget.Attachments = thought.Metadata.Attachments
|
|
mu.Lock()
|
|
out.Reparsed++
|
|
mu.Unlock()
|
|
}
|
|
|
|
if metadataEqual(thought.Metadata, normalizedTarget) {
|
|
mu.Lock()
|
|
out.Skipped++
|
|
mu.Unlock()
|
|
return
|
|
}
|
|
|
|
if _, updateErr := t.store.UpdateThought(ctx, thought.ID, thought.Content, nil, "", normalizedTarget, thought.ProjectID); updateErr != nil {
|
|
mu.Lock()
|
|
out.Failures = append(out.Failures, ReparseMetadataFailure{ID: thought.ID.String(), Error: updateErr.Error()})
|
|
mu.Unlock()
|
|
t.logger.Warn("metadata reparse update failed", slog.String("thought_id", thought.ID.String()), slog.String("error", updateErr.Error()))
|
|
return
|
|
}
|
|
|
|
mu.Lock()
|
|
out.Updated++
|
|
mu.Unlock()
|
|
}(thought)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
out.Failed = len(out.Failures)
|
|
|
|
t.logger.Info("metadata reparse completed",
|
|
slog.Int("scanned", out.Scanned),
|
|
slog.Int("reparsed", out.Reparsed),
|
|
slog.Int("normalized", out.Normalized),
|
|
slog.Int("updated", out.Updated),
|
|
slog.Int("skipped", out.Skipped),
|
|
slog.Int("failed", out.Failed),
|
|
slog.Duration("duration", time.Since(start)),
|
|
)
|
|
|
|
return nil, out, nil
|
|
}
|
|
|
|
func metadataEqual(a, b thoughttypes.ThoughtMetadata) bool {
|
|
return reflect.DeepEqual(a, b)
|
|
}
|