amcs/internal/tools/reparse_metadata.go

package tools

import (
	"context"
	"log/slog"
	"reflect"
	"sync"
	"time"

	"github.com/google/uuid"
	"github.com/modelcontextprotocol/go-sdk/mcp"
	"golang.org/x/sync/semaphore"

	"git.warky.dev/wdevs/amcs/internal/ai"
	"git.warky.dev/wdevs/amcs/internal/config"
	"git.warky.dev/wdevs/amcs/internal/metadata"
	"git.warky.dev/wdevs/amcs/internal/session"
	"git.warky.dev/wdevs/amcs/internal/store"
	thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
)

const metadataReparseConcurrency = 4

type ReparseMetadataTool struct {
	store    *store.DB
	provider ai.Provider
	capture  config.CaptureConfig
	sessions *session.ActiveProjects
	logger   *slog.Logger
}

type ReparseMetadataInput struct {
	Project         string `json:"project,omitempty" jsonschema:"optional project name or id to scope the reparse"`
	Limit           int    `json:"limit,omitempty" jsonschema:"maximum number of thoughts to process in one call; defaults to 100"`
	IncludeArchived bool   `json:"include_archived,omitempty" jsonschema:"whether to include archived thoughts; defaults to false"`
	OlderThanDays   int    `json:"older_than_days,omitempty" jsonschema:"only reparse thoughts older than N days; 0 means no restriction"`
	DryRun          bool   `json:"dry_run,omitempty" jsonschema:"report counts without updating metadata"`
}

type ReparseMetadataFailure struct {
	ID    string `json:"id"`
	Error string `json:"error"`
}

type ReparseMetadataOutput struct {
	Scanned    int                      `json:"scanned"`
	Reparsed   int                      `json:"reparsed"`
	Normalized int                      `json:"normalized"`
	Updated    int                      `json:"updated"`
	Skipped    int                      `json:"skipped"`
	Failed     int                      `json:"failed"`
	DryRun     bool                     `json:"dry_run"`
	Failures   []ReparseMetadataFailure `json:"failures,omitempty"`
}

func NewReparseMetadataTool(db *store.DB, provider ai.Provider, capture config.CaptureConfig, sessions *session.ActiveProjects, logger *slog.Logger) *ReparseMetadataTool {
	return &ReparseMetadataTool{store: db, provider: provider, capture: capture, sessions: sessions, logger: logger}
}

func (t *ReparseMetadataTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in ReparseMetadataInput) (*mcp.CallToolResult, ReparseMetadataOutput, error) {
	limit := in.Limit
	if limit <= 0 {
		limit = 100
	}

	project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
	if err != nil {
		return nil, ReparseMetadataOutput{}, err
	}

	var projectID *uuid.UUID
	if project != nil {
		projectID = &project.ID
	}

	thoughts, err := t.store.ListThoughtsForMetadataReparse(ctx, limit, projectID, in.IncludeArchived, in.OlderThanDays)
	if err != nil {
		return nil, ReparseMetadataOutput{}, err
	}

	out := ReparseMetadataOutput{
		Scanned: len(thoughts),
		DryRun:  in.DryRun,
	}

	if in.DryRun || len(thoughts) == 0 {
		return nil, out, nil
	}

	start := time.Now()
	sem := semaphore.NewWeighted(metadataReparseConcurrency)
	var mu sync.Mutex
	var wg sync.WaitGroup

	for _, thought := range thoughts {
		if ctx.Err() != nil {
			break
		}
		if err := sem.Acquire(ctx, 1); err != nil {
			break
		}
		wg.Add(1)
		go func(thought thoughttypes.Thought) {
			defer wg.Done()
			defer sem.Release(1)

			normalizedCurrent := metadata.Normalize(thought.Metadata, t.capture)

			attemptedAt := time.Now().UTC()
			extracted, extractErr := t.provider.ExtractMetadata(ctx, thought.Content)
			normalizedTarget := normalizedCurrent
			if extractErr != nil {
				normalizedTarget = metadata.MarkMetadataFailed(normalizedCurrent, t.capture, attemptedAt, extractErr)
				mu.Lock()
				out.Normalized++
				mu.Unlock()
				t.logger.Warn("metadata reparse extract failed, using normalized existing metadata", slog.String("thought_id", thought.ID.String()), slog.String("error", extractErr.Error()))
			} else {
				normalizedTarget = metadata.MarkMetadataComplete(extracted, t.capture, attemptedAt)
				normalizedTarget.Attachments = thought.Metadata.Attachments
				mu.Lock()
				out.Reparsed++
				mu.Unlock()
			}

			if metadataEqual(thought.Metadata, normalizedTarget) {
				mu.Lock()
				out.Skipped++
				mu.Unlock()
				return
			}

			if _, updateErr := t.store.UpdateThought(ctx, thought.ID, thought.Content, nil, "", normalizedTarget, thought.ProjectID); updateErr != nil {
				mu.Lock()
				out.Failures = append(out.Failures, ReparseMetadataFailure{ID: thought.ID.String(), Error: updateErr.Error()})
				mu.Unlock()
				t.logger.Warn("metadata reparse update failed", slog.String("thought_id", thought.ID.String()), slog.String("error", updateErr.Error()))
				return
			}

			mu.Lock()
			out.Updated++
			mu.Unlock()
		}(thought)
	}

	wg.Wait()

	out.Failed = len(out.Failures)

	t.logger.Info("metadata reparse completed",
		slog.Int("scanned", out.Scanned),
		slog.Int("reparsed", out.Reparsed),
		slog.Int("normalized", out.Normalized),
		slog.Int("updated", out.Updated),
		slog.Int("skipped", out.Skipped),
		slog.Int("failed", out.Failed),
		slog.Duration("duration", time.Since(start)),
	)

	return nil, out, nil
}

func metadataEqual(a, b thoughttypes.ThoughtMetadata) bool {
	return reflect.DeepEqual(a, b)
}