feat: implement file upload handler and related functionality

- Added file upload handler to process both multipart and raw file uploads.
- Implemented parsing logic for upload requests, including handling file metadata.
- Introduced SaveFileDecodedInput structure for handling decoded file uploads.
- Created unit tests for file upload parsing and validation.

feat: add metadata retry configuration and functionality

- Introduced MetadataRetryConfig to the application configuration.
- Implemented MetadataRetryer to handle retrying metadata extraction for thoughts.
- Added new tool for retrying failed metadata extractions.
- Updated thought metadata structure to include status and timestamps for metadata processing.

fix: enhance metadata normalization and error handling

- Updated metadata normalization functions to track status and errors.
- Improved handling of metadata extraction failures during thought updates and captures.
- Ensured that metadata status is correctly set during various operations.

refactor: streamline file saving logic in FilesTool

- Refactored Save method in FilesTool to utilize new SaveDecoded method.
- Simplified project and thought ID resolution logic during file saving.
This commit is contained in:
2026-03-30 22:57:21 +02:00
parent 7f2b2b9fee
commit 72b4f7ce3d
21 changed files with 890 additions and 126 deletions

View File

@@ -23,6 +23,7 @@ type CaptureTool struct {
capture config.CaptureConfig
sessions *session.ActiveProjects
metadataTimeout time.Duration
retryer *MetadataRetryer
log *slog.Logger
}
@@ -35,8 +36,8 @@ type CaptureOutput struct {
Thought thoughttypes.Thought `json:"thought"`
}
func NewCaptureTool(db *store.DB, provider ai.Provider, capture config.CaptureConfig, metadataTimeout time.Duration, sessions *session.ActiveProjects, log *slog.Logger) *CaptureTool {
return &CaptureTool{store: db, provider: provider, capture: capture, sessions: sessions, metadataTimeout: metadataTimeout, log: log}
func NewCaptureTool(db *store.DB, provider ai.Provider, capture config.CaptureConfig, metadataTimeout time.Duration, sessions *session.ActiveProjects, retryer *MetadataRetryer, log *slog.Logger) *CaptureTool {
return &CaptureTool{store: db, provider: provider, capture: capture, sessions: sessions, metadataTimeout: metadataTimeout, retryer: retryer, log: log}
}
func (t *CaptureTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in CaptureInput) (*mcp.CallToolResult, CaptureOutput, error) {
@@ -52,6 +53,7 @@ func (t *CaptureTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in C
var embedding []float32
rawMetadata := metadata.Fallback(t.capture)
metadataNeedsRetry := false
group, groupCtx := errgroup.WithContext(ctx)
group.Go(func() error {
@@ -64,6 +66,7 @@ func (t *CaptureTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in C
})
group.Go(func() error {
metaCtx := groupCtx
attemptedAt := time.Now().UTC()
if t.metadataTimeout > 0 {
var cancel context.CancelFunc
metaCtx, cancel = context.WithTimeout(groupCtx, t.metadataTimeout)
@@ -72,9 +75,11 @@ func (t *CaptureTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in C
extracted, err := t.provider.ExtractMetadata(metaCtx, content)
if err != nil {
t.log.Warn("metadata extraction failed, using fallback", slog.String("provider", t.provider.Name()), slog.String("error", err.Error()))
rawMetadata = metadata.MarkMetadataPending(rawMetadata, t.capture, attemptedAt, err)
metadataNeedsRetry = true
return nil
}
rawMetadata = extracted
rawMetadata = metadata.MarkMetadataComplete(extracted, t.capture, attemptedAt)
return nil
})
@@ -98,6 +103,9 @@ func (t *CaptureTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in C
if project != nil {
_ = t.store.TouchProject(ctx, project.ID)
}
if metadataNeedsRetry && t.retryer != nil {
t.retryer.QueueThought(created.ID)
}
return nil, CaptureOutput{Thought: created}, nil
}

View File

@@ -30,6 +30,15 @@ type SaveFileInput struct {
Project string `json:"project,omitempty" jsonschema:"optional project name or id when saving outside a linked thought"`
}
type SaveFileDecodedInput struct {
Name string
Content []byte
MediaType string
Kind string
ThoughtID string
Project string
}
type SaveFileOutput struct {
File thoughttypes.StoredFile `json:"file"`
}
@@ -59,11 +68,6 @@ func NewFilesTool(db *store.DB, sessions *session.ActiveProjects) *FilesTool {
}
func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveFileInput) (*mcp.CallToolResult, SaveFileOutput, error) {
name := strings.TrimSpace(in.Name)
if name == "" {
return nil, SaveFileOutput{}, errInvalidInput("name is required")
}
contentBase64, mediaTypeFromDataURL := splitDataURL(strings.TrimSpace(in.ContentBase64))
if contentBase64 == "" {
return nil, SaveFileOutput{}, errInvalidInput("content_base64 is required")
@@ -73,66 +77,18 @@ func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveF
if err != nil {
return nil, SaveFileOutput{}, errInvalidInput("content_base64 must be valid base64")
}
if len(content) == 0 {
return nil, SaveFileOutput{}, errInvalidInput("decoded file content must not be empty")
}
project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
if err != nil {
return nil, SaveFileOutput{}, err
}
var thoughtID *uuid.UUID
var projectID = projectIDPtr(project)
if rawThoughtID := strings.TrimSpace(in.ThoughtID); rawThoughtID != "" {
parsedThoughtID, err := parseUUID(rawThoughtID)
if err != nil {
return nil, SaveFileOutput{}, err
}
thought, err := t.store.GetThought(ctx, parsedThoughtID)
if err != nil {
return nil, SaveFileOutput{}, err
}
thoughtID = &parsedThoughtID
projectID = thought.ProjectID
if project != nil && thought.ProjectID != nil && *thought.ProjectID != project.ID {
return nil, SaveFileOutput{}, errInvalidInput("project does not match the linked thought's project")
}
}
mediaType := normalizeMediaType(strings.TrimSpace(in.MediaType), mediaTypeFromDataURL, content)
kind := normalizeFileKind(strings.TrimSpace(in.Kind), mediaType)
sum := sha256.Sum256(content)
file := thoughttypes.StoredFile{
Name: name,
MediaType: mediaType,
Kind: kind,
Encoding: "base64",
SizeBytes: int64(len(content)),
SHA256: hex.EncodeToString(sum[:]),
out, err := t.SaveDecoded(ctx, req, SaveFileDecodedInput{
Name: in.Name,
Content: content,
ProjectID: projectID,
}
if thoughtID != nil {
file.ThoughtID = thoughtID
}
created, err := t.store.InsertStoredFile(ctx, file)
MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromDataURL),
Kind: in.Kind,
ThoughtID: in.ThoughtID,
Project: in.Project,
})
if err != nil {
return nil, SaveFileOutput{}, err
}
if created.ThoughtID != nil {
if err := t.store.AddThoughtAttachment(ctx, *created.ThoughtID, thoughtAttachmentFromFile(created)); err != nil {
return nil, SaveFileOutput{}, err
}
}
if created.ProjectID != nil {
_ = t.store.TouchProject(ctx, *created.ProjectID)
}
return nil, SaveFileOutput{File: created}, nil
return nil, out, nil
}
func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFileInput) (*mcp.CallToolResult, LoadFileOutput, error) {
@@ -193,6 +149,73 @@ func (t *FilesTool) List(ctx context.Context, req *mcp.CallToolRequest, in ListF
return nil, ListFilesOutput{Files: files}, nil
}
func (t *FilesTool) SaveDecoded(ctx context.Context, req *mcp.CallToolRequest, in SaveFileDecodedInput) (SaveFileOutput, error) {
name := strings.TrimSpace(in.Name)
if name == "" {
return SaveFileOutput{}, errInvalidInput("name is required")
}
if len(in.Content) == 0 {
return SaveFileOutput{}, errInvalidInput("decoded file content must not be empty")
}
project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
if err != nil {
return SaveFileOutput{}, err
}
var thoughtID *uuid.UUID
var projectID = projectIDPtr(project)
if rawThoughtID := strings.TrimSpace(in.ThoughtID); rawThoughtID != "" {
parsedThoughtID, err := parseUUID(rawThoughtID)
if err != nil {
return SaveFileOutput{}, err
}
thought, err := t.store.GetThought(ctx, parsedThoughtID)
if err != nil {
return SaveFileOutput{}, err
}
thoughtID = &parsedThoughtID
projectID = thought.ProjectID
if project != nil && thought.ProjectID != nil && *thought.ProjectID != project.ID {
return SaveFileOutput{}, errInvalidInput("project does not match the linked thought's project")
}
}
mediaType := normalizeMediaType(strings.TrimSpace(in.MediaType), "", in.Content)
kind := normalizeFileKind(strings.TrimSpace(in.Kind), mediaType)
sum := sha256.Sum256(in.Content)
file := thoughttypes.StoredFile{
Name: name,
MediaType: mediaType,
Kind: kind,
Encoding: "base64",
SizeBytes: int64(len(in.Content)),
SHA256: hex.EncodeToString(sum[:]),
Content: in.Content,
ProjectID: projectID,
}
if thoughtID != nil {
file.ThoughtID = thoughtID
}
created, err := t.store.InsertStoredFile(ctx, file)
if err != nil {
return SaveFileOutput{}, err
}
if created.ThoughtID != nil {
if err := t.store.AddThoughtAttachment(ctx, *created.ThoughtID, thoughtAttachmentFromFile(created)); err != nil {
return SaveFileOutput{}, err
}
}
if created.ProjectID != nil {
_ = t.store.TouchProject(ctx, *created.ProjectID)
}
return SaveFileOutput{File: created}, nil
}
func thoughtAttachmentFromFile(file thoughttypes.StoredFile) thoughttypes.ThoughtAttachment {
return thoughttypes.ThoughtAttachment{
FileID: file.ID,
@@ -238,6 +261,15 @@ func normalizeMediaType(explicit string, fromDataURL string, content []byte) str
}
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return strings.TrimSpace(value)
}
}
return ""
}
func normalizeFileKind(explicit string, mediaType string) string {
if explicit != "" {
return explicit

View File

@@ -0,0 +1,206 @@
package tools
import (
"context"
"log/slog"
"sync"
"time"
"github.com/google/uuid"
"github.com/modelcontextprotocol/go-sdk/mcp"
"golang.org/x/sync/semaphore"
"git.warky.dev/wdevs/amcs/internal/ai"
"git.warky.dev/wdevs/amcs/internal/config"
"git.warky.dev/wdevs/amcs/internal/metadata"
"git.warky.dev/wdevs/amcs/internal/session"
"git.warky.dev/wdevs/amcs/internal/store"
thoughttypes "git.warky.dev/wdevs/amcs/internal/types"
)
const metadataRetryConcurrency = 4
type MetadataRetryer struct {
backgroundCtx context.Context
store *store.DB
provider ai.Provider
capture config.CaptureConfig
sessions *session.ActiveProjects
metadataTimeout time.Duration
logger *slog.Logger
}
type RetryMetadataTool struct {
retryer *MetadataRetryer
}
type RetryMetadataInput struct {
Project string `json:"project,omitempty" jsonschema:"optional project name or id to scope the retry"`
Limit int `json:"limit,omitempty" jsonschema:"maximum number of thoughts to process in one call; defaults to 100"`
IncludeArchived bool `json:"include_archived,omitempty" jsonschema:"whether to include archived thoughts; defaults to false"`
OlderThanDays int `json:"older_than_days,omitempty" jsonschema:"only retry thoughts whose last metadata attempt was at least N days ago; 0 means no restriction"`
DryRun bool `json:"dry_run,omitempty" jsonschema:"report counts without retrying metadata extraction"`
}
type RetryMetadataFailure struct {
ID string `json:"id"`
Error string `json:"error"`
}
type RetryMetadataOutput struct {
Scanned int `json:"scanned"`
Retried int `json:"retried"`
Updated int `json:"updated"`
Skipped int `json:"skipped"`
Failed int `json:"failed"`
DryRun bool `json:"dry_run"`
Failures []RetryMetadataFailure `json:"failures,omitempty"`
}
func NewMetadataRetryer(backgroundCtx context.Context, db *store.DB, provider ai.Provider, capture config.CaptureConfig, metadataTimeout time.Duration, sessions *session.ActiveProjects, logger *slog.Logger) *MetadataRetryer {
if backgroundCtx == nil {
backgroundCtx = context.Background()
}
return &MetadataRetryer{
backgroundCtx: backgroundCtx,
store: db,
provider: provider,
capture: capture,
sessions: sessions,
metadataTimeout: metadataTimeout,
logger: logger,
}
}
func NewRetryMetadataTool(retryer *MetadataRetryer) *RetryMetadataTool {
return &RetryMetadataTool{retryer: retryer}
}
func (t *RetryMetadataTool) Handle(ctx context.Context, req *mcp.CallToolRequest, in RetryMetadataInput) (*mcp.CallToolResult, RetryMetadataOutput, error) {
return t.retryer.Handle(ctx, req, in)
}
func (r *MetadataRetryer) QueueThought(id uuid.UUID) {
go func() {
attemptCtx := r.backgroundCtx
if r.metadataTimeout > 0 {
var cancel context.CancelFunc
attemptCtx, cancel = context.WithTimeout(r.backgroundCtx, r.metadataTimeout)
defer cancel()
}
if _, err := r.retryOne(attemptCtx, id); err != nil {
r.logger.Warn("background metadata retry failed", slog.String("thought_id", id.String()), slog.String("error", err.Error()))
}
}()
}
func (r *MetadataRetryer) Handle(ctx context.Context, req *mcp.CallToolRequest, in RetryMetadataInput) (*mcp.CallToolResult, RetryMetadataOutput, error) {
limit := in.Limit
if limit <= 0 {
limit = 100
}
project, err := resolveProject(ctx, r.store, r.sessions, req, in.Project, false)
if err != nil {
return nil, RetryMetadataOutput{}, err
}
var projectID *uuid.UUID
if project != nil {
projectID = &project.ID
}
thoughts, err := r.store.ListThoughtsPendingMetadataRetry(ctx, limit, projectID, in.IncludeArchived, in.OlderThanDays)
if err != nil {
return nil, RetryMetadataOutput{}, err
}
out := RetryMetadataOutput{
Scanned: len(thoughts),
DryRun: in.DryRun,
}
if in.DryRun || len(thoughts) == 0 {
return nil, out, nil
}
sem := semaphore.NewWeighted(metadataRetryConcurrency)
var mu sync.Mutex
var wg sync.WaitGroup
for _, thought := range thoughts {
if ctx.Err() != nil {
break
}
if err := sem.Acquire(ctx, 1); err != nil {
break
}
wg.Add(1)
go func(thought thoughttypes.Thought) {
defer wg.Done()
defer sem.Release(1)
mu.Lock()
out.Retried++
mu.Unlock()
updated, err := r.retryOne(ctx, thought.ID)
if err != nil {
mu.Lock()
out.Failures = append(out.Failures, RetryMetadataFailure{ID: thought.ID.String(), Error: err.Error()})
mu.Unlock()
return
}
if updated {
mu.Lock()
out.Updated++
mu.Unlock()
return
}
mu.Lock()
out.Skipped++
mu.Unlock()
}(thought)
}
wg.Wait()
out.Failed = len(out.Failures)
return nil, out, nil
}
func (r *MetadataRetryer) retryOne(ctx context.Context, id uuid.UUID) (bool, error) {
thought, err := r.store.GetThought(ctx, id)
if err != nil {
return false, err
}
if thought.Metadata.MetadataStatus == metadata.MetadataStatusComplete {
return false, nil
}
attemptCtx := ctx
if r.metadataTimeout > 0 {
var cancel context.CancelFunc
attemptCtx, cancel = context.WithTimeout(ctx, r.metadataTimeout)
defer cancel()
}
attemptedAt := time.Now().UTC()
extracted, extractErr := r.provider.ExtractMetadata(attemptCtx, thought.Content)
if extractErr != nil {
failedMetadata := metadata.MarkMetadataFailed(thought.Metadata, r.capture, attemptedAt, extractErr)
if _, updateErr := r.store.UpdateThoughtMetadata(ctx, thought.ID, failedMetadata); updateErr != nil {
return false, updateErr
}
return false, extractErr
}
completedMetadata := metadata.MarkMetadataComplete(extracted, r.capture, attemptedAt)
completedMetadata.Attachments = thought.Metadata.Attachments
if _, updateErr := r.store.UpdateThoughtMetadata(ctx, thought.ID, completedMetadata); updateErr != nil {
return false, updateErr
}
return true, nil
}

View File

@@ -106,15 +106,18 @@ func (t *ReparseMetadataTool) Handle(ctx context.Context, req *mcp.CallToolReque
normalizedCurrent := metadata.Normalize(thought.Metadata, t.capture)
attemptedAt := time.Now().UTC()
extracted, extractErr := t.provider.ExtractMetadata(ctx, thought.Content)
normalizedTarget := normalizedCurrent
if extractErr != nil {
normalizedTarget = metadata.MarkMetadataFailed(normalizedCurrent, t.capture, attemptedAt, extractErr)
mu.Lock()
out.Normalized++
mu.Unlock()
t.logger.Warn("metadata reparse extract failed, using normalized existing metadata", slog.String("thought_id", thought.ID.String()), slog.String("error", extractErr.Error()))
} else {
normalizedTarget = metadata.Normalize(extracted, t.capture)
normalizedTarget = metadata.MarkMetadataComplete(extracted, t.capture, attemptedAt)
normalizedTarget.Attachments = thought.Metadata.Attachments
mu.Lock()
out.Reparsed++
mu.Unlock()

View File

@@ -4,6 +4,7 @@ import (
"context"
"log/slog"
"strings"
"time"
"github.com/modelcontextprotocol/go-sdk/mcp"
@@ -64,8 +65,9 @@ func (t *UpdateTool) Handle(ctx context.Context, _ *mcp.CallToolRequest, in Upda
extracted, extractErr := t.provider.ExtractMetadata(ctx, content)
if extractErr != nil {
t.log.Warn("metadata extraction failed during update, keeping current metadata", slog.String("error", extractErr.Error()))
mergedMetadata = metadata.MarkMetadataFailed(mergedMetadata, t.capture, time.Now().UTC(), extractErr)
} else {
mergedMetadata = metadata.Normalize(extracted, t.capture)
mergedMetadata = metadata.MarkMetadataComplete(extracted, t.capture, time.Now().UTC())
mergedMetadata.Attachments = current.Metadata.Attachments
}
}