From 7f2b2b9feef5fbf48889c443ff7f433fcbec6ac6 Mon Sep 17 00:00:00 2001 From: "Hein (Warky)" Date: Mon, 30 Mar 2026 22:24:18 +0200 Subject: [PATCH] feat(files): implement file storage functionality with save, load, and list operations --- README.md | 36 ++++ internal/app/app.go | 27 +-- internal/mcpserver/server.go | 42 ++-- internal/metadata/normalize.go | 34 +++ internal/metadata/normalize_test.go | 23 +++ internal/store/db.go | 8 + internal/store/files.go | 191 +++++++++++++++++ internal/tools/files.go | 276 +++++++++++++++++++++++++ internal/tools/update.go | 1 + internal/types/thought.go | 44 +++- llm/memory.md | 7 +- migrations/016_create_stored_files.sql | 20 ++ 12 files changed, 676 insertions(+), 33 deletions(-) create mode 100644 internal/store/files.go create mode 100644 internal/tools/files.go create mode 100644 migrations/016_create_stored_files.sql diff --git a/README.md b/README.md index 6e46a85..5068e7d 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,9 @@ A Go MCP server for capturing and retrieving thoughts, memory, and project conte | `recall_context` | Semantic + recency context block for injection | | `link_thoughts` | Create a typed relationship between thoughts | | `related_thoughts` | Explicit links + semantic neighbours | +| `save_file` | Store a base64-encoded image, document, audio file, or other binary and optionally link it to a thought | +| `load_file` | Retrieve a stored file by ID as base64 plus metadata | +| `list_files` | Browse stored files by thought, project, or kind | | `backfill_embeddings` | Generate missing embeddings for stored thoughts | | `reparse_thought_metadata` | Re-extract and normalize metadata for stored thoughts | @@ -112,6 +115,39 @@ Run `reparse_thought_metadata` to fix stale or inconsistent metadata by re-extra - If extraction fails for a thought, existing metadata is normalized and written only if it changes - Metadata reparse runs in parallel (4 workers); one failure does not abort the run +## File Storage + +Use `save_file` to persist binary files as base64. Files can optionally be linked to a memory by passing `thought_id`, which also adds an attachment reference to that thought's metadata. + +```json +{ + "name": "meeting-notes.pdf", + "media_type": "application/pdf", + "kind": "document", + "thought_id": "optional-thought-uuid", + "content_base64": "" +} +``` + +Load a stored file again with: + +```json +{ + "id": "stored-file-uuid" +} +``` + +List files for a thought or project with: + +```json +{ + "thought_id": "optional-thought-uuid", + "project": "optional-project-name", + "kind": "optional-image-document-audio-file", + "limit": 20 +} +``` + **Automatic backfill** (optional, config-gated): ```yaml diff --git a/internal/app/app.go b/internal/app/app.go index e84c369..c99802a 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -129,19 +129,20 @@ func routes(logger *slog.Logger, cfg *config.Config, db *store.DB, provider ai.P mux := http.NewServeMux() toolSet := mcpserver.ToolSet{ - Capture: tools.NewCaptureTool(db, provider, cfg.Capture, cfg.AI.Metadata.Timeout, activeProjects, logger), - Search: tools.NewSearchTool(db, provider, cfg.Search, activeProjects), - List: tools.NewListTool(db, cfg.Search, activeProjects), - Stats: tools.NewStatsTool(db), - Get: tools.NewGetTool(db), - Update: tools.NewUpdateTool(db, provider, cfg.Capture, logger), - Delete: tools.NewDeleteTool(db), - Archive: tools.NewArchiveTool(db), - Projects: tools.NewProjectsTool(db, activeProjects), - Context: tools.NewContextTool(db, provider, cfg.Search, activeProjects), - Recall: tools.NewRecallTool(db, provider, cfg.Search, activeProjects), - Summarize: tools.NewSummarizeTool(db, provider, cfg.Search, activeProjects), - Links: tools.NewLinksTool(db, provider, cfg.Search), + Capture: tools.NewCaptureTool(db, provider, cfg.Capture, cfg.AI.Metadata.Timeout, activeProjects, logger), + Search: tools.NewSearchTool(db, provider, cfg.Search, activeProjects), + List: tools.NewListTool(db, cfg.Search, activeProjects), + Stats: tools.NewStatsTool(db), + Get: tools.NewGetTool(db), + Update: tools.NewUpdateTool(db, provider, cfg.Capture, logger), + Delete: tools.NewDeleteTool(db), + Archive: tools.NewArchiveTool(db), + Projects: tools.NewProjectsTool(db, activeProjects), + Context: tools.NewContextTool(db, provider, cfg.Search, activeProjects), + Recall: tools.NewRecallTool(db, provider, cfg.Search, activeProjects), + Summarize: tools.NewSummarizeTool(db, provider, cfg.Search, activeProjects), + Links: tools.NewLinksTool(db, provider, cfg.Search), + Files: tools.NewFilesTool(db, activeProjects), Backfill: tools.NewBackfillTool(db, provider, activeProjects, logger), Reparse: tools.NewReparseMetadataTool(db, provider, cfg.Capture, activeProjects, logger), Household: tools.NewHouseholdTool(db), diff --git a/internal/mcpserver/server.go b/internal/mcpserver/server.go index e30f137..44904a2 100644 --- a/internal/mcpserver/server.go +++ b/internal/mcpserver/server.go @@ -11,19 +11,20 @@ import ( ) type ToolSet struct { - Capture *tools.CaptureTool - Search *tools.SearchTool - List *tools.ListTool - Stats *tools.StatsTool - Get *tools.GetTool - Update *tools.UpdateTool - Delete *tools.DeleteTool - Archive *tools.ArchiveTool - Projects *tools.ProjectsTool - Context *tools.ContextTool - Recall *tools.RecallTool - Summarize *tools.SummarizeTool - Links *tools.LinksTool + Capture *tools.CaptureTool + Search *tools.SearchTool + List *tools.ListTool + Stats *tools.StatsTool + Get *tools.GetTool + Update *tools.UpdateTool + Delete *tools.DeleteTool + Archive *tools.ArchiveTool + Projects *tools.ProjectsTool + Context *tools.ContextTool + Recall *tools.RecallTool + Summarize *tools.SummarizeTool + Links *tools.LinksTool + Files *tools.FilesTool Backfill *tools.BackfillTool Reparse *tools.ReparseMetadataTool Household *tools.HouseholdTool @@ -124,6 +125,21 @@ func New(cfg config.MCPConfig, toolSet ToolSet) http.Handler { Description: "Retrieve explicit links and semantic neighbors for a thought.", }, toolSet.Links.Related) + addTool(server, &mcp.Tool{ + Name: "save_file", + Description: "Store a base64-encoded file such as an image, document, or audio clip, optionally linking it to a thought.", + }, toolSet.Files.Save) + + addTool(server, &mcp.Tool{ + Name: "load_file", + Description: "Load a previously stored file by id and return its metadata and base64 content.", + }, toolSet.Files.Load) + + addTool(server, &mcp.Tool{ + Name: "list_files", + Description: "List stored files, optionally filtered by thought, project, or kind.", + }, toolSet.Files.List) + addTool(server, &mcp.Tool{ Name: "backfill_embeddings", Description: "Generate missing embeddings for stored thoughts using the active embedding model.", diff --git a/internal/metadata/normalize.go b/internal/metadata/normalize.go index 68a9df7..29529e9 100644 --- a/internal/metadata/normalize.go +++ b/internal/metadata/normalize.go @@ -35,6 +35,7 @@ func Fallback(capture config.CaptureConfig) thoughttypes.ThoughtMetadata { Topics: []string{topicFallback}, Type: normalizeType(capture.MetadataDefaults.Type), Source: normalizeSource(capture.Source), + Attachments: []thoughttypes.ThoughtAttachment{}, } } @@ -46,6 +47,7 @@ func Normalize(in thoughttypes.ThoughtMetadata, capture config.CaptureConfig) th Topics: normalizeList(in.Topics, maxTopics), Type: normalizeType(in.Type), Source: normalizeSource(in.Source), + Attachments: normalizeAttachments(in.Attachments), } if len(out.Topics) == 0 { @@ -127,10 +129,42 @@ func Merge(base, patch thoughttypes.ThoughtMetadata, capture config.CaptureConfi if strings.TrimSpace(patch.Source) != "" { merged.Source = patch.Source } + if len(patch.Attachments) > 0 { + merged.Attachments = append(append([]thoughttypes.ThoughtAttachment{}, merged.Attachments...), patch.Attachments...) + } return Normalize(merged, capture) } +func normalizeAttachments(values []thoughttypes.ThoughtAttachment) []thoughttypes.ThoughtAttachment { + seen := make(map[string]struct{}, len(values)) + result := make([]thoughttypes.ThoughtAttachment, 0, len(values)) + + for _, value := range values { + if value.FileID.String() == "" || value.FileID.String() == "00000000-0000-0000-0000-000000000000" { + continue + } + + key := value.FileID.String() + if _, ok := seen[key]; ok { + continue + } + + value.Name = strings.TrimSpace(value.Name) + value.MediaType = strings.TrimSpace(value.MediaType) + value.Kind = strings.TrimSpace(value.Kind) + if value.SizeBytes < 0 { + value.SizeBytes = 0 + } + value.SHA256 = strings.TrimSpace(value.SHA256) + + seen[key] = struct{}{} + result = append(result, value) + } + + return result +} + func SortedTopCounts(in map[string]int, limit int) []thoughttypes.KeyCount { out := make([]thoughttypes.KeyCount, 0, len(in)) for key, count := range in { diff --git a/internal/metadata/normalize_test.go b/internal/metadata/normalize_test.go index 55397b7..f09b3a0 100644 --- a/internal/metadata/normalize_test.go +++ b/internal/metadata/normalize_test.go @@ -4,6 +4,8 @@ import ( "strings" "testing" + "github.com/google/uuid" + "git.warky.dev/wdevs/amcs/internal/config" thoughttypes "git.warky.dev/wdevs/amcs/internal/types" ) @@ -79,3 +81,24 @@ func TestMergeAddsPatchAndNormalizes(t *testing.T) { t.Fatalf("Topics len = %d, want 2", len(got.Topics)) } } + +func TestNormalizeDedupesAttachmentsByFileID(t *testing.T) { + id := uuid.New() + + got := Normalize(thoughttypes.ThoughtMetadata{ + Attachments: []thoughttypes.ThoughtAttachment{ + {FileID: id, Name: " one.png ", MediaType: " image/png ", Kind: " image ", SizeBytes: 12, SHA256: " abc "}, + {FileID: id, Name: "two.png", MediaType: "image/png", Kind: "image", SizeBytes: 99, SHA256: "def"}, + }, + }, testCaptureConfig()) + + if len(got.Attachments) != 1 { + t.Fatalf("Attachments len = %d, want 1", len(got.Attachments)) + } + if got.Attachments[0].Name != "one.png" { + t.Fatalf("Attachment name = %q, want one.png", got.Attachments[0].Name) + } + if got.Attachments[0].Kind != "image" { + t.Fatalf("Attachment kind = %q, want image", got.Attachments[0].Kind) + } +} diff --git a/internal/store/db.go b/internal/store/db.go index c7b61cd..a01cc02 100644 --- a/internal/store/db.go +++ b/internal/store/db.go @@ -92,5 +92,13 @@ func (db *DB) VerifyRequirements(ctx context.Context) error { return fmt.Errorf("embeddings table is missing — run migrations") } + var hasStoredFiles bool + if err := db.pool.QueryRow(ctx, `select exists(select 1 from pg_tables where schemaname = 'public' and tablename = 'stored_files')`).Scan(&hasStoredFiles); err != nil { + return fmt.Errorf("verify stored_files table: %w", err) + } + if !hasStoredFiles { + return fmt.Errorf("stored_files table is missing — run migrations") + } + return nil } diff --git a/internal/store/files.go b/internal/store/files.go new file mode 100644 index 0000000..fe07106 --- /dev/null +++ b/internal/store/files.go @@ -0,0 +1,191 @@ +package store + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + + thoughttypes "git.warky.dev/wdevs/amcs/internal/types" +) + +func (db *DB) InsertStoredFile(ctx context.Context, file thoughttypes.StoredFile) (thoughttypes.StoredFile, error) { + row := db.pool.QueryRow(ctx, ` + insert into stored_files (thought_id, project_id, name, media_type, kind, encoding, size_bytes, sha256, content) + values ($1, $2, $3, $4, $5, $6, $7, $8, $9) + returning guid, thought_id, project_id, name, media_type, kind, encoding, size_bytes, sha256, created_at, updated_at + `, file.ThoughtID, file.ProjectID, file.Name, file.MediaType, file.Kind, file.Encoding, file.SizeBytes, file.SHA256, file.Content) + + var created thoughttypes.StoredFile + if err := row.Scan( + &created.ID, + &created.ThoughtID, + &created.ProjectID, + &created.Name, + &created.MediaType, + &created.Kind, + &created.Encoding, + &created.SizeBytes, + &created.SHA256, + &created.CreatedAt, + &created.UpdatedAt, + ); err != nil { + return thoughttypes.StoredFile{}, fmt.Errorf("insert stored file: %w", err) + } + + return created, nil +} + +func (db *DB) GetStoredFile(ctx context.Context, id uuid.UUID) (thoughttypes.StoredFile, error) { + row := db.pool.QueryRow(ctx, ` + select guid, thought_id, project_id, name, media_type, kind, encoding, size_bytes, sha256, content, created_at, updated_at + from stored_files + where guid = $1 + `, id) + + var file thoughttypes.StoredFile + if err := row.Scan( + &file.ID, + &file.ThoughtID, + &file.ProjectID, + &file.Name, + &file.MediaType, + &file.Kind, + &file.Encoding, + &file.SizeBytes, + &file.SHA256, + &file.Content, + &file.CreatedAt, + &file.UpdatedAt, + ); err != nil { + if err == pgx.ErrNoRows { + return thoughttypes.StoredFile{}, err + } + return thoughttypes.StoredFile{}, fmt.Errorf("get stored file: %w", err) + } + + return file, nil +} + +func (db *DB) ListStoredFiles(ctx context.Context, filter thoughttypes.StoredFileFilter) ([]thoughttypes.StoredFile, error) { + args := make([]any, 0, 4) + conditions := make([]string, 0, 3) + + if filter.ThoughtID != nil { + args = append(args, *filter.ThoughtID) + conditions = append(conditions, fmt.Sprintf("thought_id = $%d", len(args))) + } + if filter.ProjectID != nil { + args = append(args, *filter.ProjectID) + conditions = append(conditions, fmt.Sprintf("project_id = $%d", len(args))) + } + if kind := strings.TrimSpace(filter.Kind); kind != "" { + args = append(args, kind) + conditions = append(conditions, fmt.Sprintf("kind = $%d", len(args))) + } + + query := ` + select guid, thought_id, project_id, name, media_type, kind, encoding, size_bytes, sha256, created_at, updated_at + from stored_files + ` + if len(conditions) > 0 { + query += " where " + strings.Join(conditions, " and ") + } + + args = append(args, filter.Limit) + query += fmt.Sprintf(" order by created_at desc limit $%d", len(args)) + + rows, err := db.pool.Query(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("list stored files: %w", err) + } + defer rows.Close() + + files := make([]thoughttypes.StoredFile, 0, filter.Limit) + for rows.Next() { + var file thoughttypes.StoredFile + if err := rows.Scan( + &file.ID, + &file.ThoughtID, + &file.ProjectID, + &file.Name, + &file.MediaType, + &file.Kind, + &file.Encoding, + &file.SizeBytes, + &file.SHA256, + &file.CreatedAt, + &file.UpdatedAt, + ); err != nil { + return nil, fmt.Errorf("scan stored file: %w", err) + } + files = append(files, file) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate stored files: %w", err) + } + + return files, nil +} + +func (db *DB) AddThoughtAttachment(ctx context.Context, thoughtID uuid.UUID, attachment thoughttypes.ThoughtAttachment) error { + tx, err := db.pool.Begin(ctx) + if err != nil { + return fmt.Errorf("begin transaction: %w", err) + } + defer tx.Rollback(ctx) + + var metadataBytes []byte + if err := tx.QueryRow(ctx, `select metadata from thoughts where guid = $1 for update`, thoughtID).Scan(&metadataBytes); err != nil { + if err == pgx.ErrNoRows { + return err + } + return fmt.Errorf("load thought metadata: %w", err) + } + + var metadata thoughttypes.ThoughtMetadata + if len(metadataBytes) > 0 { + if err := json.Unmarshal(metadataBytes, &metadata); err != nil { + return fmt.Errorf("decode thought metadata: %w", err) + } + } + + replaced := false + for i := range metadata.Attachments { + if metadata.Attachments[i].FileID == attachment.FileID { + metadata.Attachments[i] = attachment + replaced = true + break + } + } + if !replaced { + metadata.Attachments = append(metadata.Attachments, attachment) + } + + updatedMetadata, err := json.Marshal(metadata) + if err != nil { + return fmt.Errorf("encode thought metadata: %w", err) + } + + tag, err := tx.Exec(ctx, ` + update thoughts + set metadata = $2::jsonb, + updated_at = now() + where guid = $1 + `, thoughtID, updatedMetadata) + if err != nil { + return fmt.Errorf("update thought attachments: %w", err) + } + if tag.RowsAffected() == 0 { + return pgx.ErrNoRows + } + + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("commit attachment update: %w", err) + } + + return nil +} diff --git a/internal/tools/files.go b/internal/tools/files.go new file mode 100644 index 0000000..d7ba94d --- /dev/null +++ b/internal/tools/files.go @@ -0,0 +1,276 @@ +package tools + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "net/http" + "strings" + + "github.com/google/uuid" + "github.com/modelcontextprotocol/go-sdk/mcp" + + "git.warky.dev/wdevs/amcs/internal/session" + "git.warky.dev/wdevs/amcs/internal/store" + thoughttypes "git.warky.dev/wdevs/amcs/internal/types" +) + +type FilesTool struct { + store *store.DB + sessions *session.ActiveProjects +} + +type SaveFileInput struct { + Name string `json:"name" jsonschema:"file name including extension, for example photo.png or note.pdf"` + ContentBase64 string `json:"content_base64" jsonschema:"file contents encoded as base64"` + MediaType string `json:"media_type,omitempty" jsonschema:"optional MIME type such as image/png, application/pdf, or audio/mpeg"` + Kind string `json:"kind,omitempty" jsonschema:"optional logical type such as image, document, audio, or file"` + ThoughtID string `json:"thought_id,omitempty" jsonschema:"optional thought id to link this file to"` + Project string `json:"project,omitempty" jsonschema:"optional project name or id when saving outside a linked thought"` +} + +type SaveFileOutput struct { + File thoughttypes.StoredFile `json:"file"` +} + +type LoadFileInput struct { + ID string `json:"id" jsonschema:"the stored file id"` +} + +type LoadFileOutput struct { + File thoughttypes.StoredFile `json:"file"` + ContentBase64 string `json:"content_base64"` +} + +type ListFilesInput struct { + Limit int `json:"limit,omitempty" jsonschema:"maximum number of files to return"` + ThoughtID string `json:"thought_id,omitempty" jsonschema:"optional thought id to list files for"` + Project string `json:"project,omitempty" jsonschema:"optional project name or id to scope the listing"` + Kind string `json:"kind,omitempty" jsonschema:"optional kind filter such as image, document, audio, or file"` +} + +type ListFilesOutput struct { + Files []thoughttypes.StoredFile `json:"files"` +} + +func NewFilesTool(db *store.DB, sessions *session.ActiveProjects) *FilesTool { + return &FilesTool{store: db, sessions: sessions} +} + +func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveFileInput) (*mcp.CallToolResult, SaveFileOutput, error) { + name := strings.TrimSpace(in.Name) + if name == "" { + return nil, SaveFileOutput{}, errInvalidInput("name is required") + } + + contentBase64, mediaTypeFromDataURL := splitDataURL(strings.TrimSpace(in.ContentBase64)) + if contentBase64 == "" { + return nil, SaveFileOutput{}, errInvalidInput("content_base64 is required") + } + + content, err := decodeBase64(contentBase64) + if err != nil { + return nil, SaveFileOutput{}, errInvalidInput("content_base64 must be valid base64") + } + if len(content) == 0 { + return nil, SaveFileOutput{}, errInvalidInput("decoded file content must not be empty") + } + + project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false) + if err != nil { + return nil, SaveFileOutput{}, err + } + + var thoughtID *uuid.UUID + var projectID = projectIDPtr(project) + if rawThoughtID := strings.TrimSpace(in.ThoughtID); rawThoughtID != "" { + parsedThoughtID, err := parseUUID(rawThoughtID) + if err != nil { + return nil, SaveFileOutput{}, err + } + thought, err := t.store.GetThought(ctx, parsedThoughtID) + if err != nil { + return nil, SaveFileOutput{}, err + } + thoughtID = &parsedThoughtID + projectID = thought.ProjectID + if project != nil && thought.ProjectID != nil && *thought.ProjectID != project.ID { + return nil, SaveFileOutput{}, errInvalidInput("project does not match the linked thought's project") + } + } + + mediaType := normalizeMediaType(strings.TrimSpace(in.MediaType), mediaTypeFromDataURL, content) + kind := normalizeFileKind(strings.TrimSpace(in.Kind), mediaType) + sum := sha256.Sum256(content) + + file := thoughttypes.StoredFile{ + Name: name, + MediaType: mediaType, + Kind: kind, + Encoding: "base64", + SizeBytes: int64(len(content)), + SHA256: hex.EncodeToString(sum[:]), + Content: content, + ProjectID: projectID, + } + if thoughtID != nil { + file.ThoughtID = thoughtID + } + + created, err := t.store.InsertStoredFile(ctx, file) + if err != nil { + return nil, SaveFileOutput{}, err + } + + if created.ThoughtID != nil { + if err := t.store.AddThoughtAttachment(ctx, *created.ThoughtID, thoughtAttachmentFromFile(created)); err != nil { + return nil, SaveFileOutput{}, err + } + } + if created.ProjectID != nil { + _ = t.store.TouchProject(ctx, *created.ProjectID) + } + + return nil, SaveFileOutput{File: created}, nil +} + +func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFileInput) (*mcp.CallToolResult, LoadFileOutput, error) { + id, err := parseUUID(in.ID) + if err != nil { + return nil, LoadFileOutput{}, err + } + + file, err := t.store.GetStoredFile(ctx, id) + if err != nil { + return nil, LoadFileOutput{}, err + } + + return nil, LoadFileOutput{ + File: file, + ContentBase64: base64.StdEncoding.EncodeToString(file.Content), + }, nil +} + +func (t *FilesTool) List(ctx context.Context, req *mcp.CallToolRequest, in ListFilesInput) (*mcp.CallToolResult, ListFilesOutput, error) { + project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false) + if err != nil { + return nil, ListFilesOutput{}, err + } + + var thoughtID *uuid.UUID + if rawThoughtID := strings.TrimSpace(in.ThoughtID); rawThoughtID != "" { + parsedThoughtID, err := parseUUID(rawThoughtID) + if err != nil { + return nil, ListFilesOutput{}, err + } + thought, err := t.store.GetThought(ctx, parsedThoughtID) + if err != nil { + return nil, ListFilesOutput{}, err + } + thoughtID = &parsedThoughtID + if project != nil && thought.ProjectID != nil && *thought.ProjectID != project.ID { + return nil, ListFilesOutput{}, errInvalidInput("project does not match the linked thought's project") + } + if project == nil && thought.ProjectID != nil { + project = &thoughttypes.Project{ID: *thought.ProjectID} + } + } + + files, err := t.store.ListStoredFiles(ctx, thoughttypes.StoredFileFilter{ + Limit: normalizeFileLimit(in.Limit), + ThoughtID: thoughtID, + ProjectID: projectIDPtr(project), + Kind: strings.TrimSpace(in.Kind), + }) + if err != nil { + return nil, ListFilesOutput{}, err + } + if project != nil { + _ = t.store.TouchProject(ctx, project.ID) + } + + return nil, ListFilesOutput{Files: files}, nil +} + +func thoughtAttachmentFromFile(file thoughttypes.StoredFile) thoughttypes.ThoughtAttachment { + return thoughttypes.ThoughtAttachment{ + FileID: file.ID, + Name: file.Name, + MediaType: file.MediaType, + Kind: file.Kind, + SizeBytes: file.SizeBytes, + SHA256: file.SHA256, + } +} + +func splitDataURL(value string) (contentBase64 string, mediaType string) { + const marker = ";base64," + if !strings.HasPrefix(value, "data:") { + return value, "" + } + + prefix, payload, ok := strings.Cut(value, marker) + if !ok { + return value, "" + } + + mediaType = strings.TrimPrefix(prefix, "data:") + return payload, strings.TrimSpace(mediaType) +} + +func decodeBase64(value string) ([]byte, error) { + decoded, err := base64.StdEncoding.DecodeString(value) + if err == nil { + return decoded, nil + } + return base64.RawStdEncoding.DecodeString(value) +} + +func normalizeMediaType(explicit string, fromDataURL string, content []byte) string { + switch { + case explicit != "": + return explicit + case fromDataURL != "": + return fromDataURL + default: + return http.DetectContentType(content) + } +} + +func normalizeFileKind(explicit string, mediaType string) string { + if explicit != "" { + return explicit + } + + switch { + case strings.HasPrefix(mediaType, "image/"): + return "image" + case strings.HasPrefix(mediaType, "audio/"): + return "audio" + case strings.HasPrefix(mediaType, "video/"): + return "video" + case mediaType == "application/pdf" || strings.HasPrefix(mediaType, "text/") || strings.Contains(mediaType, "document"): + return "document" + default: + return "file" + } +} + +func projectIDPtr(project *thoughttypes.Project) *uuid.UUID { + if project == nil { + return nil + } + return &project.ID +} + +func normalizeFileLimit(limit int) int { + switch { + case limit <= 0: + return 20 + case limit > 100: + return 100 + default: + return limit + } +} diff --git a/internal/tools/update.go b/internal/tools/update.go index dc7d4a4..4d3a226 100644 --- a/internal/tools/update.go +++ b/internal/tools/update.go @@ -66,6 +66,7 @@ func (t *UpdateTool) Handle(ctx context.Context, _ *mcp.CallToolRequest, in Upda t.log.Warn("metadata extraction failed during update, keeping current metadata", slog.String("error", extractErr.Error())) } else { mergedMetadata = metadata.Normalize(extracted, t.capture) + mergedMetadata.Attachments = current.Metadata.Attachments } } diff --git a/internal/types/thought.go b/internal/types/thought.go index e02ac1b..3e0783a 100644 --- a/internal/types/thought.go +++ b/internal/types/thought.go @@ -7,12 +7,44 @@ import ( ) type ThoughtMetadata struct { - People []string `json:"people"` - ActionItems []string `json:"action_items"` - DatesMentioned []string `json:"dates_mentioned"` - Topics []string `json:"topics"` - Type string `json:"type"` - Source string `json:"source"` + People []string `json:"people"` + ActionItems []string `json:"action_items"` + DatesMentioned []string `json:"dates_mentioned"` + Topics []string `json:"topics"` + Type string `json:"type"` + Source string `json:"source"` + Attachments []ThoughtAttachment `json:"attachments,omitempty"` +} + +type ThoughtAttachment struct { + FileID uuid.UUID `json:"file_id"` + Name string `json:"name"` + MediaType string `json:"media_type"` + Kind string `json:"kind,omitempty"` + SizeBytes int64 `json:"size_bytes"` + SHA256 string `json:"sha256,omitempty"` +} + +type StoredFile struct { + ID uuid.UUID `json:"id"` + ThoughtID *uuid.UUID `json:"thought_id,omitempty"` + ProjectID *uuid.UUID `json:"project_id,omitempty"` + Name string `json:"name"` + MediaType string `json:"media_type"` + Kind string `json:"kind"` + Encoding string `json:"encoding"` + SizeBytes int64 `json:"size_bytes"` + SHA256 string `json:"sha256"` + Content []byte `json:"-"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type StoredFileFilter struct { + Limit int + ThoughtID *uuid.UUID + ProjectID *uuid.UUID + Kind string } type Thought struct { diff --git a/llm/memory.md b/llm/memory.md index 3bfce45..2bcbc3a 100644 --- a/llm/memory.md +++ b/llm/memory.md @@ -23,6 +23,10 @@ Use AMCS as memory with two scopes: - Use project memory for code decisions, architecture, TODOs, debugging findings, and context specific to the current repo or workstream. - Before substantial work, always retrieve context with `get_project_context` or `recall_context` so prior decisions inform your approach. - Save durable project facts with `capture_thought` after completing meaningful work. +- Use `save_file` for project assets the memory should retain, such as screenshots, PDFs, audio notes, and other documents. +- Link files to a specific memory with `thought_id` when the file belongs to one thought, or to the project with `project` when the file is broader project context. +- Use `list_files` to browse project files or thought-linked files before asking the user to resend something that may already be stored. +- Use `load_file` when you need the actual stored file contents back. - Do not attach memory to the wrong project. ## Global Notebook Rules @@ -36,8 +40,9 @@ Use AMCS as memory with two scopes: - Save only durable, useful information. - Do not save secrets, raw logs, or transient noise. - Prefer concise summaries. +- Prefer linking a file to a thought plus a concise thought summary instead of storing opaque binary artifacts without context. - When saving, choose the narrowest correct scope: project if project-specific, global if not. ## Short Operational Form -Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user. +Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, browse them with `list_files`, and load them with `load_file`. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user. diff --git a/migrations/016_create_stored_files.sql b/migrations/016_create_stored_files.sql new file mode 100644 index 0000000..fb9ee21 --- /dev/null +++ b/migrations/016_create_stored_files.sql @@ -0,0 +1,20 @@ +create table if not exists stored_files ( + id bigserial primary key, + guid uuid not null default gen_random_uuid(), + thought_id uuid references thoughts(guid) on delete set null, + project_id uuid references projects(guid) on delete set null, + name text not null, + media_type text not null, + kind text not null default 'file', + encoding text not null default 'base64', + size_bytes bigint not null, + sha256 text not null, + content bytea not null, + created_at timestamptz not null default now(), + updated_at timestamptz not null default now(), + constraint stored_files_guid_unique unique (guid) +); + +create index if not exists stored_files_thought_id_idx on stored_files (thought_id); +create index if not exists stored_files_project_id_idx on stored_files (project_id); +create index if not exists stored_files_sha256_idx on stored_files (sha256);