From 8f734c05569c12560de043951c4a10f1a7577337 Mon Sep 17 00:00:00 2001 From: "Hein (Warky)" Date: Tue, 31 Mar 2026 00:04:36 +0200 Subject: [PATCH] feat(files): enhance file handling with support for HTTP uploads and direct binary access --- README.md | 44 ++++++++++++++--- internal/app/app.go | 3 +- internal/app/files.go | 31 +++++++++++- internal/mcpserver/server.go | 6 +++ internal/tools/files.go | 94 ++++++++++++++++++++++++++++++++---- llm/memory.md | 7 ++- 6 files changed, 164 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 7af9004..8a4698f 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ A Go MCP server for capturing and retrieving thoughts, memory, and project conte | `recall_context` | Semantic + recency context block for injection | | `link_thoughts` | Create a typed relationship between thoughts | | `related_thoughts` | Explicit links + semantic neighbours | -| `save_file` | Store a base64-encoded image, document, audio file, or other binary and optionally link it to a thought | -| `load_file` | Retrieve a stored file by ID as base64 plus metadata | +| `save_file` | Store a file (base64 or by resource URI) and optionally link it to a thought | +| `load_file` | Retrieve a stored file by ID; returns metadata, base64 content, and an embedded MCP binary resource | | `list_files` | Browse stored files by thought, project, or kind | | `backfill_embeddings` | Generate missing embeddings for stored thoughts | | `reparse_thought_metadata` | Re-extract and normalize metadata for stored thoughts | @@ -181,7 +181,11 @@ Run `reparse_thought_metadata` to fix stale or inconsistent metadata by re-extra ## File Storage -Use `save_file` to persist binary files as base64. Files can optionally be linked to a memory by passing `thought_id`, which also adds an attachment reference to that thought's metadata. AI clients should prefer `save_file` when the goal is to retain the artifact itself, rather than reading or summarizing the file first. Stored files and attachment metadata are not forwarded to the metadata extraction client. +Files can optionally be linked to a thought by passing `thought_id`, which also adds an attachment reference to that thought's metadata. AI clients should prefer `save_file` when the goal is to retain the artifact itself, rather than reading or summarizing the file first. Stored files and attachment metadata are not forwarded to the metadata extraction client. + +### MCP tools + +**Save via base64** (small files or when HTTP is not available): ```json { @@ -193,15 +197,27 @@ Use `save_file` to persist binary files as base64. Files can optionally be linke } ``` -Load a stored file again with: +**Save via resource URI** (preferred for binary; avoids base64 overhead): + +Upload the file binary via HTTP first (see below), then pass the returned URI to `save_file`: ```json { - "id": "stored-file-uuid" + "name": "meeting-notes.pdf", + "thought_id": "optional-thought-uuid", + "content_uri": "amcs://files/" } ``` -List files for a thought or project with: +`content_base64` and `content_uri` are mutually exclusive. + +**Load a file** — returns metadata, base64 content, and an embedded MCP binary resource (`amcs://files/{id}`): + +```json +{ "id": "stored-file-uuid" } +``` + +**List files** for a thought or project: ```json { @@ -212,9 +228,13 @@ List files for a thought or project with: } ``` -AMCS also supports direct authenticated HTTP uploads to `/files` for clients that want to stream file bodies instead of base64-encoding them into an MCP tool call. +### MCP resources -The Go server caps `/files` uploads at 100 MB per request. Large uploads are still also subject to available memory, Postgres limits, and any reverse proxy or load balancer limits in front of AMCS. +Stored files are also exposed as MCP resources at `amcs://files/{id}`. MCP clients can read raw binary content directly via `resources/read` without going through `load_file`. + +### HTTP upload and download + +Direct HTTP access avoids base64 encoding entirely. The Go server caps `/files` uploads at 100 MB per request. Large uploads are also subject to available memory, Postgres limits, and any reverse proxy or load balancer in front of AMCS. Multipart upload: @@ -235,6 +255,14 @@ curl -X POST "http://localhost:8080/files?project=amcs&name=meeting-notes.pdf" \ --data-binary @./meeting-notes.pdf ``` +Binary download: + +```bash +curl http://localhost:8080/files/ \ + -H "x-brain-key: " \ + -o meeting-notes.pdf +``` + **Automatic backfill** (optional, config-gated): ```yaml diff --git a/internal/app/app.go b/internal/app/app.go index b811f05..16fd483 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -178,7 +178,8 @@ func routes(logger *slog.Logger, cfg *config.Config, db *store.DB, provider ai.P mcpHandler := mcpserver.New(cfg.MCP, toolSet) mux.Handle(cfg.MCP.Path, authMiddleware(mcpHandler)) - mux.Handle("/files", authMiddleware(fileUploadHandler(filesTool))) + mux.Handle("/files", authMiddleware(fileHandler(filesTool))) + mux.Handle("/files/{id}", authMiddleware(fileHandler(filesTool))) if oauthRegistry != nil && tokenStore != nil { mux.HandleFunc("/.well-known/oauth-authorization-server", oauthMetadataHandler()) mux.HandleFunc("/oauth-authorization-server", oauthMetadataHandler()) diff --git a/internal/app/files.go b/internal/app/files.go index e1b6cd8..90da972 100644 --- a/internal/app/files.go +++ b/internal/app/files.go @@ -16,8 +16,14 @@ const ( multipartFormMemory = 32 << 20 ) -func fileUploadHandler(files *tools.FilesTool) http.Handler { +func fileHandler(files *tools.FilesTool) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + id := r.PathValue("id") + if id != "" { + fileDownloadHandler(files, id, w, r) + return + } + if r.Method != http.MethodPost { w.Header().Set("Allow", http.MethodPost) http.Error(w, "method not allowed", http.StatusMethodNotAllowed) @@ -44,6 +50,29 @@ func fileUploadHandler(files *tools.FilesTool) http.Handler { }) } +func fileDownloadHandler(files *tools.FilesTool, id string, w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet && r.Method != http.MethodHead { + w.Header().Set("Allow", "GET, HEAD") + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + file, err := files.GetRaw(r.Context(), id) + if err != nil { + http.Error(w, err.Error(), http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", file.MediaType) + w.Header().Set("Content-Disposition", "attachment; filename="+file.Name) + w.Header().Set("X-File-Kind", file.Kind) + w.Header().Set("X-File-SHA256", file.SHA256) + w.WriteHeader(http.StatusOK) + if r.Method != http.MethodHead { + _, _ = w.Write(file.Content) + } +} + func parseUploadRequest(r *http.Request) (tools.SaveFileDecodedInput, error) { contentType := r.Header.Get("Content-Type") mediaType, _, _ := mime.ParseMediaType(contentType) diff --git a/internal/mcpserver/server.go b/internal/mcpserver/server.go index 579bde7..5fc8726 100644 --- a/internal/mcpserver/server.go +++ b/internal/mcpserver/server.go @@ -127,6 +127,12 @@ func New(cfg config.MCPConfig, toolSet ToolSet) http.Handler { Description: "Retrieve explicit links and semantic neighbors for a thought.", }, toolSet.Links.Related) + server.AddResourceTemplate(&mcp.ResourceTemplate{ + Name: "stored_file", + URITemplate: "amcs://files/{id}", + Description: "A stored file. Read a file's raw binary content by its id. Use load_file for metadata.", + }, toolSet.Files.ReadResource) + addTool(server, &mcp.Tool{ Name: "save_file", Description: "Store a base64-encoded file such as an image, document, or audio clip, optionally linking it to a thought.", diff --git a/internal/tools/files.go b/internal/tools/files.go index 35045c5..37b9655 100644 --- a/internal/tools/files.go +++ b/internal/tools/files.go @@ -23,7 +23,8 @@ type FilesTool struct { type SaveFileInput struct { Name string `json:"name" jsonschema:"file name including extension, for example photo.png or note.pdf"` - ContentBase64 string `json:"content_base64" jsonschema:"file contents encoded as base64"` + ContentBase64 string `json:"content_base64,omitempty" jsonschema:"file contents encoded as base64; provide this or content_uri, not both"` + ContentURI string `json:"content_uri,omitempty" jsonschema:"resource URI of an already-uploaded file, e.g. amcs://files/{id}; use this instead of content_base64 to avoid re-encoding binary content"` MediaType string `json:"media_type,omitempty" jsonschema:"optional MIME type such as image/png, application/pdf, or audio/mpeg"` Kind string `json:"kind,omitempty" jsonschema:"optional logical type such as image, document, audio, or file"` ThoughtID string `json:"thought_id,omitempty" jsonschema:"optional thought id to link this file to"` @@ -68,19 +69,48 @@ func NewFilesTool(db *store.DB, sessions *session.ActiveProjects) *FilesTool { } func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveFileInput) (*mcp.CallToolResult, SaveFileOutput, error) { - contentBase64, mediaTypeFromDataURL := splitDataURL(strings.TrimSpace(in.ContentBase64)) - if contentBase64 == "" { - return nil, SaveFileOutput{}, errInvalidInput("content_base64 is required") + uri := strings.TrimSpace(in.ContentURI) + b64 := strings.TrimSpace(in.ContentBase64) + + if uri != "" && b64 != "" { + return nil, SaveFileOutput{}, errInvalidInput("provide content_uri or content_base64, not both") } - content, err := decodeBase64(contentBase64) - if err != nil { - return nil, SaveFileOutput{}, errInvalidInput("content_base64 must be valid base64") + var content []byte + var mediaTypeFromSource string + + if uri != "" { + if !strings.HasPrefix(uri, fileURIPrefix) { + return nil, SaveFileOutput{}, errInvalidInput("content_uri must be an amcs://files/{id} URI") + } + rawID := strings.TrimPrefix(uri, fileURIPrefix) + id, err := parseUUID(rawID) + if err != nil { + return nil, SaveFileOutput{}, errInvalidInput("content_uri contains an invalid file id") + } + file, err := t.store.GetStoredFile(ctx, id) + if err != nil { + return nil, SaveFileOutput{}, errInvalidInput("content_uri references a file that does not exist") + } + content = file.Content + mediaTypeFromSource = file.MediaType + } else { + contentBase64, mediaTypeFromDataURL := splitDataURL(b64) + if contentBase64 == "" { + return nil, SaveFileOutput{}, errInvalidInput("content_base64 or content_uri is required") + } + var err error + content, err = decodeBase64(contentBase64) + if err != nil { + return nil, SaveFileOutput{}, errInvalidInput("content_base64 must be valid base64") + } + mediaTypeFromSource = mediaTypeFromDataURL } + out, err := t.SaveDecoded(ctx, req, SaveFileDecodedInput{ Name: in.Name, Content: content, - MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromDataURL), + MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromSource), Kind: in.Kind, ThoughtID: in.ThoughtID, Project: in.Project, @@ -91,6 +121,16 @@ func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveF return nil, out, nil } +const fileURIPrefix = "amcs://files/" + +func (t *FilesTool) GetRaw(ctx context.Context, rawID string) (thoughttypes.StoredFile, error) { + id, err := parseUUID(strings.TrimSpace(rawID)) + if err != nil { + return thoughttypes.StoredFile{}, err + } + return t.store.GetStoredFile(ctx, id) +} + func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFileInput) (*mcp.CallToolResult, LoadFileOutput, error) { id, err := parseUUID(in.ID) if err != nil { @@ -102,12 +142,48 @@ func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFil return nil, LoadFileOutput{}, err } - return nil, LoadFileOutput{ + uri := fileURIPrefix + file.ID.String() + result := &mcp.CallToolResult{ + Content: []mcp.Content{ + &mcp.EmbeddedResource{ + Resource: &mcp.ResourceContents{ + URI: uri, + MIMEType: file.MediaType, + Blob: file.Content, + }, + }, + }, + } + + return result, LoadFileOutput{ File: file, ContentBase64: base64.StdEncoding.EncodeToString(file.Content), }, nil } +func (t *FilesTool) ReadResource(ctx context.Context, req *mcp.ReadResourceRequest) (*mcp.ReadResourceResult, error) { + rawID := strings.TrimPrefix(req.Params.URI, fileURIPrefix) + id, err := parseUUID(strings.TrimSpace(rawID)) + if err != nil { + return nil, mcp.ResourceNotFoundError(req.Params.URI) + } + + file, err := t.store.GetStoredFile(ctx, id) + if err != nil { + return nil, mcp.ResourceNotFoundError(req.Params.URI) + } + + return &mcp.ReadResourceResult{ + Contents: []*mcp.ResourceContents{ + { + URI: req.Params.URI, + MIMEType: file.MediaType, + Blob: file.Content, + }, + }, + }, nil +} + func (t *FilesTool) List(ctx context.Context, req *mcp.CallToolRequest, in ListFilesInput) (*mcp.CallToolResult, ListFilesOutput, error) { project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false) if err != nil { diff --git a/llm/memory.md b/llm/memory.md index 158f07a..558ccb7 100644 --- a/llm/memory.md +++ b/llm/memory.md @@ -34,9 +34,11 @@ At the start of every project session, after setting the active project: - Save durable project facts with `capture_thought` after completing meaningful work. - Use `save_file` for project assets the memory should retain, such as screenshots, PDFs, audio notes, and other documents. - If the goal is to retain the artifact itself, use `save_file` directly instead of first reading, transcribing, or summarizing the file contents. +- When calling `save_file`, prefer `content_uri` over `content_base64` for binary files that were uploaded via HTTP — pass the `amcs://files/{id}` URI returned by the upload instead of re-encoding the bytes as base64. - Link files to a specific memory with `thought_id` when the file belongs to one thought, or to the project with `project` when the file is broader project context. - Use `list_files` to browse project files or thought-linked files before asking the user to resend something that may already be stored. -- Use `load_file` when you need the actual stored file contents back. +- Use `load_file` when you need the actual stored file contents back. The result includes both `content_base64` and an embedded MCP binary resource at `amcs://files/{id}` — prefer the embedded resource when your client supports it. +- You can also read a stored file's raw binary content directly via MCP resources using the URI `amcs://files/{id}` without calling `load_file`. - Stored files and attachment metadata must not be sent to the metadata extraction client. - Do not attach memory to the wrong project. @@ -53,6 +55,7 @@ At the start of every project session, after setting the active project: - Prefer concise summaries. - Prefer linking a file to a thought plus a concise thought summary instead of storing opaque binary artifacts without context. - Do not read a file just to make it storable; store the file directly and read it only when the file contents are needed for reasoning. +- Do not base64-encode a file to pass it to `save_file` if an `amcs://files/{id}` URI is already available from a prior HTTP upload. - When saving, choose the narrowest correct scope: project if project-specific, global if not. ## Skills and Guardrails @@ -65,4 +68,4 @@ At the start of every project session, after setting the active project: ## Short Operational Form -Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, prefer saving a file directly when the artifact itself is what matters, browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user. +Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, prefer saving a file directly when the artifact itself is what matters, browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. When saving a file uploaded via HTTP, pass `content_uri: "amcs://files/{id}"` instead of re-encoding it as base64. Stored files can also be read as raw binary via MCP resources at `amcs://files/{id}`. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user.