feat(files): enhance file handling with support for HTTP uploads and direct binary access

2026-03-31 00:04:36 +02:00
parent 3c1ca83dc9
commit 8f734c0556
6 changed files with 164 additions and 21 deletions
@@ -41,8 +41,8 @@ A Go MCP server for capturing and retrieving thoughts, memory, and project conte
 | `recall_context` | Semantic + recency context block for injection |
 | `link_thoughts` | Create a typed relationship between thoughts |
 | `related_thoughts` | Explicit links + semantic neighbours |
-| `save_file` | Store a base64-encoded image, document, audio file, or other binary and optionally link it to a thought |
-| `load_file` | Retrieve a stored file by ID as base64 plus metadata |
+| `save_file` | Store a file (base64 or by resource URI) and optionally link it to a thought |
+| `load_file` | Retrieve a stored file by ID; returns metadata, base64 content, and an embedded MCP binary resource |
 | `list_files` | Browse stored files by thought, project, or kind |
 | `backfill_embeddings` | Generate missing embeddings for stored thoughts |
 | `reparse_thought_metadata` | Re-extract and normalize metadata for stored thoughts |
@@ -181,7 +181,11 @@ Run `reparse_thought_metadata` to fix stale or inconsistent metadata by re-extra

 ## File Storage

-Use `save_file` to persist binary files as base64. Files can optionally be linked to a memory by passing `thought_id`, which also adds an attachment reference to that thought's metadata. AI clients should prefer `save_file` when the goal is to retain the artifact itself, rather than reading or summarizing the file first. Stored files and attachment metadata are not forwarded to the metadata extraction client.
+Files can optionally be linked to a thought by passing `thought_id`, which also adds an attachment reference to that thought's metadata. AI clients should prefer `save_file` when the goal is to retain the artifact itself, rather than reading or summarizing the file first. Stored files and attachment metadata are not forwarded to the metadata extraction client.
+
+### MCP tools
+
+**Save via base64** (small files or when HTTP is not available):

 ```json
 {
@@ -193,15 +197,27 @@ Use `save_file` to persist binary files as base64. Files can optionally be linke
 }
 ```

-Load a stored file again with:
+**Save via resource URI** (preferred for binary; avoids base64 overhead):
+
+Upload the file binary via HTTP first (see below), then pass the returned URI to `save_file`:

 ```json
 {
-  "id": "stored-file-uuid"
+  "name": "meeting-notes.pdf",
+  "thought_id": "optional-thought-uuid",
+  "content_uri": "amcs://files/<id-from-upload>"
 }
 ```

-List files for a thought or project with:
+`content_base64` and `content_uri` are mutually exclusive.
+
+**Load a file** — returns metadata, base64 content, and an embedded MCP binary resource (`amcs://files/{id}`):
+
+```json
+{ "id": "stored-file-uuid" }
+```
+
+**List files** for a thought or project:

 ```json
 {
@@ -212,9 +228,13 @@ List files for a thought or project with:
 }
 ```

-AMCS also supports direct authenticated HTTP uploads to `/files` for clients that want to stream file bodies instead of base64-encoding them into an MCP tool call.
+### MCP resources

-The Go server caps `/files` uploads at 100 MB per request. Large uploads are still also subject to available memory, Postgres limits, and any reverse proxy or load balancer limits in front of AMCS.
+Stored files are also exposed as MCP resources at `amcs://files/{id}`. MCP clients can read raw binary content directly via `resources/read` without going through `load_file`.
+
+### HTTP upload and download
+
+Direct HTTP access avoids base64 encoding entirely. The Go server caps `/files` uploads at 100 MB per request. Large uploads are also subject to available memory, Postgres limits, and any reverse proxy or load balancer in front of AMCS.

 Multipart upload:

@@ -235,6 +255,14 @@ curl -X POST "http://localhost:8080/files?project=amcs&name=meeting-notes.pdf" \
  --data-binary @./meeting-notes.pdf
 ```

+Binary download:
+
+```bash
+curl http://localhost:8080/files/<id> \
+  -H "x-brain-key: <key>" \
+  -o meeting-notes.pdf
+```
+
 **Automatic backfill** (optional, config-gated):

 ```yaml
@@ -178,7 +178,8 @@ func routes(logger *slog.Logger, cfg *config.Config, db *store.DB, provider ai.P

 	mcpHandler := mcpserver.New(cfg.MCP, toolSet)
 	mux.Handle(cfg.MCP.Path, authMiddleware(mcpHandler))
-	mux.Handle("/files", authMiddleware(fileUploadHandler(filesTool)))
+	mux.Handle("/files", authMiddleware(fileHandler(filesTool)))
+	mux.Handle("/files/{id}", authMiddleware(fileHandler(filesTool)))
 	if oauthRegistry != nil && tokenStore != nil {
 		mux.HandleFunc("/.well-known/oauth-authorization-server", oauthMetadataHandler())
 		mux.HandleFunc("/oauth-authorization-server", oauthMetadataHandler())
@@ -16,8 +16,14 @@ const (
 	multipartFormMemory = 32 << 20
 )

-func fileUploadHandler(files *tools.FilesTool) http.Handler {
+func fileHandler(files *tools.FilesTool) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		id := r.PathValue("id")
+		if id != "" {
+			fileDownloadHandler(files, id, w, r)
+			return
+		}
+
 		if r.Method != http.MethodPost {
 			w.Header().Set("Allow", http.MethodPost)
 			http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
@@ -44,6 +50,29 @@ func fileUploadHandler(files *tools.FilesTool) http.Handler {
 	})
 }

+func fileDownloadHandler(files *tools.FilesTool, id string, w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet && r.Method != http.MethodHead {
+		w.Header().Set("Allow", "GET, HEAD")
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	file, err := files.GetRaw(r.Context(), id)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusNotFound)
+		return
+	}
+
+	w.Header().Set("Content-Type", file.MediaType)
+	w.Header().Set("Content-Disposition", "attachment; filename="+file.Name)
+	w.Header().Set("X-File-Kind", file.Kind)
+	w.Header().Set("X-File-SHA256", file.SHA256)
+	w.WriteHeader(http.StatusOK)
+	if r.Method != http.MethodHead {
+		_, _ = w.Write(file.Content)
+	}
+}
+
 func parseUploadRequest(r *http.Request) (tools.SaveFileDecodedInput, error) {
 	contentType := r.Header.Get("Content-Type")
 	mediaType, _, _ := mime.ParseMediaType(contentType)
@@ -127,6 +127,12 @@ func New(cfg config.MCPConfig, toolSet ToolSet) http.Handler {
 		Description: "Retrieve explicit links and semantic neighbors for a thought.",
 	}, toolSet.Links.Related)

+	server.AddResourceTemplate(&mcp.ResourceTemplate{
+		Name:        "stored_file",
+		URITemplate: "amcs://files/{id}",
+		Description: "A stored file. Read a file's raw binary content by its id. Use load_file for metadata.",
+	}, toolSet.Files.ReadResource)
+
 	addTool(server, &mcp.Tool{
 		Name:        "save_file",
 		Description: "Store a base64-encoded file such as an image, document, or audio clip, optionally linking it to a thought.",
@@ -23,7 +23,8 @@ type FilesTool struct {

 type SaveFileInput struct {
 	Name          string `json:"name" jsonschema:"file name including extension, for example photo.png or note.pdf"`
-	ContentBase64 string `json:"content_base64" jsonschema:"file contents encoded as base64"`
+	ContentBase64 string `json:"content_base64,omitempty" jsonschema:"file contents encoded as base64; provide this or content_uri, not both"`
+	ContentURI    string `json:"content_uri,omitempty" jsonschema:"resource URI of an already-uploaded file, e.g. amcs://files/{id}; use this instead of content_base64 to avoid re-encoding binary content"`
 	MediaType     string `json:"media_type,omitempty" jsonschema:"optional MIME type such as image/png, application/pdf, or audio/mpeg"`
 	Kind          string `json:"kind,omitempty" jsonschema:"optional logical type such as image, document, audio, or file"`
 	ThoughtID     string `json:"thought_id,omitempty" jsonschema:"optional thought id to link this file to"`
@@ -68,19 +69,48 @@ func NewFilesTool(db *store.DB, sessions *session.ActiveProjects) *FilesTool {
 }

 func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveFileInput) (*mcp.CallToolResult, SaveFileOutput, error) {
-	contentBase64, mediaTypeFromDataURL := splitDataURL(strings.TrimSpace(in.ContentBase64))
-	if contentBase64 == "" {
-		return nil, SaveFileOutput{}, errInvalidInput("content_base64 is required")
+	uri := strings.TrimSpace(in.ContentURI)
+	b64 := strings.TrimSpace(in.ContentBase64)
+
+	if uri != "" && b64 != "" {
+		return nil, SaveFileOutput{}, errInvalidInput("provide content_uri or content_base64, not both")
 	}

-	content, err := decodeBase64(contentBase64)
+	var content []byte
+	var mediaTypeFromSource string
+
+	if uri != "" {
+		if !strings.HasPrefix(uri, fileURIPrefix) {
+			return nil, SaveFileOutput{}, errInvalidInput("content_uri must be an amcs://files/{id} URI")
+		}
+		rawID := strings.TrimPrefix(uri, fileURIPrefix)
+		id, err := parseUUID(rawID)
+		if err != nil {
+			return nil, SaveFileOutput{}, errInvalidInput("content_uri contains an invalid file id")
+		}
+		file, err := t.store.GetStoredFile(ctx, id)
+		if err != nil {
+			return nil, SaveFileOutput{}, errInvalidInput("content_uri references a file that does not exist")
+		}
+		content = file.Content
+		mediaTypeFromSource = file.MediaType
+	} else {
+		contentBase64, mediaTypeFromDataURL := splitDataURL(b64)
+		if contentBase64 == "" {
+			return nil, SaveFileOutput{}, errInvalidInput("content_base64 or content_uri is required")
+		}
+		var err error
+		content, err = decodeBase64(contentBase64)
 		if err != nil {
 			return nil, SaveFileOutput{}, errInvalidInput("content_base64 must be valid base64")
 		}
+		mediaTypeFromSource = mediaTypeFromDataURL
+	}
+
 	out, err := t.SaveDecoded(ctx, req, SaveFileDecodedInput{
 		Name:      in.Name,
 		Content:   content,
-		MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromDataURL),
+		MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromSource),
 		Kind:      in.Kind,
 		ThoughtID: in.ThoughtID,
 		Project:   in.Project,
@@ -91,6 +121,16 @@ func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveF
 	return nil, out, nil
 }

+const fileURIPrefix = "amcs://files/"
+
+func (t *FilesTool) GetRaw(ctx context.Context, rawID string) (thoughttypes.StoredFile, error) {
+	id, err := parseUUID(strings.TrimSpace(rawID))
+	if err != nil {
+		return thoughttypes.StoredFile{}, err
+	}
+	return t.store.GetStoredFile(ctx, id)
+}
+
 func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFileInput) (*mcp.CallToolResult, LoadFileOutput, error) {
 	id, err := parseUUID(in.ID)
 	if err != nil {
@@ -102,12 +142,48 @@ func (t *FilesTool) Load(ctx context.Context, _ *mcp.CallToolRequest, in LoadFil
 		return nil, LoadFileOutput{}, err
 	}

-	return nil, LoadFileOutput{
+	uri := fileURIPrefix + file.ID.String()
+	result := &mcp.CallToolResult{
+		Content: []mcp.Content{
+			&mcp.EmbeddedResource{
+				Resource: &mcp.ResourceContents{
+					URI:      uri,
+					MIMEType: file.MediaType,
+					Blob:     file.Content,
+				},
+			},
+		},
+	}
+
+	return result, LoadFileOutput{
 		File:          file,
 		ContentBase64: base64.StdEncoding.EncodeToString(file.Content),
 	}, nil
 }

+func (t *FilesTool) ReadResource(ctx context.Context, req *mcp.ReadResourceRequest) (*mcp.ReadResourceResult, error) {
+	rawID := strings.TrimPrefix(req.Params.URI, fileURIPrefix)
+	id, err := parseUUID(strings.TrimSpace(rawID))
+	if err != nil {
+		return nil, mcp.ResourceNotFoundError(req.Params.URI)
+	}
+
+	file, err := t.store.GetStoredFile(ctx, id)
+	if err != nil {
+		return nil, mcp.ResourceNotFoundError(req.Params.URI)
+	}
+
+	return &mcp.ReadResourceResult{
+		Contents: []*mcp.ResourceContents{
+			{
+				URI:      req.Params.URI,
+				MIMEType: file.MediaType,
+				Blob:     file.Content,
+			},
+		},
+	}, nil
+}
+
 func (t *FilesTool) List(ctx context.Context, req *mcp.CallToolRequest, in ListFilesInput) (*mcp.CallToolResult, ListFilesOutput, error) {
 	project, err := resolveProject(ctx, t.store, t.sessions, req, in.Project, false)
 	if err != nil {
@@ -34,9 +34,11 @@ At the start of every project session, after setting the active project:
 - Save durable project facts with `capture_thought` after completing meaningful work.
 - Use `save_file` for project assets the memory should retain, such as screenshots, PDFs, audio notes, and other documents.
 - If the goal is to retain the artifact itself, use `save_file` directly instead of first reading, transcribing, or summarizing the file contents.
+- When calling `save_file`, prefer `content_uri` over `content_base64` for binary files that were uploaded via HTTP — pass the `amcs://files/{id}` URI returned by the upload instead of re-encoding the bytes as base64.
 - Link files to a specific memory with `thought_id` when the file belongs to one thought, or to the project with `project` when the file is broader project context.
 - Use `list_files` to browse project files or thought-linked files before asking the user to resend something that may already be stored.
- Use `load_file` when you need the actual stored file contents back.
+- Use `load_file` when you need the actual stored file contents back. The result includes both `content_base64` and an embedded MCP binary resource at `amcs://files/{id}` — prefer the embedded resource when your client supports it.
+- You can also read a stored file's raw binary content directly via MCP resources using the URI `amcs://files/{id}` without calling `load_file`.
 - Stored files and attachment metadata must not be sent to the metadata extraction client.
 - Do not attach memory to the wrong project.

@@ -53,6 +55,7 @@ At the start of every project session, after setting the active project:
 - Prefer concise summaries.
 - Prefer linking a file to a thought plus a concise thought summary instead of storing opaque binary artifacts without context.
 - Do not read a file just to make it storable; store the file directly and read it only when the file contents are needed for reasoning.
+- Do not base64-encode a file to pass it to `save_file` if an `amcs://files/{id}` URI is already available from a prior HTTP upload.
 - When saving, choose the narrowest correct scope: project if project-specific, global if not.

 ## Skills and Guardrails
@@ -65,4 +68,4 @@ At the start of every project session, after setting the active project:

 ## Short Operational Form

-Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, prefer saving a file directly when the artifact itself is what matters, browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user.
+Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, prefer saving a file directly when the artifact itself is what matters, browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. When saving a file uploaded via HTTP, pass `content_uri: "amcs://files/{id}"` instead of re-encoding it as base64. Stored files can also be read as raw binary via MCP resources at `amcs://files/{id}`. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user.