From acd780ac9c885ccafb477e3214ee007fd8f4cf7f Mon Sep 17 00:00:00 2001 From: "Hein (Warky)" Date: Tue, 31 Mar 2026 00:30:56 +0200 Subject: [PATCH] feat(files): introduce upload_file tool for staging files and enhance save_file documentation --- README.md | 49 ++++++++++++++------ internal/ai/compat/client.go | 14 +++++- internal/mcpserver/schema.go | 14 +++++- internal/mcpserver/server.go | 7 ++- internal/tools/files.go | 89 ++++++++++++++++++++++++++++++++++-- internal/tools/files_test.go | 1 + llm/memory.md | 11 +++-- 7 files changed, 157 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 8a4698f..cbe9976 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,8 @@ A Go MCP server for capturing and retrieving thoughts, memory, and project conte | `recall_context` | Semantic + recency context block for injection | | `link_thoughts` | Create a typed relationship between thoughts | | `related_thoughts` | Explicit links + semantic neighbours | -| `save_file` | Store a file (base64 or by resource URI) and optionally link it to a thought | +| `upload_file` | Stage a file from a server-side path or base64 and get an `amcs://files/{id}` resource URI | +| `save_file` | Store a file (base64 or resource URI) and optionally link it to a thought | | `load_file` | Retrieve a stored file by ID; returns metadata, base64 content, and an embedded MCP binary resource | | `list_files` | Browse stored files by thought, project, or kind | | `backfill_embeddings` | Generate missing embeddings for stored thoughts | @@ -185,7 +186,37 @@ Files can optionally be linked to a thought by passing `thought_id`, which also ### MCP tools -**Save via base64** (small files or when HTTP is not available): +**Stage a file and get a URI** (`upload_file`) — preferred for large or binary files: + +```json +{ + "name": "diagram.png", + "content_path": "/absolute/path/to/diagram.png" +} +``` + +Or with base64 for small files (≤10 MB): + +```json +{ + "name": "diagram.png", + "content_base64": "" +} +``` + +Returns `{"file": {...}, "uri": "amcs://files/"}`. Pass `thought_id`/`project` to link immediately, or omit them and use the URI in a later `save_file` call. + +**Link a staged file to a thought** (`save_file` with `content_uri`): + +```json +{ + "name": "meeting-notes.pdf", + "thought_id": "optional-thought-uuid", + "content_uri": "amcs://files/" +} +``` + +**Save small files inline** (`save_file` with `content_base64`, ≤10 MB): ```json { @@ -197,19 +228,7 @@ Files can optionally be linked to a thought by passing `thought_id`, which also } ``` -**Save via resource URI** (preferred for binary; avoids base64 overhead): - -Upload the file binary via HTTP first (see below), then pass the returned URI to `save_file`: - -```json -{ - "name": "meeting-notes.pdf", - "thought_id": "optional-thought-uuid", - "content_uri": "amcs://files/" -} -``` - -`content_base64` and `content_uri` are mutually exclusive. +`content_base64` and `content_uri` are mutually exclusive in both tools. **Load a file** — returns metadata, base64 content, and an embedded MCP binary resource (`amcs://files/{id}`): diff --git a/internal/ai/compat/client.go b/internal/ai/compat/client.go index 059bff1..7ec6fb7 100644 --- a/internal/ai/compat/client.go +++ b/internal/ai/compat/client.go @@ -224,7 +224,7 @@ func (c *Client) ExtractMetadata(ctx context.Context, input string) (thoughttype attrs := []any{ slog.String("provider", c.name), slog.String("model", model), - slog.Duration("duration", time.Since(start)), + slog.String("duration", formatLogDuration(time.Since(start))), } if err != nil { attrs = append(attrs, slog.String("error", err.Error())) @@ -298,6 +298,18 @@ func (c *Client) ExtractMetadata(ctx context.Context, input string) (thoughttype return heuristic, nil } +func formatLogDuration(d time.Duration) string { + if d < 0 { + d = -d + } + + totalMilliseconds := d.Milliseconds() + minutes := totalMilliseconds / 60000 + seconds := (totalMilliseconds / 1000) % 60 + milliseconds := totalMilliseconds % 1000 + return fmt.Sprintf("%02d:%02d:%03d", minutes, seconds, milliseconds) +} + func (c *Client) extractMetadataWithModel(ctx context.Context, input, model string) (thoughttypes.ThoughtMetadata, error) { if c.shouldBypassModel(model) { return thoughttypes.ThoughtMetadata{}, fmt.Errorf("%s metadata: model %q temporarily bypassed after repeated empty responses", c.name, model) diff --git a/internal/mcpserver/schema.go b/internal/mcpserver/schema.go index c402b6f..e946315 100644 --- a/internal/mcpserver/schema.go +++ b/internal/mcpserver/schema.go @@ -47,7 +47,7 @@ func logToolCall[In any, Out any](logger *slog.Logger, toolName string, handler result, out, err := handler(ctx, req, in) completionAttrs := append([]any{}, attrs...) - completionAttrs = append(completionAttrs, slog.Duration("duration", time.Since(start))) + completionAttrs = append(completionAttrs, slog.String("duration", formatLogDuration(time.Since(start)))) if err != nil { completionAttrs = append(completionAttrs, slog.String("error", err.Error())) logger.Error("mcp tool completed", completionAttrs...) @@ -70,6 +70,18 @@ func truncateArgs(args any) string { return string(b[:maxLoggedArgBytes]) + fmt.Sprintf("… (%d bytes total)", len(b)) } +func formatLogDuration(d time.Duration) string { + if d < 0 { + d = -d + } + + totalMilliseconds := d.Milliseconds() + minutes := totalMilliseconds / 60000 + seconds := (totalMilliseconds / 1000) % 60 + milliseconds := totalMilliseconds % 1000 + return fmt.Sprintf("%02d:%02d:%03d", minutes, seconds, milliseconds) +} + func setToolSchemas[In any, Out any](tool *mcp.Tool) error { if tool.InputSchema == nil { inputSchema, err := jsonschema.For[In](toolSchemaOptions) diff --git a/internal/mcpserver/server.go b/internal/mcpserver/server.go index ed7d3f7..fda582b 100644 --- a/internal/mcpserver/server.go +++ b/internal/mcpserver/server.go @@ -134,9 +134,14 @@ func New(cfg config.MCPConfig, logger *slog.Logger, toolSet ToolSet) http.Handle Description: "A stored file. Read a file's raw binary content by its id. Use load_file for metadata.", }, toolSet.Files.ReadResource) + addTool(server, logger, &mcp.Tool{ + Name: "upload_file", + Description: "Stage a file and get an amcs://files/{id} resource URI. Provide content_path (absolute server-side path, no size limit) or content_base64 (≤10 MB). Optionally link immediately with thought_id/project, or omit them and pass the returned URI to save_file later.", + }, toolSet.Files.Upload) + addTool(server, logger, &mcp.Tool{ Name: "save_file", - Description: "Store a file and optionally link it to a thought. Supply either content_base64 (≤10 MB) or content_uri (amcs://files/{id} from a prior POST /files upload). For files larger than 10 MB, upload via POST /files first and pass the returned URI as content_uri.", + Description: "Store a file and optionally link it to a thought. Supply either content_base64 (≤10 MB) or content_uri (amcs://files/{id} from a prior upload_file or POST /files call). For files larger than 10 MB, use upload_file with content_path first.", }, toolSet.Files.Save) addTool(server, logger, &mcp.Tool{ diff --git a/internal/tools/files.go b/internal/tools/files.go index 181cda4..471c5f2 100644 --- a/internal/tools/files.go +++ b/internal/tools/files.go @@ -6,6 +6,8 @@ import ( "encoding/base64" "encoding/hex" "net/http" + "os" + "path/filepath" "strings" "github.com/google/uuid" @@ -65,6 +67,21 @@ type ListFilesInput struct { Kind string `json:"kind,omitempty" jsonschema:"optional kind filter such as image, document, audio, or file"` } +type UploadFileInput struct { + Name string `json:"name" jsonschema:"file name including extension, for example photo.png or note.pdf"` + ContentPath string `json:"content_path,omitempty" jsonschema:"absolute path to a file on the server; preferred for large files — no base64 overhead"` + ContentBase64 string `json:"content_base64,omitempty" jsonschema:"file contents encoded as base64 (≤10 MB); use content_path for larger files"` + MediaType string `json:"media_type,omitempty" jsonschema:"optional MIME type such as image/png, application/pdf, or audio/mpeg"` + Kind string `json:"kind,omitempty" jsonschema:"optional logical type such as image, document, audio, or file"` + ThoughtID string `json:"thought_id,omitempty" jsonschema:"optional thought id to link this file to immediately"` + Project string `json:"project,omitempty" jsonschema:"optional project name or id"` +} + +type UploadFileOutput struct { + File thoughttypes.StoredFile `json:"file"` + URI string `json:"uri" jsonschema:"amcs resource URI for this file, e.g. amcs://files/{id}; pass as content_uri in save_file to link without re-uploading"` +} + type ListFilesOutput struct { Files []thoughttypes.StoredFile `json:"files"` } @@ -73,6 +90,60 @@ func NewFilesTool(db *store.DB, sessions *session.ActiveProjects) *FilesTool { return &FilesTool{store: db, sessions: sessions} } +func (t *FilesTool) Upload(ctx context.Context, req *mcp.CallToolRequest, in UploadFileInput) (*mcp.CallToolResult, UploadFileOutput, error) { + path := strings.TrimSpace(in.ContentPath) + b64 := strings.TrimSpace(in.ContentBase64) + + if path != "" && b64 != "" { + return nil, UploadFileOutput{}, errInvalidInput("provide content_path or content_base64, not both") + } + + var content []byte + var mediaTypeFromSource string + + if path != "" { + if !filepath.IsAbs(path) { + return nil, UploadFileOutput{}, errInvalidInput("content_path must be an absolute path") + } + var err error + content, err = os.ReadFile(path) + if err != nil { + return nil, UploadFileOutput{}, errInvalidInput("cannot read content_path: " + err.Error()) + } + } else { + if b64 == "" { + return nil, UploadFileOutput{}, errInvalidInput("content_path or content_base64 is required") + } + if len(b64) > maxBase64ToolBytes { + return nil, UploadFileOutput{}, errInvalidInput( + "content_base64 exceeds the 10 MB MCP tool limit; use content_path instead", + ) + } + raw, dataURLMediaType := splitDataURL(b64) + var err error + content, err = decodeBase64(raw) + if err != nil { + return nil, UploadFileOutput{}, errInvalidInput("content_base64 must be valid base64") + } + mediaTypeFromSource = dataURLMediaType + } + + out, err := t.SaveDecoded(ctx, req, SaveFileDecodedInput{ + Name: in.Name, + Content: content, + MediaType: firstNonEmpty(strings.TrimSpace(in.MediaType), mediaTypeFromSource), + Kind: in.Kind, + ThoughtID: in.ThoughtID, + Project: in.Project, + }) + if err != nil { + return nil, UploadFileOutput{}, err + } + + uri := fileURIPrefix + out.File.ID.String() + return nil, UploadFileOutput{File: out.File, URI: uri}, nil +} + func (t *FilesTool) Save(ctx context.Context, req *mcp.CallToolRequest, in SaveFileInput) (*mcp.CallToolResult, SaveFileOutput, error) { uri := strings.TrimSpace(in.ContentURI) b64 := strings.TrimSpace(in.ContentBase64) @@ -338,6 +409,12 @@ func decodeBase64(value string) ([]byte, error) { } }, value) + var candidates []string + candidates = append(candidates, cleaned) + if trimmed := strings.TrimRight(cleaned, "="); trimmed != cleaned && trimmed != "" { + candidates = append(candidates, trimmed) + } + encodings := []*base64.Encoding{ base64.StdEncoding, base64.RawStdEncoding, @@ -346,12 +423,14 @@ func decodeBase64(value string) ([]byte, error) { } var lastErr error - for _, encoding := range encodings { - decoded, err := encoding.DecodeString(cleaned) - if err == nil { - return decoded, nil + for _, candidate := range candidates { + for _, encoding := range encodings { + decoded, err := encoding.DecodeString(candidate) + if err == nil { + return decoded, nil + } + lastErr = err } - lastErr = err } return nil, lastErr diff --git a/internal/tools/files_test.go b/internal/tools/files_test.go index 66aa06c..556bb75 100644 --- a/internal/tools/files_test.go +++ b/internal/tools/files_test.go @@ -10,6 +10,7 @@ func TestDecodeBase64AcceptsWhitespaceAndMultipleVariants(t *testing.T) { }{ {name: "standard with whitespace", input: "aG V s\nbG8=", want: "hello"}, {name: "raw standard", input: "aGVsbG8", want: "hello"}, + {name: "standard with extra padding", input: "aGVsbG8==", want: "hello"}, {name: "standard url-safe payload", input: "--8=", want: string([]byte{0xfb, 0xef})}, {name: "raw url-safe payload", input: "--8", want: string([]byte{0xfb, 0xef})}, } diff --git a/llm/memory.md b/llm/memory.md index 558ccb7..47ffb78 100644 --- a/llm/memory.md +++ b/llm/memory.md @@ -32,9 +32,10 @@ At the start of every project session, after setting the active project: - Use project memory for code decisions, architecture, TODOs, debugging findings, and context specific to the current repo or workstream. - Before substantial work, always retrieve context with `get_project_context` or `recall_context` so prior decisions inform your approach. - Save durable project facts with `capture_thought` after completing meaningful work. -- Use `save_file` for project assets the memory should retain, such as screenshots, PDFs, audio notes, and other documents. -- If the goal is to retain the artifact itself, use `save_file` directly instead of first reading, transcribing, or summarizing the file contents. -- When calling `save_file`, prefer `content_uri` over `content_base64` for binary files that were uploaded via HTTP — pass the `amcs://files/{id}` URI returned by the upload instead of re-encoding the bytes as base64. +- Use `save_file` or `upload_file` for project assets the memory should retain, such as screenshots, PDFs, audio notes, and other documents. +- If the goal is to retain the artifact itself, store the file directly instead of first reading, transcribing, or summarizing its contents. +- For binary files or files larger than 10 MB, call `upload_file` with `content_path` (absolute server-side path) first to get an `amcs://files/{id}` URI, then pass that URI to `save_file` as `content_uri` to link it to a thought. This avoids base64 encoding entirely. +- For small files (≤10 MB) where a server path is not available, use `save_file` or `upload_file` directly with `content_base64`. - Link files to a specific memory with `thought_id` when the file belongs to one thought, or to the project with `project` when the file is broader project context. - Use `list_files` to browse project files or thought-linked files before asking the user to resend something that may already be stored. - Use `load_file` when you need the actual stored file contents back. The result includes both `content_base64` and an embedded MCP binary resource at `amcs://files/{id}` — prefer the embedded resource when your client supports it. @@ -55,7 +56,7 @@ At the start of every project session, after setting the active project: - Prefer concise summaries. - Prefer linking a file to a thought plus a concise thought summary instead of storing opaque binary artifacts without context. - Do not read a file just to make it storable; store the file directly and read it only when the file contents are needed for reasoning. -- Do not base64-encode a file to pass it to `save_file` if an `amcs://files/{id}` URI is already available from a prior HTTP upload. +- Do not base64-encode a file to pass it to `save_file` if an `amcs://files/{id}` URI is already available from a prior `upload_file` or HTTP upload. - When saving, choose the narrowest correct scope: project if project-specific, global if not. ## Skills and Guardrails @@ -68,4 +69,4 @@ At the start of every project session, after setting the active project: ## Short Operational Form -Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`, store supporting binary artifacts with `save_file`, prefer saving a file directly when the artifact itself is what matters, browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. When saving a file uploaded via HTTP, pass `content_uri: "amcs://files/{id}"` instead of re-encoding it as base64. Stored files can also be read as raw binary via MCP resources at `amcs://files/{id}`. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user. +Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. At the start of every project session call `list_project_skills` and `list_project_guardrails` and apply what is returned; only create new skills or guardrails if none exist. Store durable notes with `capture_thought`. For binary files or files larger than 10 MB, call `upload_file` with `content_path` to stage the file and get an `amcs://files/{id}` URI, then pass that URI to `save_file` as `content_uri` to link it to a thought. For small files, use `save_file` or `upload_file` with `content_base64` directly. Browse stored files with `list_files`, and load them with `load_file` only when their contents are needed. Stored files can also be read as raw binary via MCP resources at `amcs://files/{id}`. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user.