feat(llm): add LLM integration instructions and handler

* Serve LLM instructions at `/llm` * Include markdown content for memory instructions * Update README with LLM integration details * Add tests for LLM instructions handler * Modify database migrations to use GUIDs for thoughts and projects
2026-03-25 18:02:42 +02:00
parent cebef3a07c
commit 8d0a91a961
16 changed files with 600 additions and 41 deletions
@@ -59,6 +59,8 @@ Run the SQL migrations against a local database with:

 `DATABASE_URL=postgres://... make migrate`

+LLM integration instructions are served at `/llm`.
+
 ## Containers

 The repo now includes a `Dockerfile` and Compose files for running the app with Postgres + pgvector.
@@ -114,6 +114,7 @@ func routes(logger *slog.Logger, cfg *config.Config, db *store.DB, provider ai.P
 	mcpHandler := mcpserver.New(cfg.MCP, toolSet)
 	mux.Handle(cfg.MCP.Path, auth.Middleware(cfg.Auth, keyring, logger)(mcpHandler))
 	mux.HandleFunc("/favicon.ico", serveFavicon)
+	mux.HandleFunc("/llm", serveLLMInstructions)

 	mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
@@ -0,0 +1,22 @@
+package app
+
+import (
+	"net/http"
+
+	amcsllm "git.warky.dev/wdevs/amcs/llm"
+)
+
+func serveLLMInstructions(w http.ResponseWriter, r *http.Request) {
+	if r.URL.Path != "/llm" {
+		http.NotFound(w, r)
+		return
+	}
+
+	w.Header().Set("Content-Type", "text/markdown; charset=utf-8")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.WriteHeader(http.StatusOK)
+	if r.Method == http.MethodHead {
+		return
+	}
+	_, _ = w.Write(amcsllm.MemoryInstructions)
+}
@@ -0,0 +1,29 @@
+package app
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	amcsllm "git.warky.dev/wdevs/amcs/llm"
+)
+
+func TestServeLLMInstructions(t *testing.T) {
+	req := httptest.NewRequest(http.MethodGet, "/llm", nil)
+	rec := httptest.NewRecorder()
+
+	serveLLMInstructions(rec, req)
+
+	res := rec.Result()
+	defer res.Body.Close()
+
+	if res.StatusCode != http.StatusOK {
+		t.Fatalf("status = %d, want %d", res.StatusCode, http.StatusOK)
+	}
+	if got := res.Header.Get("Content-Type"); got != "text/markdown; charset=utf-8" {
+		t.Fatalf("content-type = %q, want %q", got, "text/markdown; charset=utf-8")
+	}
+	if body := rec.Body.String(); body != string(amcsllm.MemoryInstructions) {
+		t.Fatalf("body = %q, want embedded instructions", body)
+	}
+}
@@ -13,7 +13,9 @@ import (
 func (db *DB) InsertLink(ctx context.Context, link thoughttypes.ThoughtLink) error {
 	_, err := db.pool.Exec(ctx, `
 		insert into thought_links (from_id, to_id, relation)
-		values ($1, $2, $3)
+		select f.id, t.id, $3
+		from thoughts f, thoughts t
+		where f.guid = $1 and t.guid = $2
 	`, link.FromID, link.ToID, link.Relation)
 	if err != nil {
 		return fmt.Errorf("insert link: %w", err)
@@ -23,15 +25,15 @@ func (db *DB) InsertLink(ctx context.Context, link thoughttypes.ThoughtLink) err

 func (db *DB) LinkedThoughts(ctx context.Context, thoughtID uuid.UUID) ([]thoughttypes.LinkedThought, error) {
 	rows, err := db.pool.Query(ctx, `
-		select t.id, t.content, t.metadata, t.project_id, t.archived_at, t.created_at, t.updated_at, l.relation, 'outgoing' as direction, l.created_at
+		select t.guid, t.content, t.metadata, t.project_id, t.archived_at, t.created_at, t.updated_at, l.relation, 'outgoing' as direction, l.created_at
 		from thought_links l
 		join thoughts t on t.id = l.to_id
-		where l.from_id = $1
+		where l.from_id = (select id from thoughts where guid = $1)
 		union all
-		select t.id, t.content, t.metadata, t.project_id, t.archived_at, t.created_at, t.updated_at, l.relation, 'incoming' as direction, l.created_at
+		select t.guid, t.content, t.metadata, t.project_id, t.archived_at, t.created_at, t.updated_at, l.relation, 'incoming' as direction, l.created_at
 		from thought_links l
 		join thoughts t on t.id = l.from_id
-		where l.to_id = $1
+		where l.to_id = (select id from thoughts where guid = $1)
 		order by created_at desc
 	`, thoughtID)
 	if err != nil {
@@ -15,7 +15,7 @@ func (db *DB) CreateProject(ctx context.Context, name, description string) (thou
 	row := db.pool.QueryRow(ctx, `
 		insert into projects (name, description)
 		values ($1, $2)
-		returning id, name, description, created_at, last_active_at
+		returning guid, name, description, created_at, last_active_at
 	`, name, description)

 	var project thoughttypes.Project
@@ -29,13 +29,13 @@ func (db *DB) GetProject(ctx context.Context, nameOrID string) (thoughttypes.Pro
 	var row pgx.Row
 	if parsedID, err := uuid.Parse(strings.TrimSpace(nameOrID)); err == nil {
 		row = db.pool.QueryRow(ctx, `
-			select id, name, description, created_at, last_active_at
+			select guid, name, description, created_at, last_active_at
 			from projects
-			where id = $1
+			where guid = $1
 		`, parsedID)
 	} else {
 		row = db.pool.QueryRow(ctx, `
-			select id, name, description, created_at, last_active_at
+			select guid, name, description, created_at, last_active_at
 			from projects
 			where name = $1
 		`, strings.TrimSpace(nameOrID))
@@ -53,10 +53,10 @@ func (db *DB) GetProject(ctx context.Context, nameOrID string) (thoughttypes.Pro

 func (db *DB) ListProjects(ctx context.Context) ([]thoughttypes.ProjectSummary, error) {
 	rows, err := db.pool.Query(ctx, `
-		select p.id, p.name, p.description, p.created_at, p.last_active_at, count(t.id) as thought_count
+		select p.guid, p.name, p.description, p.created_at, p.last_active_at, count(t.id) as thought_count
 		from projects p
-		left join thoughts t on t.project_id = p.id and t.archived_at is null
-		group by p.id
+		left join thoughts t on t.project_id = p.guid and t.archived_at is null
+		group by p.guid, p.name, p.description, p.created_at, p.last_active_at
 		order by p.last_active_at desc, p.created_at desc
 	`)
 	if err != nil {
@@ -79,7 +79,7 @@ func (db *DB) ListProjects(ctx context.Context) ([]thoughttypes.ProjectSummary,
 }

 func (db *DB) TouchProject(ctx context.Context, id uuid.UUID) error {
-	tag, err := db.pool.Exec(ctx, `update projects set last_active_at = now() where id = $1`, id)
+	tag, err := db.pool.Exec(ctx, `update projects set last_active_at = now() where guid = $1`, id)
 	if err != nil {
 		return fmt.Errorf("touch project: %w", err)
 	}
@@ -30,7 +30,7 @@ func (db *DB) InsertThought(ctx context.Context, thought thoughttypes.Thought, e
 	row := tx.QueryRow(ctx, `
 		insert into thoughts (content, metadata, project_id)
 		values ($1, $2::jsonb, $3)
-		returning id, created_at, updated_at
+		returning guid, created_at, updated_at
 	`, thought.Content, metadata, thought.ProjectID)

 	created := thought
@@ -123,7 +123,7 @@ func (db *DB) ListThoughts(ctx context.Context, filter thoughttypes.ListFilter)
 	}

 	query := `
-		select id, content, metadata, project_id, archived_at, created_at, updated_at
+		select guid, content, metadata, project_id, archived_at, created_at, updated_at
 		from thoughts
 	`
 	if len(conditions) > 0 {
@@ -209,9 +209,9 @@ func (db *DB) Stats(ctx context.Context) (thoughttypes.ThoughtStats, error) {

 func (db *DB) GetThought(ctx context.Context, id uuid.UUID) (thoughttypes.Thought, error) {
 	row := db.pool.QueryRow(ctx, `
-		select id, content, metadata, project_id, archived_at, created_at, updated_at
+		select guid, content, metadata, project_id, archived_at, created_at, updated_at
 		from thoughts
-		where id = $1
+		where guid = $1
 	`, id)

 	var thought thoughttypes.Thought
@@ -248,7 +248,7 @@ func (db *DB) UpdateThought(ctx context.Context, id uuid.UUID, content string, e
 		    metadata   = $3::jsonb,
 		    project_id = $4,
 		    updated_at = now()
-		where id = $1
+		where guid = $1
 	`, id, content, metadataBytes, projectID)
 	if err != nil {
 		return thoughttypes.Thought{}, fmt.Errorf("update thought: %w", err)
@@ -278,7 +278,7 @@ func (db *DB) UpdateThought(ctx context.Context, id uuid.UUID, content string, e
 }

 func (db *DB) DeleteThought(ctx context.Context, id uuid.UUID) error {
-	tag, err := db.pool.Exec(ctx, `delete from thoughts where id = $1`, id)
+	tag, err := db.pool.Exec(ctx, `delete from thoughts where guid = $1`, id)
 	if err != nil {
 		return fmt.Errorf("delete thought: %w", err)
 	}
@@ -289,7 +289,7 @@ func (db *DB) DeleteThought(ctx context.Context, id uuid.UUID) error {
 }

 func (db *DB) ArchiveThought(ctx context.Context, id uuid.UUID) error {
-	tag, err := db.pool.Exec(ctx, `update thoughts set archived_at = now(), updated_at = now() where id = $1`, id)
+	tag, err := db.pool.Exec(ctx, `update thoughts set archived_at = now(), updated_at = now() where guid = $1`, id)
 	if err != nil {
 		return fmt.Errorf("archive thought: %w", err)
 	}
@@ -322,14 +322,14 @@ func (db *DB) SearchSimilarThoughts(ctx context.Context, embedding []float32, em
 	}
 	if excludeID != nil {
 		args = append(args, *excludeID)
-		conditions = append(conditions, fmt.Sprintf("t.id <> $%d", len(args)))
+		conditions = append(conditions, fmt.Sprintf("t.guid <> $%d", len(args)))
 	}
 	args = append(args, limit)

 	query := `
-		select t.id, t.content, t.metadata, 1 - (e.embedding <=> $1) as similarity, t.created_at
+		select t.guid, t.content, t.metadata, 1 - (e.embedding <=> $1) as similarity, t.created_at
 		from thoughts t
-		join embeddings e on e.thought_id = t.id
+		join embeddings e on e.thought_id = t.guid
 		where ` + strings.Join(conditions, " and ") + fmt.Sprintf(`
 		order by e.embedding <=> $1
 		limit $%d`, len(args))
@@ -0,0 +1,8 @@
+package llm
+
+import _ "embed"
+
+var (
+	//go:embed memory.md
+	MemoryInstructions []byte
+)
@@ -0,0 +1,41 @@
+# AMCS Memory Instructions
+
+You have access to an MCP memory server named AMCS.
+
+Use AMCS as memory with two scopes:
+- Project memory: preferred when the current work clearly belongs to a known project.
+- Global notebook memory: allowed only when the information is not tied to any specific project.
+
+## Scope Selection Rules
+
+1. Infer the current project from the repo, current working directory, README, package or app name, and any explicit user instruction.
+2. Call `get_active_project`.
+3. If the active project clearly matches the current work, use it.
+4. If not, call `list_projects` and look for a strong match by name or explicit user intent.
+5. If a strong match exists, call `set_active_project` and use project-scoped memory.
+6. If no strong project match exists, you may use global notebook memory with no project.
+7. If multiple projects plausibly match, ask the user before reading or writing project memory.
+
+## Project Memory Rules
+
+- Use project memory for code decisions, architecture, TODOs, debugging findings, and context specific to the current repo or workstream.
+- Before substantial work, retrieve context with `get_project_context` or `recall_context`.
+- Save durable project facts with `capture_thought`.
+- Do not attach memory to the wrong project.
+
+## Global Notebook Rules
+
+- Use global memory only for information that is genuinely cross-project or not project-bound.
+- Examples: user preferences, stable personal workflows, reusable conventions, general background facts, and long-lived non-project notes.
+- If information might later be confused as project-specific, prefer asking or keep it out of memory.
+
+## Memory Hygiene
+
+- Save only durable, useful information.
+- Do not save secrets, raw logs, or transient noise.
+- Prefer concise summaries.
+- When saving, choose the narrowest correct scope: project if project-specific, global if not.
+
+## Short Operational Form
+
+Use AMCS memory in project scope when the current work matches a known project. If no clear project matches, global notebook memory is allowed for non-project-specific information. Never store project-specific memory globally when a matching project exists, and never store memory in the wrong project. If project matching is ambiguous, ask the user.
@@ -0,0 +1,450 @@
+# AMCS TODO
+## Auto Embedding Backfill Tool
+
+## Objective
+
+Add an MCP tool that automatically backfills missing embeddings for existing thoughts so semantic search keeps working after:
+
+* embedding model changes
+* earlier capture or update failures
+* import or migration of raw thoughts without vectors
+
+The tool should be safe to run repeatedly, should not duplicate work, and should make it easy to restore semantic coverage without rewriting existing thoughts.
+
+---
+
+## Desired outcome
+
+After this work:
+
+* raw thought text remains the source of truth
+* embeddings are treated as derived data per model
+* search continues to query only embeddings from the active embedding model
+* when no embeddings exist for the active model and scope, search falls back to Postgres text search
+* operators or MCP clients can trigger a backfill for the current model
+* AMCS can optionally auto-run a limited backfill pass on startup or on a schedule later
+
+---
+
+## Why this is needed
+
+Current search behavior is model-specific:
+
+* query text is embedded with the configured provider model
+* results are filtered by `embeddings.model`
+* thoughts with no embedding for that model are invisible to semantic search
+
+This means a model switch leaves old thoughts searchable only by listing and metadata filters until new embeddings are generated.
+
+To avoid that dead zone, AMCS should also support a lexical fallback path backed by native Postgres text-search indexing.
+
+---
+
+## Tool proposal
+
+### New MCP tool
+
+`backfill_embeddings`
+
+Purpose:
+
+* find thoughts missing an embedding for the active model
+* generate embeddings in batches
+* write embeddings with upsert semantics
+* report counts for scanned, embedded, skipped, and failed thoughts
+
+### Input
+
+```json
+{
+  "project": "optional project name or id",
+  "limit": 100,
+  "batch_size": 20,
+  "include_archived": false,
+  "older_than_days": 0,
+  "dry_run": false
+}
+```
+
+Notes:
+
+* `project` scopes the backfill to a project when desired
+* `limit` caps total thoughts processed in one tool call
+* `batch_size` controls provider load
+* `include_archived` defaults to `false`
+* `older_than_days` is optional and mainly useful to avoid racing with fresh writes
+* `dry_run` returns counts and sample IDs without calling the embedding provider
+
+### Output
+
+```json
+{
+  "model": "openai/text-embedding-3-small",
+  "scanned": 100,
+  "embedded": 87,
+  "skipped": 13,
+  "failed": 0,
+  "dry_run": false,
+  "failures": []
+}
+```
+
+Optional:
+
+* include a short `next_cursor` later if we add cursor-based paging
+
+---
+
+## Backfill behavior
+
+### Core rules
+
+* Backfill only when a thought is missing an embedding row for the active model.
+* Do not recompute embeddings that already exist for that model unless an explicit future `force` flag is added.
+* Keep embeddings per model side by side in the existing `embeddings` table.
+* Use `insert ... on conflict (thought_id, model) do update` so retries stay idempotent.
+
+### Selection query
+
+Add a store query that returns thoughts where no embedding exists for the requested model.
+
+Shape:
+
+* from `thoughts t`
+* left join `embeddings e on e.thought_id = t.guid and e.model = $model`
+* filter `e.id is null`
+* optional filters for project, archived state, age
+* order by `t.created_at asc`
+* limit by requested batch
+
+Ordering oldest first is useful because it steadily restores long-tail recall instead of repeatedly revisiting recent writes.
+
+### Processing loop
+
+For each selected thought:
+
+1. read `content`
+2. call `provider.Embed(content)`
+3. upsert embedding row for `thought_id + model`
+4. continue on per-item failure and collect errors
+
+Use bounded concurrency instead of fully serial processing so large backfills complete in reasonable time without overwhelming the provider.
+
+Recommended first pass:
+
+* one tool invocation handles batches internally
+* concurrency defaults to a small fixed number like `4`
+* `batch_size` and concurrency are kept server-side defaults at first, even if only `limit` is exposed in MCP input
+
+---
+
+## Search fallback behavior
+
+### Goal
+
+If semantic retrieval cannot run because no embeddings exist for the active model in the selected scope, AMCS should fall back to Postgres text search instead of returning empty semantic results by default.
+
+### Fallback rules
+
+* If embeddings exist for the active model, keep using vector search as the primary path.
+* If no embeddings exist for the active model in scope, run Postgres text search against raw thought content.
+* Fallback should apply to:
+
+  * `search_thoughts`
+  * `recall_context`
+  * `get_project_context` when `query` is provided
+  * `summarize_thoughts` when `query` is provided
+  * semantic neighbors in `related_thoughts`
+
+* Fallback should not mutate data. It is retrieval-only.
+* Backfill remains the long-term fix; text search is the immediate safety net.
+
+### Postgres search approach
+
+Add a native full-text index on thought content and query it with a matching text-search configuration.
+
+Recommended first pass:
+
+* add a migration creating a GIN index on `to_tsvector('simple', content)`
+* use `websearch_to_tsquery('simple', $query)` for user-entered text
+* rank results with `ts_rank_cd(...)`
+* continue excluding archived thoughts by default
+* continue honoring project scope
+
+Using the `simple` configuration is a safer default for mixed prose, identifiers, and code-ish text than a language-specific stemmer.
+
+### Store additions for fallback
+
+Add store methods such as:
+
+* `HasEmbeddingsForModel(ctx, model string, projectID *uuid.UUID) (bool, error)`
+* `SearchThoughtsText(ctx, query string, limit int, projectID *uuid.UUID, excludeID *uuid.UUID) ([]SearchResult, error)`
+
+These should be used by a shared retrieval helper in `internal/tools` so semantic callers degrade consistently.
+
+### Notes on ranking
+
+Text-search scores will not be directly comparable to vector similarity scores.
+
+That is acceptable in v1 because:
+
+* each request will use one retrieval mode at a time
+* fallback is only used when semantic search is unavailable
+* response payloads can continue to return `similarity` as a generic relevance score
+
+---
+
+## Auto behavior
+
+The user asked for an auto backfill tool, so define two layers:
+
+### Layer 1: explicit MCP tool
+
+Ship `backfill_embeddings` first.
+
+This is the lowest-risk path because:
+
+* it is observable
+* it is rate-limited by the caller
+* it avoids surprise provider cost on startup
+
+### Layer 2: optional automatic runner
+
+Add a config-gated background runner after the tool exists and is proven stable.
+
+Config sketch:
+
+```yaml
+backfill:
+  enabled: false
+  run_on_startup: false
+  interval: "15m"
+  batch_size: 20
+  max_per_run: 100
+  include_archived: false
+```
+
+Behavior:
+
+* on startup, if enabled and `run_on_startup=true`, run a small bounded backfill pass
+* if `interval` is set, periodically backfill missing embeddings for the active configured model
+* log counts and failures, but never block server startup on backfill failure
+
+This keeps the first implementation simple while still giving us a clean path to true automation.
+
+---
+
+## Store changes
+
+Add store methods focused on missing-model coverage.
+
+### New methods
+
+* `ListThoughtsMissingEmbedding(ctx, model string, limit int, projectID *uuid.UUID, includeArchived bool, olderThanDays int) ([]Thought, error)`
+* `UpsertEmbedding(ctx, thoughtID uuid.UUID, model string, embedding []float32) error`
+
+### Optional later methods
+
+* `CountThoughtsMissingEmbedding(ctx, model string, projectID *uuid.UUID, includeArchived bool) (int, error)`
+* `ListThoughtIDsMissingEmbeddingPage(...)` for cursor-based paging on large datasets
+
+### Why separate `UpsertEmbedding`
+
+`InsertThought` and `UpdateThought` already contain embedding upsert logic, but a dedicated helper will:
+
+* reduce duplication
+* let backfill avoid full thought updates
+* make future re-embedding jobs cleaner
+
+---
+
+## Tooling changes
+
+### New file
+
+`internal/tools/backfill.go`
+
+Responsibilities:
+
+* parse input
+* resolve project if provided
+* select missing thoughts
+* run bounded embedding generation
+* record per-item failures without aborting the whole batch
+* return summary counts
+
+### MCP registration
+
+Add the tool to:
+
+* `internal/mcpserver/server.go`
+* `internal/mcpserver/schema.go` and tests if needed
+* `internal/app/app.go` wiring
+
+Suggested tool description:
+
+* `Generate missing embeddings for stored thoughts using the active embedding model.`
+
+---
+
+## Config changes
+
+No config is required for the first manual tool beyond the existing embedding provider settings.
+
+For the later automatic runner, add:
+
+* `backfill.enabled`
+* `backfill.run_on_startup`
+* `backfill.interval`
+* `backfill.batch_size`
+* `backfill.max_per_run`
+* `backfill.include_archived`
+
+Validation rules:
+
+* `batch_size > 0`
+* `max_per_run >= batch_size`
+* `interval` must parse when provided
+
+---
+
+## Failure handling
+
+The backfill tool should be best-effort, not all-or-nothing.
+
+Rules:
+
+* one thought failure does not abort the full run
+* provider errors are captured and counted
+* database upsert failures are captured and counted
+* final tool response includes truncated failure details
+* full details go to logs
+
+Failure payloads should avoid returning raw thought content to the caller if that would create noisy or sensitive responses. Prefer thought IDs plus short error strings.
+
+---
+
+## Observability
+
+Add structured logs for:
+
+* selected model
+* project scope
+* scan count
+* success count
+* failure count
+* duration
+
+Later, metrics can include:
+
+* `amcs_backfill_runs_total`
+* `amcs_backfill_embeddings_total`
+* `amcs_backfill_failures_total`
+* `amcs_thoughts_missing_embeddings`
+
+---
+
+## Concurrency and rate limiting
+
+Keep the first version conservative.
+
+Plan:
+
+* use a worker pool with a small fixed concurrency
+* keep batch sizes small by default
+* stop fetching new work once `limit` is reached
+* respect `ctx` cancellation so long backfills can be interrupted cleanly
+
+Do not add provider-specific rate-limit logic in v1 unless real failures show it is needed.
+
+---
+
+## Security and safety
+
+* Reuse existing MCP auth.
+* Do not expose a broad `force=true` option in v1.
+* Default to non-archived thoughts only.
+* Do not mutate raw thought text or metadata during backfill.
+* Treat embeddings as derived data that may be regenerated safely.
+
+---
+
+## Testing plan
+
+### Store tests
+
+Add tests for:
+
+* listing thoughts missing embeddings for a model
+* project-scoped missing-embedding queries
+* archived thought filtering
+* idempotent upsert behavior
+
+### Tool tests
+
+Add tests for:
+
+* dry-run mode
+* successful batch embedding
+* partial provider failures
+* empty result set
+* project resolution
+* context cancellation
+
+### Integration tests
+
+Add a flow covering:
+
+1. create thoughts without embeddings for a target model
+2. run `backfill_embeddings`
+3. confirm rows exist in `embeddings`
+4. confirm `search_thoughts` can now retrieve them when using that model
+
+### Fallback search tests
+
+Add coverage for:
+
+* no embeddings for model -> `search_thoughts` uses Postgres text search
+* project-scoped queries only search matching project thoughts
+* archived thoughts stay excluded by default
+* `related_thoughts` falls back to text search neighbors when semantic vectors are unavailable
+* once embeddings exist, semantic search remains the primary path
+
+---
+
+## Rollout order
+
+1. Add store helpers for missing-embedding selection and embedding upsert.
+2. Add Postgres full-text index migration and text-search store helpers.
+3. Add shared semantic-or-text fallback retrieval logic for query-based tools.
+4. Add `backfill_embeddings` MCP tool and wire it into the server.
+5. Add unit and integration tests.
+6. Document usage in `README.md`.
+7. Add optional background auto-runner behind config.
+8. Consider a future `force` or `reindex_model` path only after v1 is stable.
+
+---
+
+## Open questions
+
+* Should the tool expose `batch_size` to clients, or should batching stay internal?
+* Should the first version support only the active model, or allow a `model` override for admins?
+* Should archived thoughts be backfilled by default during startup jobs but not MCP calls?
+* Do we want a separate CLI/admin command for large one-time reindex jobs outside MCP?
+
+Recommended answers for v1:
+
+* keep batching mostly internal
+* use only the active configured model
+* exclude archived thoughts by default everywhere
+* postpone a dedicated CLI until volume justifies it
+
+---
+
+## Nice follow-ups
+
+* add a `missing_embeddings` stat to `thought_stats`
+* expose a read-only tool for counting missing embeddings by project
+* add a re-embed path for migrating from one model to another in controlled waves
+* add metadata extraction backfill as a separate job if imported content often lacks metadata
+* expose the retrieval mode in responses for easier debugging of semantic vs text fallback
@@ -1,10 +1,12 @@
 create table if not exists thoughts (
-  id uuid default gen_random_uuid() primary key,
-  content text not null,
-  embedding vector(1536),
-  metadata jsonb default '{}'::jsonb,
+  id         bigserial   primary key,
+  guid       uuid        not null default gen_random_uuid(),
+  content    text        not null,
+  embedding  vector(1536),
+  metadata   jsonb       default '{}'::jsonb,
  created_at timestamptz default now(),
-  updated_at timestamptz default now()
+  updated_at timestamptz default now(),
+  constraint thoughts_guid_unique unique (guid)
 );

 create index if not exists thoughts_embedding_hnsw_idx
@@ -1,12 +1,14 @@
 create table if not exists projects (
-  id uuid default gen_random_uuid() primary key,
-  name text not null unique,
-  description text,
-  created_at timestamptz default now(),
-  last_active_at timestamptz default now()
+  id             bigserial   primary key,
+  guid           uuid        not null default gen_random_uuid(),
+  name           text        not null unique,
+  description    text,
+  created_at     timestamptz default now(),
+  last_active_at timestamptz default now(),
+  constraint projects_guid_unique unique (guid)
 );

-alter table thoughts add column if not exists project_id uuid references projects(id);
+alter table thoughts add column if not exists project_id uuid references projects(guid);
 alter table thoughts add column if not exists archived_at timestamptz;

 create index if not exists thoughts_project_id_idx on thoughts (project_id);
@@ -1,7 +1,7 @@
 create table if not exists thought_links (
-  from_id uuid references thoughts(id) on delete cascade,
-  to_id uuid references thoughts(id) on delete cascade,
-  relation text not null,
+  from_id    bigint      not null references thoughts(id) on delete cascade,
+  to_id      bigint      not null references thoughts(id) on delete cascade,
+  relation   text        not null,
  created_at timestamptz default now(),
  primary key (from_id, to_id, relation)
 );
@@ -16,7 +16,7 @@ as $$
 begin
  return query
  select
-    t.id,
+    t.guid,
    t.content,
    t.metadata,
    1 - (t.embedding <=> query_embedding) as similarity,
@@ -1,7 +1,7 @@
 create table if not exists embeddings (
  id        bigserial   primary key,
  guid      uuid        not null default gen_random_uuid(),
-  thought_id uuid       not null references thoughts(id) on delete cascade,
+  thought_id uuid       not null references thoughts(guid) on delete cascade,
  model     text        not null,
  dim       int         not null,
  embedding vector      not null,
@@ -17,13 +17,13 @@ as $$
 begin
  return query
  select
-    t.id,
+    t.guid,
    t.content,
    t.metadata,
    1 - (e.embedding <=> query_embedding) as similarity,
    t.created_at
  from thoughts t
-  join embeddings e on e.thought_id = t.id
+  join embeddings e on e.thought_id = t.guid
  where 1 - (e.embedding <=> query_embedding) > match_threshold
    and t.archived_at is null
    and (embedding_model = '' or e.model = embedding_model)