5 месяцев назад · 785bbcf319
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -22,6 +22,10 @@ qmd embed                         # Generate vector embeddings (uses node-llama-
 
				 qmd search <query>                # BM25 full-text search
			
 
				 qmd vsearch <query>               # Vector similarity search
			
 
				 qmd query <query>                 # Hybrid search with reranking (best quality)
			
 
				+qmd mcp                           # Start MCP server (stdio transport)
			
 
				+qmd mcp --http [--port N]         # Start MCP server (HTTP, default port 8181)
			
 
				+qmd mcp --http --daemon           # Start as background daemon
			
 
				+qmd mcp stop                      # Stop background MCP daemon
			
 
				 ```
			
 
				 
			
 
				 ## Collection Management
			
@@ -136,4 +140,4 @@ bun link               # Install globally as 'qmd'
 
				 ## Do NOT compile
			
 
				 
			
 
				 - Never run `bun build --compile` - it overwrites the shell wrapper and breaks sqlite-vec
			
 
				-- The `qmd` file is a shell script that runs `bun src/qmd.ts` - do not replace it
			
 
				+- The `qmd` file is a shell script that runs `bun src/qmd.ts` - do not replace it
			
--- a/README.md
+++ b/README.md
@@ -65,8 +65,8 @@ Although the tool works perfectly fine when you just tell your agent to use it o
 
				 
			
 
				 **Tools exposed:**
			
 
				 - `qmd_search` - Fast BM25 keyword search (supports collection filter)
			
 
				-- `qmd_vsearch` - Semantic vector search (supports collection filter)
			
 
				-- `qmd_query` - Hybrid search with reranking (supports collection filter)
			
 
				+- `qmd_vector_search` - Semantic vector search (supports collection filter)
			
 
				+- `qmd_deep_search` - Deep search with query expansion and reranking (supports collection filter)
			
 
				 - `qmd_get` - Retrieve document by path or docid (with fuzzy matching suggestions)
			
 
				 - `qmd_multi_get` - Retrieve multiple documents by glob pattern, list, or docids
			
 
				 - `qmd_status` - Index health and collection info
			
@@ -104,6 +104,29 @@ Or configure MCP manually in `~/.claude/settings.json`:
 
				 }
			
 
				 ```
			
 
				 
			
 
				+#### HTTP Transport
			
 
				+
			
 
				+By default, QMD's MCP server uses stdio (launched as a subprocess by each client). For a shared, long-lived server that avoids repeated model loading, use the HTTP transport:
			
 
				+
			
 
				+```sh
			
 
				+# Foreground (Ctrl-C to stop)
			
 
				+qmd mcp --http                    # localhost:8181
			
 
				+qmd mcp --http --port 8080        # custom port
			
 
				+
			
 
				+# Background daemon
			
 
				+qmd mcp --http --daemon           # start, writes PID to ~/.cache/qmd/mcp.pid
			
 
				+qmd mcp stop                      # stop via PID file
			
 
				+qmd status                        # shows "MCP: running (PID ...)" when active
			
 
				+```
			
 
				+
			
 
				+The HTTP server exposes two endpoints:
			
 
				+- `POST /mcp` — MCP Streamable HTTP (JSON responses, stateless)
			
 
				+- `GET /health` — liveness check with uptime
			
 
				+
			
 
				+LLM models stay loaded in VRAM across requests. Embedding/reranking contexts are disposed after 5 min idle and transparently recreated on the next request (~1s penalty, models remain loaded).
			
 
				+
			
 
				+Point any MCP client at `http://localhost:8181/mcp` to connect.
			
 
				+
			
 
				 ## Architecture
			
 
				 
			
 
				 ```
			
--- a/skills/qmd/SKILL.md
+++ b/skills/qmd/SKILL.md
@@ -142,8 +142,8 @@ This plugin configures the qmd MCP server automatically. When available, prefer
 
				 | MCP Tool | Equivalent CLI | Purpose |
			
 
				 |----------|---------------|---------|
			
 
				 | `qmd_search` | `qmd search` | Fast BM25 keyword search |
			
 
				-| `qmd_vsearch` | `qmd vsearch` | Semantic vector search |
			
 
				-| `qmd_query` | `qmd query` | Hybrid search with reranking |
			
 
				+| `qmd_vector_search` | `qmd vsearch` | Semantic vector search |
			
 
				+| `qmd_deep_search` | `qmd query` | Deep search with expansion and reranking |
			
 
				 | `qmd_get` | `qmd get` | Retrieve document by path or docid |
			
 
				 | `qmd_multi_get` | `qmd multi-get` | Retrieve multiple documents |
			
 
				 | `qmd_status` | `qmd status` | Index health and collection info |
			
--- a/skills/qmd/references/mcp-setup.md
+++ b/skills/qmd/references/mcp-setup.md
@@ -47,7 +47,7 @@ Fast BM25 keyword search.
 
				 - `limit` (optional): Number of results (default: 5)
			
 
				 - `minScore` (optional): Minimum relevance score
			
 
				 
			
 
				-### qmd_vsearch
			
 
				+### qmd_vector_search
			
 
				 Semantic vector search for conceptual similarity.
			
 
				 
			
 
				 **Parameters:**
			
@@ -56,7 +56,7 @@ Semantic vector search for conceptual similarity.
 
				 - `limit` (optional): Number of results (default: 5)
			
 
				 - `minScore` (optional): Minimum relevance score
			
 
				 
			
 
				-### qmd_query
			
 
				+### qmd_deep_search
			
 
				 Hybrid search combining BM25, vector search, and LLM re-ranking.
			
 
				 
			
 
				 **Parameters:**
			
@@ -98,7 +98,7 @@ Get index health and collection information.
 
				 - Ensure embeddings are generated: `qmd embed`
			
 
				 
			
 
				 ### Slow searches
			
 
				-- For faster results, use `qmd_search` instead of `qmd_query`
			
 
				+- For faster results, use `qmd_search` instead of `qmd_deep_search`
			
 
				 - The first search may be slow while models load (~3GB)
			
 
				 - Subsequent searches are much faster
			
 
				 
			
--- a/src/cli.test.ts
+++ b/src/cli.test.ts
@@ -961,3 +961,239 @@ describe("status and collection list hide filesystem paths", () => {
 
				     expect(stdout).not.toMatch(/Path:\s+\//);
			
 
				   });
			
 
				 });
			
 
				+
			
 
				+// =============================================================================
			
 
				+// MCP HTTP Daemon Lifecycle
			
 
				+// =============================================================================
			
 
				+
			
 
				+describe("mcp http daemon", () => {
			
 
				+  let daemonTestDir: string;
			
 
				+  let daemonCacheDir: string; // XDG_CACHE_HOME value (the qmd/ subdir is created automatically)
			
 
				+  let daemonDbPath: string;
			
 
				+  let daemonConfigDir: string;
			
 
				+
			
 
				+  // Track spawned PIDs for cleanup
			
 
				+  const spawnedPids: number[] = [];
			
 
				+
			
 
				+  /** Get path to PID file inside the test cache dir */
			
 
				+  function pidPath(): string {
			
 
				+    return join(daemonCacheDir, "qmd", "mcp.pid");
			
 
				+  }
			
 
				+
			
 
				+  /** Run qmd with test-isolated env (cache, db, config) */
			
 
				+  async function runDaemonQmd(
			
 
				+    args: string[],
			
 
				+  ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
			
 
				+    return runQmd(args, {
			
 
				+      dbPath: daemonDbPath,
			
 
				+      configDir: daemonConfigDir,
			
 
				+      env: { XDG_CACHE_HOME: daemonCacheDir },
			
 
				+    });
			
 
				+  }
			
 
				+
			
 
				+  /** Spawn a foreground HTTP server (non-blocking) and return the process */
			
 
				+  function spawnHttpServer(port: number): ReturnType<typeof Bun.spawn> {
			
 
				+    const proc = Bun.spawn(["bun", qmdScript, "mcp", "--http", "--port", String(port)], {
			
 
				+      cwd: fixturesDir,
			
 
				+      env: {
			
 
				+        ...process.env,
			
 
				+        INDEX_PATH: daemonDbPath,
			
 
				+        QMD_CONFIG_DIR: daemonConfigDir,
			
 
				+      },
			
 
				+      stdout: "pipe",
			
 
				+      stderr: "pipe",
			
 
				+    });
			
 
				+    spawnedPids.push(proc.pid);
			
 
				+    return proc;
			
 
				+  }
			
 
				+
			
 
				+  /** Wait for HTTP server to become ready */
			
 
				+  async function waitForServer(port: number, timeoutMs = 5000): Promise<boolean> {
			
 
				+    const deadline = Date.now() + timeoutMs;
			
 
				+    while (Date.now() < deadline) {
			
 
				+      try {
			
 
				+        const res = await fetch(`http://localhost:${port}/health`);
			
 
				+        if (res.ok) return true;
			
 
				+      } catch { /* not ready yet */ }
			
 
				+      await Bun.sleep(200);
			
 
				+    }
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  /** Pick a random high port unlikely to conflict */
			
 
				+  function randomPort(): number {
			
 
				+    return 10000 + Math.floor(Math.random() * 50000);
			
 
				+  }
			
 
				+
			
 
				+  beforeAll(async () => {
			
 
				+    daemonTestDir = await mkdtemp(join(tmpdir(), "qmd-daemon-test-"));
			
 
				+    daemonCacheDir = join(daemonTestDir, "cache");
			
 
				+    daemonDbPath = join(daemonTestDir, "test.sqlite");
			
 
				+    daemonConfigDir = join(daemonTestDir, "config");
			
 
				+
			
 
				+    await mkdir(join(daemonCacheDir, "qmd"), { recursive: true });
			
 
				+    await mkdir(daemonConfigDir, { recursive: true });
			
 
				+    await writeFile(join(daemonConfigDir, "index.yml"), "collections: {}\n");
			
 
				+  });
			
 
				+
			
 
				+  afterAll(async () => {
			
 
				+    // Kill any leftover spawned processes
			
 
				+    for (const pid of spawnedPids) {
			
 
				+      try { process.kill(pid, "SIGTERM"); } catch { /* already dead */ }
			
 
				+    }
			
 
				+    // Also clean up via PID file if present
			
 
				+    try {
			
 
				+      const { readFileSync, existsSync, unlinkSync } = require("fs");
			
 
				+      const pf = pidPath();
			
 
				+      if (existsSync(pf)) {
			
 
				+        const pid = parseInt(readFileSync(pf, "utf-8").trim());
			
 
				+        try { process.kill(pid, "SIGTERM"); } catch {}
			
 
				+        unlinkSync(pf);
			
 
				+      }
			
 
				+    } catch {}
			
 
				+
			
 
				+    await rm(daemonTestDir, { recursive: true, force: true });
			
 
				+  });
			
 
				+
			
 
				+  // -------------------------------------------------------------------------
			
 
				+  // Foreground HTTP
			
 
				+  // -------------------------------------------------------------------------
			
 
				+
			
 
				+  test("foreground HTTP server starts and responds to health check", async () => {
			
 
				+    const port = randomPort();
			
 
				+    const proc = spawnHttpServer(port);
			
 
				+
			
 
				+    try {
			
 
				+      const ready = await waitForServer(port);
			
 
				+      expect(ready).toBe(true);
			
 
				+
			
 
				+      const res = await fetch(`http://localhost:${port}/health`);
			
 
				+      expect(res.status).toBe(200);
			
 
				+      const body = await res.json();
			
 
				+      expect(body.status).toBe("ok");
			
 
				+    } finally {
			
 
				+      proc.kill("SIGTERM");
			
 
				+      await proc.exited;
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  // -------------------------------------------------------------------------
			
 
				+  // Daemon lifecycle
			
 
				+  // -------------------------------------------------------------------------
			
 
				+
			
 
				+  test("--daemon writes PID file and starts server", async () => {
			
 
				+    const port = randomPort();
			
 
				+    const { stdout, exitCode } = await runDaemonQmd([
			
 
				+      "mcp", "--http", "--daemon", "--port", String(port),
			
 
				+    ]);
			
 
				+    expect(exitCode).toBe(0);
			
 
				+    expect(stdout).toContain(`http://localhost:${port}/mcp`);
			
 
				+
			
 
				+    // PID file should exist
			
 
				+    const { existsSync, readFileSync } = require("fs");
			
 
				+    expect(existsSync(pidPath())).toBe(true);
			
 
				+
			
 
				+    const pid = parseInt(readFileSync(pidPath(), "utf-8").trim());
			
 
				+    spawnedPids.push(pid);
			
 
				+
			
 
				+    // Server should be reachable
			
 
				+    const ready = await waitForServer(port);
			
 
				+    expect(ready).toBe(true);
			
 
				+
			
 
				+    // Clean up
			
 
				+    process.kill(pid, "SIGTERM");
			
 
				+    await Bun.sleep(500);
			
 
				+    try { require("fs").unlinkSync(pidPath()); } catch {}
			
 
				+  });
			
 
				+
			
 
				+  test("stop kills daemon and removes PID file", async () => {
			
 
				+    const port = randomPort();
			
 
				+    // Start daemon
			
 
				+    const { exitCode: startCode } = await runDaemonQmd([
			
 
				+      "mcp", "--http", "--daemon", "--port", String(port),
			
 
				+    ]);
			
 
				+    expect(startCode).toBe(0);
			
 
				+
			
 
				+    const { readFileSync } = require("fs");
			
 
				+    const pid = parseInt(readFileSync(pidPath(), "utf-8").trim());
			
 
				+    spawnedPids.push(pid);
			
 
				+
			
 
				+    await waitForServer(port);
			
 
				+
			
 
				+    // Stop it
			
 
				+    const { stdout: stopOut, exitCode: stopCode } = await runDaemonQmd(["mcp", "stop"]);
			
 
				+    expect(stopCode).toBe(0);
			
 
				+    expect(stopOut).toContain("Stopped");
			
 
				+
			
 
				+    // PID file should be gone
			
 
				+    expect(require("fs").existsSync(pidPath())).toBe(false);
			
 
				+
			
 
				+    // Process should be dead
			
 
				+    await Bun.sleep(500);
			
 
				+    expect(() => process.kill(pid, 0)).toThrow();
			
 
				+  });
			
 
				+
			
 
				+  test("stop handles dead PID gracefully (cleans stale file)", async () => {
			
 
				+    // Write a PID file pointing to a dead process
			
 
				+    const { writeFileSync } = require("fs");
			
 
				+    writeFileSync(pidPath(), "999999999");
			
 
				+
			
 
				+    const { stdout, exitCode } = await runDaemonQmd(["mcp", "stop"]);
			
 
				+    expect(exitCode).toBe(0);
			
 
				+    expect(stdout).toContain("stale");
			
 
				+
			
 
				+    // PID file should be cleaned up
			
 
				+    expect(require("fs").existsSync(pidPath())).toBe(false);
			
 
				+  });
			
 
				+
			
 
				+  test("--daemon rejects if already running", async () => {
			
 
				+    const port = randomPort();
			
 
				+    // Start first daemon
			
 
				+    const { exitCode: firstCode } = await runDaemonQmd([
			
 
				+      "mcp", "--http", "--daemon", "--port", String(port),
			
 
				+    ]);
			
 
				+    expect(firstCode).toBe(0);
			
 
				+
			
 
				+    const { readFileSync } = require("fs");
			
 
				+    const pid = parseInt(readFileSync(pidPath(), "utf-8").trim());
			
 
				+    spawnedPids.push(pid);
			
 
				+
			
 
				+    await waitForServer(port);
			
 
				+
			
 
				+    // Try to start second daemon — should fail
			
 
				+    const { stderr, exitCode } = await runDaemonQmd([
			
 
				+      "mcp", "--http", "--daemon", "--port", String(port + 1),
			
 
				+    ]);
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("Already running");
			
 
				+
			
 
				+    // Clean up first daemon
			
 
				+    process.kill(pid, "SIGTERM");
			
 
				+    await Bun.sleep(500);
			
 
				+    try { require("fs").unlinkSync(pidPath()); } catch {}
			
 
				+  });
			
 
				+
			
 
				+  test("--daemon cleans stale PID file and starts fresh", async () => {
			
 
				+    // Write a stale PID file
			
 
				+    const { writeFileSync, readFileSync } = require("fs");
			
 
				+    writeFileSync(pidPath(), "999999999");
			
 
				+
			
 
				+    const port = randomPort();
			
 
				+    const { exitCode, stdout } = await runDaemonQmd([
			
 
				+      "mcp", "--http", "--daemon", "--port", String(port),
			
 
				+    ]);
			
 
				+    expect(exitCode).toBe(0);
			
 
				+    expect(stdout).toContain(`http://localhost:${port}/mcp`);
			
 
				+
			
 
				+    const pid = parseInt(readFileSync(pidPath(), "utf-8").trim());
			
 
				+    spawnedPids.push(pid);
			
 
				+    expect(pid).not.toBe(999999999);
			
 
				+
			
 
				+    // Clean up
			
 
				+    const ready = await waitForServer(port);
			
 
				+    expect(ready).toBe(true);
			
 
				+    process.kill(pid, "SIGTERM");
			
 
				+    await Bun.sleep(500);
			
 
				+    try { require("fs").unlinkSync(pidPath()); } catch {}
			
 
				+  });
			
 
				+});
			
--- a/src/mcp.test.ts
+++ b/src/mcp.test.ts
@@ -291,10 +291,10 @@ describe("MCP Server", () => {
 
				   });
			
 
				 
			
 
				   // ===========================================================================
			
 
				-  // Tool: qmd_vsearch (Vector)
			
 
				+  // Tool: qmd_vector_search (Vector)
			
 
				   // ===========================================================================
			
 
				 
			
 
				-  describe("qmd_vsearch tool", () => {
			
 
				+  describe("qmd_vector_search tool", () => {
			
 
				     test("returns results for semantic query", async () => {
			
 
				       const results = await searchVec(testDb, "project documentation", DEFAULT_EMBED_MODEL, 10);
			
 
				       expect(results.length).toBeGreaterThan(0);
			
@@ -317,15 +317,18 @@ describe("MCP Server", () => {
 
				   });
			
 
				 
			
 
				   // ===========================================================================
			
 
				-  // Tool: qmd_query (Hybrid)
			
 
				+  // Tool: qmd_deep_search (Deep search)
			
 
				   // ===========================================================================
			
 
				 
			
 
				-  describe("qmd_query tool", () => {
			
 
				-    test("expands query with variations", async () => {
			
 
				-      const queries = await expandQuery("api documentation", DEFAULT_QUERY_MODEL, testDb);
			
 
				-      // Always returns at least the original query, may have more if generation succeeds
			
 
				-      expect(queries.length).toBeGreaterThanOrEqual(1);
			
 
				-      expect(queries[0]).toBe("api documentation");
			
 
				+  describe("qmd_deep_search tool", () => {
			
 
				+    test("expands query with typed variations", async () => {
			
 
				+      const expanded = await expandQuery("api documentation", DEFAULT_QUERY_MODEL, testDb);
			
 
				+      // Returns ExpandedQuery[] — typed expansions, original excluded
			
 
				+      expect(expanded.length).toBeGreaterThanOrEqual(1);
			
 
				+      for (const q of expanded) {
			
 
				+        expect(['lex', 'vec', 'hyde']).toContain(q.type);
			
 
				+        expect(q.text.length).toBeGreaterThan(0);
			
 
				+      }
			
 
				     }, 30000); // 30s timeout for model loading
			
 
				 
			
 
				     test("performs RRF fusion on multiple result lists", () => {
			
@@ -356,22 +359,33 @@ describe("MCP Server", () => {
 
				     });
			
 
				 
			
 
				     test("full hybrid search pipeline", async () => {
			
 
				-      // Simulate full qmd_query flow
			
 
				+      // Simulate full qmd_deep_search flow with type-routed queries
			
 
				       const query = "meeting notes";
			
 
				-      const queries = await expandQuery(query, DEFAULT_QUERY_MODEL, testDb);
			
 
				+      const expanded = await expandQuery(query, DEFAULT_QUERY_MODEL, testDb);
			
 
				 
			
 
				       const rankedLists: RankedResult[][] = [];
			
 
				-      for (const q of queries) {
			
 
				-        const ftsResults = searchFTS(testDb, q, 20);
			
 
				-        if (ftsResults.length > 0) {
			
 
				-          rankedLists.push(ftsResults.map(r => ({
			
 
				-            file: r.filepath,
			
 
				-            displayPath: r.displayPath,
			
 
				-            title: r.title,
			
 
				-            body: r.body || "",
			
 
				-            score: r.score,
			
 
				-          })));
			
 
				+
			
 
				+      // Original query → FTS (probe)
			
 
				+      const probeFts = searchFTS(testDb, query, 20);
			
 
				+      if (probeFts.length > 0) {
			
 
				+        rankedLists.push(probeFts.map(r => ({
			
 
				+          file: r.filepath, displayPath: r.displayPath,
			
 
				+          title: r.title, body: r.body || "", score: r.score,
			
 
				+        })));
			
 
				+      }
			
 
				+
			
 
				+      // Expanded queries → route by type: lex→FTS, vec/hyde skipped (no vectors in test)
			
 
				+      for (const q of expanded) {
			
 
				+        if (q.type === 'lex') {
			
 
				+          const ftsResults = searchFTS(testDb, q.text, 20);
			
 
				+          if (ftsResults.length > 0) {
			
 
				+            rankedLists.push(ftsResults.map(r => ({
			
 
				+              file: r.filepath, displayPath: r.displayPath,
			
 
				+              title: r.title, body: r.body || "", score: r.score,
			
 
				+            })));
			
 
				+          }
			
 
				         }
			
 
				+        // vec/hyde would go to searchVec — not available in this unit test
			
 
				       }
			
 
				 
			
 
				       expect(rankedLists.length).toBeGreaterThan(0);
			
@@ -720,47 +734,6 @@ describe("MCP Server", () => {
 
				     });
			
 
				   });
			
 
				 
			
 
				-  // ===========================================================================
			
 
				-  // Prompt: query
			
 
				-  // ===========================================================================
			
 
				-
			
 
				-  describe("query prompt", () => {
			
 
				-    test("returns usage guide", () => {
			
 
				-      // The prompt content is static, just verify the structure
			
 
				-      const promptContent = `# QMD - Quick Markdown Search
			
 
				-
			
 
				-QMD is your on-device search engine for markdown knowledge bases.`;
			
 
				-
			
 
				-      expect(promptContent).toContain("QMD");
			
 
				-      expect(promptContent).toContain("search");
			
 
				-    });
			
 
				-
			
 
				-    test("describes all available tools", () => {
			
 
				-      const toolNames = [
			
 
				-        "qmd_search",
			
 
				-        "qmd_vsearch",
			
 
				-        "qmd_query",
			
 
				-        "qmd_get",
			
 
				-        "qmd_multi_get",
			
 
				-        "qmd_status",
			
 
				-      ];
			
 
				-
			
 
				-      // Verify these are documented in the prompt
			
 
				-      const promptGuide = `
			
 
				-### 1. qmd_search (Fast keyword search)
			
 
				-### 2. qmd_vsearch (Semantic search)
			
 
				-### 3. qmd_query (Hybrid search - highest quality)
			
 
				-### 4. qmd_get (Retrieve document)
			
 
				-### 5. qmd_multi_get (Retrieve multiple documents)
			
 
				-### 6. qmd_status (Index info)
			
 
				-      `;
			
 
				-
			
 
				-      for (const tool of toolNames) {
			
 
				-        expect(promptGuide).toContain(tool);
			
 
				-      }
			
 
				-    });
			
 
				-  });
			
 
				-
			
 
				   // ===========================================================================
			
 
				   // Edge Cases
			
 
				   // ===========================================================================
			
@@ -887,3 +860,175 @@ QMD is your on-device search engine for markdown knowledge bases.`;
 
				     });
			
 
				   });
			
 
				 });
			
 
				+
			
 
				+// =============================================================================
			
 
				+// HTTP Transport Tests
			
 
				+// =============================================================================
			
 
				+
			
 
				+import { startMcpHttpServer, type HttpServerHandle } from "./mcp";
			
 
				+import { enableProductionMode } from "./store";
			
 
				+
			
 
				+describe("MCP HTTP Transport", () => {
			
 
				+  let handle: HttpServerHandle;
			
 
				+  let baseUrl: string;
			
 
				+  let httpTestDbPath: string;
			
 
				+  let httpTestConfigDir: string;
			
 
				+  // Stash original env to restore after tests
			
 
				+  const origIndexPath = process.env.INDEX_PATH;
			
 
				+  const origConfigDir = process.env.QMD_CONFIG_DIR;
			
 
				+
			
 
				+  beforeAll(async () => {
			
 
				+    // Create isolated test database with seeded data
			
 
				+    httpTestDbPath = `/tmp/qmd-mcp-http-test-${Date.now()}.sqlite`;
			
 
				+    const db = new Database(httpTestDbPath);
			
 
				+    initTestDatabase(db);
			
 
				+    seedTestData(db);
			
 
				+    db.close();
			
 
				+
			
 
				+    // Create isolated YAML config
			
 
				+    const configPrefix = join(tmpdir(), `qmd-mcp-http-config-${Date.now()}-${Math.random().toString(36).slice(2)}`);
			
 
				+    httpTestConfigDir = await mkdtemp(configPrefix);
			
 
				+    const testConfig: CollectionConfig = {
			
 
				+      collections: {
			
 
				+        docs: {
			
 
				+          path: "/test/docs",
			
 
				+          pattern: "**/*.md",
			
 
				+        }
			
 
				+      }
			
 
				+    };
			
 
				+    await writeFile(join(httpTestConfigDir, "index.yml"), YAML.stringify(testConfig));
			
 
				+
			
 
				+    // Point createStore() at our test DB
			
 
				+    process.env.INDEX_PATH = httpTestDbPath;
			
 
				+    process.env.QMD_CONFIG_DIR = httpTestConfigDir;
			
 
				+
			
 
				+    handle = await startMcpHttpServer(0, { quiet: true }); // OS-assigned ephemeral port
			
 
				+    baseUrl = `http://localhost:${handle.port}`;
			
 
				+  });
			
 
				+
			
 
				+  afterAll(async () => {
			
 
				+    await handle.stop();
			
 
				+
			
 
				+    // Restore env
			
 
				+    if (origIndexPath !== undefined) process.env.INDEX_PATH = origIndexPath;
			
 
				+    else delete process.env.INDEX_PATH;
			
 
				+    if (origConfigDir !== undefined) process.env.QMD_CONFIG_DIR = origConfigDir;
			
 
				+    else delete process.env.QMD_CONFIG_DIR;
			
 
				+
			
 
				+    // Clean up test files
			
 
				+    try { require("fs").unlinkSync(httpTestDbPath); } catch {}
			
 
				+    try {
			
 
				+      const files = await readdir(httpTestConfigDir);
			
 
				+      for (const f of files) await unlink(join(httpTestConfigDir, f));
			
 
				+      await rmdir(httpTestConfigDir);
			
 
				+    } catch {}
			
 
				+  });
			
 
				+
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+  // Health & routing
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+
			
 
				+  test("GET /health returns 200 with status and uptime", async () => {
			
 
				+    const res = await fetch(`${baseUrl}/health`);
			
 
				+    expect(res.status).toBe(200);
			
 
				+    expect(res.headers.get("content-type")).toContain("application/json");
			
 
				+    const body = await res.json();
			
 
				+    expect(body.status).toBe("ok");
			
 
				+    expect(typeof body.uptime).toBe("number");
			
 
				+  });
			
 
				+
			
 
				+  test("GET /other returns 404", async () => {
			
 
				+    const res = await fetch(`${baseUrl}/other`);
			
 
				+    expect(res.status).toBe(404);
			
 
				+  });
			
 
				+
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+  // MCP protocol over HTTP
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+
			
 
				+  /** Send a JSON-RPC message to /mcp and return the parsed response.
			
 
				+   * MCP Streamable HTTP requires Accept header with both JSON and SSE. */
			
 
				+  async function mcpRequest(body: object): Promise<{ status: number; json: any; contentType: string | null }> {
			
 
				+    const res = await fetch(`${baseUrl}/mcp`, {
			
 
				+      method: "POST",
			
 
				+      headers: {
			
 
				+        "Content-Type": "application/json",
			
 
				+        "Accept": "application/json, text/event-stream",
			
 
				+      },
			
 
				+      body: JSON.stringify(body),
			
 
				+    });
			
 
				+    const json = await res.json();
			
 
				+    return { status: res.status, json, contentType: res.headers.get("content-type") };
			
 
				+  }
			
 
				+
			
 
				+  test("POST /mcp initialize returns 200 JSON (not SSE)", async () => {
			
 
				+    const { status, json, contentType } = await mcpRequest({
			
 
				+      jsonrpc: "2.0",
			
 
				+      id: 1,
			
 
				+      method: "initialize",
			
 
				+      params: {
			
 
				+        protocolVersion: "2025-03-26",
			
 
				+        capabilities: {},
			
 
				+        clientInfo: { name: "test-client", version: "1.0.0" },
			
 
				+      },
			
 
				+    });
			
 
				+    expect(status).toBe(200);
			
 
				+    expect(contentType).toContain("application/json");
			
 
				+    expect(json.jsonrpc).toBe("2.0");
			
 
				+    expect(json.id).toBe(1);
			
 
				+    expect(json.result.serverInfo.name).toBe("qmd");
			
 
				+  });
			
 
				+
			
 
				+  test("POST /mcp tools/list returns registered tools", async () => {
			
 
				+    // Initialize first (required by MCP protocol)
			
 
				+    await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 1, method: "initialize",
			
 
				+      params: { protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "test", version: "1.0" } },
			
 
				+    });
			
 
				+
			
 
				+    const { status, json, contentType } = await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 2, method: "tools/list", params: {},
			
 
				+    });
			
 
				+    expect(status).toBe(200);
			
 
				+    expect(contentType).toContain("application/json");
			
 
				+
			
 
				+    const toolNames = json.result.tools.map((t: any) => t.name);
			
 
				+    expect(toolNames).toContain("search");
			
 
				+    expect(toolNames).toContain("get");
			
 
				+    expect(toolNames).toContain("status");
			
 
				+  });
			
 
				+
			
 
				+  test("POST /mcp tools/call search returns results", async () => {
			
 
				+    // Initialize
			
 
				+    await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 1, method: "initialize",
			
 
				+      params: { protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "test", version: "1.0" } },
			
 
				+    });
			
 
				+
			
 
				+    const { status, json } = await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 3, method: "tools/call",
			
 
				+      params: { name: "search", arguments: { query: "readme" } },
			
 
				+    });
			
 
				+    expect(status).toBe(200);
			
 
				+    expect(json.result).toBeDefined();
			
 
				+    // Should have content array with text results
			
 
				+    expect(json.result.content.length).toBeGreaterThan(0);
			
 
				+    expect(json.result.content[0].type).toBe("text");
			
 
				+  });
			
 
				+
			
 
				+  test("POST /mcp tools/call get returns document", async () => {
			
 
				+    // Initialize
			
 
				+    await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 1, method: "initialize",
			
 
				+      params: { protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "test", version: "1.0" } },
			
 
				+    });
			
 
				+
			
 
				+    const { status, json } = await mcpRequest({
			
 
				+      jsonrpc: "2.0", id: 4, method: "tools/call",
			
 
				+      params: { name: "get", arguments: { path: "readme.md" } },
			
 
				+    });
			
 
				+    expect(status).toBe(200);
			
 
				+    expect(json.result).toBeDefined();
			
 
				+    expect(json.result.content.length).toBeGreaterThan(0);
			
 
				+  });
			
 
				+});
			
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -10,17 +10,20 @@
 
				 
			
 
				 import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
			
 
				 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
			
 
				+import { WebStandardStreamableHTTPServerTransport }
			
 
				+  from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
			
 
				 import { z } from "zod";
			
 
				 import {
			
 
				   createStore,
			
 
				-  reciprocalRankFusion,
			
 
				   extractSnippet,
			
 
				-  DEFAULT_EMBED_MODEL,
			
 
				-  DEFAULT_QUERY_MODEL,
			
 
				-  DEFAULT_RERANK_MODEL,
			
 
				+  addLineNumbers,
			
 
				+  hybridQuery,
			
 
				+  vectorSearchQuery,
			
 
				   DEFAULT_MULTI_GET_MAX_BYTES,
			
 
				 } from "./store.js";
			
 
				-import type { RankedResult } from "./store.js";
			
 
				+import type { Store } from "./store.js";
			
 
				+import { getCollection, getGlobalContext } from "./collections.js";
			
 
				+import { disposeDefaultLlamaCpp } from "./llm.js";
			
 
				 
			
 
				 // =============================================================================
			
 
				 // Types for structured content
			
@@ -75,27 +78,78 @@ function formatSearchSummary(results: SearchResultItem[], query: string): string
 
				   return lines.join('\n');
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * Add line numbers to text content.
			
 
				- * Each line becomes: "{lineNum}: {content}"
			
 
				- */
			
 
				-function addLineNumbers(text: string, startLine: number = 1): string {
			
 
				-  const lines = text.split('\n');
			
 
				-  return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
			
 
				-}
			
 
				-
			
 
				 // =============================================================================
			
 
				 // MCP Server
			
 
				 // =============================================================================
			
 
				 
			
 
				-export async function startMcpServer(): Promise<void> {
			
 
				-  // Open database once at startup - keep it open for the lifetime of the server
			
 
				-  const store = createStore();
			
 
				+/**
			
 
				+ * Build dynamic server instructions from actual index state.
			
 
				+ * Injected into the LLM's system prompt via MCP initialize response —
			
 
				+ * gives the LLM immediate context about what's searchable without a tool call.
			
 
				+ */
			
 
				+function buildInstructions(store: Store): string {
			
 
				+  const status = store.getStatus();
			
 
				+  const lines: string[] = [];
			
 
				+
			
 
				+  // --- What is this? ---
			
 
				+  const globalCtx = getGlobalContext();
			
 
				+  lines.push(`QMD is your local search engine over ${status.totalDocuments} markdown documents.`);
			
 
				+  if (globalCtx) lines.push(`Context: ${globalCtx}`);
			
 
				+
			
 
				+  // --- What's searchable? ---
			
 
				+  if (status.collections.length > 0) {
			
 
				+    lines.push("");
			
 
				+    lines.push("Collections (scope with `collection` parameter):");
			
 
				+    for (const col of status.collections) {
			
 
				+      const collConfig = getCollection(col.name);
			
 
				+      const rootCtx = collConfig?.context?.[""] || collConfig?.context?.["/"];
			
 
				+      const desc = rootCtx ? ` — ${rootCtx}` : "";
			
 
				+      lines.push(`  - "${col.name}" (${col.documents} docs)${desc}`);
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				-  const server = new McpServer({
			
 
				-    name: "qmd",
			
 
				-    version: "1.0.0",
			
 
				-  });
			
 
				+  // --- Capability gaps ---
			
 
				+  if (!status.hasVectorIndex) {
			
 
				+    lines.push("");
			
 
				+    lines.push("Note: No vector embeddings. Only `search` (BM25) is available.");
			
 
				+  } else if (status.needsEmbedding > 0) {
			
 
				+    lines.push("");
			
 
				+    lines.push(`Note: ${status.needsEmbedding} documents need embedding. Run \`qmd embed\` to update.`);
			
 
				+  }
			
 
				+
			
 
				+  // --- When to use which tool (escalation ladder) ---
			
 
				+  // Tool schemas describe parameters; instructions describe strategy.
			
 
				+  lines.push("");
			
 
				+  lines.push("Search:");
			
 
				+  lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
			
 
				+  lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
			
 
				+  lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
			
 
				+
			
 
				+  // --- Retrieval workflow ---
			
 
				+  lines.push("");
			
 
				+  lines.push("Retrieval:");
			
 
				+  lines.push("  - `get` — single document by path or docid (#abc123). Supports line offset (`file.md:100`).");
			
 
				+  lines.push("  - `multi_get` — batch retrieve by glob (`journals/2025-05*.md`) or comma-separated list.");
			
 
				+
			
 
				+  // --- Non-obvious things that prevent mistakes ---
			
 
				+  lines.push("");
			
 
				+  lines.push("Tips:");
			
 
				+  lines.push("  - File paths in results are relative to their collection.");
			
 
				+  lines.push("  - Use `minScore: 0.5` to filter low-confidence results.");
			
 
				+  lines.push("  - Results include a `context` field describing the content type.");
			
 
				+
			
 
				+  return lines.join("\n");
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Create an MCP server with all QMD tools, resources, and prompts registered.
			
 
				+ * Shared by both stdio and HTTP transports.
			
 
				+ */
			
 
				+function createMcpServer(store: Store): McpServer {
			
 
				+  const server = new McpServer(
			
 
				+    { name: "qmd", version: "1.0.0" },
			
 
				+    { instructions: buildInstructions(store) },
			
 
				+  );
			
 
				 
			
 
				   // ---------------------------------------------------------------------------
			
 
				   // Resource: qmd://{path} - read-only access to documents by path
			
@@ -166,100 +220,15 @@ export async function startMcpServer(): Promise<void> {
 
				   );
			
 
				 
			
 
				   // ---------------------------------------------------------------------------
			
 
				-  // Prompt: query guide
			
 
				-  // ---------------------------------------------------------------------------
			
 
				-
			
 
				-  server.registerPrompt(
			
 
				-    "query",
			
 
				-    {
			
 
				-      title: "QMD Query Guide",
			
 
				-      description: "How to effectively search your knowledge base with QMD",
			
 
				-    },
			
 
				-    () => ({
			
 
				-      messages: [
			
 
				-        {
			
 
				-          role: "user",
			
 
				-          content: {
			
 
				-            type: "text",
			
 
				-            text: `# QMD - Quick Markdown Search
			
 
				-
			
 
				-QMD is your on-device search engine for markdown knowledge bases. Use it to find information across your notes, documents, and meeting transcripts.
			
 
				-
			
 
				-## Available Tools
			
 
				-
			
 
				-### 1. search (Fast keyword search)
			
 
				-Best for: Finding documents with specific keywords or phrases.
			
 
				-- Uses BM25 full-text search
			
 
				-- Fast, no LLM required
			
 
				-- Good for exact matches
			
 
				-- Use \`collection\` parameter to filter to a specific collection
			
 
				-
			
 
				-### 2. vsearch (Semantic search)
			
 
				-Best for: Finding conceptually related content even without exact keyword matches.
			
 
				-- Uses vector embeddings
			
 
				-- Understands meaning and context
			
 
				-- Good for "how do I..." or conceptual queries
			
 
				-- Use \`collection\` parameter to filter to a specific collection
			
 
				-
			
 
				-### 3. query (Hybrid search - highest quality)
			
 
				-Best for: Important searches where you want the best results.
			
 
				-- Combines keyword + semantic search
			
 
				-- Expands your query with variations
			
 
				-- Re-ranks results with LLM
			
 
				-- Slower but most accurate
			
 
				-- Use \`collection\` parameter to filter to a specific collection
			
 
				-
			
 
				-### 4. get (Retrieve document)
			
 
				-Best for: Getting the full content of a single document you found.
			
 
				-- Use the file path from search results
			
 
				-- Supports line ranges: \`file.md:100\` or fromLine/maxLines parameters
			
 
				-- Suggests similar files if not found
			
 
				-
			
 
				-### 5. multi_get (Retrieve multiple documents)
			
 
				-Best for: Getting content from multiple files at once.
			
 
				-- Use glob patterns: \`journals/2025-05*.md\`
			
 
				-- Or comma-separated: \`file1.md, file2.md\`
			
 
				-- Skips files over maxBytes (default 10KB) - use get for large files
			
 
				-
			
 
				-### 6. status (Index info)
			
 
				-Shows collection info, document counts, and embedding status.
			
 
				-
			
 
				-## Resources
			
 
				-
			
 
				-You can also access documents directly via the \`qmd://\` URI scheme:
			
 
				-- List all documents: \`resources/list\`
			
 
				-- Read a document: \`resources/read\` with uri \`qmd://path/to/file.md\`
			
 
				-
			
 
				-## Search Strategy
			
 
				-
			
 
				-1. **Start with search** for quick keyword lookups
			
 
				-2. **Use vsearch** when keywords aren't working or for conceptual queries
			
 
				-3. **Use query** for important searches or when you need high confidence
			
 
				-4. **Use get** to retrieve a single full document
			
 
				-5. **Use multi_get** to batch retrieve multiple related files
			
 
				-
			
 
				-## Tips
			
 
				-
			
 
				-- Use \`minScore: 0.5\` to filter low-relevance results
			
 
				-- Use \`collection: "notes"\` to search only in a specific collection
			
 
				-- Check the "Context" field - it describes what kind of content the file contains
			
 
				-- File paths are relative to their collection (e.g., \`pages/meeting.md\`)
			
 
				-- For glob patterns, match on display_path (e.g., \`journals/2025-*.md\`)`,
			
 
				-          },
			
 
				-        },
			
 
				-      ],
			
 
				-    })
			
 
				-  );
			
 
				-
			
 
				-  // ---------------------------------------------------------------------------
			
 
				-  // Tool: qmd_search (BM25 full-text)
			
 
				+  // Tool: qmd_search (keyword)
			
 
				   // ---------------------------------------------------------------------------
			
 
				 
			
 
				   server.registerTool(
			
 
				     "search",
			
 
				     {
			
 
				-      title: "Search (BM25)",
			
 
				-      description: "Fast keyword-based full-text search using BM25. Best for finding documents with specific words or phrases.",
			
 
				+      title: "Keyword Search",
			
 
				+      description: "Search by keyword. Finds documents containing exact words and phrases in the query.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         query: z.string().describe("Search query - keywords or phrases to find"),
			
 
				         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
@@ -293,14 +262,15 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				   );
			
 
				 
			
 
				   // ---------------------------------------------------------------------------
			
 
				-  // Tool: qmd_vsearch (Vector semantic search)
			
 
				+  // Tool: qmd_vector_search (Vector semantic search)
			
 
				   // ---------------------------------------------------------------------------
			
 
				 
			
 
				   server.registerTool(
			
 
				-    "vsearch",
			
 
				+    "vector_search",
			
 
				     {
			
 
				-      title: "Vector Search (Semantic)",
			
 
				-      description: "Semantic similarity search using vector embeddings. Finds conceptually related content even without exact keyword matches. Requires embeddings (run 'qmd embed' first).",
			
 
				+      title: "Vector Search",
			
 
				+      description: "Search by meaning. Finds relevant documents even when they use different words than the query — handles synonyms, paraphrases, and related concepts.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         query: z.string().describe("Natural language query - describe what you're looking for"),
			
 
				         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
@@ -309,45 +279,30 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				       },
			
 
				     },
			
 
				     async ({ query, limit, minScore, collection }) => {
			
 
				-      const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
			
 
				-      if (!tableExists) {
			
 
				-        return {
			
 
				-          content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
			
 
				-          isError: true,
			
 
				-        };
			
 
				-      }
			
 
				+      const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
			
 
				 
			
 
				-      // Expand query
			
 
				-      const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
			
 
				-
			
 
				-      // Collect results (filter by collection after search)
			
 
				-      const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; docid: string }>();
			
 
				-      for (const q of queries) {
			
 
				-        const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit || 10)
			
 
				-          .then(results => results.filter(r => !collection || r.collectionName === collection));
			
 
				-        for (const r of vecResults) {
			
 
				-          const existing = allResults.get(r.filepath);
			
 
				-          if (!existing || r.score > existing.score) {
			
 
				-            allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, docid: r.docid });
			
 
				-          }
			
 
				+      if (results.length === 0) {
			
 
				+        // Distinguish "no embeddings" from "no matches" — check if vector table exists
			
 
				+        const tableExists = store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
			
 
				+        if (!tableExists) {
			
 
				+          return {
			
 
				+            content: [{ type: "text", text: "Vector index not found. Run 'qmd embed' first to create embeddings." }],
			
 
				+            isError: true,
			
 
				+          };
			
 
				         }
			
 
				       }
			
 
				 
			
 
				-      const filtered: SearchResultItem[] = Array.from(allResults.values())
			
 
				-        .sort((a, b) => b.score - a.score)
			
 
				-        .slice(0, limit || 10)
			
 
				-        .filter(r => r.score >= (minScore || 0.3))
			
 
				-        .map(r => {
			
 
				-          const { line, snippet } = extractSnippet(r.body || "", query, 300);
			
 
				-          return {
			
 
				-            docid: `#${r.docid}`,
			
 
				-            file: r.displayPath,
			
 
				-            title: r.title,
			
 
				-            score: Math.round(r.score * 100) / 100,
			
 
				-            context: store.getContextForFile(r.file),
			
 
				-            snippet: addLineNumbers(snippet, line),  // Default to line numbers
			
 
				-          };
			
 
				-        });
			
 
				+      const filtered: SearchResultItem[] = results.map(r => {
			
 
				+        const { line, snippet } = extractSnippet(r.body, query, 300);
			
 
				+        return {
			
 
				+          docid: `#${r.docid}`,
			
 
				+          file: r.displayPath,
			
 
				+          title: r.title,
			
 
				+          score: Math.round(r.score * 100) / 100,
			
 
				+          context: r.context,
			
 
				+          snippet: addLineNumbers(snippet, line),
			
 
				+        };
			
 
				+      });
			
 
				 
			
 
				       return {
			
 
				         content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
			
@@ -357,14 +312,15 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				   );
			
 
				 
			
 
				   // ---------------------------------------------------------------------------
			
 
				-  // Tool: qmd_query (Hybrid with reranking)
			
 
				+  // Tool: qmd_deep_search (Deep search with expansion + reranking)
			
 
				   // ---------------------------------------------------------------------------
			
 
				 
			
 
				   server.registerTool(
			
 
				-    "query",
			
 
				+    "deep_search",
			
 
				     {
			
 
				-      title: "Hybrid Query (Best Quality)",
			
 
				-      description: "Highest quality search combining BM25 + vector + query expansion + LLM reranking. Slower but most accurate. Use for important searches.",
			
 
				+      title: "Deep Search",
			
 
				+      description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         query: z.string().describe("Natural language query - describe what you're looking for"),
			
 
				         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
@@ -373,66 +329,19 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				       },
			
 
				     },
			
 
				     async ({ query, limit, minScore, collection }) => {
			
 
				-      // Expand query
			
 
				-      const queries = await store.expandQuery(query, DEFAULT_QUERY_MODEL);
			
 
				-
			
 
				-      // Collect ranked lists (filter by collection after search)
			
 
				-      const rankedLists: RankedResult[][] = [];
			
 
				-      const docidMap = new Map<string, string>(); // filepath -> docid
			
 
				-      const hasVectors = !!store.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
			
 
				-
			
 
				-      for (const q of queries) {
			
 
				-        const ftsResults = store.searchFTS(q, 20)
			
 
				-          .filter(r => !collection || r.collectionName === collection);
			
 
				-        if (ftsResults.length > 0) {
			
 
				-          for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
			
 
				-          rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
			
 
				-        }
			
 
				-        if (hasVectors) {
			
 
				-          const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, 20)
			
 
				-            .then(results => results.filter(r => !collection || r.collectionName === collection));
			
 
				-          if (vecResults.length > 0) {
			
 
				-            for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				-            rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				+      const results = await hybridQuery(store, query, { collection, limit, minScore });
			
 
				 
			
 
				-      // RRF fusion
			
 
				-      const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
			
 
				-      const fused = reciprocalRankFusion(rankedLists, weights);
			
 
				-      const candidates = fused.slice(0, 30);
			
 
				-
			
 
				-      // Rerank
			
 
				-      const reranked = await store.rerank(
			
 
				-        query,
			
 
				-        candidates.map(c => ({ file: c.file, text: c.body })),
			
 
				-        DEFAULT_RERANK_MODEL
			
 
				-      );
			
 
				-
			
 
				-      // Blend scores
			
 
				-      const candidateMap = new Map(candidates.map(c => [c.file, { displayPath: c.displayPath, title: c.title, body: c.body }]));
			
 
				-      const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1]));
			
 
				-
			
 
				-      const filtered: SearchResultItem[] = reranked.map(r => {
			
 
				-        const rrfRank = rrfRankMap.get(r.file) || candidates.length;
			
 
				-        let rrfWeight: number;
			
 
				-        if (rrfRank <= 3) rrfWeight = 0.75;
			
 
				-        else if (rrfRank <= 10) rrfWeight = 0.60;
			
 
				-        else rrfWeight = 0.40;
			
 
				-        const rrfScore = 1 / rrfRank;
			
 
				-        const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
			
 
				-        const candidate = candidateMap.get(r.file);
			
 
				-        const { line, snippet } = extractSnippet(candidate?.body || "", query, 300);
			
 
				+      const filtered: SearchResultItem[] = results.map(r => {
			
 
				+        const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
			
 
				         return {
			
 
				-          docid: `#${docidMap.get(r.file) || ""}`,
			
 
				-          file: candidate?.displayPath || "",
			
 
				-          title: candidate?.title || "",
			
 
				-          score: Math.round(blendedScore * 100) / 100,
			
 
				-          context: store.getContextForFile(r.file),
			
 
				-          snippet: addLineNumbers(snippet, line),  // Default to line numbers
			
 
				+          docid: `#${r.docid}`,
			
 
				+          file: r.displayPath,
			
 
				+          title: r.title,
			
 
				+          score: Math.round(r.score * 100) / 100,
			
 
				+          context: r.context,
			
 
				+          snippet: addLineNumbers(snippet, line),
			
 
				         };
			
 
				-      }).filter(r => r.score >= (minScore || 0)).slice(0, limit || 10);
			
 
				+      });
			
 
				 
			
 
				       return {
			
 
				         content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
			
@@ -450,6 +359,7 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				     {
			
 
				       title: "Get Document",
			
 
				       description: "Retrieve the full content of a document by its file path or docid. Use paths or docids (#abc123) from search results. Suggests similar files if not found.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         file: z.string().describe("File path or docid from search results (e.g., 'pages/meeting.md', '#abc123', or 'pages/meeting.md:100' to start at line 100)"),
			
 
				         fromLine: z.number().optional().describe("Start from this line number (1-indexed)"),
			
@@ -514,6 +424,7 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				     {
			
 
				       title: "Multi-Get Documents",
			
 
				       description: "Retrieve multiple documents by glob pattern (e.g., 'journals/2025-05*.md') or comma-separated list. Skips files larger than maxBytes.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         pattern: z.string().describe("Glob pattern or comma-separated list of file paths"),
			
 
				         maxLines: z.number().optional().describe("Maximum lines per file"),
			
@@ -586,6 +497,7 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				     {
			
 
				       title: "Index Status",
			
 
				       description: "Show the status of the QMD index: collections, document counts, and health information.",
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {},
			
 
				     },
			
 
				     async () => {
			
@@ -610,14 +522,130 @@ You can also access documents directly via the \`qmd://\` URI scheme:
 
				     }
			
 
				   );
			
 
				 
			
 
				-  // ---------------------------------------------------------------------------
			
 
				-  // Connect via stdio
			
 
				-  // ---------------------------------------------------------------------------
			
 
				+  return server;
			
 
				+}
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Transport: stdio (default)
			
 
				+// =============================================================================
			
 
				 
			
 
				+export async function startMcpServer(): Promise<void> {
			
 
				+  const store = createStore();
			
 
				+  const server = createMcpServer(store);
			
 
				   const transport = new StdioServerTransport();
			
 
				   await server.connect(transport);
			
 
				+}
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Transport: Streamable HTTP
			
 
				+// =============================================================================
			
 
				+
			
 
				+export type HttpServerHandle = {
			
 
				+  httpServer: ReturnType<typeof Bun.serve>;
			
 
				+  port: number;
			
 
				+  stop: () => Promise<void>;
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * Start MCP server over Streamable HTTP (JSON responses, no SSE).
			
 
				+ * Binds to localhost only. Returns a handle for shutdown and port discovery.
			
 
				+ */
			
 
				+export async function startMcpHttpServer(port: number, options?: { quiet?: boolean }): Promise<HttpServerHandle> {
			
 
				+  const store = createStore();
			
 
				+  const mcpServer = createMcpServer(store);
			
 
				+  const transport = new WebStandardStreamableHTTPServerTransport({
			
 
				+    enableJsonResponse: true,
			
 
				+  });
			
 
				+  await mcpServer.connect(transport);
			
 
				+
			
 
				+  const startTime = Date.now();
			
 
				+  const quiet = options?.quiet ?? false;
			
 
				+
			
 
				+  /** Format timestamp for request logging */
			
 
				+  function ts(): string {
			
 
				+    return new Date().toISOString().slice(11, 23); // HH:mm:ss.SSS
			
 
				+  }
			
 
				+
			
 
				+  /** Extract a human-readable label from a JSON-RPC body */
			
 
				+  function describeRequest(body: any): string {
			
 
				+    const method = body?.method ?? "unknown";
			
 
				+    if (method === "tools/call") {
			
 
				+      const tool = body.params?.name ?? "?";
			
 
				+      const args = body.params?.arguments;
			
 
				+      // Show query string if present, truncated
			
 
				+      if (args?.query) {
			
 
				+        const q = String(args.query).slice(0, 80);
			
 
				+        return `tools/call ${tool} "${q}"`;
			
 
				+      }
			
 
				+      if (args?.path) return `tools/call ${tool} ${args.path}`;
			
 
				+      if (args?.pattern) return `tools/call ${tool} ${args.pattern}`;
			
 
				+      return `tools/call ${tool}`;
			
 
				+    }
			
 
				+    return method;
			
 
				+  }
			
 
				+
			
 
				+  function log(msg: string): void {
			
 
				+    if (!quiet) console.error(msg);
			
 
				+  }
			
 
				+
			
 
				+  const httpServer = Bun.serve({
			
 
				+    port,
			
 
				+    hostname: "localhost",
			
 
				+    async fetch(req) {
			
 
				+      const reqStart = Date.now();
			
 
				+      const pathname = new URL(req.url).pathname;
			
 
				+
			
 
				+      if (pathname === "/health" && req.method === "GET") {
			
 
				+        const res = Response.json({
			
 
				+          status: "ok",
			
 
				+          uptime: Math.floor((Date.now() - startTime) / 1000),
			
 
				+        });
			
 
				+        log(`${ts()} GET /health (${Date.now() - reqStart}ms)`);
			
 
				+        return res;
			
 
				+      }
			
 
				+
			
 
				+      if (pathname === "/mcp" && req.method === "POST") {
			
 
				+        const body = await req.json();
			
 
				+        const label = describeRequest(body);
			
 
				+        const res = await transport.handleRequest(req, { parsedBody: body });
			
 
				+        log(`${ts()} POST /mcp ${label} (${Date.now() - reqStart}ms)`);
			
 
				+        return res;
			
 
				+      }
			
 
				+
			
 
				+      // Pass other methods (GET, DELETE) to transport for protocol handling
			
 
				+      if (pathname === "/mcp") {
			
 
				+        return transport.handleRequest(req);
			
 
				+      }
			
 
				+
			
 
				+      return new Response("Not Found", { status: 404 });
			
 
				+    },
			
 
				+  });
			
 
				+
			
 
				+  const actualPort = httpServer.port;
			
 
				+
			
 
				+  let stopping = false;
			
 
				+  const stop = async () => {
			
 
				+    if (stopping) return;
			
 
				+    stopping = true;
			
 
				+    await transport.close();
			
 
				+    httpServer.stop();
			
 
				+    store.close();
			
 
				+    await disposeDefaultLlamaCpp();
			
 
				+  };
			
 
				+
			
 
				+  process.on("SIGTERM", async () => {
			
 
				+    console.error("Shutting down (SIGTERM)...");
			
 
				+    await stop();
			
 
				+    process.exit(0);
			
 
				+  });
			
 
				+  process.on("SIGINT", async () => {
			
 
				+    console.error("Shutting down (SIGINT)...");
			
 
				+    await stop();
			
 
				+    process.exit(0);
			
 
				+  });
			
 
				 
			
 
				-  // Note: Database stays open - it will be closed when the process exits
			
 
				+  log(`QMD MCP server listening on http://localhost:${actualPort}/mcp`);
			
 
				+  return { httpServer, port: actualPort, stop };
			
 
				 }
			
 
				 
			
 
				 // Run if this is the main module
			
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -2,8 +2,7 @@
 
				 import { Database } from "bun:sqlite";
			
 
				 import { Glob, $ } from "bun";
			
 
				 import { parseArgs } from "util";
			
 
				-import { readFileSync, statSync } from "fs";
			
 
				-import * as sqliteVec from "sqlite-vec";
			
 
				+import { readFileSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync } from "fs";
			
 
				 import {
			
 
				   getPwd,
			
 
				   getRealPath,
			
@@ -11,7 +10,6 @@ import {
 
				   resolve,
			
 
				   enableProductionMode,
			
 
				   searchFTS,
			
 
				-  searchVec,
			
 
				   extractSnippet,
			
 
				   getContextForFile,
			
 
				   getContextForPath,
			
@@ -30,8 +28,6 @@ import {
 
				   hashContent,
			
 
				   extractTitle,
			
 
				   formatDocForEmbedding,
			
 
				-  formatQueryForEmbedding,
			
 
				-  chunkDocument,
			
 
				   chunkDocumentByTokens,
			
 
				   clearCache,
			
 
				   getCacheKey,
			
@@ -58,16 +54,18 @@ import {
 
				   getCollectionsWithoutContext,
			
 
				   getTopLevelPathsWithoutContext,
			
 
				   handelize,
			
 
				+  hybridQuery,
			
 
				+  vectorSearchQuery,
			
 
				+  addLineNumbers,
			
 
				+  type ExpandedQuery,
			
 
				   DEFAULT_EMBED_MODEL,
			
 
				-  DEFAULT_QUERY_MODEL,
			
 
				   DEFAULT_RERANK_MODEL,
			
 
				   DEFAULT_GLOB,
			
 
				   DEFAULT_MULTI_GET_MAX_BYTES,
			
 
				   createStore,
			
 
				   getDefaultDbPath,
			
 
				 } from "./store.js";
			
 
				-import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR, type ILLMSession, type RerankDocument, type Queryable, type QueryType } from "./llm.js";
			
 
				-import type { SearchResult, RankedResult } from "./store.js";
			
 
				+import { disposeDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "./llm.js";
			
 
				 import {
			
 
				   formatSearchResults,
			
 
				   formatDocuments,
			
@@ -232,28 +230,6 @@ function computeDisplayPath(
 
				   return filepath;
			
 
				 }
			
 
				 
			
 
				-// Rerank documents using node-llama-cpp cross-encoder model
			
 
				-async function rerank(query: string, documents: { file: string; text: string }[], _model: string = DEFAULT_RERANK_MODEL, _db?: Database, session?: ILLMSession): Promise<{ file: string; score: number }[]> {
			
 
				-  if (documents.length === 0) return [];
			
 
				-
			
 
				-  const total = documents.length;
			
 
				-  process.stderr.write(`Reranking ${total} documents...\n`);
			
 
				-  progress.indeterminate();
			
 
				-
			
 
				-  const rerankDocs: RerankDocument[] = documents.map((doc) => ({
			
 
				-    file: doc.file,
			
 
				-    text: doc.text.slice(0, 4000), // Truncate to context limit
			
 
				-  }));
			
 
				-
			
 
				-  const result = session
			
 
				-    ? await session.rerank(query, rerankDocs)
			
 
				-    : await getDefaultLlamaCpp().rerank(query, rerankDocs);
			
 
				-
			
 
				-  progress.clear();
			
 
				-  process.stderr.write("\n");
			
 
				-
			
 
				-  return result.results.map((r) => ({ file: r.file, score: r.score }));
			
 
				-}
			
 
				 
			
 
				 function formatTimeAgo(date: Date): string {
			
 
				   const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
			
@@ -300,7 +276,24 @@ function showStatus(): void {
 
				 
			
 
				   console.log(`${c.bold}QMD Status${c.reset}\n`);
			
 
				   console.log(`Index: ${dbPath}`);
			
 
				-  console.log(`Size:  ${formatBytes(indexSize)}\n`);
			
 
				+  console.log(`Size:  ${formatBytes(indexSize)}`);
			
 
				+
			
 
				+  // MCP daemon status (check PID file liveness)
			
 
				+  const mcpCacheDir = Bun.env.XDG_CACHE_HOME
			
 
				+    ? resolve(Bun.env.XDG_CACHE_HOME, "qmd")
			
 
				+    : resolve(homedir(), ".cache", "qmd");
			
 
				+  const mcpPidPath = resolve(mcpCacheDir, "mcp.pid");
			
 
				+  if (existsSync(mcpPidPath)) {
			
 
				+    const mcpPid = parseInt(readFileSync(mcpPidPath, "utf-8").trim());
			
 
				+    try {
			
 
				+      process.kill(mcpPid, 0);
			
 
				+      console.log(`MCP:   ${c.green}running${c.reset} (PID ${mcpPid})`);
			
 
				+    } catch {
			
 
				+      unlinkSync(mcpPidPath);
			
 
				+      // Stale PID file cleaned up silently
			
 
				+    }
			
 
				+  }
			
 
				+  console.log("");
			
 
				 
			
 
				   console.log(`${c.bold}Documents${c.reset}`);
			
 
				   console.log(`  Total:    ${totalDocs.count} files indexed`);
			
@@ -1701,55 +1694,6 @@ function normalizeBM25(score: number): number {
 
				   return 1 / (1 + Math.exp(-(absScore - 5) / 3));
			
 
				 }
			
 
				 
			
 
				-function normalizeScores(results: SearchResult[]): SearchResult[] {
			
 
				-  if (results.length === 0) return results;
			
 
				-  const maxScore = Math.max(...results.map(r => r.score));
			
 
				-  const minScore = Math.min(...results.map(r => r.score));
			
 
				-  const range = maxScore - minScore || 1;
			
 
				-  return results.map(r => ({ ...r, score: (r.score - minScore) / range }));
			
 
				-}
			
 
				-
			
 
				-// Reciprocal Rank Fusion: combines multiple ranked lists
			
 
				-// RRF score = sum(1 / (k + rank)) across all lists where doc appears
			
 
				-// k=60 is standard, provides good balance between top and lower ranks
			
 
				-
			
 
				-function reciprocalRankFusion(
			
 
				-  resultLists: RankedResult[][],
			
 
				-  weights: number[] = [],  // Weight per result list (default 1.0)
			
 
				-  k: number = 60
			
 
				-): RankedResult[] {
			
 
				-  const scores = new Map<string, { score: number; displayPath: string; title: string; body: string; bestRank: number }>();
			
 
				-
			
 
				-  for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
			
 
				-    const results = resultLists[listIdx];
			
 
				-    if (!results) continue;
			
 
				-    const weight = weights[listIdx] ?? 1.0;
			
 
				-    for (let rank = 0; rank < results.length; rank++) {
			
 
				-      const doc = results[rank];
			
 
				-      if (!doc) continue; // Ensure doc is not undefined
			
 
				-      const rrfScore = weight / (k + rank + 1);
			
 
				-      const existing = scores.get(doc.file);
			
 
				-      if (existing) {
			
 
				-        existing.score += rrfScore;
			
 
				-        existing.bestRank = Math.min(existing.bestRank, rank);
			
 
				-      } else {
			
 
				-        scores.set(doc.file, { score: rrfScore, displayPath: doc.displayPath, title: doc.title, body: doc.body, bestRank: rank });
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Add bonus for best rank: documents that ranked #1-3 in any list get a boost
			
 
				-  // This prevents dilution of exact matches by expansion queries
			
 
				-  return Array.from(scores.entries())
			
 
				-    .map(([file, { score, displayPath, title, body, bestRank }]) => {
			
 
				-      let bonus = 0;
			
 
				-      if (bestRank === 0) bonus = 0.05;  // Ranked #1 somewhere
			
 
				-      else if (bestRank <= 2) bonus = 0.02;  // Ranked top-3 somewhere
			
 
				-      return { file, displayPath, title, body, score: score + bonus };
			
 
				-    })
			
 
				-    .sort((a, b) => b.score - a.score);
			
 
				-}
			
 
				-
			
 
				 type OutputOptions = {
			
 
				   format: OutputFormat;
			
 
				   full: boolean;
			
@@ -1791,12 +1735,6 @@ function shortPath(dirpath: string): string {
 
				   return dirpath;
			
 
				 }
			
 
				 
			
 
				-// Add line numbers to text content
			
 
				-function addLineNumbers(text: string, startLine: number = 1): string {
			
 
				-  const lines = text.split('\n');
			
 
				-  return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
			
 
				-}
			
 
				-
			
 
				 function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
			
 
				   const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
			
 
				 
			
@@ -1957,11 +1895,24 @@ function search(query: string, opts: OutputOptions): void {
 
				   outputResults(resultsWithContext, query, opts);
			
 
				 }
			
 
				 
			
 
				-async function vectorSearch(query: string, opts: OutputOptions, model: string = DEFAULT_EMBED_MODEL): Promise<void> {
			
 
				-  const db = getDb();
			
 
				+// Log query expansion as a tree to stderr (CLI progress feedback)
			
 
				+function logExpansionTree(originalQuery: string, expanded: ExpandedQuery[]): void {
			
 
				+  const lines: string[] = [];
			
 
				+  lines.push(`${c.dim}├─ ${originalQuery}${c.reset}`);
			
 
				+  for (const q of expanded) {
			
 
				+    let preview = q.text.replace(/\n/g, ' ');
			
 
				+    if (preview.length > 72) preview = preview.substring(0, 69) + '...';
			
 
				+    lines.push(`${c.dim}├─ ${q.type}: ${preview}${c.reset}`);
			
 
				+  }
			
 
				+  if (lines.length > 0) {
			
 
				+    lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
			
 
				+  }
			
 
				+  for (const line of lines) process.stderr.write(line + '\n');
			
 
				+}
			
 
				+
			
 
				+async function vectorSearch(query: string, opts: OutputOptions, _model: string = DEFAULT_EMBED_MODEL): Promise<void> {
			
 
				+  const store = getStore();
			
 
				 
			
 
				-  // Validate collection filter if specified
			
 
				-  let collectionName: string | undefined;
			
 
				   if (opts.collection) {
			
 
				     const coll = getCollectionFromYaml(opts.collection);
			
 
				     if (!coll) {
			
@@ -1969,59 +1920,22 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
 
				       closeDb();
			
 
				       process.exit(1);
			
 
				     }
			
 
				-    collectionName = opts.collection;
			
 
				-  }
			
 
				-
			
 
				-  const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
			
 
				-  if (!tableExists) {
			
 
				-    console.error("Vector index not found. Run 'qmd embed' first to create embeddings.");
			
 
				-    closeDb();
			
 
				-    return;
			
 
				   }
			
 
				 
			
 
				-  // Check index health and warn about issues
			
 
				-  checkIndexHealth(db);
			
 
				+  checkIndexHealth(store.db);
			
 
				 
			
 
				-  // Wrap LLM operations in a session for lifecycle management
			
 
				-  await withLLMSession(async (session) => {
			
 
				-    // Expand query using structured output (no lexical for vector-only search)
			
 
				-    const queryables = await expandQueryStructured(query, false, opts.context, session);
			
 
				-
			
 
				-    // Build list of queries for vector search: original, vec, and hyde
			
 
				-    const vectorQueries: string[] = [query];
			
 
				-    for (const q of queryables) {
			
 
				-      if (q.type === 'vec' || q.type === 'hyde') {
			
 
				-        if (q.text && q.text !== query) {
			
 
				-          vectorQueries.push(q.text);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    process.stderr.write(`${c.dim}Searching ${vectorQueries.length} vector queries...${c.reset}\n`);
			
 
				-
			
 
				-    // Collect results from all query variations
			
 
				-    const perQueryLimit = opts.all ? 500 : 20;
			
 
				-    const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
			
 
				-
			
 
				-    // IMPORTANT: Run vector searches sequentially, not with Promise.all.
			
 
				-    // node-llama-cpp's embedding context hangs when multiple concurrent embed() calls
			
 
				-    // are made. This is a known limitation of the LlamaEmbeddingContext.
			
 
				-    // See: https://github.com/tobi/qmd/pull/23
			
 
				-    for (const q of vectorQueries) {
			
 
				-      const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any, session);
			
 
				-      for (const r of vecResults) {
			
 
				-        const existing = allResults.get(r.filepath);
			
 
				-        if (!existing || r.score > existing.score) {
			
 
				-          allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // Sort by max score and limit to requested count
			
 
				-    const results = Array.from(allResults.values())
			
 
				-      .sort((a, b) => b.score - a.score)
			
 
				-      .slice(0, opts.limit)
			
 
				-      .map(r => ({ ...r, context: getContextForFile(db, r.file) }));
			
 
				+  await withLLMSession(async () => {
			
 
				+    const results = await vectorSearchQuery(store, query, {
			
 
				+      collection: opts.collection,
			
 
				+      limit: opts.all ? 500 : (opts.limit || 10),
			
 
				+      minScore: opts.minScore || 0.3,
			
 
				+      hooks: {
			
 
				+        onExpand: (original, expanded) => {
			
 
				+          logExpansionTree(original, expanded);
			
 
				+          process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
			
 
				+        },
			
 
				+      },
			
 
				+    });
			
 
				 
			
 
				     closeDb();
			
 
				 
			
@@ -2029,62 +1943,22 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
 
				       console.log("No results found.");
			
 
				       return;
			
 
				     }
			
 
				-    outputResults(results, query, { ...opts, limit: results.length }); // Already limited
			
 
				-  }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
			
 
				-}
			
 
				-
			
 
				-// Expand query using structured output with GBNF grammar
			
 
				-async function expandQueryStructured(query: string, includeLexical: boolean = true, context?: string, session?: ILLMSession): Promise<Queryable[]> {
			
 
				-  process.stderr.write(`${c.dim}Expanding query...${c.reset}\n`);
			
 
				-
			
 
				-  const queryables = session
			
 
				-    ? await session.expandQuery(query, { includeLexical, context })
			
 
				-    : await getDefaultLlamaCpp().expandQuery(query, { includeLexical, context });
			
 
				-
			
 
				-  // Log the expansion as a tree
			
 
				-  const lines: string[] = [];
			
 
				-  const bothLabel = includeLexical ? ' · (lexical+vector)' : ' · (vector)';
			
 
				-  lines.push(`${c.dim}├─ ${query}${bothLabel}${c.reset}`);
			
 
				-
			
 
				-  for (let i = 0; i < queryables.length; i++) {
			
 
				-    const q = queryables[i];
			
 
				-    if (!q || q.text === query) continue;
			
 
				-
			
 
				-    let textPreview = q.text.replace(/\n/g, ' ');
			
 
				-    if (textPreview.length > 80) {
			
 
				-      textPreview = textPreview.substring(0, 77) + '...';
			
 
				-    }
			
 
				-
			
 
				-    const label = q.type === 'lex' ? 'lexical' : (q.type === 'hyde' ? 'hyde' : 'vector');
			
 
				-    lines.push(`${c.dim}├─ ${textPreview} · (${label})${c.reset}`);
			
 
				-  }
			
 
				-
			
 
				-  // Fix last item to use └─ instead of ├─
			
 
				-  if (lines.length > 0) {
			
 
				-    lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
			
 
				-  }
			
 
				 
			
 
				-  for (const line of lines) {
			
 
				-    process.stderr.write(line + '\n');
			
 
				-  }
			
 
				-
			
 
				-  return queryables;
			
 
				-}
			
 
				-
			
 
				-async function expandQuery(query: string, _model: string = DEFAULT_QUERY_MODEL, _db?: Database, session?: ILLMSession): Promise<string[]> {
			
 
				-  const queryables = await expandQueryStructured(query, true, undefined, session);
			
 
				-  const queries = new Set<string>([query]);
			
 
				-  for (const q of queryables) {
			
 
				-    queries.add(q.text);
			
 
				-  }
			
 
				-  return Array.from(queries);
			
 
				+    outputResults(results.map(r => ({
			
 
				+      file: r.file,
			
 
				+      displayPath: r.displayPath,
			
 
				+      title: r.title,
			
 
				+      body: r.body,
			
 
				+      score: r.score,
			
 
				+      context: r.context,
			
 
				+      docid: r.docid,
			
 
				+    })), query, { ...opts, limit: results.length });
			
 
				+  }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
			
 
				 }
			
 
				 
			
 
				-async function querySearch(query: string, opts: OutputOptions, embedModel: string = DEFAULT_EMBED_MODEL, rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
			
 
				-  const db = getDb();
			
 
				+async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
			
 
				+  const store = getStore();
			
 
				 
			
 
				-  // Validate collection filter if specified
			
 
				-  let collectionName: string | undefined;
			
 
				   if (opts.collection) {
			
 
				     const coll = getCollectionFromYaml(opts.collection);
			
 
				     if (!coll) {
			
@@ -2092,198 +1966,51 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
 
				       closeDb();
			
 
				       process.exit(1);
			
 
				     }
			
 
				-    collectionName = opts.collection;
			
 
				   }
			
 
				 
			
 
				-  // Check index health and warn about issues
			
 
				-  checkIndexHealth(db);
			
 
				-
			
 
				-  // Run initial BM25 search (will be reused for retrieval)
			
 
				-  const initialFts = searchFTS(db, query, 20, collectionName as any);
			
 
				-  let hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
			
 
				-
			
 
				-  // Check if initial results have strong signals (skip expansion if so)
			
 
				-  // Strong signal = top result is strong AND clearly separated from runner-up.
			
 
				-  // This avoids skipping expansion when BM25 has lots of mediocre matches.
			
 
				-  const topScore = initialFts[0]?.score ?? 0;
			
 
				-  const secondScore = initialFts[1]?.score ?? 0;
			
 
				-  const hasStrongSignal = initialFts.length > 0 && topScore >= 0.85 && (topScore - secondScore) >= 0.15;
			
 
				-
			
 
				-  // Wrap LLM operations in a session for lifecycle management
			
 
				-  await withLLMSession(async (session) => {
			
 
				-    let ftsQueries: string[] = [query];
			
 
				-    let vectorQueries: string[] = [query];
			
 
				-
			
 
				-    if (hasStrongSignal) {
			
 
				-      // Strong BM25 signal - skip expensive LLM expansion
			
 
				-      process.stderr.write(`${c.dim}Strong BM25 signal (${topScore.toFixed(2)}) - skipping expansion${c.reset}\n`);
			
 
				-      // Still log the "expansion tree" in the same style as vsearch for consistency.
			
 
				-      {
			
 
				-        const lines: string[] = [];
			
 
				-        lines.push(`${c.dim}├─ ${query} · (lexical+vector)${c.reset}`);
			
 
				-        lines[lines.length - 1] = lines[lines.length - 1]!.replace('├─', '└─');
			
 
				-        for (const line of lines) process.stderr.write(line + '\n');
			
 
				-      }
			
 
				-    } else {
			
 
				-      // Weak signal - expand query for better recall
			
 
				-      const queryables = await expandQueryStructured(query, true, opts.context, session);
			
 
				-
			
 
				-      for (const q of queryables) {
			
 
				-        if (q.type === 'lex') {
			
 
				-          if (q.text && q.text !== query) ftsQueries.push(q.text);
			
 
				-        } else if (q.type === 'vec' || q.type === 'hyde') {
			
 
				-          if (q.text && q.text !== query) vectorQueries.push(q.text);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    process.stderr.write(`${c.dim}Searching ${ftsQueries.length} lexical + ${vectorQueries.length} vector queries...${c.reset}\n`);
			
 
				-
			
 
				-    // Collect ranked result lists for RRF fusion
			
 
				-    const rankedLists: RankedResult[][] = [];
			
 
				-
			
 
				-    // Map to store hash by filepath for final results
			
 
				-    const hashMap = new Map<string, string>();
			
 
				-
			
 
				-    // Run all searches concurrently (FTS + Vector)
			
 
				-    const searchPromises: Promise<void>[] = [];
			
 
				-
			
 
				-    // FTS searches
			
 
				-    for (const q of ftsQueries) {
			
 
				-      if (!q) continue;
			
 
				-      searchPromises.push((async () => {
			
 
				-        const ftsResults = searchFTS(db, q, 20, (collectionName || "") as any);
			
 
				-        if (ftsResults.length > 0) {
			
 
				-          for (const r of ftsResults) {
			
 
				-            // Mutex for hashMap is not strictly needed as it's just adding values
			
 
				-            hashMap.set(r.filepath, r.hash);
			
 
				-          }
			
 
				-          rankedLists.push(ftsResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
			
 
				-        }
			
 
				-      })());
			
 
				-    }
			
 
				-
			
 
				-    // Vector searches (session ensures contexts stay alive)
			
 
				-    if (hasVectors) {
			
 
				-      for (const q of vectorQueries) {
			
 
				-        if (!q) continue;
			
 
				-        searchPromises.push((async () => {
			
 
				-          const vecResults = await searchVec(db, q, embedModel, 20, (collectionName || "") as any, session);
			
 
				-          if (vecResults.length > 0) {
			
 
				-            for (const r of vecResults) hashMap.set(r.filepath, r.hash);
			
 
				-            rankedLists.push(vecResults.map(r => ({ file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score })));
			
 
				-          }
			
 
				-        })());
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    await Promise.all(searchPromises);
			
 
				+  checkIndexHealth(store.db);
			
 
				+
			
 
				+  await withLLMSession(async () => {
			
 
				+    const results = await hybridQuery(store, query, {
			
 
				+      collection: opts.collection,
			
 
				+      limit: opts.all ? 500 : (opts.limit || 10),
			
 
				+      minScore: opts.minScore || 0,
			
 
				+      hooks: {
			
 
				+        onStrongSignal: (score) => {
			
 
				+          process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
			
 
				+        },
			
 
				+        onExpand: (original, expanded) => {
			
 
				+          logExpansionTree(original, expanded);
			
 
				+          process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
			
 
				+        },
			
 
				+        onRerankStart: (chunkCount) => {
			
 
				+          process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
			
 
				+          progress.indeterminate();
			
 
				+        },
			
 
				+        onRerankDone: () => {
			
 
				+          progress.clear();
			
 
				+        },
			
 
				+      },
			
 
				+    });
			
 
				 
			
 
				-    // Apply Reciprocal Rank Fusion to combine all ranked lists
			
 
				-    // Give 2x weight to original query results (first 2 lists: FTS + vector)
			
 
				-    const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
			
 
				-    const fused = reciprocalRankFusion(rankedLists, weights);
			
 
				-    // Hard cap reranking for latency/cost. We rerank per-document (best chunk only).
			
 
				-    const RERANK_DOC_LIMIT = 40;
			
 
				-    const candidates = fused.slice(0, RERANK_DOC_LIMIT);
			
 
				+    closeDb();
			
 
				 
			
 
				-    if (candidates.length === 0) {
			
 
				+    if (results.length === 0) {
			
 
				       console.log("No results found.");
			
 
				-      closeDb();
			
 
				       return;
			
 
				     }
			
 
				 
			
 
				-    // Rerank multiple chunks per document, then aggregate scores
			
 
				-    // This improves ranking for long documents where keyword-matched chunk isn't always best
			
 
				-    // We only rerank ONE chunk per document (best chunk by a simple keyword heuristic),
			
 
				-    // so we never rerank more than RERANK_DOC_LIMIT items.
			
 
				-    const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = [];
			
 
				-    const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
			
 
				-
			
 
				-    const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
			
 
				-    for (const cand of candidates) {
			
 
				-      const chunks = chunkDocument(cand.body);
			
 
				-      if (chunks.length === 0) continue;
			
 
				-
			
 
				-      // Choose best chunk by keyword matches; fall back to first chunk.
			
 
				-      let bestIdx = 0;
			
 
				-      let bestScore = -1;
			
 
				-      for (let i = 0; i < chunks.length; i++) {
			
 
				-        const chunkLower = chunks[i]!.text.toLowerCase();
			
 
				-        const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
			
 
				-        if (score > bestScore) {
			
 
				-          bestScore = score;
			
 
				-          bestIdx = i;
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text, chunkIdx: bestIdx });
			
 
				-      docChunkMap.set(cand.file, { chunks, bestIdx });
			
 
				-    }
			
 
				-
			
 
				-    // Rerank selected chunks (with caching). One chunk per doc -> one rerank item per doc.
			
 
				-    const reranked = await rerank(
			
 
				-      query,
			
 
				-      chunksToRerank.map(ch => ({ file: ch.file, text: ch.text })),
			
 
				-      rerankModel,
			
 
				-      db,
			
 
				-      session
			
 
				-    );
			
 
				-
			
 
				-    const aggregatedScores = new Map<string, { score: number; bestChunkIdx: number }>();
			
 
				-    for (const r of reranked) {
			
 
				-      const chunkInfo = docChunkMap.get(r.file);
			
 
				-      aggregatedScores.set(r.file, { score: r.score, bestChunkIdx: chunkInfo?.bestIdx ?? 0 });
			
 
				-    }
			
 
				-
			
 
				-    // Blend RRF position score with aggregated reranker score using position-aware weights
			
 
				-    // Top retrieval results get more protection from reranker disagreement
			
 
				-    const candidateMap = new Map(candidates.map(cand => [cand.file, { displayPath: cand.displayPath, title: cand.title, body: cand.body }]));
			
 
				-    const rrfRankMap = new Map(candidates.map((cand, i) => [cand.file, i + 1])); // 1-indexed rank
			
 
				-
			
 
				-    const finalResults = Array.from(aggregatedScores.entries()).map(([file, { score: rerankScore, bestChunkIdx }]) => {
			
 
				-      const rrfRank = rrfRankMap.get(file) || 30;
			
 
				-      // Position-aware blending: top retrieval results preserved more
			
 
				-      // Rank 1-3: 75% RRF, 25% reranker (trust retrieval for exact matches)
			
 
				-      // Rank 4-10: 60% RRF, 40% reranker
			
 
				-      // Rank 11+: 40% RRF, 60% reranker (trust reranker for lower-ranked)
			
 
				-      let rrfWeight: number;
			
 
				-      if (rrfRank <= 3) {
			
 
				-        rrfWeight = 0.75;
			
 
				-      } else if (rrfRank <= 10) {
			
 
				-        rrfWeight = 0.60;
			
 
				-      } else {
			
 
				-        rrfWeight = 0.40;
			
 
				-      }
			
 
				-      const rrfScore = 1 / rrfRank;  // Position-based: 1, 0.5, 0.33...
			
 
				-      const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * rerankScore;
			
 
				-      const candidate = candidateMap.get(file);
			
 
				-      // Use the best-scoring chunk's text for the body (better for snippets)
			
 
				-      const chunkInfo = docChunkMap.get(file);
			
 
				-      const chunkBody = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0]!.text) : candidate?.body || "";
			
 
				-      const chunkPos = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.pos || 0) : 0;
			
 
				-      return {
			
 
				-        file,
			
 
				-        displayPath: candidate?.displayPath || "",
			
 
				-        title: candidate?.title || "",
			
 
				-        body: chunkBody,
			
 
				-        chunkPos,
			
 
				-        score: blendedScore,
			
 
				-        context: getContextForFile(db, file),
			
 
				-        hash: hashMap.get(file) || "",
			
 
				-      };
			
 
				-    }).sort((a, b) => b.score - a.score);
			
 
				-
			
 
				-    // Deduplicate by file (safety net - shouldn't happen but prevents duplicate output)
			
 
				-    const seenFiles = new Set<string>();
			
 
				-    const dedupedResults = finalResults.filter(r => {
			
 
				-      if (seenFiles.has(r.file)) return false;
			
 
				-      seenFiles.add(r.file);
			
 
				-      return true;
			
 
				-    });
			
 
				-
			
 
				-    closeDb();
			
 
				-    outputResults(dedupedResults, query, opts);
			
 
				+    // Map to CLI output format — use bestChunk for snippet display
			
 
				+    outputResults(results.map(r => ({
			
 
				+      file: r.file,
			
 
				+      displayPath: r.displayPath,
			
 
				+      title: r.title,
			
 
				+      body: r.bestChunk,
			
 
				+      chunkPos: r.bestChunkPos,
			
 
				+      score: r.score,
			
 
				+      context: r.context,
			
 
				+      docid: r.docid,
			
 
				+    })), query, { ...opts, limit: results.length });
			
 
				   }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
			
 
				 }
			
 
				 
			
@@ -2327,6 +2054,10 @@ function parseCLI() {
 
				       from: { type: "string" },  // start line
			
 
				       "max-bytes": { type: "string" },  // max bytes for multi-get
			
 
				       "line-numbers": { type: "boolean" },  // add line numbers to output
			
 
				+      // MCP HTTP transport options
			
 
				+      http: { type: "boolean" },
			
 
				+      daemon: { type: "boolean" },
			
 
				+      port: { type: "string" },
			
 
				     },
			
 
				     allowPositionals: true,
			
 
				     strict: false, // Allow unknown options to pass through
			
@@ -2390,7 +2121,10 @@ function showHelp(): void {
 
				   console.log("  qmd search <query>            - Full-text search (BM25)");
			
 
				   console.log("  qmd vsearch <query>           - Vector similarity search");
			
 
				   console.log("  qmd query <query>             - Combined search with query expansion + reranking");
			
 
				-  console.log("  qmd mcp                       - Start MCP server (for AI agent integration)");
			
 
				+  console.log("  qmd mcp                       - Start MCP server (stdio transport)");
			
 
				+  console.log("  qmd mcp --http [--port N]     - Start MCP server (HTTP transport, default port 8181)");
			
 
				+  console.log("  qmd mcp --http --daemon       - Start MCP server as background daemon");
			
 
				+  console.log("  qmd mcp stop                  - Stop background MCP daemon");
			
 
				   console.log("");
			
 
				   console.log("Global options:");
			
 
				   console.log("  --index <name>             - Use custom index name (default: index)");
			
@@ -2651,8 +2385,85 @@ if (import.meta.main) {
 
				       break;
			
 
				 
			
 
				     case "mcp": {
			
 
				-      const { startMcpServer } = await import("./mcp.js");
			
 
				-      await startMcpServer();
			
 
				+      const sub = cli.args[0]; // stop | status | undefined
			
 
				+
			
 
				+      // Cache dir for PID/log files — same dir as the index
			
 
				+      const cacheDir = Bun.env.XDG_CACHE_HOME
			
 
				+        ? resolve(Bun.env.XDG_CACHE_HOME, "qmd")
			
 
				+        : resolve(homedir(), ".cache", "qmd");
			
 
				+      const pidPath = resolve(cacheDir, "mcp.pid");
			
 
				+
			
 
				+      // Subcommands take priority over flags
			
 
				+      if (sub === "stop") {
			
 
				+        if (!existsSync(pidPath)) {
			
 
				+          console.log("Not running (no PID file).");
			
 
				+          process.exit(0);
			
 
				+        }
			
 
				+        const pid = parseInt(readFileSync(pidPath, "utf-8").trim());
			
 
				+        try {
			
 
				+          process.kill(pid, 0); // alive?
			
 
				+          process.kill(pid, "SIGTERM");
			
 
				+          unlinkSync(pidPath);
			
 
				+          console.log(`Stopped QMD MCP server (PID ${pid}).`);
			
 
				+        } catch {
			
 
				+          unlinkSync(pidPath);
			
 
				+          console.log("Cleaned up stale PID file (server was not running).");
			
 
				+        }
			
 
				+        process.exit(0);
			
 
				+      }
			
 
				+
			
 
				+      if (cli.values.http) {
			
 
				+        const port = Number(cli.values.port) || 8181;
			
 
				+
			
 
				+        if (cli.values.daemon) {
			
 
				+          // Guard: check if already running
			
 
				+          if (existsSync(pidPath)) {
			
 
				+            const existingPid = parseInt(readFileSync(pidPath, "utf-8").trim());
			
 
				+            try {
			
 
				+              process.kill(existingPid, 0); // alive?
			
 
				+              console.error(`Already running (PID ${existingPid}). Run 'qmd mcp stop' first.`);
			
 
				+              process.exit(1);
			
 
				+            } catch {
			
 
				+              // Stale PID file — continue
			
 
				+            }
			
 
				+          }
			
 
				+
			
 
				+          mkdirSync(cacheDir, { recursive: true });
			
 
				+          const logPath = resolve(cacheDir, "mcp.log");
			
 
				+          const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
			
 
				+          const child = Bun.spawn([process.execPath, import.meta.path, "mcp", "--http", "--port", String(port)], {
			
 
				+            stdout: logFd,
			
 
				+            stderr: logFd,
			
 
				+            stdin: "ignore",
			
 
				+          });
			
 
				+          child.unref();
			
 
				+          closeSync(logFd); // parent's copy; child inherited the fd
			
 
				+
			
 
				+          writeFileSync(pidPath, String(child.pid));
			
 
				+          console.log(`Started on http://localhost:${port}/mcp (PID ${child.pid})`);
			
 
				+          console.log(`Logs: ${logPath}`);
			
 
				+          process.exit(0);
			
 
				+        }
			
 
				+
			
 
				+        // Foreground HTTP mode — remove top-level cursor handlers so the
			
 
				+        // async cleanup handlers in startMcpHttpServer actually run.
			
 
				+        process.removeAllListeners("SIGTERM");
			
 
				+        process.removeAllListeners("SIGINT");
			
 
				+        const { startMcpHttpServer } = await import("./mcp.js");
			
 
				+        try {
			
 
				+          await startMcpHttpServer(port);
			
 
				+        } catch (e: any) {
			
 
				+          if (e?.code === "EADDRINUSE") {
			
 
				+            console.error(`Port ${port} already in use. Try a different port with --port.`);
			
 
				+            process.exit(1);
			
 
				+          }
			
 
				+          throw e;
			
 
				+        }
			
 
				+      } else {
			
 
				+        // Default: stdio transport
			
 
				+        const { startMcpServer } = await import("./mcp.js");
			
 
				+        await startMcpServer();
			
 
				+      }
			
 
				       break;
			
 
				     }
			
 
				 
			
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -35,6 +35,8 @@ import {
 
				   parseVirtualPath,
			
 
				   normalizeDocid,
			
 
				   isDocid,
			
 
				+  STRONG_SIGNAL_MIN_SCORE,
			
 
				+  STRONG_SIGNAL_MIN_GAP,
			
 
				   type Store,
			
 
				   type DocumentResult,
			
 
				   type SearchResult,
			
@@ -908,6 +910,96 @@ describe("FTS Search", () => {
 
				     await cleanupTestDb(store);
			
 
				   });
			
 
				 
			
 
				+  // BM25 IDF requires corpus depth — helper adds non-matching docs so term frequency
			
 
				+  // differentiation produces meaningful scores (2-doc corpus has near-zero IDF).
			
 
				+  async function addNoiseDocuments(db: Database, collectionName: string, count = 8) {
			
 
				+    for (let i = 0; i < count; i++) {
			
 
				+      await insertTestDocument(db, collectionName, {
			
 
				+        name: `noise${i}`,
			
 
				+        title: `Unrelated Topic ${i}`,
			
 
				+        body: `This document discusses completely different subjects like gardening and cooking ${i}`,
			
 
				+        displayPath: `test/noise${i}.md`,
			
 
				+      });
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  test("searchFTS scores: stronger BM25 match → higher normalized score", async () => {
			
 
				+    const store = await createTestStore();
			
 
				+    const collectionName = await createTestCollection();
			
 
				+    await addNoiseDocuments(store.db, collectionName);
			
 
				+
			
 
				+    // "alpha" appears in title (10x weight) + body → strong BM25
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "strong",
			
 
				+      title: "Alpha Guide",
			
 
				+      body: "This is the definitive alpha reference with alpha details and more alpha info",
			
 
				+      displayPath: "test/strong.md",
			
 
				+    });
			
 
				+
			
 
				+    // "alpha" appears once in body only → weaker BM25
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "weak",
			
 
				+      title: "General Notes",
			
 
				+      body: "Some notes that mention alpha in passing among other topics and keywords",
			
 
				+      displayPath: "test/weak.md",
			
 
				+    });
			
 
				+
			
 
				+    const results = store.searchFTS("alpha", 10);
			
 
				+    expect(results.length).toBe(2);
			
 
				+
			
 
				+    // Verify score direction: stronger match (title + body) should score HIGHER
			
 
				+    const strongResult = results.find(r => r.displayPath.includes("strong"))!;
			
 
				+    const weakResult = results.find(r => r.displayPath.includes("weak"))!;
			
 
				+    expect(strongResult.score).toBeGreaterThan(weakResult.score);
			
 
				+
			
 
				+    // Verify scores are in valid (0, 1) range
			
 
				+    for (const r of results) {
			
 
				+      expect(r.score).toBeGreaterThan(0);
			
 
				+      expect(r.score).toBeLessThan(1);
			
 
				+    }
			
 
				+
			
 
				+    await cleanupTestDb(store);
			
 
				+  });
			
 
				+
			
 
				+  test("searchFTS scores: minScore filter keeps strong matches, drops weak", async () => {
			
 
				+    const store = await createTestStore();
			
 
				+    const collectionName = await createTestCollection();
			
 
				+    await addNoiseDocuments(store.db, collectionName);
			
 
				+
			
 
				+    // Strong match: keyword in title (10x weight) + repeated in body
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "strong",
			
 
				+      title: "Kubernetes Deployment",
			
 
				+      body: "Kubernetes deployment strategies for kubernetes clusters using kubernetes operators",
			
 
				+      displayPath: "test/strong.md",
			
 
				+    });
			
 
				+
			
 
				+    // Weak match: keyword appears once in body only
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "weak",
			
 
				+      title: "Random Notes",
			
 
				+      body: "Various topics including a brief kubernetes mention among many other unrelated things",
			
 
				+      displayPath: "test/weak.md",
			
 
				+    });
			
 
				+
			
 
				+    const allResults = store.searchFTS("kubernetes", 10);
			
 
				+    expect(allResults.length).toBe(2);
			
 
				+
			
 
				+    // With a minScore threshold, strong match should survive, weak should be filterable
			
 
				+    const strongScore = allResults.find(r => r.displayPath.includes("strong"))!.score;
			
 
				+    const weakScore = allResults.find(r => r.displayPath.includes("weak"))!.score;
			
 
				+
			
 
				+    // Find a threshold between them
			
 
				+    const threshold = (strongScore + weakScore) / 2;
			
 
				+    const filtered = allResults.filter(r => r.score >= threshold);
			
 
				+
			
 
				+    // Strong match survives the filter, weak does not
			
 
				+    expect(filtered.length).toBe(1);
			
 
				+    expect(filtered[0]!.displayPath).toContain("strong");
			
 
				+
			
 
				+    await cleanupTestDb(store);
			
 
				+  });
			
 
				+
			
 
				   test("searchFTS ignores inactive documents", async () => {
			
 
				     const store = await createTestStore();
			
 
				     const collectionName = await createTestCollection();
			
@@ -933,6 +1025,53 @@ describe("FTS Search", () => {
 
				 
			
 
				     await cleanupTestDb(store);
			
 
				   });
			
 
				+
			
 
				+  test("searchFTS scores: strong signal detection works with correct normalization", async () => {
			
 
				+    const store = await createTestStore();
			
 
				+    const collectionName = await createTestCollection();
			
 
				+
			
 
				+    // BM25 IDF needs meaningful corpus depth for strong signal to fire.
			
 
				+    // 50 noise docs give IDF ≈ log(50/2) ≈ 3.2 — enough for scores above 0.85.
			
 
				+    await addNoiseDocuments(store.db, collectionName, 50);
			
 
				+
			
 
				+    // Dominant: keyword in filepath (10x BM25 weight column) + title + body
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "dominant",
			
 
				+      title: "Zephyr Configuration Guide",
			
 
				+      body: "Complete zephyr configuration guide. Zephyr setup instructions for zephyr deployment.",
			
 
				+      displayPath: "zephyr/zephyr-guide.md",
			
 
				+    });
			
 
				+
			
 
				+    // Weak: keyword once in body only, longer doc dilutes TF
			
 
				+    await insertTestDocument(store.db, collectionName, {
			
 
				+      name: "weak",
			
 
				+      title: "General Notes",
			
 
				+      body: "Various topics covering many areas of technology and design. " +
			
 
				+        "One of them might relate to zephyr but mostly about other things entirely. " +
			
 
				+        "Additional content about databases, networking, security, performance, " +
			
 
				+        "monitoring, deployment, testing, and documentation practices.",
			
 
				+      displayPath: "notes/misc.md",
			
 
				+    });
			
 
				+
			
 
				+    const results = store.searchFTS("zephyr", 10);
			
 
				+    expect(results.length).toBe(2);
			
 
				+
			
 
				+    const topScore = results[0]!.score;
			
 
				+    const secondScore = results[1]!.score;
			
 
				+
			
 
				+    // With correct normalization: strong match should be well above threshold
			
 
				+    expect(topScore).toBeGreaterThanOrEqual(STRONG_SIGNAL_MIN_SCORE);
			
 
				+
			
 
				+    // Gap should exceed threshold when there's a dominant match
			
 
				+    const gap = topScore - secondScore;
			
 
				+    expect(gap).toBeGreaterThanOrEqual(STRONG_SIGNAL_MIN_GAP);
			
 
				+
			
 
				+    // Full strong signal check should pass (this was dead code before the fix)
			
 
				+    const hasStrongSignal = topScore >= STRONG_SIGNAL_MIN_SCORE && gap >= STRONG_SIGNAL_MIN_GAP;
			
 
				+    expect(hasStrongSignal).toBe(true);
			
 
				+
			
 
				+    await cleanupTestDb(store);
			
 
				+  });
			
 
				 });
			
 
				 
			
 
				 // =============================================================================
			
@@ -1893,27 +2032,32 @@ describe("LlamaCpp Integration", () => {
 
				     await cleanupTestDb(store);
			
 
				   });
			
 
				 
			
 
				-  test("expandQuery returns original plus expanded queries", async () => {
			
 
				+  test("expandQuery returns typed expansions (no original query)", async () => {
			
 
				     const store = await createTestStore();
			
 
				 
			
 
				-    const queries = await store.expandQuery("test query");
			
 
				-    expect(queries).toContain("test query");
			
 
				-    expect(queries[0]).toBe("test query");
			
 
				-    // LlamaCpp returns original + variations
			
 
				-    expect(queries.length).toBeGreaterThanOrEqual(1);
			
 
				+    const expanded = await store.expandQuery("test query");
			
 
				+    // Returns ExpandedQuery[] — typed results from LLM, excluding original
			
 
				+    expect(expanded.length).toBeGreaterThanOrEqual(1);
			
 
				+    for (const q of expanded) {
			
 
				+      expect(['lex', 'vec', 'hyde']).toContain(q.type);
			
 
				+      expect(q.text.length).toBeGreaterThan(0);
			
 
				+      expect(q.text).not.toBe("test query"); // original excluded
			
 
				+    }
			
 
				 
			
 
				     await cleanupTestDb(store);
			
 
				   }, 30000);
			
 
				 
			
 
				-  test("expandQuery caches results", async () => {
			
 
				+  test("expandQuery caches results as JSON with types", async () => {
			
 
				     const store = await createTestStore();
			
 
				 
			
 
				-    // First call
			
 
				+    // First call — hits LLM
			
 
				     const queries1 = await store.expandQuery("cached query test");
			
 
				-    // Second call - should hit cache
			
 
				+    // Second call — hits cache
			
 
				     const queries2 = await store.expandQuery("cached query test");
			
 
				 
			
 
				-    expect(queries1[0]).toBe(queries2[0]);
			
 
				+    // Cache should preserve full typed structure
			
 
				+    expect(queries1).toEqual(queries2);
			
 
				+    expect(queries2[0]?.type).toBeDefined();
			
 
				 
			
 
				     await cleanupTestDb(store);
			
 
				   }, 30000);
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -56,6 +56,27 @@ export const CHUNK_OVERLAP_TOKENS = Math.floor(CHUNK_SIZE_TOKENS * 0.15);  // 12
 
				 export const CHUNK_SIZE_CHARS = CHUNK_SIZE_TOKENS * 4;  // 3200 chars
			
 
				 export const CHUNK_OVERLAP_CHARS = CHUNK_OVERLAP_TOKENS * 4;  // 480 chars
			
 
				 
			
 
				+// Hybrid query: strong BM25 signal detection thresholds
			
 
				+// Skip expensive LLM expansion when top result is strong AND clearly separated from runner-up
			
 
				+export const STRONG_SIGNAL_MIN_SCORE = 0.85;
			
 
				+export const STRONG_SIGNAL_MIN_GAP = 0.15;
			
 
				+// Max candidates to pass to reranker — balances quality vs latency.
			
 
				+// 40 keeps rank 31-40 visible to the reranker (matters for recall on broad queries).
			
 
				+export const RERANK_CANDIDATE_LIMIT = 40;
			
 
				+
			
 
				+/**
			
 
				+ * A typed query expansion result. Decoupled from llm.ts internal Queryable —
			
 
				+ * same shape, but store.ts owns its own public API type.
			
 
				+ *
			
 
				+ * - lex: keyword variant → routes to FTS only
			
 
				+ * - vec: semantic variant → routes to vector only
			
 
				+ * - hyde: hypothetical document → routes to vector only
			
 
				+ */
			
 
				+export type ExpandedQuery = {
			
 
				+  type: 'lex' | 'vec' | 'hyde';
			
 
				+  text: string;
			
 
				+};
			
 
				+
			
 
				 // =============================================================================
			
 
				 // Path utilities
			
 
				 // =============================================================================
			
@@ -623,7 +644,7 @@ export type Store = {
 
				   searchVec: (query: string, model: string, limit?: number, collectionName?: string) => Promise<SearchResult[]>;
			
 
				 
			
 
				   // Query expansion & reranking
			
 
				-  expandQuery: (query: string, model?: string) => Promise<string[]>;
			
 
				+  expandQuery: (query: string, model?: string) => Promise<ExpandedQuery[]>;
			
 
				   rerank: (query: string, documents: { file: string; text: string }[], model?: string) => Promise<{ file: string; score: number }[]>;
			
 
				 
			
 
				   // Document retrieval
			
@@ -1877,10 +1898,11 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
 
				   const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; bm25_score: number }[];
			
 
				   return rows.map(row => {
			
 
				     const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
			
 
				-    // Convert bm25 (negative, lower is better) into a stable (0..1] score where higher is better.
			
 
				-    // BM25 scores in SQLite FTS5 are negative (e.g., -10 is strong, -2 is weak).
			
 
				-    // Avoid per-query normalization so "strong signal" heuristics can work.
			
 
				-    const score = 1 / (1 + Math.abs(row.bm25_score));
			
 
				+    // Convert bm25 (negative, lower is better) into a stable [0..1) score where higher is better.
			
 
				+    // FTS5 BM25 scores are negative (e.g., -10 is strong, -2 is weak).
			
 
				+    // |x| / (1 + |x|) maps: strong(-10)→0.91, medium(-2)→0.67, weak(-0.5)→0.33, none(0)→0.
			
 
				+    // Monotonic and query-independent — no per-query normalization needed.
			
 
				+    const score = Math.abs(row.bm25_score) / (1 + Math.abs(row.bm25_score));
			
 
				     return {
			
 
				       filepath: row.filepath,
			
 
				       displayPath: row.display_path,
			
@@ -2050,27 +2072,33 @@ export function insertEmbedding(
 
				 // Query expansion
			
 
				 // =============================================================================
			
 
				 
			
 
				-export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database): Promise<string[]> {
			
 
				-  // Check cache first
			
 
				+export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database): Promise<ExpandedQuery[]> {
			
 
				+  // Check cache first — stored as JSON preserving types
			
 
				   const cacheKey = getCacheKey("expandQuery", { query, model });
			
 
				   const cached = getCachedResult(db, cacheKey);
			
 
				   if (cached) {
			
 
				-    const lines = cached.split('\n').map(l => l.trim()).filter(l => l.length > 0);
			
 
				-    return [query, ...lines.slice(0, 2)];
			
 
				+    try {
			
 
				+      return JSON.parse(cached) as ExpandedQuery[];
			
 
				+    } catch {
			
 
				+      // Old cache format (pre-typed, newline-separated text) — re-expand
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   const llm = getDefaultLlamaCpp();
			
 
				   // Note: LlamaCpp uses hardcoded model, model parameter is ignored
			
 
				   const results = await llm.expandQuery(query);
			
 
				-  const queryTexts = results.map(r => r.text);
			
 
				 
			
 
				-  // Cache the expanded queries (excluding original)
			
 
				-  const expandedOnly = queryTexts.filter(t => t !== query);
			
 
				-  if (expandedOnly.length > 0) {
			
 
				-    setCachedResult(db, cacheKey, expandedOnly.join('\n'));
			
 
				+  // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
			
 
				+  // Filter out entries that duplicate the original query text.
			
 
				+  const expanded: ExpandedQuery[] = results
			
 
				+    .filter(r => r.text !== query)
			
 
				+    .map(r => ({ type: r.type, text: r.text }));
			
 
				+
			
 
				+  if (expanded.length > 0) {
			
 
				+    setCachedResult(db, cacheKey, JSON.stringify(expanded));
			
 
				   }
			
 
				 
			
 
				-  return Array.from(new Set([query, ...queryTexts]));
			
 
				+  return expanded;
			
 
				 }
			
 
				 
			
 
				 // =============================================================================
			
@@ -2082,8 +2110,10 @@ export async function rerank(query: string, documents: { file: string; text: str
 
				   const uncachedDocs: RerankDocument[] = [];
			
 
				 
			
 
				   // Check cache for each document
			
 
				+  // Cache key includes chunk text — different queries can select different chunks
			
 
				+  // from the same file, and the reranker score depends on which chunk was sent.
			
 
				   for (const doc of documents) {
			
 
				-    const cacheKey = getCacheKey("rerank", { query, file: doc.file, model });
			
 
				+    const cacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
			
 
				     const cached = getCachedResult(db, cacheKey);
			
 
				     if (cached !== null) {
			
 
				       cachedResults.set(doc.file, parseFloat(cached));
			
@@ -2097,9 +2127,10 @@ export async function rerank(query: string, documents: { file: string; text: str
 
				     const llm = getDefaultLlamaCpp();
			
 
				     const rerankResult = await llm.rerank(query, uncachedDocs, { model });
			
 
				 
			
 
				-    // Cache results
			
 
				+    // Cache results — use original doc.text for cache key (result.file lacks chunk text)
			
 
				+    const textByFile = new Map(documents.map(d => [d.file, d.text]));
			
 
				     for (const result of rerankResult.results) {
			
 
				-      const cacheKey = getCacheKey("rerank", { query, file: result.file, model });
			
 
				+      const cacheKey = getCacheKey("rerank", { query, file: result.file, model, chunk: textByFile.get(result.file) || "" });
			
 
				       setCachedResult(db, cacheKey, result.score.toString());
			
 
				       cachedResults.set(result.file, result.score);
			
 
				     }
			
@@ -2569,3 +2600,314 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
 
				     snippetLines: snippetLineCount,
			
 
				   };
			
 
				 }
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Shared helpers (used by both CLI and MCP)
			
 
				+// =============================================================================
			
 
				+
			
 
				+/**
			
 
				+ * Add line numbers to text content.
			
 
				+ * Each line becomes: "{lineNum}: {content}"
			
 
				+ */
			
 
				+export function addLineNumbers(text: string, startLine: number = 1): string {
			
 
				+  const lines = text.split('\n');
			
 
				+  return lines.map((line, i) => `${startLine + i}: ${line}`).join('\n');
			
 
				+}
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Shared search orchestration
			
 
				+//
			
 
				+// hybridQuery() and vectorSearchQuery() are standalone functions (not Store
			
 
				+// methods) because they are orchestration over primitives — same rationale as
			
 
				+// reciprocalRankFusion(). They take a Store as first argument so both CLI
			
 
				+// and MCP can share the identical pipeline.
			
 
				+// =============================================================================
			
 
				+
			
 
				+/**
			
 
				+ * Optional progress hooks for search orchestration.
			
 
				+ * CLI wires these to stderr for user feedback; MCP leaves them unset.
			
 
				+ */
			
 
				+export interface SearchHooks {
			
 
				+  /** BM25 probe found strong signal — expansion will be skipped */
			
 
				+  onStrongSignal?: (topScore: number) => void;
			
 
				+  /** Query expansion complete. Empty array = strong signal skip (no expansion). */
			
 
				+  onExpand?: (original: string, expanded: ExpandedQuery[]) => void;
			
 
				+  /** Reranking is about to start */
			
 
				+  onRerankStart?: (chunkCount: number) => void;
			
 
				+  /** Reranking finished */
			
 
				+  onRerankDone?: () => void;
			
 
				+}
			
 
				+
			
 
				+export interface HybridQueryOptions {
			
 
				+  collection?: string;
			
 
				+  limit?: number;           // default 10
			
 
				+  minScore?: number;        // default 0
			
 
				+  candidateLimit?: number;  // default RERANK_CANDIDATE_LIMIT
			
 
				+  hooks?: SearchHooks;
			
 
				+}
			
 
				+
			
 
				+export interface HybridQueryResult {
			
 
				+  file: string;             // internal filepath (qmd://collection/path)
			
 
				+  displayPath: string;
			
 
				+  title: string;
			
 
				+  body: string;             // full document body (for snippet extraction)
			
 
				+  bestChunk: string;        // best chunk text
			
 
				+  bestChunkPos: number;     // char offset of best chunk in body
			
 
				+  score: number;            // blended score (full precision)
			
 
				+  context: string | null;   // user-set context
			
 
				+  docid: string;            // content hash prefix (6 chars)
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
			
 
				+ *
			
 
				+ * Pipeline:
			
 
				+ * 1. BM25 probe → skip expansion if strong signal
			
 
				+ * 2. expandQuery() → typed query variants (lex/vec/hyde)
			
 
				+ * 3. Type-routed search: original→vector, lex→FTS, vec/hyde→vector
			
 
				+ * 4. RRF fusion → slice to candidateLimit
			
 
				+ * 5. chunkDocument() + keyword-best-chunk selection
			
 
				+ * 6. rerank on chunks (NOT full bodies — O(tokens) trap)
			
 
				+ * 7. Position-aware score blending (RRF rank × reranker score)
			
 
				+ * 8. Dedup by file, filter by minScore, slice to limit
			
 
				+ */
			
 
				+export async function hybridQuery(
			
 
				+  store: Store,
			
 
				+  query: string,
			
 
				+  options?: HybridQueryOptions
			
 
				+): Promise<HybridQueryResult[]> {
			
 
				+  const limit = options?.limit ?? 10;
			
 
				+  const minScore = options?.minScore ?? 0;
			
 
				+  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
			
 
				+  const collection = options?.collection;
			
 
				+  const hooks = options?.hooks;
			
 
				+
			
 
				+  const rankedLists: RankedResult[][] = [];
			
 
				+  const docidMap = new Map<string, string>(); // filepath -> docid
			
 
				+  const hasVectors = !!store.db.prepare(
			
 
				+    `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
			
 
				+  ).get();
			
 
				+
			
 
				+  // Step 1: BM25 probe — strong signal skips expensive LLM expansion
			
 
				+  const initialFts = store.searchFTS(query, 20)
			
 
				+    .filter(r => !collection || r.collectionName === collection);
			
 
				+  const topScore = initialFts[0]?.score ?? 0;
			
 
				+  const secondScore = initialFts[1]?.score ?? 0;
			
 
				+  const hasStrongSignal = initialFts.length > 0
			
 
				+    && topScore >= STRONG_SIGNAL_MIN_SCORE
			
 
				+    && (topScore - secondScore) >= STRONG_SIGNAL_MIN_GAP;
			
 
				+
			
 
				+  if (hasStrongSignal) hooks?.onStrongSignal?.(topScore);
			
 
				+
			
 
				+  // Step 2: Expand query (or skip if strong signal)
			
 
				+  const expanded = hasStrongSignal
			
 
				+    ? []
			
 
				+    : await store.expandQuery(query);
			
 
				+
			
 
				+  hooks?.onExpand?.(query, expanded);
			
 
				+
			
 
				+  // Seed with initial FTS results (avoid re-running original query FTS)
			
 
				+  if (initialFts.length > 0) {
			
 
				+    for (const r of initialFts) docidMap.set(r.filepath, r.docid);
			
 
				+    rankedLists.push(initialFts.map(r => ({
			
 
				+      file: r.filepath, displayPath: r.displayPath,
			
 
				+      title: r.title, body: r.body || "", score: r.score,
			
 
				+    })));
			
 
				+  }
			
 
				+
			
 
				+  // Step 3: Route searches by query type
			
 
				+  // Original query → vector search (FTS already covered by probe in step 1).
			
 
				+  // Vector searches run sequentially — node-llama-cpp's embed context
			
 
				+  // hangs on concurrent embed() calls (known limitation).
			
 
				+  if (hasVectors) {
			
 
				+    const vecResults = await store.searchVec(query, DEFAULT_EMBED_MODEL, 20, collection);
			
 
				+    if (vecResults.length > 0) {
			
 
				+      for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				+      rankedLists.push(vecResults.map(r => ({
			
 
				+        file: r.filepath, displayPath: r.displayPath,
			
 
				+        title: r.title, body: r.body || "", score: r.score,
			
 
				+      })));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Expanded queries → route by type: lex→FTS only, vec/hyde→vector only.
			
 
				+  // This restores the CLI's query-type-aware routing that was lost in the initial refactor.
			
 
				+  for (const q of expanded) {
			
 
				+    if (q.type === 'lex') {
			
 
				+      const ftsResults = store.searchFTS(q.text, 20)
			
 
				+        .filter(r => !collection || r.collectionName === collection);
			
 
				+      if (ftsResults.length > 0) {
			
 
				+        for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
			
 
				+        rankedLists.push(ftsResults.map(r => ({
			
 
				+          file: r.filepath, displayPath: r.displayPath,
			
 
				+          title: r.title, body: r.body || "", score: r.score,
			
 
				+        })));
			
 
				+      }
			
 
				+    } else {
			
 
				+      // vec or hyde → vector search only
			
 
				+      if (hasVectors) {
			
 
				+        const vecResults = await store.searchVec(q.text, DEFAULT_EMBED_MODEL, 20, collection);
			
 
				+        if (vecResults.length > 0) {
			
 
				+          for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				+          rankedLists.push(vecResults.map(r => ({
			
 
				+            file: r.filepath, displayPath: r.displayPath,
			
 
				+            title: r.title, body: r.body || "", score: r.score,
			
 
				+          })));
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
			
 
				+  const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
			
 
				+  const fused = reciprocalRankFusion(rankedLists, weights);
			
 
				+  const candidates = fused.slice(0, candidateLimit);
			
 
				+
			
 
				+  if (candidates.length === 0) return [];
			
 
				+
			
 
				+  // Step 5: Chunk documents, pick best chunk per doc for reranking.
			
 
				+  // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
			
 
				+  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
			
 
				+  const chunksToRerank: { file: string; text: string }[] = [];
			
 
				+  const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
			
 
				+
			
 
				+  for (const cand of candidates) {
			
 
				+    const chunks = chunkDocument(cand.body);
			
 
				+    if (chunks.length === 0) continue;
			
 
				+
			
 
				+    // Pick chunk with most keyword overlap (fallback: first chunk)
			
 
				+    let bestIdx = 0;
			
 
				+    let bestScore = -1;
			
 
				+    for (let i = 0; i < chunks.length; i++) {
			
 
				+      const chunkLower = chunks[i]!.text.toLowerCase();
			
 
				+      const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
			
 
				+      if (score > bestScore) { bestScore = score; bestIdx = i; }
			
 
				+    }
			
 
				+
			
 
				+    chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text });
			
 
				+    docChunkMap.set(cand.file, { chunks, bestIdx });
			
 
				+  }
			
 
				+
			
 
				+  // Step 6: Rerank chunks (NOT full bodies)
			
 
				+  hooks?.onRerankStart?.(chunksToRerank.length);
			
 
				+  const reranked = await store.rerank(query, chunksToRerank);
			
 
				+  hooks?.onRerankDone?.();
			
 
				+
			
 
				+  // Step 7: Blend RRF position score with reranker score
			
 
				+  // Position-aware weights: top retrieval results get more protection from reranker disagreement
			
 
				+  const candidateMap = new Map(candidates.map(c => [c.file, {
			
 
				+    displayPath: c.displayPath, title: c.title, body: c.body,
			
 
				+  }]));
			
 
				+  const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1]));
			
 
				+
			
 
				+  const blended = reranked.map(r => {
			
 
				+    const rrfRank = rrfRankMap.get(r.file) || candidateLimit;
			
 
				+    let rrfWeight: number;
			
 
				+    if (rrfRank <= 3) rrfWeight = 0.75;
			
 
				+    else if (rrfRank <= 10) rrfWeight = 0.60;
			
 
				+    else rrfWeight = 0.40;
			
 
				+    const rrfScore = 1 / rrfRank;
			
 
				+    const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
			
 
				+
			
 
				+    const candidate = candidateMap.get(r.file);
			
 
				+    const chunkInfo = docChunkMap.get(r.file);
			
 
				+    const bestIdx = chunkInfo?.bestIdx ?? 0;
			
 
				+    const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
			
 
				+    const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
			
 
				+
			
 
				+    return {
			
 
				+      file: r.file,
			
 
				+      displayPath: candidate?.displayPath || "",
			
 
				+      title: candidate?.title || "",
			
 
				+      body: candidate?.body || "",
			
 
				+      bestChunk,
			
 
				+      bestChunkPos,
			
 
				+      score: blendedScore,
			
 
				+      context: store.getContextForFile(r.file),
			
 
				+      docid: docidMap.get(r.file) || "",
			
 
				+    };
			
 
				+  }).sort((a, b) => b.score - a.score);
			
 
				+
			
 
				+  // Step 8: Dedup by file (safety net — prevents duplicate output)
			
 
				+  const seenFiles = new Set<string>();
			
 
				+  return blended
			
 
				+    .filter(r => {
			
 
				+      if (seenFiles.has(r.file)) return false;
			
 
				+      seenFiles.add(r.file);
			
 
				+      return true;
			
 
				+    })
			
 
				+    .filter(r => r.score >= minScore)
			
 
				+    .slice(0, limit);
			
 
				+}
			
 
				+
			
 
				+export interface VectorSearchOptions {
			
 
				+  collection?: string;
			
 
				+  limit?: number;           // default 10
			
 
				+  minScore?: number;        // default 0.3
			
 
				+  hooks?: Pick<SearchHooks, 'onExpand'>;
			
 
				+}
			
 
				+
			
 
				+export interface VectorSearchResult {
			
 
				+  file: string;
			
 
				+  displayPath: string;
			
 
				+  title: string;
			
 
				+  body: string;
			
 
				+  score: number;
			
 
				+  context: string | null;
			
 
				+  docid: string;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Vector-only semantic search with query expansion.
			
 
				+ *
			
 
				+ * Pipeline:
			
 
				+ * 1. expandQuery() → typed variants, filter to vec/hyde only (lex irrelevant here)
			
 
				+ * 2. searchVec() for original + vec/hyde variants (sequential — node-llama-cpp embed limitation)
			
 
				+ * 3. Dedup by filepath (keep max score)
			
 
				+ * 4. Sort by score descending, filter by minScore, slice to limit
			
 
				+ */
			
 
				+export async function vectorSearchQuery(
			
 
				+  store: Store,
			
 
				+  query: string,
			
 
				+  options?: VectorSearchOptions
			
 
				+): Promise<VectorSearchResult[]> {
			
 
				+  const limit = options?.limit ?? 10;
			
 
				+  const minScore = options?.minScore ?? 0.3;
			
 
				+  const collection = options?.collection;
			
 
				+
			
 
				+  const hasVectors = !!store.db.prepare(
			
 
				+    `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
			
 
				+  ).get();
			
 
				+  if (!hasVectors) return [];
			
 
				+
			
 
				+  // Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
			
 
				+  const allExpanded = await store.expandQuery(query);
			
 
				+  const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
			
 
				+  options?.hooks?.onExpand?.(query, vecExpanded);
			
 
				+
			
 
				+  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
			
 
				+  const queryTexts = [query, ...vecExpanded.map(q => q.text)];
			
 
				+  const allResults = new Map<string, VectorSearchResult>();
			
 
				+  for (const q of queryTexts) {
			
 
				+    const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
			
 
				+    for (const r of vecResults) {
			
 
				+      const existing = allResults.get(r.filepath);
			
 
				+      if (!existing || r.score > existing.score) {
			
 
				+        allResults.set(r.filepath, {
			
 
				+          file: r.filepath,
			
 
				+          displayPath: r.displayPath,
			
 
				+          title: r.title,
			
 
				+          body: r.body || "",
			
 
				+          score: r.score,
			
 
				+          context: store.getContextForFile(r.filepath),
			
 
				+          docid: r.docid,
			
 
				+        });
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return Array.from(allResults.values())
			
 
				+    .sort((a, b) => b.score - a.score)
			
 
				+    .filter(r => r.score >= minScore)
			
 
				+    .slice(0, limit);
			
 
				+}