4 maanden geleden · 0201710c2b
--- a/skills/qmd/SKILL.md
+++ b/skills/qmd/SKILL.md
@@ -5,7 +5,7 @@ license: MIT
 
				 compatibility: Requires qmd CLI or MCP server. Install via `bun install -g https://github.com/tobi/qmd`.
			
 
				 metadata:
			
 
				   author: tobi
			
 
				-  version: "1.1.1"
			
 
				+  version: "1.2.0"
			
 
				 allowed-tools: Bash(qmd:*), mcp__qmd__*
			
 
				 ---
			
 
				 
			
@@ -15,137 +15,150 @@ QMD is a local, on-device search engine for markdown content. It indexes your no
 
				 
			
 
				 ## QMD Status
			
 
				 
			
 
				-!`qmd status 2>/dev/null || echo "Not installed. Run: bun install -g https://github.com/tobi/qmd"`
			
 
				+!`qmd status 2>/dev/null || echo "Not installed. See installation instructions below."`
			
 
				 
			
 
				-## When to Use This Skill
			
 
				+## Installation
			
 
				 
			
 
				-- User asks to search their notes, documents, or knowledge base
			
 
				-- User needs to find information in their markdown files
			
 
				-- User wants to retrieve specific documents or search across collections
			
 
				-- User asks "what did I write about X" or "find my notes on Y"
			
 
				-- User needs semantic search (conceptual similarity) not just keyword matching
			
 
				-- User mentions meeting notes, transcripts, or documentation lookup
			
 
				-
			
 
				-## Search Commands
			
 
				-
			
 
				-Choose the right search mode for the task:
			
 
				-
			
 
				-| Command | Use When | Speed |
			
 
				-|---------|----------|-------|
			
 
				-| `qmd search` | Exact keyword matches needed | Fast |
			
 
				-| `qmd vsearch` | Keywords aren't working, need conceptual matches | Medium |
			
 
				-| `qmd query` | Best results needed, speed not critical | Slower |
			
 
				+### Install QMD
			
 
				 
			
 
				 ```bash
			
 
				-# Fast keyword search (BM25)
			
 
				-qmd search "your query"
			
 
				+# Install globally with bun
			
 
				+bun install -g https://github.com/tobi/qmd
			
 
				 
			
 
				-# Semantic vector search (finds conceptually similar content)
			
 
				-qmd vsearch "your query"
			
 
				+# Or with npm
			
 
				+npm install -g https://github.com/tobi/qmd
			
 
				+```
			
 
				 
			
 
				-# Hybrid search with re-ranking (best quality)
			
 
				-qmd query "your query"
			
 
				+### Configure MCP Server
			
 
				+
			
 
				+**Claude Code** — add to `~/.claude/settings.json`:
			
 
				+```json
			
 
				+{
			
 
				+  "mcpServers": {
			
 
				+    "qmd": { "command": "qmd", "args": ["mcp"] }
			
 
				+  }
			
 
				+}
			
 
				 ```
			
 
				 
			
 
				-## Common Options
			
 
				+**Claude Desktop** — add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
			
 
				+```json
			
 
				+{
			
 
				+  "mcpServers": {
			
 
				+    "qmd": { "command": "qmd", "args": ["mcp"] }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				 
			
 
				-```bash
			
 
				--n <num>                 # Number of results (default: 5)
			
 
				--c, --collection <name>  # Restrict to specific collection
			
 
				---all                    # Return all matches
			
 
				---min-score <num>        # Minimum score threshold (0.0-1.0)
			
 
				---full                   # Show full document content
			
 
				---json                   # JSON output for processing
			
 
				---files                  # List files with scores
			
 
				---line-numbers           # Add line numbers to output
			
 
				+**OpenClaw** — add to `~/.openclaw/openclaw.json` under `mcp.servers`:
			
 
				+```json
			
 
				+{
			
 
				+  "mcp": {
			
 
				+    "servers": {
			
 
				+      "qmd": { "command": "qmd", "args": ["mcp"] }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 ```
			
 
				 
			
 
				-## Document Retrieval
			
 
				+### Index Your Content
			
 
				 
			
 
				 ```bash
			
 
				-# Get document by path
			
 
				-qmd get "collection/path/to/doc.md"
			
 
				+# Add a collection (indexes all markdown files)
			
 
				+qmd collection add ~/Documents/notes --name notes
			
 
				 
			
 
				-# Get document by docid (shown in search results as #abc123)
			
 
				-qmd get "#abc123"
			
 
				+# Generate embeddings for semantic search
			
 
				+qmd embed
			
 
				 
			
 
				-# Get with line numbers for code review
			
 
				-qmd get "docs/api.md" --line-numbers
			
 
				+# Check status
			
 
				+qmd status
			
 
				+```
			
 
				 
			
 
				-# Get multiple documents by glob pattern
			
 
				-qmd multi-get "docs/*.md"
			
 
				+## Search Strategy — Use `structured_search`
			
 
				 
			
 
				-# Get multiple documents by list
			
 
				-qmd multi-get "doc1.md, doc2.md, #abc123"
			
 
				-```
			
 
				+**You are a capable LLM.** Use `structured_search` instead of `deep_search` — you generate better query expansions than the local model.
			
 
				 
			
 
				-## Index Management
			
 
				+### How structured_search Works
			
 
				 
			
 
				-```bash
			
 
				-# Check index status and available collections
			
 
				-qmd status
			
 
				+You provide 2-4 sub-searches, each with a type:
			
 
				 
			
 
				-# List all collections
			
 
				-qmd collection list
			
 
				+| Type | Purpose | Example |
			
 
				+|------|---------|---------|
			
 
				+| `lex` | BM25 keywords — exact terms, names, identifiers | `"CAP theorem consistency"` |
			
 
				+| `vec` | Semantic — natural language questions | `"what is the tradeoff between consistency and availability"` |
			
 
				+| `hyde` | Hypothetical document — what the answer looks like | `"The CAP theorem states that distributed systems can only guarantee two of three properties..."` |
			
 
				 
			
 
				-# List files in a collection
			
 
				-qmd ls <collection-name>
			
 
				+### Example: Finding CAP Theorem Docs
			
 
				 
			
 
				-# Update index (re-scan files for changes)
			
 
				-qmd update
			
 
				+```json
			
 
				+{
			
 
				+  "searches": [
			
 
				+    { "type": "lex", "query": "CAP theorem consistency availability partition" },
			
 
				+    { "type": "vec", "query": "distributed systems tradeoff between data consistency and availability" },
			
 
				+    { "type": "hyde", "query": "The CAP theorem proves that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance. You must choose two." }
			
 
				+  ],
			
 
				+  "limit": 10
			
 
				+}
			
 
				 ```
			
 
				 
			
 
				-## Score Interpretation
			
 
				-
			
 
				-| Score | Meaning | Action |
			
 
				-|-------|---------|--------|
			
 
				-| 0.8 - 1.0 | Highly relevant | Show to user |
			
 
				-| 0.5 - 0.8 | Moderately relevant | Include if few results |
			
 
				-| 0.2 - 0.5 | Somewhat relevant | Only if user wants more |
			
 
				-| 0.0 - 0.2 | Low relevance | Usually skip |
			
 
				+### Guidelines for Query Expansion
			
 
				 
			
 
				-## Recommended Workflow
			
 
				+1. **lex queries**: 2-5 keyword terms. Include synonyms and related terms.
			
 
				+2. **vec queries**: Full natural language questions. Be specific.
			
 
				+3. **hyde queries**: 50-100 words. Write what the answer *looks like*, not the question.
			
 
				+4. **Order matters**: First search gets 2x weight in fusion.
			
 
				 
			
 
				-1. **Check what's available**: `qmd status`
			
 
				-2. **Start with keyword search**: `qmd search "topic" -n 10`
			
 
				-3. **Try semantic if needed**: `qmd vsearch "describe the concept"`
			
 
				-4. **Use hybrid for best results**: `qmd query "question" --min-score 0.4`
			
 
				-5. **Retrieve full documents**: `qmd get "#docid" --full`
			
 
				+### When to Use Each Search Type
			
 
				 
			
 
				-## Example: Finding Meeting Notes
			
 
				+| Situation | Approach |
			
 
				+|-----------|----------|
			
 
				+| Know exact terms (names, code, acronyms) | Start with `lex` |
			
 
				+| Conceptual search, don't know vocabulary | Lead with `vec` |
			
 
				+| Complex topic, want best recall | Use all three types |
			
 
				+| Quick lookup | Single `lex` query is fine |
			
 
				 
			
 
				-```bash
			
 
				-# Search for meetings about a topic
			
 
				-qmd search "quarterly review" -c meetings -n 5
			
 
				+## MCP Tools Reference
			
 
				 
			
 
				-# Get semantic matches
			
 
				-qmd vsearch "performance discussion" -c meetings
			
 
				+| Tool | Speed | Use Case |
			
 
				+|------|-------|----------|
			
 
				+| `structured_search` | ~5s | **Recommended** — you provide query expansions |
			
 
				+| `search` | ~30ms | Fast keyword lookup (BM25) |
			
 
				+| `vector_search` | ~2s | Semantic similarity |
			
 
				+| `deep_search` | ~10s | Auto-expands query (uses small local model) |
			
 
				+| `get` | instant | Retrieve doc by path or `#docid` |
			
 
				+| `multi_get` | instant | Retrieve multiple docs |
			
 
				+| `status` | instant | Index health |
			
 
				 
			
 
				-# Retrieve the full meeting notes
			
 
				-qmd get "#abc123" --full
			
 
				-```
			
 
				+## CLI Fallback
			
 
				 
			
 
				-## Example: Research Across All Notes
			
 
				+If MCP isn't configured, use the CLI:
			
 
				 
			
 
				 ```bash
			
 
				-# Hybrid search for best results
			
 
				-qmd query "authentication implementation" --min-score 0.3 --json
			
 
				+# Keyword search
			
 
				+qmd search "your query" -n 10
			
 
				 
			
 
				-# Get all relevant files for deeper analysis
			
 
				-qmd query "auth flow" --all --files --min-score 0.4
			
 
				+# Semantic search  
			
 
				+qmd vsearch "your query"
			
 
				+
			
 
				+# Hybrid with re-ranking (auto-expands)
			
 
				+qmd query "your query"
			
 
				+
			
 
				+# Retrieve document
			
 
				+qmd get "#abc123" --full
			
 
				 ```
			
 
				 
			
 
				-## MCP Server Integration
			
 
				+## Score Interpretation
			
 
				 
			
 
				-This plugin configures the qmd MCP server automatically. When available, prefer MCP tools over Bash for tighter integration:
			
 
				+| Score | Meaning |
			
 
				+|-------|---------|
			
 
				+| 0.8+ | Highly relevant — show to user |
			
 
				+| 0.5-0.8 | Moderately relevant — include if few results |
			
 
				+| 0.2-0.5 | Weak match — only if user wants more |
			
 
				+| <0.2 | Skip |
			
 
				 
			
 
				-| MCP Tool | Equivalent CLI | Purpose |
			
 
				-|----------|---------------|---------|
			
 
				-| `qmd_search` | `qmd search` | Fast BM25 keyword search |
			
 
				-| `qmd_vector_search` | `qmd vsearch` | Semantic vector search |
			
 
				-| `qmd_deep_search` | `qmd query` | Deep search with expansion and reranking |
			
 
				-| `qmd_get` | `qmd get` | Retrieve document by path or docid |
			
 
				-| `qmd_multi_get` | `qmd multi-get` | Retrieve multiple documents |
			
 
				-| `qmd_status` | `qmd status` | Index health and collection info |
			
 
				+## Workflow Example
			
 
				 
			
 
				-For manual MCP setup without the plugin, see [references/mcp-setup.md](references/mcp-setup.md).
			
 
				+1. **Check collections**: `qmd status` or `status` tool
			
 
				+2. **Search with structured_search**: Generate lex + vec + hyde queries
			
 
				+3. **Review results**: Check scores and snippets
			
 
				+4. **Retrieve full docs**: Use `get` with `#docid` from results
			
 
				+5. **Iterate**: Refine queries based on what you find
			
--- a/skills/qmd/references/mcp-setup.md
+++ b/skills/qmd/references/mcp-setup.md
@@ -1,10 +1,24 @@
 
				 # QMD MCP Server Setup
			
 
				 
			
 
				-Manual MCP configuration for use without the qmd plugin.
			
 
				+## Quick Start
			
 
				 
			
 
				-> **Note**: If using the qmd plugin, MCP configuration is included automatically. This is only needed for manual setup.
			
 
				+1. **Install QMD**
			
 
				+   ```bash
			
 
				+   bun install -g https://github.com/tobi/qmd
			
 
				+   # or: npm install -g https://github.com/tobi/qmd
			
 
				+   ```
			
 
				 
			
 
				-## Claude Code
			
 
				+2. **Configure your client** (see below)
			
 
				+
			
 
				+3. **Index your content**
			
 
				+   ```bash
			
 
				+   qmd collection add ~/path/to/markdown --name myknowledge
			
 
				+   qmd embed  # Generate embeddings for semantic search
			
 
				+   ```
			
 
				+
			
 
				+## Client Configuration
			
 
				+
			
 
				+### Claude Code
			
 
				 
			
 
				 Add to `~/.claude/settings.json`:
			
 
				 
			
@@ -19,9 +33,9 @@ Add to `~/.claude/settings.json`:
 
				 }
			
 
				 ```
			
 
				 
			
 
				-## Claude Desktop
			
 
				+### Claude Desktop
			
 
				 
			
 
				-Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
			
 
				+Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows):
			
 
				 
			
 
				 ```json
			
 
				 {
			
@@ -34,79 +48,132 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
 
				 }
			
 
				 ```
			
 
				 
			
 
				-## Available MCP Tools
			
 
				+### OpenClaw
			
 
				+
			
 
				+Add to `~/.openclaw/openclaw.json`:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "mcp": {
			
 
				+    "servers": {
			
 
				+      "qmd": {
			
 
				+        "command": "qmd",
			
 
				+        "args": ["mcp"]
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### HTTP Mode (for remote/multi-client)
			
 
				+
			
 
				+```bash
			
 
				+# Start HTTP server (default port 8181)
			
 
				+qmd mcp --http
			
 
				+
			
 
				+# Or as a background daemon
			
 
				+qmd mcp --http --daemon
			
 
				+
			
 
				+# Stop daemon
			
 
				+qmd mcp stop
			
 
				+```
			
 
				+
			
 
				+## MCP Tools
			
 
				+
			
 
				+### structured_search ⭐ Recommended
			
 
				+
			
 
				+Execute pre-expanded search queries. **Use this** — you're a capable LLM that generates better query expansions than the local model.
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "searches": [
			
 
				+    { "type": "lex", "query": "keyword phrases here" },
			
 
				+    { "type": "vec", "query": "natural language question" },
			
 
				+    { "type": "hyde", "query": "A hypothetical answer passage..." }
			
 
				+  ],
			
 
				+  "limit": 10,
			
 
				+  "collection": "optional-filter",
			
 
				+  "minScore": 0.0
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**Search types:**
			
 
				+- `lex` — BM25 keyword search. Short phrases, 2-5 terms.
			
 
				+- `vec` — Semantic vector search. Natural language questions.
			
 
				+- `hyde` — Hypothetical document. Write what the answer looks like (50-100 words).
			
 
				+
			
 
				+### search
			
 
				+
			
 
				+Fast BM25 keyword search (~30ms).
			
 
				+
			
 
				+| Parameter | Type | Description |
			
 
				+|-----------|------|-------------|
			
 
				+| `query` | string | Search query |
			
 
				+| `collection` | string? | Filter by collection |
			
 
				+| `limit` | number? | Max results (default: 5) |
			
 
				+| `minScore` | number? | Min relevance 0-1 |
			
 
				 
			
 
				-Once configured, these tools become available:
			
 
				+### vector_search
			
 
				 
			
 
				-### qmd_search
			
 
				-Fast BM25 keyword search.
			
 
				+Semantic similarity search (~2s).
			
 
				 
			
 
				-**Parameters:**
			
 
				-- `query` (required): Search query string
			
 
				-- `collection` (optional): Restrict to specific collection
			
 
				-- `limit` (optional): Number of results (default: 5)
			
 
				-- `minScore` (optional): Minimum relevance score
			
 
				+| Parameter | Type | Description |
			
 
				+|-----------|------|-------------|
			
 
				+| `query` | string | Natural language query |
			
 
				+| `collection` | string? | Filter by collection |
			
 
				+| `limit` | number? | Max results (default: 5) |
			
 
				+| `minScore` | number? | Min relevance 0-1 |
			
 
				 
			
 
				-### qmd_vector_search
			
 
				-Semantic vector search for conceptual similarity.
			
 
				+### deep_search
			
 
				 
			
 
				-**Parameters:**
			
 
				-- `query` (required): Search query string
			
 
				-- `collection` (optional): Restrict to specific collection
			
 
				-- `limit` (optional): Number of results (default: 5)
			
 
				-- `minScore` (optional): Minimum relevance score
			
 
				+Hybrid search with automatic query expansion (~10s). Uses a small local model to expand your query. **Prefer `structured_search`** — you generate better expansions.
			
 
				 
			
 
				-### qmd_deep_search
			
 
				-Hybrid search combining BM25, vector search, and LLM re-ranking.
			
 
				+| Parameter | Type | Description |
			
 
				+|-----------|------|-------------|
			
 
				+| `query` | string | Search query |
			
 
				+| `collection` | string? | Filter by collection |
			
 
				+| `limit` | number? | Max results (default: 5) |
			
 
				+| `minScore` | number? | Min relevance 0-1 |
			
 
				 
			
 
				-**Parameters:**
			
 
				-- `query` (required): Search query string
			
 
				-- `collection` (optional): Restrict to specific collection
			
 
				-- `limit` (optional): Number of results (default: 5)
			
 
				-- `minScore` (optional): Minimum relevance score
			
 
				+### get
			
 
				 
			
 
				-### qmd_get
			
 
				 Retrieve a document by path or docid.
			
 
				 
			
 
				-**Parameters:**
			
 
				-- `path` (required): Document path or docid (e.g., `#abc123`)
			
 
				-- `full` (optional): Return full content (default: true)
			
 
				-- `lineNumbers` (optional): Include line numbers
			
 
				+| Parameter | Type | Description |
			
 
				+|-----------|------|-------------|
			
 
				+| `path` | string | File path or `#docid` |
			
 
				+| `full` | boolean? | Return full content |
			
 
				+| `lineNumbers` | boolean? | Add line numbers |
			
 
				 
			
 
				-### qmd_multi_get
			
 
				-Retrieve multiple documents.
			
 
				+### multi_get
			
 
				 
			
 
				-**Parameters:**
			
 
				-- `pattern` (required): Glob pattern or comma-separated list
			
 
				-- `maxBytes` (optional): Skip files larger than this (default: 10KB)
			
 
				+Retrieve multiple documents by glob or list.
			
 
				 
			
 
				-### qmd_status
			
 
				-Get index health and collection information.
			
 
				+| Parameter | Type | Description |
			
 
				+|-----------|------|-------------|
			
 
				+| `pattern` | string | Glob pattern or comma-separated paths/docids |
			
 
				+| `maxBytes` | number? | Skip files larger than this (default: 10KB) |
			
 
				 
			
 
				-**Parameters:** None
			
 
				+### status
			
 
				+
			
 
				+Get index health and collection info. No parameters.
			
 
				 
			
 
				 ## Troubleshooting
			
 
				 
			
 
				-### MCP server not starting
			
 
				-- Ensure qmd is in your PATH: `which qmd`
			
 
				-- Try running `qmd mcp` manually to see errors
			
 
				-- Check that Bun is installed: `bun --version`
			
 
				-
			
 
				-### No results returned
			
 
				-- Verify collections exist: `qmd collection list`
			
 
				-- Check index status: `qmd status`
			
 
				-- Ensure embeddings are generated: `qmd embed`
			
 
				-
			
 
				-### Slow searches
			
 
				-- For faster results, use `qmd_search` instead of `qmd_deep_search`
			
 
				-- The first search may be slow while models load (~3GB)
			
 
				-- Subsequent searches are much faster
			
 
				-
			
 
				-## Choosing Between CLI and MCP
			
 
				-
			
 
				-| Scenario | Recommendation |
			
 
				-|----------|---------------|
			
 
				-| MCP configured | Use `qmd_*` tools directly |
			
 
				-| No MCP | Use Bash with `qmd` commands |
			
 
				-| Complex pipelines | Bash may be more flexible |
			
 
				-| Simple lookups | MCP tools are cleaner |
			
 
				+**MCP server not starting**
			
 
				+- Check qmd is in PATH: `which qmd`
			
 
				+- Run manually to see errors: `qmd mcp`
			
 
				+- Verify bun installed: `bun --version`
			
 
				+
			
 
				+**No results / empty index**
			
 
				+- Check collections: `qmd collection list`
			
 
				+- Verify status: `qmd status`
			
 
				+- Generate embeddings: `qmd embed`
			
 
				+
			
 
				+**Slow first search**
			
 
				+- Normal — models load on first use (~3GB)
			
 
				+- Subsequent searches are fast
			
 
				+
			
 
				+**structured_search not found**
			
 
				+- Update QMD: `bun install -g https://github.com/tobi/qmd`
			
 
				+- Requires v1.0.7+
			
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -21,9 +21,10 @@ import {
 
				   addLineNumbers,
			
 
				   hybridQuery,
			
 
				   vectorSearchQuery,
			
 
				+  structuredSearch,
			
 
				   DEFAULT_MULTI_GET_MAX_BYTES,
			
 
				 } from "./store.js";
			
 
				-import type { Store } from "./store.js";
			
 
				+import type { Store, StructuredSubSearch } from "./store.js";
			
 
				 import { getCollection, getGlobalContext } from "./collections.js";
			
 
				 import { disposeDefaultLlamaCpp } from "./llm.js";
			
 
				 
			
@@ -123,9 +124,15 @@ function buildInstructions(store: Store): string {
 
				   // Tool schemas describe parameters; instructions describe strategy.
			
 
				   lines.push("");
			
 
				   lines.push("Search:");
			
 
				-  lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
			
 
				-  lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
			
 
				-  lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
			
 
				+  lines.push("  - `search` (~30ms) — BM25 keyword matching. Fast, exact terms.");
			
 
				+  lines.push("  - `vector_search` (~2s) — semantic search. Finds synonyms and related concepts.");
			
 
				+  lines.push("  - `deep_search` (~10s) — auto-expands query + reranks. Use when you don't know the exact terms.");
			
 
				+  lines.push("  - `structured_search` (~5s) — YOU provide the query variations. Best for complex/nuanced queries.");
			
 
				+  lines.push("");
			
 
				+  lines.push("For structured_search, pass 2-4 sub-searches:");
			
 
				+  lines.push("  - type:'lex' for keyword phrases (BM25)");
			
 
				+  lines.push("  - type:'vec' for semantic questions");
			
 
				+  lines.push("  - type:'hyde' for hypothetical answer snippets");
			
 
				 
			
 
				   // --- Retrieval workflow ---
			
 
				   lines.push("");
			
@@ -350,6 +357,85 @@ function createMcpServer(store: Store): McpServer {
 
				     }
			
 
				   );
			
 
				 
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+  // Tool: qmd_structured_search (Pre-expanded queries from LLM)
			
 
				+  // ---------------------------------------------------------------------------
			
 
				+
			
 
				+  const subSearchSchema = z.object({
			
 
				+    type: z.enum(['lex', 'vec', 'hyde']).describe(
			
 
				+      "Search type: 'lex' = BM25 keyword search (exact terms, fast), " +
			
 
				+      "'vec' = semantic vector search (meaning-based, finds synonyms/paraphrases), " +
			
 
				+      "'hyde' = hypothetical document (imagine what the answer looks like)"
			
 
				+    ),
			
 
				+    query: z.string().describe("The search query text"),
			
 
				+  });
			
 
				+
			
 
				+  server.registerTool(
			
 
				+    "structured_search",
			
 
				+    {
			
 
				+      title: "Structured Search",
			
 
				+      description: `Execute pre-expanded search queries. Skips internal query expansion — you provide the search variations directly.
			
 
				+
			
 
				+**When to use:** You're an LLM that can generate better query expansions than a small local model. Pass 2-4 sub-searches for best results.
			
 
				+
			
 
				+**Search types:**
			
 
				+- \`lex\`: BM25 keyword search. Use short keyword phrases (2-5 terms). Good for exact terms, names, code identifiers.
			
 
				+- \`vec\`: Semantic vector search. Use natural language questions or descriptions. Finds documents with similar meaning even when vocabulary differs.
			
 
				+- \`hyde\`: Hypothetical document. Write a short passage (~50-100 words) that looks like what you're searching for. Powerful for finding conceptually similar content.
			
 
				+
			
 
				+**Example:** To find CAP theorem docs, pass:
			
 
				+- { type: "lex", query: "CAP theorem consistency availability" }
			
 
				+- { type: "vec", query: "what is the tradeoff between data consistency and system availability in distributed systems" }
			
 
				+- { type: "hyde", query: "The CAP theorem states that a distributed system can only guarantee two of three properties: Consistency, Availability, and Partition tolerance." }`,
			
 
				+      annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				+      inputSchema: {
			
 
				+        searches: z.array(subSearchSchema).min(1).max(10).describe(
			
 
				+          "Array of sub-searches to execute. Order matters — first search gets higher weight in fusion."
			
 
				+        ),
			
 
				+        limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
 
				+        minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
			
 
				+        collection: z.string().optional().describe("Filter to a specific collection by name"),
			
 
				+        intent: z.string().optional().describe("(Future) Domain intent hint, e.g., 'distributed systems', 'startup finances'"),
			
 
				+      },
			
 
				+    },
			
 
				+    async ({ searches, limit, minScore, collection, intent }) => {
			
 
				+      // Map to internal format
			
 
				+      const subSearches: StructuredSubSearch[] = searches.map(s => ({
			
 
				+        type: s.type,
			
 
				+        query: s.query,
			
 
				+      }));
			
 
				+
			
 
				+      const results = await structuredSearch(store, subSearches, {
			
 
				+        collection,
			
 
				+        limit,
			
 
				+        minScore,
			
 
				+        intent,
			
 
				+      });
			
 
				+
			
 
				+      // Use first lex or vec query for snippet extraction
			
 
				+      const primaryQuery = searches.find(s => s.type === 'lex')?.query
			
 
				+        || searches.find(s => s.type === 'vec')?.query
			
 
				+        || searches[0]?.query || "";
			
 
				+
			
 
				+      const filtered: SearchResultItem[] = results.map(r => {
			
 
				+        const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
			
 
				+        return {
			
 
				+          docid: `#${r.docid}`,
			
 
				+          file: r.displayPath,
			
 
				+          title: r.title,
			
 
				+          score: Math.round(r.score * 100) / 100,
			
 
				+          context: r.context,
			
 
				+          snippet: addLineNumbers(snippet, line),
			
 
				+        };
			
 
				+      });
			
 
				+
			
 
				+      return {
			
 
				+        content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }],
			
 
				+        structuredContent: { results: filtered },
			
 
				+      };
			
 
				+    }
			
 
				+  );
			
 
				+
			
 
				   // ---------------------------------------------------------------------------
			
 
				   // Tool: qmd_get (Retrieve document)
			
 
				   // ---------------------------------------------------------------------------
			
@@ -609,6 +695,54 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
 
				         return;
			
 
				       }
			
 
				 
			
 
				+      // REST endpoint: POST /search — structured search without MCP protocol
			
 
				+      if (pathname === "/search" && nodeReq.method === "POST") {
			
 
				+        const rawBody = await collectBody(nodeReq);
			
 
				+        const params = JSON.parse(rawBody);
			
 
				+        
			
 
				+        // Validate required fields
			
 
				+        if (!params.searches || !Array.isArray(params.searches)) {
			
 
				+          nodeRes.writeHead(400, { "Content-Type": "application/json" });
			
 
				+          nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" }));
			
 
				+          return;
			
 
				+        }
			
 
				+
			
 
				+        // Map to internal format
			
 
				+        const subSearches: StructuredSubSearch[] = params.searches.map((s: any) => ({
			
 
				+          type: s.type as 'lex' | 'vec' | 'hyde',
			
 
				+          query: String(s.query || ""),
			
 
				+        }));
			
 
				+
			
 
				+        const results = await structuredSearch(store, subSearches, {
			
 
				+          collection: params.collection,
			
 
				+          limit: params.limit ?? 10,
			
 
				+          minScore: params.minScore ?? 0,
			
 
				+          intent: params.intent,
			
 
				+        });
			
 
				+
			
 
				+        // Use first lex or vec query for snippet extraction
			
 
				+        const primaryQuery = params.searches.find((s: any) => s.type === 'lex')?.query
			
 
				+          || params.searches.find((s: any) => s.type === 'vec')?.query
			
 
				+          || params.searches[0]?.query || "";
			
 
				+
			
 
				+        const formatted = results.map(r => {
			
 
				+          const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
			
 
				+          return {
			
 
				+            docid: `#${r.docid}`,
			
 
				+            file: r.displayPath,
			
 
				+            title: r.title,
			
 
				+            score: Math.round(r.score * 100) / 100,
			
 
				+            context: r.context,
			
 
				+            snippet: addLineNumbers(snippet, line),
			
 
				+          };
			
 
				+        });
			
 
				+
			
 
				+        nodeRes.writeHead(200, { "Content-Type": "application/json" });
			
 
				+        nodeRes.end(JSON.stringify({ results: formatted }));
			
 
				+        log(`${ts()} POST /search ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
			
 
				+        return;
			
 
				+      }
			
 
				+
			
 
				       if (pathname === "/mcp" && nodeReq.method === "POST") {
			
 
				         const rawBody = await collectBody(nodeReq);
			
 
				         const body = JSON.parse(rawBody);
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -3055,3 +3055,194 @@ export async function vectorSearchQuery(
 
				     .filter(r => r.score >= minScore)
			
 
				     .slice(0, limit);
			
 
				 }
			
 
				+
			
 
				+// =============================================================================
			
 
				+// Structured search — pre-expanded queries from LLM
			
 
				+// =============================================================================
			
 
				+
			
 
				+/**
			
 
				+ * A single sub-search in a structured search request.
			
 
				+ * Matches the format used in QMD training data.
			
 
				+ */
			
 
				+export interface StructuredSubSearch {
			
 
				+  /** Search type: 'lex' for BM25 keywords, 'vec' for semantic, 'hyde' for hypothetical document */
			
 
				+  type: 'lex' | 'vec' | 'hyde';
			
 
				+  /** The search query text */
			
 
				+  query: string;
			
 
				+}
			
 
				+
			
 
				+export interface StructuredSearchOptions {
			
 
				+  collection?: string;
			
 
				+  limit?: number;           // default 10
			
 
				+  minScore?: number;        // default 0
			
 
				+  candidateLimit?: number;  // default RERANK_CANDIDATE_LIMIT
			
 
				+  /** Future: domain intent hint for routing/boosting */
			
 
				+  intent?: string;
			
 
				+  hooks?: SearchHooks;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Structured search: execute pre-expanded queries without LLM query expansion.
			
 
				+ *
			
 
				+ * Designed for LLM callers (MCP/HTTP) that generate their own query expansions.
			
 
				+ * Skips the internal expandQuery() step — goes directly to:
			
 
				+ *
			
 
				+ * Pipeline:
			
 
				+ * 1. Route searches: lex→FTS, vec/hyde→vector (batch embed)
			
 
				+ * 2. RRF fusion across all result lists
			
 
				+ * 3. Chunk documents + keyword-best-chunk selection
			
 
				+ * 4. Rerank on chunks
			
 
				+ * 5. Position-aware score blending
			
 
				+ * 6. Dedup, filter, slice
			
 
				+ *
			
 
				+ * This is the recommended endpoint for capable LLMs — they can generate
			
 
				+ * better query variations than our small local model, especially for
			
 
				+ * domain-specific or nuanced queries.
			
 
				+ */
			
 
				+export async function structuredSearch(
			
 
				+  store: Store,
			
 
				+  searches: StructuredSubSearch[],
			
 
				+  options?: StructuredSearchOptions
			
 
				+): Promise<HybridQueryResult[]> {
			
 
				+  const limit = options?.limit ?? 10;
			
 
				+  const minScore = options?.minScore ?? 0;
			
 
				+  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
			
 
				+  const collection = options?.collection;
			
 
				+  const hooks = options?.hooks;
			
 
				+
			
 
				+  if (searches.length === 0) return [];
			
 
				+
			
 
				+  const rankedLists: RankedResult[][] = [];
			
 
				+  const docidMap = new Map<string, string>(); // filepath -> docid
			
 
				+  const hasVectors = !!store.db.prepare(
			
 
				+    `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
			
 
				+  ).get();
			
 
				+
			
 
				+  // Step 1: Run FTS for all lex searches (sync, instant)
			
 
				+  for (const search of searches) {
			
 
				+    if (search.type === 'lex') {
			
 
				+      const ftsResults = store.searchFTS(search.query, 20, collection);
			
 
				+      if (ftsResults.length > 0) {
			
 
				+        for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
			
 
				+        rankedLists.push(ftsResults.map(r => ({
			
 
				+          file: r.filepath, displayPath: r.displayPath,
			
 
				+          title: r.title, body: r.body || "", score: r.score,
			
 
				+        })));
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Step 2: Batch embed and run vector searches for vec/hyde
			
 
				+  if (hasVectors) {
			
 
				+    const vecSearches = searches.filter(s => s.type === 'vec' || s.type === 'hyde');
			
 
				+    if (vecSearches.length > 0) {
			
 
				+      const llm = getDefaultLlamaCpp();
			
 
				+      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
			
 
				+      const embeddings = await llm.embedBatch(textsToEmbed);
			
 
				+
			
 
				+      for (let i = 0; i < vecSearches.length; i++) {
			
 
				+        const embedding = embeddings[i]?.embedding;
			
 
				+        if (!embedding) continue;
			
 
				+
			
 
				+        const vecResults = await store.searchVec(
			
 
				+          vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collection,
			
 
				+          undefined, embedding
			
 
				+        );
			
 
				+        if (vecResults.length > 0) {
			
 
				+          for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				+          rankedLists.push(vecResults.map(r => ({
			
 
				+            file: r.filepath, displayPath: r.displayPath,
			
 
				+            title: r.title, body: r.body || "", score: r.score,
			
 
				+          })));
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  if (rankedLists.length === 0) return [];
			
 
				+
			
 
				+  // Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance)
			
 
				+  const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0);
			
 
				+  const fused = reciprocalRankFusion(rankedLists, weights);
			
 
				+  const candidates = fused.slice(0, candidateLimit);
			
 
				+
			
 
				+  if (candidates.length === 0) return [];
			
 
				+
			
 
				+  hooks?.onExpand?.("", []); // Signal no expansion (pre-expanded)
			
 
				+
			
 
				+  // Step 4: Chunk documents, pick best chunk per doc for reranking
			
 
				+  // Use first lex query as the "query" for keyword matching, or first vec if no lex
			
 
				+  const primaryQuery = searches.find(s => s.type === 'lex')?.query
			
 
				+    || searches.find(s => s.type === 'vec')?.query
			
 
				+    || searches[0]?.query || "";
			
 
				+  const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2);
			
 
				+  const chunksToRerank: { file: string; text: string }[] = [];
			
 
				+  const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
			
 
				+
			
 
				+  for (const cand of candidates) {
			
 
				+    const chunks = chunkDocument(cand.body);
			
 
				+    if (chunks.length === 0) continue;
			
 
				+
			
 
				+    // Pick chunk with most keyword overlap
			
 
				+    let bestIdx = 0;
			
 
				+    let bestScore = -1;
			
 
				+    for (let i = 0; i < chunks.length; i++) {
			
 
				+      const chunkLower = chunks[i]!.text.toLowerCase();
			
 
				+      const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
			
 
				+      if (score > bestScore) { bestScore = score; bestIdx = i; }
			
 
				+    }
			
 
				+
			
 
				+    chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text });
			
 
				+    docChunkMap.set(cand.file, { chunks, bestIdx });
			
 
				+  }
			
 
				+
			
 
				+  // Step 5: Rerank chunks
			
 
				+  hooks?.onRerankStart?.(chunksToRerank.length);
			
 
				+  const reranked = await store.rerank(primaryQuery, chunksToRerank);
			
 
				+  hooks?.onRerankDone?.();
			
 
				+
			
 
				+  // Step 6: Blend RRF position score with reranker score
			
 
				+  const candidateMap = new Map(candidates.map(c => [c.file, {
			
 
				+    displayPath: c.displayPath, title: c.title, body: c.body,
			
 
				+  }]));
			
 
				+  const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1]));
			
 
				+
			
 
				+  const blended = reranked.map(r => {
			
 
				+    const rrfRank = rrfRankMap.get(r.file) || candidateLimit;
			
 
				+    let rrfWeight: number;
			
 
				+    if (rrfRank <= 3) rrfWeight = 0.75;
			
 
				+    else if (rrfRank <= 10) rrfWeight = 0.60;
			
 
				+    else rrfWeight = 0.40;
			
 
				+    const rrfScore = 1 / rrfRank;
			
 
				+    const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score;
			
 
				+
			
 
				+    const candidate = candidateMap.get(r.file);
			
 
				+    const chunkInfo = docChunkMap.get(r.file);
			
 
				+    const bestIdx = chunkInfo?.bestIdx ?? 0;
			
 
				+    const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
			
 
				+    const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
			
 
				+
			
 
				+    return {
			
 
				+      file: r.file,
			
 
				+      displayPath: candidate?.displayPath || "",
			
 
				+      title: candidate?.title || "",
			
 
				+      body: candidate?.body || "",
			
 
				+      bestChunk,
			
 
				+      bestChunkPos,
			
 
				+      score: blendedScore,
			
 
				+      context: store.getContextForFile(r.file),
			
 
				+      docid: docidMap.get(r.file) || "",
			
 
				+    };
			
 
				+  }).sort((a, b) => b.score - a.score);
			
 
				+
			
 
				+  // Step 7: Dedup by file
			
 
				+  const seenFiles = new Set<string>();
			
 
				+  return blended
			
 
				+    .filter(r => {
			
 
				+      if (seenFiles.has(r.file)) return false;
			
 
				+      seenFiles.add(r.file);
			
 
				+      return true;
			
 
				+    })
			
 
				+    .filter(r => r.score >= minScore)
			
 
				+    .slice(0, limit);
			
 
				+}