4 months ago · d1ec31eab8
--- a/skills/qmd/SKILL.md
+++ b/skills/qmd/SKILL.md
@@ -11,54 +11,87 @@ allowed-tools: Bash(qmd:*), mcp__qmd__*
 
				 
			
 
				 # QMD - Quick Markdown Search
			
 
				 
			
 
				-Local search engine for markdown content. Indexes notes, docs, and knowledge bases.
			
 
				+Local search engine for markdown content.
			
 
				 
			
 
				 ## Status
			
 
				 
			
 
				 !`qmd status 2>/dev/null || echo "Not installed: npm install -g @tobilu/qmd"`
			
 
				 
			
 
				-## MCP Search — `structured_search`
			
 
				-
			
 
				-Pass 1-4 sub-queries with type `lex`, `vec`, or `hyde`:
			
 
				+## MCP: `structured_search`
			
 
				 
			
 
				 ```json
			
 
				 {
			
 
				   "searches": [
			
 
				     { "type": "lex", "query": "CAP theorem consistency" },
			
 
				     { "type": "vec", "query": "tradeoff between consistency and availability" }
			
 
				-  ]
			
 
				+  ],
			
 
				+  "collections": ["notes", "docs"],
			
 
				+  "limit": 10
			
 
				 }
			
 
				 ```
			
 
				 
			
 
				-| Type | Method | What to Write |
			
 
				-|------|--------|---------------|
			
 
				-| `lex` | BM25 keywords | Short phrases — exact terms, names, code |
			
 
				-| `vec` | Vector search | Natural language question |
			
 
				-| `hyde` | Vector search | Hypothetical answer (50-100 words) |
			
 
				+### Search Types
			
 
				+
			
 
				+| Type | Method | Input |
			
 
				+|------|--------|-------|
			
 
				+| `lex` | BM25 | Keywords — exact terms, names, code |
			
 
				+| `vec` | Vector | Question — natural language |
			
 
				+| `hyde` | Vector | Answer — hypothetical result (50-100 words) |
			
 
				+
			
 
				+### Writing Good Queries
			
 
				+
			
 
				+**lex (keyword)**
			
 
				+- 2-5 terms, no filler words
			
 
				+- Include synonyms: `"auth authentication login"`
			
 
				+- Use exact names: `"PostgreSQL connection pool"`
			
 
				+- Code identifiers work: `"handleError async"`
			
 
				+
			
 
				+**vec (semantic)**
			
 
				+- Full natural language question
			
 
				+- Be specific: `"how does the rate limiter handle burst traffic"` not `"rate limiting"`
			
 
				+- Include context: `"in the payment service, how are refunds processed"`
			
 
				+
			
 
				+**hyde (hypothetical document)**
			
 
				+- Write 50-100 words of what the *answer* looks like
			
 
				+- Use the vocabulary you expect in the result
			
 
				+- Example: `"The rate limiter uses a sliding window algorithm with a 60-second window. When a client exceeds 100 requests per minute, subsequent requests return 429 Too Many Requests until the window resets."`
			
 
				+
			
 
				+### Combining Types
			
 
				+
			
 
				+| Goal | Approach |
			
 
				+|------|----------|
			
 
				+| Know exact terms | `lex` only |
			
 
				+| Don't know vocabulary | `vec` only |
			
 
				+| Best recall | `lex` + `vec` |
			
 
				+| Complex topic | `lex` + `vec` + `hyde` |
			
 
				 
			
 
				-**Tips:**
			
 
				-- Quick lookup → single `lex` query
			
 
				-- Don't know exact terms → use `vec`
			
 
				-- Best results → combine `lex` + `vec` (+ `hyde` for complex topics)
			
 
				-- First query gets 2x weight
			
 
				+First query gets 2x weight in fusion — put your best guess first.
			
 
				 
			
 
				-## MCP Tools
			
 
				+### Collection Filtering
			
 
				+
			
 
				+```json
			
 
				+{ "collection": "docs" }           // Single collection
			
 
				+{ "collections": ["docs", "notes"] }  // Multiple (OR)
			
 
				+```
			
 
				+
			
 
				+Omit both to search all collections.
			
 
				+
			
 
				+## Other MCP Tools
			
 
				 
			
 
				 | Tool | Use |
			
 
				 |------|-----|
			
 
				-| `structured_search` | Search with lex/vec/hyde queries |
			
 
				 | `get` | Retrieve doc by path or `#docid` |
			
 
				-| `multi_get` | Retrieve multiple docs by glob/list |
			
 
				-| `status` | Index health and collections |
			
 
				+| `multi_get` | Retrieve multiple by glob/list |
			
 
				+| `status` | Collections and health |
			
 
				 
			
 
				 ## CLI
			
 
				 
			
 
				 ```bash
			
 
				-qmd search "keywords"           # BM25 keyword search
			
 
				-qmd vsearch "question"          # Vector similarity
			
 
				-qmd query "question"            # Auto-expand + rerank
			
 
				-qmd query $'lex: X\nvec: Y'     # Structured (same as MCP)
			
 
				-qmd get "#abc123"               # Retrieve by docid
			
 
				+qmd query "question"              # Auto-expand + rerank
			
 
				+qmd query $'lex: X\nvec: Y'       # Structured
			
 
				+qmd search "keywords"             # BM25 only
			
 
				+qmd vsearch "question"            # Vector only
			
 
				+qmd get "#abc123"                 # By docid
			
 
				 ```
			
 
				 
			
 
				 ## Setup
			
@@ -66,10 +99,5 @@ qmd get "#abc123"               # Retrieve by docid
 
				 ```bash
			
 
				 npm install -g @tobilu/qmd
			
 
				 qmd collection add ~/notes --name notes
			
 
				-qmd embed                       # Generate embeddings
			
 
				-```
			
 
				-
			
 
				-MCP config for Claude Code (`~/.claude/settings.json`):
			
 
				-```json
			
 
				-{ "mcpServers": { "qmd": { "command": "qmd", "args": ["mcp"] } } }
			
 
				+qmd embed
			
 
				 ```
			
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -261,11 +261,11 @@ function createMcpServer(store: Store): McpServer {
 
				         ),
			
 
				         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
 
				         minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
			
 
				-        collection: z.string().optional().describe("Filter to a specific collection by name"),
			
 
				-        intent: z.string().optional().describe("(Future) Domain intent hint, e.g., 'distributed systems', 'startup finances'"),
			
 
				+        collection: z.string().optional().describe("Filter to a single collection by name"),
			
 
				+        collections: z.array(z.string()).optional().describe("Filter to multiple collections (OR match)"),
			
 
				       },
			
 
				     },
			
 
				-    async ({ searches, limit, minScore, collection, intent }) => {
			
 
				+    async ({ searches, limit, minScore, collection, collections }) => {
			
 
				       // Map to internal format
			
 
				       const subSearches: StructuredSubSearch[] = searches.map(s => ({
			
 
				         type: s.type,
			
@@ -274,9 +274,9 @@ function createMcpServer(store: Store): McpServer {
 
				 
			
 
				       const results = await structuredSearch(store, subSearches, {
			
 
				         collection,
			
 
				+        collections,
			
 
				         limit,
			
 
				         minScore,
			
 
				-        intent,
			
 
				       });
			
 
				 
			
 
				       // Use first lex or vec query for snippet extraction
			
@@ -582,9 +582,9 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
 
				 
			
 
				         const results = await structuredSearch(store, subSearches, {
			
 
				           collection: params.collection,
			
 
				+          collections: params.collections,
			
 
				           limit: params.limit ?? 10,
			
 
				           minScore: params.minScore ?? 0,
			
 
				-          intent: params.intent,
			
 
				         });
			
 
				 
			
 
				         // Use first lex or vec query for snippet extraction
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -3072,7 +3072,8 @@ export interface StructuredSubSearch {
 
				 }
			
 
				 
			
 
				 export interface StructuredSearchOptions {
			
 
				-  collection?: string;
			
 
				+  collection?: string;      // Single collection filter
			
 
				+  collections?: string[];   // Multiple collections filter (OR)
			
 
				   limit?: number;           // default 10
			
 
				   minScore?: number;        // default 0
			
 
				   candidateLimit?: number;  // default RERANK_CANDIDATE_LIMIT
			
@@ -3107,9 +3108,12 @@ export async function structuredSearch(
 
				   const limit = options?.limit ?? 10;
			
 
				   const minScore = options?.minScore ?? 0;
			
 
				   const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
			
 
				-  const collection = options?.collection;
			
 
				   const hooks = options?.hooks;
			
 
				 
			
 
				+  // Normalize collection filter to array (undefined = all collections)
			
 
				+  const collections: string[] | undefined = options?.collections
			
 
				+    ?? (options?.collection ? [options.collection] : undefined);
			
 
				+
			
 
				   if (searches.length === 0) return [];
			
 
				 
			
 
				   const rankedLists: RankedResult[][] = [];
			
@@ -3118,16 +3122,21 @@ export async function structuredSearch(
 
				     `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
			
 
				   ).get();
			
 
				 
			
 
				+  // Helper to run search across collections (or all if undefined)
			
 
				+  const collectionList = collections ?? [undefined]; // undefined = all collections
			
 
				+
			
 
				   // Step 1: Run FTS for all lex searches (sync, instant)
			
 
				   for (const search of searches) {
			
 
				     if (search.type === 'lex') {
			
 
				-      const ftsResults = store.searchFTS(search.query, 20, collection);
			
 
				-      if (ftsResults.length > 0) {
			
 
				-        for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
			
 
				-        rankedLists.push(ftsResults.map(r => ({
			
 
				-          file: r.filepath, displayPath: r.displayPath,
			
 
				-          title: r.title, body: r.body || "", score: r.score,
			
 
				-        })));
			
 
				+      for (const coll of collectionList) {
			
 
				+        const ftsResults = store.searchFTS(search.query, 20, coll);
			
 
				+        if (ftsResults.length > 0) {
			
 
				+          for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
			
 
				+          rankedLists.push(ftsResults.map(r => ({
			
 
				+            file: r.filepath, displayPath: r.displayPath,
			
 
				+            title: r.title, body: r.body || "", score: r.score,
			
 
				+          })));
			
 
				+        }
			
 
				       }
			
 
				     }
			
 
				   }
			
@@ -3144,16 +3153,18 @@ export async function structuredSearch(
 
				         const embedding = embeddings[i]?.embedding;
			
 
				         if (!embedding) continue;
			
 
				 
			
 
				-        const vecResults = await store.searchVec(
			
 
				-          vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collection,
			
 
				-          undefined, embedding
			
 
				-        );
			
 
				-        if (vecResults.length > 0) {
			
 
				-          for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				-          rankedLists.push(vecResults.map(r => ({
			
 
				-            file: r.filepath, displayPath: r.displayPath,
			
 
				-            title: r.title, body: r.body || "", score: r.score,
			
 
				-          })));
			
 
				+        for (const coll of collectionList) {
			
 
				+          const vecResults = await store.searchVec(
			
 
				+            vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
			
 
				+            undefined, embedding
			
 
				+          );
			
 
				+          if (vecResults.length > 0) {
			
 
				+            for (const r of vecResults) docidMap.set(r.filepath, r.docid);
			
 
				+            rankedLists.push(vecResults.map(r => ({
			
 
				+              file: r.filepath, displayPath: r.displayPath,
			
 
				+              title: r.title, body: r.body || "", score: r.score,
			
 
				+            })));
			
 
				+          }
			
 
				         }
			
 
				       }
			
 
				     }