4 mesiacov pred · 4649069e62
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ texts/
 
				 !CHANGELOG.md
			
 
				 !skills/**/*.md
			
 
				 !finetune/*.md
			
 
				+!docs/*.md
			
 
				 finetune/outputs/
			
 
				 finetune/data/train/
			
 
				 .claude/
			
--- a/docs/SYNTAX.md
+++ b/docs/SYNTAX.md
@@ -0,0 +1,141 @@
 
				+# QMD Query Syntax
			
 
				+
			
 
				+QMD queries are structured documents with typed sub-queries. Each line specifies a search type and query text.
			
 
				+
			
 
				+## Grammar
			
 
				+
			
 
				+```ebnf
			
 
				+query_document = { line } ;
			
 
				+line           = [ type ":" ] text newline ;
			
 
				+type           = "lex" | "vec" | "hyde" | "expand" ;
			
 
				+text           = quoted_phrase | plain_text ;
			
 
				+quoted_phrase  = '"' { character } '"' ;
			
 
				+plain_text     = { character } ;
			
 
				+newline        = "\n" ;
			
 
				+```
			
 
				+
			
 
				+## Query Types
			
 
				+
			
 
				+| Type | Method | Description |
			
 
				+|------|--------|-------------|
			
 
				+| `lex` | BM25 | Keyword search with exact matching |
			
 
				+| `vec` | Vector | Semantic similarity search |
			
 
				+| `hyde` | Vector | Hypothetical document embedding |
			
 
				+| `expand` | LLM | Auto-expand into lex/vec/hyde via local model |
			
 
				+
			
 
				+## Default Behavior
			
 
				+
			
 
				+A query without any type prefix is treated as `expand:` — it gets passed to the query expansion model which generates lex, vec, and hyde variations automatically.
			
 
				+
			
 
				+```
			
 
				+# These are equivalent:
			
 
				+how does authentication work
			
 
				+expand: how does authentication work
			
 
				+```
			
 
				+
			
 
				+## Lex Query Syntax
			
 
				+
			
 
				+Lex queries support special syntax for precise keyword matching:
			
 
				+
			
 
				+```ebnf
			
 
				+lex_query   = { lex_term } ;
			
 
				+lex_term    = negation | phrase | word ;
			
 
				+negation    = "-" ( phrase | word ) ;
			
 
				+phrase      = '"' { character } '"' ;
			
 
				+word        = { letter | digit | "'" } ;
			
 
				+```
			
 
				+
			
 
				+| Syntax | Meaning | Example |
			
 
				+|--------|---------|---------|
			
 
				+| `word` | Prefix match | `perf` matches "performance" |
			
 
				+| `"phrase"` | Exact phrase | `"rate limiter"` |
			
 
				+| `-word` | Exclude term | `-sports` |
			
 
				+| `-"phrase"` | Exclude phrase | `-"test data"` |
			
 
				+
			
 
				+### Examples
			
 
				+
			
 
				+```
			
 
				+lex: CAP theorem consistency
			
 
				+lex: "machine learning" -"deep learning"
			
 
				+lex: auth -oauth -saml
			
 
				+```
			
 
				+
			
 
				+## Vec Query Syntax
			
 
				+
			
 
				+Vec queries are natural language questions. No special syntax — just write what you're looking for.
			
 
				+
			
 
				+```
			
 
				+vec: how does the rate limiter handle burst traffic
			
 
				+vec: what is the tradeoff between consistency and availability
			
 
				+```
			
 
				+
			
 
				+## Hyde Query Syntax
			
 
				+
			
 
				+Hyde queries are hypothetical answer passages (50-100 words). Write what you expect the answer to look like.
			
 
				+
			
 
				+```
			
 
				+hyde: The rate limiter uses a sliding window algorithm with a 60-second window. When a client exceeds 100 requests per minute, subsequent requests return 429 Too Many Requests.
			
 
				+```
			
 
				+
			
 
				+## Multi-Line Queries
			
 
				+
			
 
				+Combine multiple query types for best results. First query gets 2x weight in fusion.
			
 
				+
			
 
				+```
			
 
				+lex: rate limiter algorithm
			
 
				+vec: how does rate limiting work in the API
			
 
				+hyde: The API implements rate limiting using a token bucket algorithm...
			
 
				+```
			
 
				+
			
 
				+## Expand Queries
			
 
				+
			
 
				+Use `expand:` to leverage the local query expansion model. Limited to one per query document.
			
 
				+
			
 
				+```
			
 
				+expand: error handling best practices
			
 
				+```
			
 
				+
			
 
				+This generates lex, vec, and hyde variations automatically. Useful when you don't know the exact terms.
			
 
				+
			
 
				+## Constraints
			
 
				+
			
 
				+- Maximum one `expand:` query per document
			
 
				+- `lex` syntax (`-term`, `"phrase"`) only works in lex queries
			
 
				+- Empty lines are ignored
			
 
				+- Leading/trailing whitespace is trimmed
			
 
				+
			
 
				+## MCP/HTTP API
			
 
				+
			
 
				+The `query` tool accepts a query document:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "q": "lex: CAP theorem\nvec: consistency vs availability",
			
 
				+  "collections": ["docs"],
			
 
				+  "limit": 10
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+Or structured format:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "searches": [
			
 
				+    { "type": "lex", "query": "CAP theorem" },
			
 
				+    { "type": "vec", "query": "consistency vs availability" }
			
 
				+  ]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## CLI
			
 
				+
			
 
				+```bash
			
 
				+# Single line (implicit expand)
			
 
				+qmd query "how does auth work"
			
 
				+
			
 
				+# Multi-line with types
			
 
				+qmd query $'lex: auth token\nvec: how does authentication work'
			
 
				+
			
 
				+# Structured
			
 
				+qmd query $'lex: keywords\nvec: question\nhyde: hypothetical answer...'
			
 
				+```
			
--- a/skills/qmd/SKILL.md
+++ b/skills/qmd/SKILL.md
@@ -5,7 +5,7 @@ license: MIT
 
				 compatibility: Requires qmd CLI or MCP server. Install via `npm install -g @tobilu/qmd`.
			
 
				 metadata:
			
 
				   author: tobi
			
 
				-  version: "1.3.0"
			
 
				+  version: "2.0.0"
			
 
				 allowed-tools: Bash(qmd:*), mcp__qmd__*
			
 
				 ---
			
 
				 
			
@@ -17,7 +17,7 @@ Local search engine for markdown content.
 
				 
			
 
				 !`qmd status 2>/dev/null || echo "Not installed: npm install -g @tobilu/qmd"`
			
 
				 
			
 
				-## MCP: `structured_search`
			
 
				+## MCP: `query`
			
 
				 
			
 
				 ```json
			
 
				 {
			
@@ -25,18 +25,19 @@ Local search engine for markdown content.
 
				     { "type": "lex", "query": "CAP theorem consistency" },
			
 
				     { "type": "vec", "query": "tradeoff between consistency and availability" }
			
 
				   ],
			
 
				-  "collections": ["notes", "docs"],
			
 
				+  "collections": ["docs"],
			
 
				   "limit": 10
			
 
				 }
			
 
				 ```
			
 
				 
			
 
				-### Search Types
			
 
				+### Query Types
			
 
				 
			
 
				 | Type | Method | Input |
			
 
				 |------|--------|-------|
			
 
				 | `lex` | BM25 | Keywords — exact terms, names, code |
			
 
				 | `vec` | Vector | Question — natural language |
			
 
				 | `hyde` | Vector | Answer — hypothetical result (50-100 words) |
			
 
				+| `expand` | LLM | Auto-expand via local model (max 1 per query) |
			
 
				 
			
 
				 ### Writing Good Queries
			
 
				 
			
@@ -48,20 +49,24 @@ Local search engine for markdown content.
 
				 
			
 
				 **vec (semantic)**
			
 
				 - Full natural language question
			
 
				-- Be specific: `"how does the rate limiter handle burst traffic"` not `"rate limiting"`
			
 
				+- Be specific: `"how does the rate limiter handle burst traffic"`
			
 
				 - Include context: `"in the payment service, how are refunds processed"`
			
 
				 
			
 
				 **hyde (hypothetical document)**
			
 
				 - Write 50-100 words of what the *answer* looks like
			
 
				 - Use the vocabulary you expect in the result
			
 
				-- Example: `"The rate limiter uses a sliding window algorithm with a 60-second window. When a client exceeds 100 requests per minute, subsequent requests return 429 Too Many Requests until the window resets."`
			
 
				+
			
 
				+**expand (auto-expand)**
			
 
				+- Let the local LLM generate lex/vec/hyde variations
			
 
				+- Good when you don't know exact terms
			
 
				+- Max one expand: per query
			
 
				 
			
 
				 ### Combining Types
			
 
				 
			
 
				 | Goal | Approach |
			
 
				 |------|----------|
			
 
				 | Know exact terms | `lex` only |
			
 
				-| Don't know vocabulary | `vec` only |
			
 
				+| Don't know vocabulary | `vec` or `expand` |
			
 
				 | Best recall | `lex` + `vec` |
			
 
				 | Complex topic | `lex` + `vec` + `hyde` |
			
 
				 
			
@@ -99,11 +104,19 @@ Omit to search all collections.
 
				 ```bash
			
 
				 qmd query "question"              # Auto-expand + rerank
			
 
				 qmd query $'lex: X\nvec: Y'       # Structured
			
 
				-qmd search "keywords"             # BM25 only
			
 
				-qmd vsearch "question"            # Vector only
			
 
				+qmd query $'expand: question'     # Explicit expand
			
 
				+qmd search "keywords"             # BM25 only (no LLM)
			
 
				 qmd get "#abc123"                 # By docid
			
 
				 ```
			
 
				 
			
 
				+## HTTP API
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST http://localhost:8181/query \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{"searches": [{"type": "lex", "query": "test"}]}'
			
 
				+```
			
 
				+
			
 
				 ## Setup
			
 
				 
			
 
				 ```bash
			
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -120,7 +120,7 @@ function buildInstructions(store: Store): string {
 
				 
			
 
				   // --- Search tool ---
			
 
				   lines.push("");
			
 
				-  lines.push("Search: Use `structured_search` with 1-4 sub-queries:");
			
 
				+  lines.push("Search: Use `query` with sub-queries (lex/vec/hyde/expand):");
			
 
				   lines.push("  - type:'lex' — BM25 keyword search (exact terms, fast)");
			
 
				   lines.push("  - type:'vec' — semantic vector search (meaning-based)");
			
 
				   lines.push("  - type:'hyde' — hypothetical document (write what the answer looks like)");
			
@@ -225,43 +225,42 @@ function createMcpServer(store: Store): McpServer {
 
				   );
			
 
				 
			
 
				   // ---------------------------------------------------------------------------
			
 
				-  // Tool: structured_search (Primary search tool)
			
 
				+  // Tool: query (Primary search tool)
			
 
				   // ---------------------------------------------------------------------------
			
 
				 
			
 
				   const subSearchSchema = z.object({
			
 
				-    type: z.enum(['lex', 'vec', 'hyde']).describe(
			
 
				-      "Search type: 'lex' = BM25 keyword search (exact terms, fast), " +
			
 
				-      "'vec' = semantic vector search (meaning-based, finds synonyms/paraphrases), " +
			
 
				-      "'hyde' = hypothetical document (imagine what the answer looks like)"
			
 
				+    type: z.enum(['lex', 'vec', 'hyde', 'expand']).describe(
			
 
				+      "Query type: 'lex' = BM25 keywords, 'vec' = semantic question, " +
			
 
				+      "'hyde' = hypothetical answer, 'expand' = auto-expand via LLM (max 1)"
			
 
				     ),
			
 
				-    query: z.string().describe("The search query text"),
			
 
				+    query: z.string().describe("The query text"),
			
 
				   });
			
 
				 
			
 
				   server.registerTool(
			
 
				-    "structured_search",
			
 
				+    "query",
			
 
				     {
			
 
				-      title: "Structured Search",
			
 
				-      description: `Execute pre-expanded search queries. Skips internal query expansion — you provide the search variations directly.
			
 
				-
			
 
				-**When to use:** You're an LLM that can generate better query expansions than a small local model. Pass 2-4 sub-searches for best results.
			
 
				-
			
 
				-**Search types:**
			
 
				-- \`lex\`: BM25 keyword search. Use short keyword phrases (2-5 terms). Good for exact terms, names, code identifiers.
			
 
				-- \`vec\`: Semantic vector search. Use natural language questions or descriptions. Finds documents with similar meaning even when vocabulary differs.
			
 
				-- \`hyde\`: Hypothetical document. Write a short passage (~50-100 words) that looks like what you're searching for. Powerful for finding conceptually similar content.
			
 
				-
			
 
				-**Example:** To find CAP theorem docs, pass:
			
 
				-- { type: "lex", query: "CAP theorem consistency availability" }
			
 
				-- { type: "vec", query: "what is the tradeoff between data consistency and system availability in distributed systems" }
			
 
				-- { type: "hyde", query: "The CAP theorem states that a distributed system can only guarantee two of three properties: Consistency, Availability, and Partition tolerance." }`,
			
 
				+      title: "Query",
			
 
				+      description: `Search the knowledge base with typed sub-queries.
			
 
				+
			
 
				+**Query types:**
			
 
				+- \`lex\`: BM25 keyword search. Supports "exact phrase" and -negation.
			
 
				+- \`vec\`: Semantic vector search. Natural language questions.
			
 
				+- \`hyde\`: Hypothetical document. Write what the answer looks like (50-100 words).
			
 
				+- \`expand\`: Auto-expand via local LLM. Max one per query.
			
 
				+
			
 
				+**Examples:**
			
 
				+- Quick lookup: [{ type: "lex", query: "CAP theorem" }]
			
 
				+- Semantic: [{ type: "vec", query: "consistency vs availability tradeoff" }]
			
 
				+- Best results: [{ type: "lex", query: "CAP" }, { type: "vec", query: "distributed systems consistency" }]
			
 
				+- Auto-expand: [{ type: "expand", query: "how does rate limiting work" }]`,
			
 
				       annotations: { readOnlyHint: true, openWorldHint: false },
			
 
				       inputSchema: {
			
 
				         searches: z.array(subSearchSchema).min(1).max(10).describe(
			
 
				-          "Array of sub-searches to execute. Order matters — first search gets higher weight in fusion."
			
 
				+          "Sub-queries to execute. First gets 2x weight. Max one expand: per query."
			
 
				         ),
			
 
				-        limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
			
 
				-        minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
			
 
				-        collections: z.array(z.string()).optional().describe("Filter to specific collections (OR match)"),
			
 
				+        limit: z.number().optional().default(10).describe("Max results (default: 10)"),
			
 
				+        minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
			
 
				+        collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
			
 
				       },
			
 
				     },
			
 
				     async ({ searches, limit, minScore, collections }) => {
			
@@ -561,7 +560,8 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
 
				       }
			
 
				 
			
 
				       // REST endpoint: POST /search — structured search without MCP protocol
			
 
				-      if (pathname === "/search" && nodeReq.method === "POST") {
			
 
				+      // REST endpoint: POST /query — structured search without MCP protocol
			
 
				+      if (pathname === "/query" && nodeReq.method === "POST") {
			
 
				         const rawBody = await collectBody(nodeReq);
			
 
				         const params = JSON.parse(rawBody);
			
 
				         
			
@@ -603,7 +603,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
 
				 
			
 
				         nodeRes.writeHead(200, { "Content-Type": "application/json" });
			
 
				         nodeRes.end(JSON.stringify({ results: formatted }));
			
 
				-        log(`${ts()} POST /search ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
			
 
				+        log(`${ts()} POST /query ${params.searches.length} queries (${Date.now() - reqStart}ms)`);
			
 
				         return;
			
 
				       }
			
 
				 
			
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -1960,14 +1960,14 @@ function parseStructuredQuery(query: string): StructuredSubSearch[] | null {
 
				   const lines = query.split('\n').map(l => l.trim()).filter(l => l.length > 0);
			
 
				   if (lines.length === 0) return null;
			
 
				 
			
 
				-  const prefixRe = /^(lex|vec|hyde):\s*/i;
			
 
				+  const prefixRe = /^(lex|vec|hyde|expand):\s*/i;
			
 
				   const searches: StructuredSubSearch[] = [];
			
 
				   const plainLines: string[] = [];
			
 
				 
			
 
				   for (const line of lines) {
			
 
				     const match = line.match(prefixRe);
			
 
				     if (match) {
			
 
				-      const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde';
			
 
				+      const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde' | 'expand';
			
 
				       const text = line.slice(match[0].length).trim();
			
 
				       if (text.length > 0) {
			
 
				         searches.push({ type, query: text });
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -3151,8 +3151,8 @@ export async function vectorSearchQuery(
 
				  * Matches the format used in QMD training data.
			
 
				  */
			
 
				 export interface StructuredSubSearch {
			
 
				-  /** Search type: 'lex' for BM25 keywords, 'vec' for semantic, 'hyde' for hypothetical document */
			
 
				-  type: 'lex' | 'vec' | 'hyde';
			
 
				+  /** Search type: 'lex' for BM25, 'vec' for semantic, 'hyde' for hypothetical, 'expand' for LLM expansion */
			
 
				+  type: 'lex' | 'vec' | 'hyde' | 'expand';
			
 
				   /** The search query text */
			
 
				   query: string;
			
 
				 }
			
@@ -3199,7 +3199,11 @@ export async function structuredSearch(
 
				 
			
 
				   if (searches.length === 0) return [];
			
 
				 
			
 
				-  // Validate semantic queries don't use lex-only syntax
			
 
				+  // Validate: max one expand query, semantic queries don't use lex syntax
			
 
				+  const expandSearches = searches.filter(s => s.type === 'expand');
			
 
				+  if (expandSearches.length > 1) {
			
 
				+    throw new Error('Maximum one expand: query per document');
			
 
				+  }
			
 
				   for (const search of searches) {
			
 
				     if (search.type === 'vec' || search.type === 'hyde') {
			
 
				       const error = validateSemanticQuery(search.query);
			
@@ -3209,6 +3213,22 @@ export async function structuredSearch(
 
				     }
			
 
				   }
			
 
				 
			
 
				+  // Process expand: queries by calling the query expansion model
			
 
				+  let processedSearches = searches.filter(s => s.type !== 'expand');
			
 
				+  if (expandSearches.length > 0) {
			
 
				+    const expandQuery = expandSearches[0]!.query;
			
 
				+    const expanded = await store.expandQuery(expandQuery);
			
 
				+    // Add expanded queries (lex, vec, hyde from the model)
			
 
				+    for (const exp of expanded) {
			
 
				+      processedSearches.push({ type: exp.type as 'lex' | 'vec' | 'hyde', query: exp.text });
			
 
				+    }
			
 
				+    // Also add original as lex for strong signal matching
			
 
				+    processedSearches.unshift({ type: 'lex', query: expandQuery });
			
 
				+  }
			
 
				+
			
 
				+  // Use processed searches from here on
			
 
				+  searches = processedSearches;
			
 
				+
			
 
				   const rankedLists: RankedResult[][] = [];
			
 
				   const docidMap = new Map<string, string>(); // filepath -> docid
			
 
				   const hasVectors = !!store.db.prepare(
			
--- a/test/mcp.test.ts
+++ b/test/mcp.test.ts
@@ -1008,12 +1008,12 @@ describe("MCP HTTP Transport", () => {
 
				     expect(contentType).toContain("application/json");
			
 
				 
			
 
				     const toolNames = json.result.tools.map((t: any) => t.name);
			
 
				-    expect(toolNames).toContain("structured_search");
			
 
				+    expect(toolNames).toContain("query");
			
 
				     expect(toolNames).toContain("get");
			
 
				     expect(toolNames).toContain("status");
			
 
				   });
			
 
				 
			
 
				-  test("POST /mcp tools/call structured_search returns results", async () => {
			
 
				+  test("POST /mcp tools/call query returns results", async () => {
			
 
				     // Initialize
			
 
				     await mcpRequest({
			
 
				       jsonrpc: "2.0", id: 1, method: "initialize",
			
@@ -1022,7 +1022,7 @@ describe("MCP HTTP Transport", () => {
 
				 
			
 
				     const { status, json } = await mcpRequest({
			
 
				       jsonrpc: "2.0", id: 3, method: "tools/call",
			
 
				-      params: { name: "structured_search", arguments: { searches: [{ type: "lex", query: "readme" }] } },
			
 
				+      params: { name: "query", arguments: { searches: [{ type: "lex", query: "readme" }] } },
			
 
				     });
			
 
				     expect(status).toBe(200);
			
 
				     expect(json.result).toBeDefined();