ソースを参照

feat: expose candidateLimit as MCP tool parameter and CLI flag

Reranking 40 chunks takes ~2 min on CPU (the default candidateLimit).
The option already exists in hybridQuery()/structuredSearch() but was
never surfaced to users. This adds:

- `candidateLimit` param to the MCP `query` tool inputSchema
- `candidateLimit` field to the REST /query endpoint
- `--candidate-limit` / `-C` CLI flag for `qmd query`

Default stays 40 (no behavior change). Users on CPU-only machines can
lower it for a speed/recall tradeoff. Complements #231.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Andreas Spannagel 3 ヶ月 前
コミット
87bd968d7b
2 ファイル変更13 行追加1 行削除
  1. 6 1
      src/mcp.ts
  2. 7 0
      src/qmd.ts

+ 6 - 1
src/mcp.ts

@@ -307,10 +307,13 @@ Intent-aware lex (C++ performance, not sports):
         ),
         limit: z.number().optional().default(10).describe("Max results (default: 10)"),
         minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
+        candidateLimit: z.number().optional().describe(
+          "Maximum candidates to rerank (default: 40, lower = faster but may miss results)"
+        ),
         collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
       },
     },
-    async ({ searches, limit, minScore, collections }) => {
+    async ({ searches, limit, minScore, candidateLimit, collections }) => {
       // Map to internal format
       const subSearches: StructuredSubSearch[] = searches.map(s => ({
         type: s.type,
@@ -324,6 +327,7 @@ Intent-aware lex (C++ performance, not sports):
         collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
         limit,
         minScore,
+        candidateLimit,
       });
 
       // Use first lex or vec query for snippet extraction
@@ -635,6 +639,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
           collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
           limit: params.limit ?? 10,
           minScore: params.minScore ?? 0,
+          candidateLimit: params.candidateLimit,
         });
 
         // Use first lex or vec query for snippet extraction

+ 7 - 0
src/qmd.ts

@@ -1751,6 +1751,7 @@ type OutputOptions = {
   collection?: string | string[];  // Filter by collection name(s)
   lineNumbers?: boolean; // Add line numbers to output
   context?: string;      // Optional context for query expansion
+  candidateLimit?: number;  // Max candidates to rerank (default: 40)
 };
 
 // Highlight query terms in text (skip short words < 3 chars)
@@ -2141,6 +2142,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
         collections: singleCollection ? [singleCollection] : undefined,
         limit: opts.all ? 500 : (opts.limit || 10),
         minScore: opts.minScore || 0,
+        candidateLimit: opts.candidateLimit,
         hooks: {
           onEmbedStart: (count) => {
             process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@@ -2164,6 +2166,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
         collection: singleCollection,
         limit: opts.all ? 500 : (opts.limit || 10),
         minScore: opts.minScore || 0,
+        candidateLimit: opts.candidateLimit,
         hooks: {
           onStrongSignal: (score) => {
             process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -2271,6 +2274,8 @@ function parseCLI() {
       from: { type: "string" },  // start line
       "max-bytes": { type: "string" },  // max bytes for multi-get
       "line-numbers": { type: "boolean" },  // add line numbers to output
+      // Query options
+      "candidate-limit": { type: "string", short: "C" },
       // MCP HTTP transport options
       http: { type: "boolean" },
       daemon: { type: "boolean" },
@@ -2308,6 +2313,7 @@ function parseCLI() {
     all: isAll,
     collection: values.collection as string[] | undefined,
     lineNumbers: !!values["line-numbers"],
+    candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
   };
 
   return {
@@ -2409,6 +2415,7 @@ function showHelp(): void {
   console.log("  --all                      - Return all matches (pair with --min-score)");
   console.log("  --min-score <num>          - Minimum similarity score");
   console.log("  --full                     - Output full document instead of snippet");
+  console.log("  -C, --candidate-limit <n>  - Max candidates to rerank (default 40, lower = faster)");
   console.log("  --line-numbers             - Include line numbers in output");
   console.log("  --files | --json | --csv | --md | --xml  - Output format");
   console.log("  -c, --collection <name>    - Filter by one or more collections");