Selaa lähdekoodia

feat(cli): add --no-rerank flag to skip reranking in qmd query

Exposes the existing skipRerank option as a --no-rerank CLI flag for
qmd query. On CPU-only machines, reranking takes 120s+ for 20 chunks -
this flag lets users get RRF-fused results without the reranking penalty.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Matt Van Horn 2 kuukautta sitten
vanhempi
commit
11b3f17fba
1 muutettua tiedostoa jossa 6 lisäystä ja 0 poistoa
  1. 6 0
      src/cli/qmd.ts

+ 6 - 0
src/cli/qmd.ts

@@ -1724,6 +1724,7 @@ type OutputOptions = {
   context?: string;      // Optional context for query expansion
   candidateLimit?: number;  // Max candidates to rerank (default: 40)
   intent?: string;       // Domain intent for disambiguation
+  skipRerank?: boolean;  // Skip LLM reranking, use RRF scores only
 };
 
 // Highlight query terms in text (skip short words < 3 chars)
@@ -2206,6 +2207,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
         limit: opts.all ? 500 : (opts.limit || 10),
         minScore: opts.minScore || 0,
         candidateLimit: opts.candidateLimit,
+        skipRerank: opts.skipRerank,
         explain: !!opts.explain,
         intent,
         hooks: {
@@ -2232,6 +2234,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
         limit: opts.all ? 500 : (opts.limit || 10),
         minScore: opts.minScore || 0,
         candidateLimit: opts.candidateLimit,
+        skipRerank: opts.skipRerank,
         explain: !!opts.explain,
         intent,
         hooks: {
@@ -2344,6 +2347,7 @@ function parseCLI() {
       "line-numbers": { type: "boolean" },  // add line numbers to output
       // Query options
       "candidate-limit": { type: "string", short: "C" },
+      "no-rerank": { type: "boolean", default: false },
       intent: { type: "string" },
       // MCP HTTP transport options
       http: { type: "boolean" },
@@ -2383,6 +2387,7 @@ function parseCLI() {
     collection: values.collection as string[] | undefined,
     lineNumbers: !!values["line-numbers"],
     candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
+    skipRerank: !!values["no-rerank"],
     explain: !!values.explain,
     intent: values.intent as string | undefined,
   };
@@ -2599,6 +2604,7 @@ function showHelp(): void {
   console.log("  --min-score <num>          - Minimum similarity score");
   console.log("  --full                     - Output full document instead of snippet");
   console.log("  -C, --candidate-limit <n>  - Max candidates to rerank (default 40, lower = faster)");
+  console.log("  --no-rerank                - Skip LLM reranking (use RRF scores only, much faster on CPU)");
   console.log("  --line-numbers             - Include line numbers in output");
   console.log("  --explain                  - Include retrieval score traces (query --json/CLI)");
   console.log("  --files | --json | --csv | --md | --xml  - Output format");