Переглянути джерело

fix: support multiple -c collection filters in search commands

Closes #191 (thanks @openclaw)
Tobi Lutke 3 місяців тому
батько
коміт
640ac13cd0
2 змінених файлів з 207 додано та 33 видалено
  1. 62 33
      src/qmd.ts
  2. 145 0
      test/multi-collection-filter.test.ts

+ 62 - 33
src/qmd.ts

@@ -1748,7 +1748,7 @@ type OutputOptions = {
   limit: number;
   minScore: number;
   all?: boolean;
-  collection?: string;  // Filter by collection name (pwd suffix match)
+  collection?: string | string[];  // Filter by collection name(s)
   lineNumbers?: boolean; // Add line numbers to output
   context?: string;      // Optional context for query expansion
 };
@@ -1902,24 +1902,47 @@ function outputResults(results: { file: string; displayPath: string; title: stri
   }
 }
 
-function search(query: string, opts: OutputOptions): void {
-  const db = getDb();
-
-  // Validate collection filter if specified
-  let collectionName: string | undefined;
-  if (opts.collection) {
-    const coll = getCollectionFromYaml(opts.collection);
+// Resolve -c collection filter: supports single string, array, or undefined.
+// Returns validated collection names (exits on unknown collection).
+function resolveCollectionFilter(raw: string | string[] | undefined): string[] {
+  if (!raw) return [];
+  const names = Array.isArray(raw) ? raw : [raw];
+  const validated: string[] = [];
+  for (const name of names) {
+    const coll = getCollectionFromYaml(name);
     if (!coll) {
-      console.error(`Collection not found: ${opts.collection}`);
+      console.error(`Collection not found: ${name}`);
       closeDb();
       process.exit(1);
     }
-    collectionName = opts.collection;
+    validated.push(name);
   }
+  return validated;
+}
+
+// Post-filter results to only include files from specified collections.
+function filterByCollections<T extends { filepath?: string; file?: string }>(results: T[], collectionNames: string[]): T[] {
+  if (collectionNames.length <= 1) return results;
+  const prefixes = collectionNames.map(n => `qmd://${n}/`);
+  return results.filter(r => {
+    const path = r.filepath || r.file || '';
+    return prefixes.some(p => path.startsWith(p));
+  });
+}
+
+function search(query: string, opts: OutputOptions): void {
+  const db = getDb();
+
+  // Validate collection filter (supports multiple -c flags)
+  const collectionNames = resolveCollectionFilter(opts.collection);
+  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
 
   // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
   const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
-  const results = searchFTS(db, query, fetchLimit, collectionName);
+  const results = filterByCollections(
+    searchFTS(db, query, fetchLimit, singleCollection),
+    collectionNames
+  );
 
   // Add context to results
   const resultsWithContext = results.map(r => ({
@@ -1960,20 +1983,15 @@ function logExpansionTree(originalQuery: string, expanded: ExpandedQuery[]): voi
 async function vectorSearch(query: string, opts: OutputOptions, _model: string = DEFAULT_EMBED_MODEL): Promise<void> {
   const store = getStore();
 
-  if (opts.collection) {
-    const coll = getCollectionFromYaml(opts.collection);
-    if (!coll) {
-      console.error(`Collection not found: ${opts.collection}`);
-      closeDb();
-      process.exit(1);
-    }
-  }
+  // Validate collection filter (supports multiple -c flags)
+  const collectionNames = resolveCollectionFilter(opts.collection);
+  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
 
   checkIndexHealth(store.db);
 
   await withLLMSession(async () => {
-    const results = await vectorSearchQuery(store, query, {
-      collection: opts.collection,
+    let results = await vectorSearchQuery(store, query, {
+      collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0.3,
       hooks: {
@@ -1984,6 +2002,14 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
       },
     });
 
+    // Post-filter for multi-collection
+    if (collectionNames.length > 1) {
+      results = results.filter(r => {
+        const prefixes = collectionNames.map(n => `qmd://${n}/`);
+        return prefixes.some(p => r.file.startsWith(p));
+      });
+    }
+
     closeDb();
 
     if (results.length === 0) {
@@ -2006,20 +2032,15 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
 async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
   const store = getStore();
 
-  if (opts.collection) {
-    const coll = getCollectionFromYaml(opts.collection);
-    if (!coll) {
-      console.error(`Collection not found: ${opts.collection}`);
-      closeDb();
-      process.exit(1);
-    }
-  }
+  // Validate collection filter (supports multiple -c flags)
+  const collectionNames = resolveCollectionFilter(opts.collection);
+  const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
 
   checkIndexHealth(store.db);
 
   await withLLMSession(async () => {
-    const results = await hybridQuery(store, query, {
-      collection: opts.collection,
+    let results = await hybridQuery(store, query, {
+      collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0,
       hooks: {
@@ -2040,6 +2061,14 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
       },
     });
 
+    // Post-filter for multi-collection
+    if (collectionNames.length > 1) {
+      results = results.filter(r => {
+        const prefixes = collectionNames.map(n => `qmd://${n}/`);
+        return prefixes.some(p => r.file.startsWith(p));
+      });
+    }
+
     closeDb();
 
     if (results.length === 0) {
@@ -2088,7 +2117,7 @@ function parseCLI() {
       xml: { type: "boolean" },
       files: { type: "boolean" },
       json: { type: "boolean" },
-      collection: { type: "string", short: "c" },  // Filter by collection
+      collection: { type: "string", short: "c", multiple: true },  // Filter by collection(s)
       // Collection options
       name: { type: "string" },  // collection name
       mask: { type: "string" },  // glob pattern
@@ -2137,7 +2166,7 @@ function parseCLI() {
     limit: isAll ? 100000 : (values.n ? parseInt(String(values.n), 10) || defaultLimit : defaultLimit),
     minScore: values["min-score"] ? parseFloat(String(values["min-score"])) || 0 : 0,
     all: isAll,
-    collection: values.collection as string | undefined,
+    collection: values.collection as string[] | undefined,
     lineNumbers: !!values["line-numbers"],
   };
 

+ 145 - 0
test/multi-collection-filter.test.ts

@@ -0,0 +1,145 @@
+/**
+ * Unit tests for multi-collection filter logic (PR #191).
+ *
+ * Tests the filterByCollections post-filter and the resolveCollectionFilter
+ * behavior for single-collection vs multi-collection search.
+ */
+
+import { describe, test, expect } from "vitest";
+
+// Reproduce the filterByCollections logic from qmd.ts for testing
+// (the function is private in qmd.ts)
+function filterByCollections<T extends { filepath?: string; file?: string }>(
+  results: T[],
+  collectionNames: string[],
+): T[] {
+  if (collectionNames.length <= 1) return results;
+  const prefixes = collectionNames.map((n) => `qmd://${n}/`);
+  return results.filter((r) => {
+    const path = r.filepath || r.file || "";
+    return prefixes.some((p) => path.startsWith(p));
+  });
+}
+
+describe("filterByCollections", () => {
+  const results = [
+    { filepath: "qmd://docs/readme.md", file: "qmd://docs/readme.md" },
+    { filepath: "qmd://notes/todo.md", file: "qmd://notes/todo.md" },
+    { filepath: "qmd://journals/2024/jan.md", file: "qmd://journals/2024/jan.md" },
+    { filepath: "qmd://docs/api.md", file: "qmd://docs/api.md" },
+  ];
+
+  test("returns all results when no collections specified", () => {
+    expect(filterByCollections(results, [])).toEqual(results);
+  });
+
+  test("returns all results for single collection (no-op, handled by SQL filter)", () => {
+    expect(filterByCollections(results, ["docs"])).toEqual(results);
+  });
+
+  test("filters to matching collections when multiple specified", () => {
+    const filtered = filterByCollections(results, ["docs", "journals"]);
+    expect(filtered).toHaveLength(3);
+    expect(filtered.map((r) => r.filepath)).toEqual([
+      "qmd://docs/readme.md",
+      "qmd://journals/2024/jan.md",
+      "qmd://docs/api.md",
+    ]);
+  });
+
+  test("filters correctly with two collections", () => {
+    const filtered = filterByCollections(results, ["notes", "journals"]);
+    expect(filtered).toHaveLength(2);
+    expect(filtered.map((r) => r.filepath)).toEqual([
+      "qmd://notes/todo.md",
+      "qmd://journals/2024/jan.md",
+    ]);
+  });
+
+  test("returns empty when no results match collections", () => {
+    const filtered = filterByCollections(results, ["archive", "trash"]);
+    expect(filtered).toHaveLength(0);
+  });
+
+  test("uses file field when filepath is missing", () => {
+    const fileOnlyResults = [
+      { file: "qmd://docs/readme.md" },
+      { file: "qmd://notes/todo.md" },
+    ];
+    const filtered = filterByCollections(fileOnlyResults, ["docs", "notes"]);
+    expect(filtered).toHaveLength(2);
+  });
+
+  test("uses filepath over file when both present", () => {
+    const mixedResults = [
+      { filepath: "qmd://docs/readme.md", file: "qmd://notes/todo.md" },
+    ];
+    const filtered = filterByCollections(mixedResults, ["docs", "notes"]);
+    expect(filtered).toHaveLength(1);
+    // Should match via filepath (docs), not file (notes)
+    expect(filtered[0].filepath).toBe("qmd://docs/readme.md");
+  });
+});
+
+describe("resolveCollectionFilter input normalization", () => {
+  // Test the array normalization logic without the DB dependency
+  function normalizeCollectionInput(raw: string | string[] | undefined): string[] {
+    if (!raw) return [];
+    return Array.isArray(raw) ? raw : [raw];
+  }
+
+  test("undefined returns empty array", () => {
+    expect(normalizeCollectionInput(undefined)).toEqual([]);
+  });
+
+  test("single string returns single-element array", () => {
+    expect(normalizeCollectionInput("docs")).toEqual(["docs"]);
+  });
+
+  test("array passes through", () => {
+    expect(normalizeCollectionInput(["docs", "notes"])).toEqual(["docs", "notes"]);
+  });
+
+  test("empty string returns single-element array", () => {
+    expect(normalizeCollectionInput("")).toEqual([]);
+  });
+});
+
+describe("collection option type from parseArgs", () => {
+  // Verify that parseArgs with `multiple: true` produces string[]
+  test("parseArgs multiple:true produces array for repeated flags", () => {
+    const { parseArgs } = require("node:util");
+    const { values } = parseArgs({
+      args: ["-c", "docs", "-c", "notes"],
+      options: {
+        collection: { type: "string", short: "c", multiple: true },
+      },
+      strict: true,
+    });
+    expect(values.collection).toEqual(["docs", "notes"]);
+  });
+
+  test("parseArgs multiple:true produces array for single flag", () => {
+    const { parseArgs } = require("node:util");
+    const { values } = parseArgs({
+      args: ["-c", "docs"],
+      options: {
+        collection: { type: "string", short: "c", multiple: true },
+      },
+      strict: true,
+    });
+    expect(values.collection).toEqual(["docs"]);
+  });
+
+  test("parseArgs multiple:true produces undefined when flag absent", () => {
+    const { parseArgs } = require("node:util");
+    const { values } = parseArgs({
+      args: [],
+      options: {
+        collection: { type: "string", short: "c", multiple: true },
+      },
+      strict: true,
+    });
+    expect(values.collection).toBeUndefined();
+  });
+});