il y a 1 mois · 4fe18a21bc
--- a/dist/cli/qmd.js
+++ b/dist/cli/qmd.js
@@ -412,18 +412,34 @@ async function showStatus() {
 
				     }
			
 
				     closeDb();
			
 
				 }
			
 
				-async function updateCollections() {
			
 
				+async function updateCollections(collectionFilter) {
			
 
				     const db = getDb();
			
 
				     const storeInstance = getStore();
			
 
				     // Collections are defined in YAML; no duplicate cleanup needed.
			
 
				     // Clear Ollama cache on update
			
 
				     clearCache(db);
			
 
				-    const collections = listCollections(db);
			
 
				-    if (collections.length === 0) {
			
 
				+    const allCollections = listCollections(db);
			
 
				+    if (allCollections.length === 0) {
			
 
				         console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
			
 
				         closeDb();
			
 
				         return;
			
 
				     }
			
 
				+    // i-ofojj7dy — when a positional collection name is supplied, filter to just
			
 
				+    // that collection. Validate against the known list and exit non-zero on miss
			
 
				+    // (no silent full-fleet fallback). Empty filter = full-fleet (legacy).
			
 
				+    let collections = allCollections;
			
 
				+    if (collectionFilter !== undefined) {
			
 
				+        const match = allCollections.find(col => col.name === collectionFilter);
			
 
				+        if (!match) {
			
 
				+            const known = allCollections.map(c => c.name).sort().join(", ");
			
 
				+            console.error(`${c.red}Collection not found: "${collectionFilter}"${c.reset}`);
			
 
				+            console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
			
 
				+            console.error(`${c.dim}Run 'qmd update --all' (or 'qmd update' with no args) to process every collection.${c.reset}`);
			
 
				+            closeDb();
			
 
				+            process.exit(1);
			
 
				+        }
			
 
				+        collections = [match];
			
 
				+    }
			
 
				     console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
			
 
				     for (let i = 0; i < collections.length; i++) {
			
 
				         const col = collections[i];
			
@@ -1517,13 +1533,43 @@ function optionalString(v) {
 
				 async function vectorIndex(model = DEFAULT_EMBED_MODEL_URI, force = false, batchOptions) {
			
 
				     const storeInstance = getStore();
			
 
				     const db = storeInstance.db;
			
 
				+    // i-ofojj7dy — validate the collection filter against the known list before
			
 
				+    // doing any work. Mirrors `qmd update <name>` ergonomics.
			
 
				+    if (batchOptions?.collection !== undefined) {
			
 
				+        const allCollections = listCollections(db);
			
 
				+        const match = allCollections.find(col => col.name === batchOptions.collection);
			
 
				+        if (!match) {
			
 
				+            const known = allCollections.map(c => c.name).sort().join(", ");
			
 
				+            console.error(`${c.red}Collection not found: "${batchOptions.collection}"${c.reset}`);
			
 
				+            console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
			
 
				+            console.error(`${c.dim}Run 'qmd embed --all' (or 'qmd embed' with no args) to embed every collection.${c.reset}`);
			
 
				+            closeDb();
			
 
				+            process.exit(1);
			
 
				+        }
			
 
				+        // i-ofojj7dy — `--force` is fleet-wide (nukes all content_vectors).
			
 
				+        // Combining it with a single-collection filter would silently break
			
 
				+        // every OTHER collection's embeddings. Per-collection force-clear is a
			
 
				+        // distinct feature (out of scope here). Refuse and steer the user.
			
 
				+        if (force) {
			
 
				+            console.error(`${c.red}--force cannot be combined with a positional collection name.${c.reset}`);
			
 
				+            console.error(`${c.dim}--force clears ALL vectors fleet-wide before re-embedding; restricting it to one collection would corrupt the others.${c.reset}`);
			
 
				+            console.error(`${c.dim}Use 'qmd embed --all -f' to force-re-embed every collection, OR drop -f and run 'qmd embed ${batchOptions.collection}' to embed only this collection's pending hashes.${c.reset}`);
			
 
				+            closeDb();
			
 
				+            process.exit(1);
			
 
				+        }
			
 
				+    }
			
 
				     if (force) {
			
 
				         console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
			
 
				     }
			
 
				     // Check if there's work to do before starting
			
 
				-    const hashesToEmbed = getHashesNeedingEmbedding(db);
			
 
				+    const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
			
 
				     if (hashesToEmbed === 0 && !force) {
			
 
				-        console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
			
 
				+        if (batchOptions?.collection) {
			
 
				+            console.log(`${c.green}✓ All content hashes in collection "${batchOptions.collection}" already have embeddings.${c.reset}`);
			
 
				+        }
			
 
				+        else {
			
 
				+            console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
			
 
				+        }
			
 
				         closeDb();
			
 
				         return;
			
 
				     }
			
@@ -2431,8 +2477,13 @@ function showHelp() {
 
				     console.log("");
			
 
				     console.log("Maintenance:");
			
 
				     console.log("  qmd status                    - View index + collection health");
			
 
				-    console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
			
 
				-    console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
			
 
				+    console.log("  qmd update [<collection>|--all] [--pull]");
			
 
				+    console.log("                                - Re-index collections (positional name limits to one;");
			
 
				+    console.log("                                  no arg or --all = every collection; --pull = git pull first)");
			
 
				+    console.log("  qmd embed [<collection>|--all] [-f]");
			
 
				+    console.log("                                - Generate/refresh vector embeddings");
			
 
				+    console.log("                                  (positional name limits to one collection; no arg or --all = all;");
			
 
				+    console.log("                                  -f clears + re-embeds ALL vectors fleet-wide, incompatible with <collection>)");
			
 
				     console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
			
 
				     console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
			
 
				     console.log("    --provider {local,openai}   - Embedding backend (default: local llama.cpp)");
			
@@ -2790,14 +2841,38 @@ if (isMain) {
 
				         case "status":
			
 
				             await showStatus();
			
 
				             break;
			
 
				-        case "update":
			
 
				-            await updateCollections();
			
 
				+        case "update": {
			
 
				+            // i-ofojj7dy — `qmd update <collection>` filters to a single collection;
			
 
				+            // `qmd update --all` or `qmd update` (no arg) preserves full-fleet behavior.
			
 
				+            // `--all` together with a positional name errors out to avoid silent
			
 
				+            // disagreement between the two intents.
			
 
				+            const updateCollectionArg = cli.args[0];
			
 
				+            const updateAllFlag = !!cli.values.all;
			
 
				+            if (updateAllFlag && updateCollectionArg !== undefined) {
			
 
				+                console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
			
 
				+                console.error(`${c.dim}Use 'qmd update --all' for every collection OR 'qmd update <name>' for one.${c.reset}`);
			
 
				+                process.exit(1);
			
 
				+            }
			
 
				+            const updateFilter = updateAllFlag ? undefined : updateCollectionArg;
			
 
				+            await updateCollections(updateFilter);
			
 
				             break;
			
 
				+        }
			
 
				         case "embed":
			
 
				             try {
			
 
				                 const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
			
 
				                 const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
			
 
				                 const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
			
 
				+                // i-ofojj7dy — `qmd embed <collection>` filters pending-embedding
			
 
				+                // candidates to documents in that collection. `--all` together with a
			
 
				+                // positional name is an explicit error.
			
 
				+                const embedCollectionArg = cli.args[0];
			
 
				+                const embedAllFlag = !!cli.values.all;
			
 
				+                if (embedAllFlag && embedCollectionArg !== undefined) {
			
 
				+                    console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
			
 
				+                    console.error(`${c.dim}Use 'qmd embed --all' for every collection OR 'qmd embed <name>' for one.${c.reset}`);
			
 
				+                    process.exit(1);
			
 
				+                }
			
 
				+                const embedCollectionFilter = embedAllFlag ? undefined : embedCollectionArg;
			
 
				                 // Build embedding provider from CLI flags + env + config file.
			
 
				                 // Backward compat: with no flags / env vars, the factory returns
			
 
				                 // a LocalLlamaCppProvider that delegates to the default LlamaCpp
			
@@ -2811,6 +2886,7 @@ if (isMain) {
 
				                     chunkStrategy: embedChunkStrategy,
			
 
				                     embedProvider,
			
 
				                     providerKind: embedProvider.kind,
			
 
				+                    collection: embedCollectionFilter,
			
 
				                 });
			
 
				             }
			
 
				             catch (error) {
			
--- a/dist/store.d.ts
+++ b/dist/store.d.ts
@@ -352,6 +352,14 @@ export type EmbedOptions = {
 
				      * the store's `LlamaCpp` (or the global singleton).
			
 
				      */
			
 
				     embedProvider?: EmbeddingProvider;
			
 
				+    /**
			
 
				+     * Optional collection name filter (i-ofojj7dy). When set, only content
			
 
				+     * hashes that have at least one document in this collection are embedded.
			
 
				+     * `getPendingEmbeddingDocs` filters at the SQL level. Callers are expected
			
 
				+     * to validate the name against `listCollections(db)` first; passing an
			
 
				+     * unknown name yields zero pending docs (no work, no error).
			
 
				+     */
			
 
				+    collection?: string;
			
 
				 };
			
 
				 /**
			
 
				  * Generate vector embeddings for documents that need them.
			
@@ -470,7 +478,7 @@ export type IndexStatus = {
 
				     hasVectorIndex: boolean;
			
 
				     collections: CollectionInfo[];
			
 
				 };
			
 
				-export declare function getHashesNeedingEmbedding(db: Database): number;
			
 
				+export declare function getHashesNeedingEmbedding(db: Database, collection?: string): number;
			
 
				 export type IndexHealthInfo = {
			
 
				     needsEmbedding: number;
			
 
				     totalDocs: number;
			
--- a/dist/store.js
+++ b/dist/store.js
@@ -1029,12 +1029,29 @@ function resolveEmbedOptions(options) {
 
				         maxBatchBytes: validatePositiveIntegerOption("maxBatchBytes", options?.maxBatchBytes, DEFAULT_EMBED_MAX_BATCH_BYTES),
			
 
				     };
			
 
				 }
			
 
				-function getPendingEmbeddingDocs(db) {
			
 
				+function getPendingEmbeddingDocs(db, collection) {
			
 
				     // `MIN(d.collection)` deterministically picks one collection per hash when
			
 
				     // the same content is indexed in multiple collections (SQLite tie-breaks
			
 
				     // alphabetically). The identical bytes produce identical chunks regardless
			
 
				     // of which collection wins; the chunkStrategy lookup still resolves via
			
 
				     // that collection's YAML config. See Phase 2 design notes (i-bud0h8vu).
			
 
				+    //
			
 
				+    // i-ofojj7dy — when a collection name is supplied, filter rows BEFORE the
			
 
				+    // GROUP BY so we only emit hashes whose documents include that collection.
			
 
				+    // Other collections sharing the same content hash still benefit from any
			
 
				+    // embeddings generated for the canonical owner (content_vectors is keyed
			
 
				+    // by hash, not by collection).
			
 
				+    if (collection !== undefined) {
			
 
				+        return db.prepare(`
			
 
				+      SELECT d.hash, MIN(d.path) as path, MIN(d.collection) as collection, length(CAST(c.doc AS BLOB)) as bytes
			
 
				+      FROM documents d
			
 
				+      JOIN content c ON d.hash = c.hash
			
 
				+      LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
			
 
				+      WHERE d.active = 1 AND v.hash IS NULL AND d.collection = ?
			
 
				+      GROUP BY d.hash
			
 
				+      ORDER BY MIN(d.path)
			
 
				+    `).all(collection);
			
 
				+    }
			
 
				     return db.prepare(`
			
 
				     SELECT d.hash, MIN(d.path) as path, MIN(d.collection) as collection, length(CAST(c.doc AS BLOB)) as bytes
			
 
				     FROM documents d
			
@@ -1146,7 +1163,8 @@ export async function generateEmbeddings(store, options) {
 
				     if (options?.force) {
			
 
				         clearAllEmbeddings(db);
			
 
				     }
			
 
				-    const docsToEmbed = getPendingEmbeddingDocs(db);
			
 
				+    // i-ofojj7dy — optional collection filter restricts the pending-doc set.
			
 
				+    const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
			
 
				     if (docsToEmbed.length === 0) {
			
 
				         return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
			
 
				     }
			
@@ -1544,7 +1562,18 @@ export function handelize(path) {
 
				 // =============================================================================
			
 
				 // Index health
			
 
				 // =============================================================================
			
 
				-export function getHashesNeedingEmbedding(db) {
			
 
				+export function getHashesNeedingEmbedding(db, collection) {
			
 
				+    // i-ofojj7dy — optional collection filter. Restricts the count to hashes
			
 
				+    // whose documents are in the named collection.
			
 
				+    if (collection !== undefined) {
			
 
				+        const result = db.prepare(`
			
 
				+      SELECT COUNT(DISTINCT d.hash) as count
			
 
				+      FROM documents d
			
 
				+      LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
			
 
				+      WHERE d.active = 1 AND v.hash IS NULL AND d.collection = ?
			
 
				+    `).get(collection);
			
 
				+        return result.count;
			
 
				+    }
			
 
				     const result = db.prepare(`
			
 
				     SELECT COUNT(DISTINCT d.hash) as count
			
 
				     FROM documents d
			
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -537,7 +537,7 @@ async function showStatus(): Promise<void> {
 
				   closeDb();
			
 
				 }
			
 
				 
			
 
				-async function updateCollections(): Promise<void> {
			
 
				+async function updateCollections(collectionFilter?: string): Promise<void> {
			
 
				   const db = getDb();
			
 
				   const storeInstance = getStore();
			
 
				   // Collections are defined in YAML; no duplicate cleanup needed.
			
@@ -545,14 +545,31 @@ async function updateCollections(): Promise<void> {
 
				   // Clear Ollama cache on update
			
 
				   clearCache(db);
			
 
				 
			
 
				-  const collections = listCollections(db);
			
 
				+  const allCollections = listCollections(db);
			
 
				 
			
 
				-  if (collections.length === 0) {
			
 
				+  if (allCollections.length === 0) {
			
 
				     console.log(`${c.dim}No collections found. Run 'qmd collection add .' to index markdown files.${c.reset}`);
			
 
				     closeDb();
			
 
				     return;
			
 
				   }
			
 
				 
			
 
				+  // i-ofojj7dy — when a positional collection name is supplied, filter to just
			
 
				+  // that collection. Validate against the known list and exit non-zero on miss
			
 
				+  // (no silent full-fleet fallback). Empty filter = full-fleet (legacy).
			
 
				+  let collections = allCollections;
			
 
				+  if (collectionFilter !== undefined) {
			
 
				+    const match = allCollections.find(col => col.name === collectionFilter);
			
 
				+    if (!match) {
			
 
				+      const known = allCollections.map(c => c.name).sort().join(", ");
			
 
				+      console.error(`${c.red}Collection not found: "${collectionFilter}"${c.reset}`);
			
 
				+      console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
			
 
				+      console.error(`${c.dim}Run 'qmd update --all' (or 'qmd update' with no args) to process every collection.${c.reset}`);
			
 
				+      closeDb();
			
 
				+      process.exit(1);
			
 
				+    }
			
 
				+    collections = [match];
			
 
				+  }
			
 
				+
			
 
				   console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
			
 
				 
			
 
				   for (let i = 0; i < collections.length; i++) {
			
@@ -1783,19 +1800,50 @@ async function vectorIndex(
 
				     chunkStrategy?: ChunkStrategy;
			
 
				     embedProvider?: EmbeddingProvider;
			
 
				     providerKind?: ProviderKind;
			
 
				+    collection?: string;
			
 
				   },
			
 
				 ): Promise<void> {
			
 
				   const storeInstance = getStore();
			
 
				   const db = storeInstance.db;
			
 
				 
			
 
				+  // i-ofojj7dy — validate the collection filter against the known list before
			
 
				+  // doing any work. Mirrors `qmd update <name>` ergonomics.
			
 
				+  if (batchOptions?.collection !== undefined) {
			
 
				+    const allCollections = listCollections(db);
			
 
				+    const match = allCollections.find(col => col.name === batchOptions.collection);
			
 
				+    if (!match) {
			
 
				+      const known = allCollections.map(c => c.name).sort().join(", ");
			
 
				+      console.error(`${c.red}Collection not found: "${batchOptions.collection}"${c.reset}`);
			
 
				+      console.error(`${c.dim}Available collections: ${known || "(none)"}${c.reset}`);
			
 
				+      console.error(`${c.dim}Run 'qmd embed --all' (or 'qmd embed' with no args) to embed every collection.${c.reset}`);
			
 
				+      closeDb();
			
 
				+      process.exit(1);
			
 
				+    }
			
 
				+    // i-ofojj7dy — `--force` is fleet-wide (nukes all content_vectors).
			
 
				+    // Combining it with a single-collection filter would silently break
			
 
				+    // every OTHER collection's embeddings. Per-collection force-clear is a
			
 
				+    // distinct feature (out of scope here). Refuse and steer the user.
			
 
				+    if (force) {
			
 
				+      console.error(`${c.red}--force cannot be combined with a positional collection name.${c.reset}`);
			
 
				+      console.error(`${c.dim}--force clears ALL vectors fleet-wide before re-embedding; restricting it to one collection would corrupt the others.${c.reset}`);
			
 
				+      console.error(`${c.dim}Use 'qmd embed --all -f' to force-re-embed every collection, OR drop -f and run 'qmd embed ${batchOptions.collection}' to embed only this collection's pending hashes.${c.reset}`);
			
 
				+      closeDb();
			
 
				+      process.exit(1);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   if (force) {
			
 
				     console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
			
 
				   }
			
 
				 
			
 
				   // Check if there's work to do before starting
			
 
				-  const hashesToEmbed = getHashesNeedingEmbedding(db);
			
 
				+  const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
			
 
				   if (hashesToEmbed === 0 && !force) {
			
 
				-    console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
			
 
				+    if (batchOptions?.collection) {
			
 
				+      console.log(`${c.green}✓ All content hashes in collection "${batchOptions.collection}" already have embeddings.${c.reset}`);
			
 
				+    } else {
			
 
				+      console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
			
 
				+    }
			
 
				     closeDb();
			
 
				     return;
			
 
				   }
			
@@ -2844,8 +2892,13 @@ function showHelp(): void {
 
				   console.log("");
			
 
				   console.log("Maintenance:");
			
 
				   console.log("  qmd status                    - View index + collection health");
			
 
				-  console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
			
 
				-  console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
			
 
				+  console.log("  qmd update [<collection>|--all] [--pull]");
			
 
				+  console.log("                                - Re-index collections (positional name limits to one;");
			
 
				+  console.log("                                  no arg or --all = every collection; --pull = git pull first)");
			
 
				+  console.log("  qmd embed [<collection>|--all] [-f]");
			
 
				+  console.log("                                - Generate/refresh vector embeddings");
			
 
				+  console.log("                                  (positional name limits to one collection; no arg or --all = all;");
			
 
				+  console.log("                                  -f clears + re-embeds ALL vectors fleet-wide, incompatible with <collection>)");
			
 
				   console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
			
 
				   console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
			
 
				   console.log("    --provider {local,openai}   - Embedding backend (default: local llama.cpp)");
			
@@ -3232,9 +3285,22 @@ if (isMain) {
 
				       await showStatus();
			
 
				       break;
			
 
				 
			
 
				-    case "update":
			
 
				-      await updateCollections();
			
 
				+    case "update": {
			
 
				+      // i-ofojj7dy — `qmd update <collection>` filters to a single collection;
			
 
				+      // `qmd update --all` or `qmd update` (no arg) preserves full-fleet behavior.
			
 
				+      // `--all` together with a positional name errors out to avoid silent
			
 
				+      // disagreement between the two intents.
			
 
				+      const updateCollectionArg = cli.args[0];
			
 
				+      const updateAllFlag = !!cli.values.all;
			
 
				+      if (updateAllFlag && updateCollectionArg !== undefined) {
			
 
				+        console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
			
 
				+        console.error(`${c.dim}Use 'qmd update --all' for every collection OR 'qmd update <name>' for one.${c.reset}`);
			
 
				+        process.exit(1);
			
 
				+      }
			
 
				+      const updateFilter = updateAllFlag ? undefined : updateCollectionArg;
			
 
				+      await updateCollections(updateFilter);
			
 
				       break;
			
 
				+    }
			
 
				 
			
 
				     case "embed":
			
 
				       try {
			
@@ -3242,6 +3308,18 @@ if (isMain) {
 
				         const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
			
 
				         const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
			
 
				 
			
 
				+        // i-ofojj7dy — `qmd embed <collection>` filters pending-embedding
			
 
				+        // candidates to documents in that collection. `--all` together with a
			
 
				+        // positional name is an explicit error.
			
 
				+        const embedCollectionArg = cli.args[0];
			
 
				+        const embedAllFlag = !!cli.values.all;
			
 
				+        if (embedAllFlag && embedCollectionArg !== undefined) {
			
 
				+          console.error(`${c.red}Conflicting arguments: --all cannot be combined with a positional collection name.${c.reset}`);
			
 
				+          console.error(`${c.dim}Use 'qmd embed --all' for every collection OR 'qmd embed <name>' for one.${c.reset}`);
			
 
				+          process.exit(1);
			
 
				+        }
			
 
				+        const embedCollectionFilter = embedAllFlag ? undefined : embedCollectionArg;
			
 
				+
			
 
				         // Build embedding provider from CLI flags + env + config file.
			
 
				         // Backward compat: with no flags / env vars, the factory returns
			
 
				         // a LocalLlamaCppProvider that delegates to the default LlamaCpp
			
@@ -3256,6 +3334,7 @@ if (isMain) {
 
				           chunkStrategy: embedChunkStrategy,
			
 
				           embedProvider,
			
 
				           providerKind: embedProvider.kind,
			
 
				+          collection: embedCollectionFilter,
			
 
				         });
			
 
				       } catch (error) {
			
 
				         if (error instanceof ModelMismatchError) {
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -1380,6 +1380,14 @@ export type EmbedOptions = {
 
				    * the store's `LlamaCpp` (or the global singleton).
			
 
				    */
			
 
				   embedProvider?: EmbeddingProvider;
			
 
				+  /**
			
 
				+   * Optional collection name filter (i-ofojj7dy). When set, only content
			
 
				+   * hashes that have at least one document in this collection are embedded.
			
 
				+   * `getPendingEmbeddingDocs` filters at the SQL level. Callers are expected
			
 
				+   * to validate the name against `listCollections(db)` first; passing an
			
 
				+   * unknown name yields zero pending docs (no work, no error).
			
 
				+   */
			
 
				+  collection?: string;
			
 
				 };
			
 
				 
			
 
				 type PendingEmbeddingDoc = {
			
@@ -1418,12 +1426,29 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
 
				   };
			
 
				 }
			
 
				 
			
 
				-function getPendingEmbeddingDocs(db: Database): PendingEmbeddingDoc[] {
			
 
				+function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
			
 
				   // `MIN(d.collection)` deterministically picks one collection per hash when
			
 
				   // the same content is indexed in multiple collections (SQLite tie-breaks
			
 
				   // alphabetically). The identical bytes produce identical chunks regardless
			
 
				   // of which collection wins; the chunkStrategy lookup still resolves via
			
 
				   // that collection's YAML config. See Phase 2 design notes (i-bud0h8vu).
			
 
				+  //
			
 
				+  // i-ofojj7dy — when a collection name is supplied, filter rows BEFORE the
			
 
				+  // GROUP BY so we only emit hashes whose documents include that collection.
			
 
				+  // Other collections sharing the same content hash still benefit from any
			
 
				+  // embeddings generated for the canonical owner (content_vectors is keyed
			
 
				+  // by hash, not by collection).
			
 
				+  if (collection !== undefined) {
			
 
				+    return db.prepare(`
			
 
				+      SELECT d.hash, MIN(d.path) as path, MIN(d.collection) as collection, length(CAST(c.doc AS BLOB)) as bytes
			
 
				+      FROM documents d
			
 
				+      JOIN content c ON d.hash = c.hash
			
 
				+      LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
			
 
				+      WHERE d.active = 1 AND v.hash IS NULL AND d.collection = ?
			
 
				+      GROUP BY d.hash
			
 
				+      ORDER BY MIN(d.path)
			
 
				+    `).all(collection) as PendingEmbeddingDoc[];
			
 
				+  }
			
 
				   return db.prepare(`
			
 
				     SELECT d.hash, MIN(d.path) as path, MIN(d.collection) as collection, length(CAST(c.doc AS BLOB)) as bytes
			
 
				     FROM documents d
			
@@ -1559,7 +1584,8 @@ export async function generateEmbeddings(
 
				     clearAllEmbeddings(db);
			
 
				   }
			
 
				 
			
 
				-  const docsToEmbed = getPendingEmbeddingDocs(db);
			
 
				+  // i-ofojj7dy — optional collection filter restricts the pending-doc set.
			
 
				+  const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
			
 
				 
			
 
				   if (docsToEmbed.length === 0) {
			
 
				     return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
			
@@ -2128,7 +2154,18 @@ export type IndexStatus = {
 
				 // Index health
			
 
				 // =============================================================================
			
 
				 
			
 
				-export function getHashesNeedingEmbedding(db: Database): number {
			
 
				+export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
			
 
				+  // i-ofojj7dy — optional collection filter. Restricts the count to hashes
			
 
				+  // whose documents are in the named collection.
			
 
				+  if (collection !== undefined) {
			
 
				+    const result = db.prepare(`
			
 
				+      SELECT COUNT(DISTINCT d.hash) as count
			
 
				+      FROM documents d
			
 
				+      LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
			
 
				+      WHERE d.active = 1 AND v.hash IS NULL AND d.collection = ?
			
 
				+    `).get(collection) as { count: number };
			
 
				+    return result.count;
			
 
				+  }
			
 
				   const result = db.prepare(`
			
 
				     SELECT COUNT(DISTINCT d.hash) as count
			
 
				     FROM documents d
			
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -254,6 +254,45 @@ describe("CLI Embed", () => {
 
				     expect(exitCode).toBe(1);
			
 
				     expect(stderr).toContain("maxBatchBytes");
			
 
				   });
			
 
				+
			
 
				+  // i-ofojj7dy — collection-filter ergonomics for embed
			
 
				+  test("embed <unknown-collection> exits non-zero with available list", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("embed-unknown");
			
 
				+    const aDir = join(testDir, `embed-unknown-a-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "e.md"), "# E");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "embed-real"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stderr, exitCode } = await runQmd(["embed", "embed-ghost"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("Collection not found");
			
 
				+    expect(stderr).toContain("embed-ghost");
			
 
				+    expect(stderr).toContain("embed-real");
			
 
				+  });
			
 
				+
			
 
				+  test("embed <collection> --force rejects the conflict", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("embed-force-conflict");
			
 
				+    const aDir = join(testDir, `embed-force-conflict-a-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "f.md"), "# F");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "ef-a"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stderr, exitCode } = await runQmd(["embed", "ef-a", "--force"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("--force cannot be combined with a positional collection name");
			
 
				+  });
			
 
				+
			
 
				+  test("embed --all + positional name is a conflict error", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("embed-all-conflict");
			
 
				+    const aDir = join(testDir, `embed-all-conflict-a-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "g.md"), "# G");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "eac-a"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stderr, exitCode } = await runQmd(["embed", "eac-a", "--all"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("Conflicting arguments");
			
 
				+  });
			
 
				 });
			
 
				 
			
 
				 describe("CLI Skill Commands", () => {
			
@@ -552,6 +591,70 @@ describe("CLI Update Command", () => {
 
				     expect(stdout).toContain("Updating");
			
 
				   });
			
 
				 
			
 
				+  // i-ofojj7dy — collection-filter ergonomics
			
 
				+  test("update <collection> filters to a single collection", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("update-filter");
			
 
				+    // Two collections in one config; each gets its own fixture dir
			
 
				+    const aDir = join(testDir, `update-filter-a-${Date.now()}`);
			
 
				+    const bDir = join(testDir, `update-filter-b-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await mkdir(bDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "x.md"), "# X");
			
 
				+    await writeFile(join(bDir, "y.md"), "# Y");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "filter-a"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+    expect((await runQmd(["collection", "add", bDir, "--name", "filter-b"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stdout, exitCode } = await runQmd(["update", "filter-a"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(0);
			
 
				+    expect(stdout).toContain("Updating 1 collection(s)");
			
 
				+    expect(stdout).toContain("filter-a");
			
 
				+    expect(stdout).not.toContain("filter-b");
			
 
				+  });
			
 
				+
			
 
				+  test("update <unknown-collection> exits non-zero with available list", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("update-unknown");
			
 
				+    const aDir = join(testDir, `update-unknown-a-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "z.md"), "# Z");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "real-name"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stderr, exitCode } = await runQmd(["update", "ghost-collection"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("Collection not found");
			
 
				+    expect(stderr).toContain("ghost-collection");
			
 
				+    expect(stderr).toContain("real-name");
			
 
				+  });
			
 
				+
			
 
				+  test("update --all behaves like update with no args (full-fleet)", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("update-all");
			
 
				+    const aDir = join(testDir, `update-all-a-${Date.now()}`);
			
 
				+    const bDir = join(testDir, `update-all-b-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await mkdir(bDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "u.md"), "# U");
			
 
				+    await writeFile(join(bDir, "v.md"), "# V");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "all-a"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+    expect((await runQmd(["collection", "add", bDir, "--name", "all-b"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stdout, exitCode } = await runQmd(["update", "--all"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(0);
			
 
				+    expect(stdout).toContain("Updating 2 collection(s)");
			
 
				+    expect(stdout).toContain("all-a");
			
 
				+    expect(stdout).toContain("all-b");
			
 
				+  });
			
 
				+
			
 
				+  test("update --all + positional name is a conflict error", async () => {
			
 
				+    const { dbPath, configDir } = await createIsolatedTestEnv("update-conflict");
			
 
				+    const aDir = join(testDir, `update-conflict-a-${Date.now()}`);
			
 
				+    await mkdir(aDir, { recursive: true });
			
 
				+    await writeFile(join(aDir, "c.md"), "# C");
			
 
				+    expect((await runQmd(["collection", "add", aDir, "--name", "conflict-a"], { dbPath, configDir })).exitCode).toBe(0);
			
 
				+
			
 
				+    const { stderr, exitCode } = await runQmd(["update", "conflict-a", "--all"], { dbPath, configDir });
			
 
				+    expect(exitCode).toBe(1);
			
 
				+    expect(stderr).toContain("Conflicting arguments");
			
 
				+  });
			
 
				+
			
 
				   test("deactivates stale docs when collection has zero matching files", async () => {
			
 
				     const { dbPath, configDir } = await createIsolatedTestEnv("update-empty");
			
 
				     const collectionDir = join(testDir, `update-empty-${Date.now()}`);
			
--- a/test/embed-collection-filter.test.ts
+++ b/test/embed-collection-filter.test.ts
@@ -0,0 +1,203 @@
 
				+/**
			
 
				+ * embed-collection-filter.test.ts — Tests for the collection-filter plumbing
			
 
				+ * shipped under i-ofojj7dy:
			
 
				+ *
			
 
				+ *   - getPendingEmbeddingDocs(db, collection) filters at the SQL layer
			
 
				+ *   - getHashesNeedingEmbedding(db, collection) filters at the SQL layer
			
 
				+ *   - generateEmbeddings({ collection }) only embeds matching docs
			
 
				+ *
			
 
				+ * Uses an in-memory SQLite + stub EmbeddingProvider — no node-llama-cpp.
			
 
				+ */
			
 
				+
			
 
				+import { describe, test, expect, beforeEach, afterEach } from "vitest";
			
 
				+import { mkdtempSync, rmSync } from "node:fs";
			
 
				+import { tmpdir } from "node:os";
			
 
				+import { join } from "node:path";
			
 
				+
			
 
				+import {
			
 
				+  createStore,
			
 
				+  generateEmbeddings,
			
 
				+  getHashesNeedingEmbedding,
			
 
				+  type Store,
			
 
				+} from "../src/store.js";
			
 
				+import type {
			
 
				+  EmbeddingProvider,
			
 
				+  ProviderEmbedding,
			
 
				+  ProviderHealth,
			
 
				+} from "../src/embedding/provider.js";
			
 
				+
			
 
				+// ─────────────────────────── Stub provider ───────────────────────────────────
			
 
				+
			
 
				+class StubProvider implements EmbeddingProvider {
			
 
				+  readonly kind = "openai" as const;
			
 
				+  readonly modelId: string;
			
 
				+  readonly dim: number;
			
 
				+  embedBatchCalls = 0;
			
 
				+  totalTextsEmbedded = 0;
			
 
				+  // Snapshot the per-doc collection labels we received via the chunk stream.
			
 
				+  // generateEmbeddings hands us the chunk text only, but we can correlate
			
 
				+  // back through `docsProcessed` count in the result. For this test we only
			
 
				+  // assert on the result counts.
			
 
				+  constructor(modelId: string, dim = 4) {
			
 
				+    this.modelId = modelId;
			
 
				+    this.dim = dim;
			
 
				+  }
			
 
				+  getModelId(): string { return this.modelId; }
			
 
				+  getDimensions(): number | undefined { return this.dim; }
			
 
				+  async healthcheck(): Promise<ProviderHealth> {
			
 
				+    return { ok: true, model: this.modelId, dimensions: this.dim };
			
 
				+  }
			
 
				+  async embed(text: string): Promise<ProviderEmbedding | null> {
			
 
				+    this.totalTextsEmbedded++;
			
 
				+    return { embedding: this.fakeEmbed(text), model: this.modelId };
			
 
				+  }
			
 
				+  async embedBatch(texts: string[]): Promise<(ProviderEmbedding | null)[]> {
			
 
				+    this.embedBatchCalls++;
			
 
				+    this.totalTextsEmbedded += texts.length;
			
 
				+    return texts.map((t) => ({ embedding: this.fakeEmbed(t), model: this.modelId }));
			
 
				+  }
			
 
				+  async dispose(): Promise<void> {}
			
 
				+  private fakeEmbed(text: string): number[] {
			
 
				+    return Array.from({ length: this.dim }, (_, i) => (text.length + i) * 0.01);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// ─────────────────────────── Test setup ──────────────────────────────────────
			
 
				+
			
 
				+let workDir: string;
			
 
				+let store: Store;
			
 
				+
			
 
				+beforeEach(() => {
			
 
				+  workDir = mkdtempSync(join(tmpdir(), "qmd-embed-filter-test-"));
			
 
				+  process.env.INDEX_PATH = join(workDir, "index.sqlite");
			
 
				+  store = createStore(process.env.INDEX_PATH);
			
 
				+
			
 
				+  const now = "2026-05-13T00:00:00Z";
			
 
				+
			
 
				+  // Three distinct content hashes, three distinct collections — one doc each.
			
 
				+  // The body has to be non-empty so chunkDocumentByTokens emits ≥1 chunk/doc.
			
 
				+  const bodies: Record<string, string> = {
			
 
				+    hashA: "Alpha collection body content here that is long enough to chunk.",
			
 
				+    hashB: "Beta collection body text there with different vocabulary to chunk.",
			
 
				+    hashC: "Gamma collection body words yonder packing unique tokens to chunk.",
			
 
				+  };
			
 
				+  for (const [hash, body] of Object.entries(bodies)) {
			
 
				+    store.db
			
 
				+      .prepare(`INSERT INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
			
 
				+      .run(hash, body, now);
			
 
				+  }
			
 
				+  // doc-per-collection mapping
			
 
				+  const insertDoc = (hash: string, collection: string, path: string) => {
			
 
				+    store.db
			
 
				+      .prepare(
			
 
				+        `INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`,
			
 
				+      )
			
 
				+      .run(hash, collection, path, path, now, now, 1);
			
 
				+  };
			
 
				+  insertDoc("hashA", "alpha", "a.md");
			
 
				+  insertDoc("hashB", "beta", "b.md");
			
 
				+  insertDoc("hashC", "gamma", "c.md");
			
 
				+});
			
 
				+
			
 
				+afterEach(() => {
			
 
				+  try {
			
 
				+    store.close();
			
 
				+  } catch { /* ignore */ }
			
 
				+  delete process.env.INDEX_PATH;
			
 
				+  rmSync(workDir, { recursive: true, force: true });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── getHashesNeedingEmbedding ───────────────────────
			
 
				+
			
 
				+describe("getHashesNeedingEmbedding with collection filter (i-ofojj7dy)", () => {
			
 
				+  test("returns total count when no collection passed", () => {
			
 
				+    expect(getHashesNeedingEmbedding(store.db)).toBe(3);
			
 
				+  });
			
 
				+
			
 
				+  test("returns 1 when filtering to a single-doc collection", () => {
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "alpha")).toBe(1);
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "beta")).toBe(1);
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "gamma")).toBe(1);
			
 
				+  });
			
 
				+
			
 
				+  test("returns 0 when filter does not match any collection", () => {
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "nonexistent")).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("shared content hash counted per containing collection", () => {
			
 
				+    // Add a second doc that re-uses hashA but in collection "beta".
			
 
				+    const now = "2026-05-13T00:00:00Z";
			
 
				+    store.db
			
 
				+      .prepare(
			
 
				+        `INSERT INTO documents (hash, collection, path, title, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?)`,
			
 
				+      )
			
 
				+      .run("hashA", "beta", "shared.md", "shared", now, now, 1);
			
 
				+    // Without filter, the DISTINCT count of pending hashes is still 3.
			
 
				+    expect(getHashesNeedingEmbedding(store.db)).toBe(3);
			
 
				+    // With filter, beta now contains 2 distinct hashes (hashA + hashB).
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "beta")).toBe(2);
			
 
				+    // Alpha still owns just hashA.
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "alpha")).toBe(1);
			
 
				+  });
			
 
				+
			
 
				+  test("inactive docs are excluded from the filtered count", () => {
			
 
				+    store.db
			
 
				+      .prepare(`UPDATE documents SET active = 0 WHERE collection = 'beta'`)
			
 
				+      .run();
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "beta")).toBe(0);
			
 
				+    // Other collections unaffected
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "alpha")).toBe(1);
			
 
				+  });
			
 
				+});
			
 
				+
			
 
				+// ─────────────────────────── generateEmbeddings filter ───────────────────────
			
 
				+
			
 
				+describe("generateEmbeddings with collection filter (i-ofojj7dy)", () => {
			
 
				+  test("processes only documents in the named collection", async () => {
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      collection: "alpha",
			
 
				+      maxDocsPerBatch: 64,
			
 
				+    });
			
 
				+    expect(result.docsProcessed).toBe(1);
			
 
				+    expect(result.chunksEmbedded).toBeGreaterThan(0);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("processes all documents when collection is omitted (legacy path)", async () => {
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      maxDocsPerBatch: 64,
			
 
				+    });
			
 
				+    expect(result.docsProcessed).toBe(3);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("returns zero-result for unknown collection without throwing", async () => {
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      collection: "ghost",
			
 
				+    });
			
 
				+    // No docs to embed → returns early with the empty-result shape
			
 
				+    expect(result.docsProcessed).toBe(0);
			
 
				+    expect(result.chunksEmbedded).toBe(0);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+    expect(provider.totalTextsEmbedded).toBe(0);
			
 
				+  });
			
 
				+
			
 
				+  test("does not embed docs from sibling collections", async () => {
			
 
				+    // Embed only beta; verify alpha + gamma are STILL pending afterward.
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    await generateEmbeddings(store, {
			
 
				+      embedProvider: provider,
			
 
				+      collection: "beta",
			
 
				+    });
			
 
				+    // alpha + gamma still need embeddings, beta does not
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "alpha")).toBe(1);
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "gamma")).toBe(1);
			
 
				+    expect(getHashesNeedingEmbedding(store.db, "beta")).toBe(0);
			
 
				+  });
			
 
				+});