6 months ago · 01d74727f7
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -1972,7 +1972,10 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
 
				   const perQueryLimit = opts.all ? 500 : 20;
			
 
				   const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
			
 
				 
			
 
				-  // Run vector searches sequentially (node-llama-cpp embedding context doesn't handle concurrent calls)
			
 
				+  // IMPORTANT: Run vector searches sequentially, not with Promise.all.
			
 
				+  // node-llama-cpp's embedding context hangs when multiple concurrent embed() calls
			
 
				+  // are made. This is a known limitation of the LlamaEmbeddingContext.
			
 
				+  // See: https://github.com/tobi/qmd/pull/23
			
 
				   for (const q of vectorQueries) {
			
 
				     const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
			
 
				     for (const r of vecResults) {
			
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -1855,6 +1855,42 @@ describe("LlamaCpp Integration", () => {
 
				     await cleanupTestDb(store);
			
 
				   });
			
 
				 
			
 
				+  // Regression test for https://github.com/tobi/qmd/pull/23
			
 
				+  // sqlite-vec virtual tables hang when combined with JOINs in the same query.
			
 
				+  // The fix uses a two-step approach: vector query first, then separate JOINs.
			
 
				+  test("searchVec uses two-step query to avoid sqlite-vec JOIN hang", async () => {
			
 
				+    const store = await createTestStore();
			
 
				+
			
 
				+    // Add test document with embedding
			
 
				+    await store.addDocument("collection", "test.md", "Test content for vector search");
			
 
				+    await store.updateIndex();
			
 
				+
			
 
				+    // Generate embedding for the test doc
			
 
				+    const llm = (await import("./llm.js")).getDefaultLlamaCpp();
			
 
				+    const embedding = await llm.embed("Test content for vector search");
			
 
				+    if (embedding) {
			
 
				+      // Manually insert vector to test the query path
			
 
				+      const hash = hashContent("Test content for vector search");
			
 
				+      store.db.prepare(`
			
 
				+        INSERT OR REPLACE INTO content_vectors (hash, seq, pos) VALUES (?, 0, 0)
			
 
				+      `).run(hash);
			
 
				+      store.db.prepare(`
			
 
				+        INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)
			
 
				+      `).run(`${hash}_0`, new Float32Array(embedding.embedding));
			
 
				+    }
			
 
				+
			
 
				+    // This should complete quickly (not hang) due to the two-step fix
			
 
				+    const startTime = Date.now();
			
 
				+    const results = await store.searchVec("test content", "embeddinggemma", 5);
			
 
				+    const elapsed = Date.now() - startTime;
			
 
				+
			
 
				+    // If the query took more than 10 seconds, something is wrong
			
 
				+    // (the hang bug would cause it to never return)
			
 
				+    expect(elapsed).toBeLessThan(10000);
			
 
				+
			
 
				+    await cleanupTestDb(store);
			
 
				+  }, 30000);
			
 
				+
			
 
				   test("expandQuery returns original plus expanded queries", async () => {
			
 
				     const store = await createTestStore();
			
 
				 
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -1679,7 +1679,12 @@ export async function searchVec(db: Database, query: string, model: string, limi
 
				   const embedding = await getEmbedding(query, model, true);
			
 
				   if (!embedding) return [];
			
 
				 
			
 
				-  // Step 1: Get vector matches (sqlite-vec doesn't work with JOINs)
			
 
				+  // IMPORTANT: We use a two-step query approach here because sqlite-vec virtual tables
			
 
				+  // hang indefinitely when combined with JOINs in the same query. Do NOT try to
			
 
				+  // "optimize" this by combining into a single query with JOINs - it will break.
			
 
				+  // See: https://github.com/tobi/qmd/pull/23
			
 
				+
			
 
				+  // Step 1: Get vector matches from sqlite-vec (no JOINs allowed)
			
 
				   const vecResults = db.prepare(`
			
 
				     SELECT hash_seq, distance
			
 
				     FROM vectors_vec