Explorar el Código

Fix vsearch/query hang caused by sqlite-vec JOIN incompatibility

sqlite-vec virtual tables don't work correctly with JOINs in the same
query - they cause the query to hang indefinitely.

Changes:
- searchVec: Rewrite to use two-step approach
  1. Query vectors_vec table alone (no JOINs)
  2. Look up document info separately using result hash_seqs
- vsearch: Change from Promise.all to sequential for loop
  (node-llama-cpp embedding context doesn't handle concurrent calls)

This fixes vsearch and hybrid query commands that were hanging at
"Searching N vector queries..."

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Brendan McCord hace 4 meses
padre
commit
216793380a
Se han modificado 2 ficheros con 39 adiciones y 26 borrados
  1. 3 3
      src/qmd.ts
  2. 36 23
      src/store.ts

+ 3 - 3
src/qmd.ts

@@ -1972,8 +1972,8 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
   const perQueryLimit = opts.all ? 500 : 20;
   const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
 
-  // Use Promise.all for concurrent vector searches
-  await Promise.all(vectorQueries.map(async (q) => {
+  // Run vector searches sequentially (node-llama-cpp embedding context doesn't handle concurrent calls)
+  for (const q of vectorQueries) {
     const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
     for (const r of vecResults) {
       const existing = allResults.get(r.filepath);
@@ -1981,7 +1981,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
         allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
       }
     }
-  }));
+  }
 
   // Sort by max score and limit to requested count
   const results = Array.from(allResults.values())

+ 36 - 23
src/store.ts

@@ -1679,48 +1679,61 @@ export async function searchVec(db: Database, query: string, model: string, limi
   const embedding = await getEmbedding(query, model, true);
   if (!embedding) return [];
 
-  // sqlite-vec requires "k = ?" for KNN queries
-  let sql = `
+  // Step 1: Get vector matches (sqlite-vec doesn't work with JOINs)
+  const vecResults = db.prepare(`
+    SELECT hash_seq, distance
+    FROM vectors_vec
+    WHERE embedding MATCH ? AND k = ?
+  `).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number }[];
+
+  if (vecResults.length === 0) return [];
+
+  // Step 2: Get chunk info and document data
+  const hashSeqs = vecResults.map(r => r.hash_seq);
+  const distanceMap = new Map(vecResults.map(r => [r.hash_seq, r.distance]));
+
+  // Build query for document lookup
+  const placeholders = hashSeqs.map(() => '?').join(',');
+  let docSql = `
     SELECT
-      v.hash_seq,
-      v.distance,
+      cv.hash || '_' || cv.seq as hash_seq,
+      cv.hash,
+      cv.pos,
       'qmd://' || d.collection || '/' || d.path as filepath,
       d.collection || '/' || d.path as display_path,
       d.title,
-      content.doc as body,
-      cv.hash,
-      cv.pos
-    FROM vectors_vec v
-    JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq
+      content.doc as body
+    FROM content_vectors cv
     JOIN documents d ON d.hash = cv.hash AND d.active = 1
     JOIN content ON content.hash = d.hash
-    WHERE v.embedding MATCH ? AND k = ?
+    WHERE cv.hash || '_' || cv.seq IN (${placeholders})
   `;
-
-  const params: (Float32Array | number | string)[] = [new Float32Array(embedding), limit * 3];
+  const params: string[] = [...hashSeqs];
 
   if (collectionId) {
-    // Filter by collection name
-    sql += ` AND d.collection = ?`;
+    docSql += ` AND d.collection = ?`;
     params.push(String(collectionId));
   }
 
-  sql += ` ORDER BY v.distance`;
-
-  const rows = db.prepare(sql).all(...params) as { hash_seq: string; distance: number; filepath: string; display_path: string; title: string; body: string; hash: string; pos: number }[];
+  const docRows = db.prepare(docSql).all(...params) as {
+    hash_seq: string; hash: string; pos: number; filepath: string;
+    display_path: string; title: string; body: string;
+  }[];
 
-  const seen = new Map<string, { row: typeof rows[0]; bestDist: number }>();
-  for (const row of rows) {
+  // Combine with distances and dedupe by filepath
+  const seen = new Map<string, { row: typeof docRows[0]; bestDist: number }>();
+  for (const row of docRows) {
+    const distance = distanceMap.get(row.hash_seq) ?? 1;
     const existing = seen.get(row.filepath);
-    if (!existing || row.distance < existing.bestDist) {
-      seen.set(row.filepath, { row, bestDist: row.distance });
+    if (!existing || distance < existing.bestDist) {
+      seen.set(row.filepath, { row, bestDist: distance });
     }
   }
 
   return Array.from(seen.values())
     .sort((a, b) => a.bestDist - b.bestDist)
     .slice(0, limit)
-    .map(({ row }) => {
+    .map(({ row, bestDist }) => {
       const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
       return {
         filepath: row.filepath,
@@ -1733,7 +1746,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
         bodyLength: row.body.length,
         body: row.body,
         context: getContextForFile(db, row.filepath),
-        score: 1 - row.distance,  // Cosine similarity = 1 - cosine distance
+        score: 1 - bestDist,  // Cosine similarity = 1 - cosine distance
         source: "vec" as const,
         chunkPos: row.pos,
       };