Переглянути джерело

fix(embed): handle vec0 OR REPLACE limitation in insertEmbedding

sqlite-vec's vec0 virtual tables silently ignore the OR REPLACE conflict
clause. When a crash interrupts embedding mid-way, chunks that were
inserted into vectors_vec but not content_vectors get re-selected by
getHashesForEmbedding, causing a UNIQUE constraint error on re-embed.

Two changes:
1. Insert content_vectors first so getHashesForEmbedding won't re-select
   the hash if a crash occurs between the two inserts.
2. Use DELETE + INSERT for vectors_vec instead of INSERT OR REPLACE.

Fixes #445

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Antonio 2 місяців тому
батько
коміт
902e14650e
1 змінених файлів з 13 додано та 2 видалено
  1. 13 2
      src/store.ts

+ 13 - 2
src/store.ts

@@ -2943,6 +2943,12 @@ export function clearAllEmbeddings(db: Database): void {
 /**
  * Insert a single embedding into both content_vectors and vectors_vec tables.
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
+ *
+ * content_vectors is inserted first so that getHashesForEmbedding (which checks
+ * only content_vectors) won't re-select the hash on a crash between the two inserts.
+ *
+ * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
+ * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
  */
 export function insertEmbedding(
   db: Database,
@@ -2954,11 +2960,16 @@ export function insertEmbedding(
   embeddedAt: string
 ): void {
   const hashSeq = `${hash}_${seq}`;
-  const insertVecStmt = db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+
+  // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
   const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
+  insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
 
+  // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
+  const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+  const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+  deleteVecStmt.run(hashSeq);
   insertVecStmt.run(hashSeq, embedding);
-  insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
 }
 
 // =============================================================================