Ver Fonte

fix(store): thread embed model URI to format functions for correct prompt detection

When the embed model is configured via YAML (not env var), formatDocForEmbedding
and formatQueryForEmbedding callers in store.ts would fall back to the default
model, producing the wrong prompt format. This adds a public embedModelName
getter on LlamaCpp and threads it through all five call sites.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
JohnRichardEnders há 1 mês atrás
pai
commit
8644fa99d1
2 ficheiros alterados com 10 adições e 5 exclusões
  1. 4 0
      src/llm.ts
  2. 6 5
      src/store.ts

+ 4 - 0
src/llm.ts

@@ -445,6 +445,10 @@ export class LlamaCpp implements LLM {
     this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
   }
 
+  get embedModelName(): string {
+    return this.embedModelUri;
+  }
+
   /**
    * Reset the inactivity timer. Called after each model operation.
    * When timer fires, models are unloaded to free memory (if no active sessions).

+ 6 - 5
src/store.ts

@@ -1414,6 +1414,7 @@ export async function generateEmbeddings(
 
   // Use store's LlamaCpp or global singleton, wrapped in a session
   const llm = getLlm(store);
+  const embedModelUri = llm.embedModelName;
 
   // Create a session manager for this llm instance
   const result = await withLLMSessionForLlm(llm, async (session) => {
@@ -1471,7 +1472,7 @@ export async function generateEmbeddings(
 
       if (!vectorTableInitialized) {
         const firstChunk = batchChunks[0]!;
-        const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, model);
+        const firstText = formatDocForEmbedding(firstChunk.text, firstChunk.title, embedModelUri);
         const firstResult = await session.embed(firstText, { model });
         if (!firstResult) {
           throw new Error("Failed to get embedding dimensions from first chunk");
@@ -1503,7 +1504,7 @@ export async function generateEmbeddings(
 
         const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
         const chunkBatch = batchChunks.slice(batchStart, batchEnd);
-        const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title, model));
+        const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title, embedModelUri));
 
         try {
           const embeddings = await session.embedBatch(texts, { model });
@@ -1527,7 +1528,7 @@ export async function generateEmbeddings(
           } else {
             for (const chunk of chunkBatch) {
               try {
-                const text = formatDocForEmbedding(chunk.text, chunk.title, model);
+                const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
                 const result = await session.embed(text, { model });
                 if (result) {
                   insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
@@ -3985,7 +3986,7 @@ export async function hybridQuery(
 
     // Batch embed all vector queries in a single call
     const llm = getLlm(store);
-    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
+    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
     hooks?.onEmbedStart?.(textsToEmbed.length);
     const embedStart = Date.now();
     const embeddings = await llm.embedBatch(textsToEmbed);
@@ -4368,7 +4369,7 @@ export async function structuredSearch(
     );
     if (vecSearches.length > 0) {
       const llm = getLlm(store);
-      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
+      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
       hooks?.onEmbedStart?.(textsToEmbed.length);
       const embedStart = Date.now();
       const embeddings = await llm.embedBatch(textsToEmbed);