2 mesi fa · 6ebfc54cc7
--- a/src/store.ts
+++ b/src/store.ts
@@ -24,6 +24,7 @@ import {
 
				   formatQueryForEmbedding,
			
 
				   formatDocForEmbedding,
			
 
				   withLLMSessionForLlm,
			
 
				+  type LLMSessionOptions,
			
 
				   type RerankDocument,
			
 
				   type ILLMSession,
			
 
				 } from "./llm.js";
			
@@ -1409,6 +1410,54 @@ function getEmbeddingDocsForBatch(db: Database, batch: PendingEmbeddingDoc[]): E
 
				   }));
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Run `body` with a session-shaped argument that supplies an AbortSignal +
			
 
				+ * isValid flag. When `provider` is supplied, the session is a lightweight
			
 
				+ * AbortController-backed stub — `getLlm(store)` is never called and
			
 
				+ * `withLLMSessionForLlm` is bypassed entirely, so node-llama-cpp is not
			
 
				+ * warmed up on remote-only deployments (i-08ovbvtb, follow-up to i-qkarfffa).
			
 
				+ *
			
 
				+ * When `provider` is undefined, behavior is unchanged: a real `LLMSession`
			
 
				+ * is created via `withLLMSessionForLlm(getLlm(store), ...)` so that the
			
 
				+ * body can use `session.embed`/`session.embedBatch` for the local path.
			
 
				+ *
			
 
				+ * The fake session's LLM-only methods (embed/embedBatch/expandQuery/rerank)
			
 
				+ * throw if called — they MUST NOT be reached when `provider` is set, since
			
 
				+ * the embed path is supposed to route through the provider instead.
			
 
				+ */
			
 
				+async function withEmbedSession<T>(
			
 
				+  store: Store,
			
 
				+  provider: EmbeddingProvider | undefined,
			
 
				+  body: (session: ILLMSession) => Promise<T>,
			
 
				+  options?: LLMSessionOptions,
			
 
				+): Promise<T> {
			
 
				+  if (provider) {
			
 
				+    const ac = new AbortController();
			
 
				+    const fakeSession: ILLMSession = {
			
 
				+      get signal() { return ac.signal; },
			
 
				+      get isValid() { return !ac.signal.aborted; },
			
 
				+      embed: async () => {
			
 
				+        throw new Error("withEmbedSession: provider supplied — session.embed must not be called");
			
 
				+      },
			
 
				+      embedBatch: async () => {
			
 
				+        throw new Error("withEmbedSession: provider supplied — session.embedBatch must not be called");
			
 
				+      },
			
 
				+      expandQuery: async () => {
			
 
				+        throw new Error("withEmbedSession: provider supplied — session.expandQuery must not be called");
			
 
				+      },
			
 
				+      rerank: async () => {
			
 
				+        throw new Error("withEmbedSession: provider supplied — session.rerank must not be called");
			
 
				+      },
			
 
				+    };
			
 
				+    try {
			
 
				+      return await body(fakeSession);
			
 
				+    } finally {
			
 
				+      ac.abort();
			
 
				+    }
			
 
				+  }
			
 
				+  return withLLMSessionForLlm(getLlm(store), body, options);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Generate vector embeddings for documents that need them.
			
 
				  * Pure function — no console output, no db lifecycle management.
			
@@ -1463,10 +1512,6 @@ export async function generateEmbeddings(
 
				     // callers that never touch ~/.config/qmd working.
			
 
				   }
			
 
				 
			
 
				-  // Use store's LlamaCpp or global singleton, wrapped in a session
			
 
				-  const llm = getLlm(store);
			
 
				-  const embedModelUri = options?.embedProvider?.getModelId() ?? llm.embedModelName;
			
 
				-
			
 
				   // Provider routing — when an EmbeddingProvider is supplied, embed calls go
			
 
				   // through it (HTTP, GPU worker, etc.). Otherwise, use the LLM session path.
			
 
				   // The outer session is still created for its abort signal (chunking uses
			
@@ -1474,8 +1519,18 @@ export async function generateEmbeddings(
 
				   const provider = options?.embedProvider;
			
 
				   const providerModel = provider?.getModelId() ?? model;
			
 
				 
			
 
				-  // Create a session manager for this llm instance
			
 
				-  const result = await withLLMSessionForLlm(llm, async (session) => {
			
 
				+  // Resolve `embedModelUri` (used for formatting prefixes etc.) lazily —
			
 
				+  // when `provider` is set, take it from the provider; otherwise fall back
			
 
				+  // to the local LlamaCpp's embed model name. Accessing `getLlm(store)` is
			
 
				+  // deferred to the non-provider branch so remote-only deployments do not
			
 
				+  // construct a `LlamaCpp` instance just to read its embedModelName.
			
 
				+  const embedModelUri = provider
			
 
				+    ? provider.getModelId()
			
 
				+    : getLlm(store).embedModelName;
			
 
				+
			
 
				+  // Run the embedding loop inside a session-scoped wrapper. When `provider`
			
 
				+  // is set, this short-circuits the local LLM warm-up entirely (i-08ovbvtb).
			
 
				+  const result = await withEmbedSession(store, provider, async (session) => {
			
 
				     let chunksEmbedded = 0;
			
 
				     let errors = 0;
			
 
				     let bytesProcessed = 0;
			
--- a/test/embedding-store-integration.test.ts
+++ b/test/embedding-store-integration.test.ts
@@ -146,10 +146,36 @@ describe("generateEmbeddings with EmbeddingProvider", () => {
 
				     expect(result.errors).toBe(0);
			
 
				     expect(provider.embedBatchCalls + provider.embedCalls).toBeGreaterThan(0);
			
 
				     expect(provider.totalTextsEmbedded).toBeGreaterThan(0);
			
 
				-  }, 30000); // Cold-cache llama-cpp init can take >5s on first session call.
			
 
				-  // Provider short-circuits embed calls (line 1494-1499 of store.ts) but the
			
 
				-  // outer `withLLMSessionForLlm` wrapper still warms the LLM. DoD #9 (skip
			
 
				-  // LLM init when provider supplied) is a follow-up refactor.
			
 
				+  });
			
 
				+  // Default 5s timeout restored after i-08ovbvtb removed the
			
 
				+  // `withLLMSessionForLlm` wrapper from the provider path. The previous
			
 
				+  // 30s bump (commit 058ec1d) was a workaround for the cold-cache LLM
			
 
				+  // warm-up that the refactor now skips entirely.
			
 
				+
			
 
				+  test("provider mode does not access store.llm (DoD #2, #5 — i-08ovbvtb)", async () => {
			
 
				+    // When `embedProvider` is supplied, the refactor must NOT consult the
			
 
				+    // local LlamaCpp at all — neither `embedModelName` nor any other field.
			
 
				+    // We assert this by setting `store.llm` to a Proxy that throws on any
			
 
				+    // property access. If `getLlm(store).embedModelName` (or any sibling
			
 
				+    // call site) regressed back into the provider path, the test would
			
 
				+    // fail with a clear error message.
			
 
				+    const throwingLlm = new Proxy({}, {
			
 
				+      get(_target, prop) {
			
 
				+        throw new Error(
			
 
				+          `store.llm.${String(prop)} accessed when embedProvider was supplied — DoD violation`,
			
 
				+        );
			
 
				+      },
			
 
				+    }) as never;
			
 
				+    store.llm = throwingLlm;
			
 
				+
			
 
				+    const provider = new StubProvider("embeddinggemma", 4);
			
 
				+    const result = await generateEmbeddings(store, { embedProvider: provider });
			
 
				+
			
 
				+    expect(result.docsProcessed).toBe(2);
			
 
				+    expect(result.chunksEmbedded).toBeGreaterThan(0);
			
 
				+    expect(result.errors).toBe(0);
			
 
				+    expect(provider.totalTextsEmbedded).toBeGreaterThan(0);
			
 
				+  });
			
 
				 
			
 
				   test("model-id guard throws ModelMismatchError on mismatch", async () => {
			
 
				     // Pre-populate content_vectors with a different model id