|
|
@@ -24,6 +24,7 @@ import {
|
|
|
formatQueryForEmbedding,
|
|
|
formatDocForEmbedding,
|
|
|
withLLMSessionForLlm,
|
|
|
+ type LLMSessionOptions,
|
|
|
type RerankDocument,
|
|
|
type ILLMSession,
|
|
|
} from "./llm.js";
|
|
|
@@ -1409,6 +1410,54 @@ function getEmbeddingDocsForBatch(db: Database, batch: PendingEmbeddingDoc[]): E
|
|
|
}));
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Run `body` with a session-shaped argument that supplies an AbortSignal +
|
|
|
+ * isValid flag. When `provider` is supplied, the session is a lightweight
|
|
|
+ * AbortController-backed stub — `getLlm(store)` is never called and
|
|
|
+ * `withLLMSessionForLlm` is bypassed entirely, so node-llama-cpp is not
|
|
|
+ * warmed up on remote-only deployments (i-08ovbvtb, follow-up to i-qkarfffa).
|
|
|
+ *
|
|
|
+ * When `provider` is undefined, behavior is unchanged: a real `LLMSession`
|
|
|
+ * is created via `withLLMSessionForLlm(getLlm(store), ...)` so that the
|
|
|
+ * body can use `session.embed`/`session.embedBatch` for the local path.
|
|
|
+ *
|
|
|
+ * The fake session's LLM-only methods (embed/embedBatch/expandQuery/rerank)
|
|
|
+ * throw if called — they MUST NOT be reached when `provider` is set, since
|
|
|
+ * the embed path is supposed to route through the provider instead.
|
|
|
+ */
|
|
|
+async function withEmbedSession<T>(
|
|
|
+ store: Store,
|
|
|
+ provider: EmbeddingProvider | undefined,
|
|
|
+ body: (session: ILLMSession) => Promise<T>,
|
|
|
+ options?: LLMSessionOptions,
|
|
|
+): Promise<T> {
|
|
|
+ if (provider) {
|
|
|
+ const ac = new AbortController();
|
|
|
+ const fakeSession: ILLMSession = {
|
|
|
+ get signal() { return ac.signal; },
|
|
|
+ get isValid() { return !ac.signal.aborted; },
|
|
|
+ embed: async () => {
|
|
|
+ throw new Error("withEmbedSession: provider supplied — session.embed must not be called");
|
|
|
+ },
|
|
|
+ embedBatch: async () => {
|
|
|
+ throw new Error("withEmbedSession: provider supplied — session.embedBatch must not be called");
|
|
|
+ },
|
|
|
+ expandQuery: async () => {
|
|
|
+ throw new Error("withEmbedSession: provider supplied — session.expandQuery must not be called");
|
|
|
+ },
|
|
|
+ rerank: async () => {
|
|
|
+ throw new Error("withEmbedSession: provider supplied — session.rerank must not be called");
|
|
|
+ },
|
|
|
+ };
|
|
|
+ try {
|
|
|
+ return await body(fakeSession);
|
|
|
+ } finally {
|
|
|
+ ac.abort();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return withLLMSessionForLlm(getLlm(store), body, options);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Generate vector embeddings for documents that need them.
|
|
|
* Pure function — no console output, no db lifecycle management.
|
|
|
@@ -1463,10 +1512,6 @@ export async function generateEmbeddings(
|
|
|
// callers that never touch ~/.config/qmd working.
|
|
|
}
|
|
|
|
|
|
- // Use store's LlamaCpp or global singleton, wrapped in a session
|
|
|
- const llm = getLlm(store);
|
|
|
- const embedModelUri = options?.embedProvider?.getModelId() ?? llm.embedModelName;
|
|
|
-
|
|
|
// Provider routing — when an EmbeddingProvider is supplied, embed calls go
|
|
|
// through it (HTTP, GPU worker, etc.). Otherwise, use the LLM session path.
|
|
|
// The outer session is still created for its abort signal (chunking uses
|
|
|
@@ -1474,8 +1519,18 @@ export async function generateEmbeddings(
|
|
|
const provider = options?.embedProvider;
|
|
|
const providerModel = provider?.getModelId() ?? model;
|
|
|
|
|
|
- // Create a session manager for this llm instance
|
|
|
- const result = await withLLMSessionForLlm(llm, async (session) => {
|
|
|
+ // Resolve `embedModelUri` (used for formatting prefixes etc.) lazily —
|
|
|
+ // when `provider` is set, take it from the provider; otherwise fall back
|
|
|
+ // to the local LlamaCpp's embed model name. Accessing `getLlm(store)` is
|
|
|
+ // deferred to the non-provider branch so remote-only deployments do not
|
|
|
+ // construct a `LlamaCpp` instance just to read its embedModelName.
|
|
|
+ const embedModelUri = provider
|
|
|
+ ? provider.getModelId()
|
|
|
+ : getLlm(store).embedModelName;
|
|
|
+
|
|
|
+ // Run the embedding loop inside a session-scoped wrapper. When `provider`
|
|
|
+ // is set, this short-circuits the local LLM warm-up entirely (i-08ovbvtb).
|
|
|
+ const result = await withEmbedSession(store, provider, async (session) => {
|
|
|
let chunksEmbedded = 0;
|
|
|
let errors = 0;
|
|
|
let bytesProcessed = 0;
|