/** * local.ts - Local llama.cpp adapter implementing EmbeddingProvider. * * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like * any other EmbeddingProvider to upstream callers. Used as the default and * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker. */ import { type LlamaCpp, getDefaultLlamaCpp, } from "../llm.js"; import type { EmbeddingProvider, ProviderEmbedOptions, ProviderEmbedding, ProviderHealth, ProviderKind, } from "./provider.js"; export type LocalLlamaCppProviderConfig = { /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */ llm?: LlamaCpp; /** * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma" * to match the value in `content_vectors.model` for existing qmd installs. */ modelId?: string; }; export class LocalLlamaCppProvider implements EmbeddingProvider { readonly kind: ProviderKind = "local"; private readonly llm: LlamaCpp; private readonly modelId: string; private dimensions: number | undefined = undefined; private lastError: string | undefined = undefined; constructor(config: LocalLlamaCppProviderConfig = {}) { this.llm = config.llm ?? getDefaultLlamaCpp(); this.modelId = config.modelId ?? "embeddinggemma"; } getModelId(): string { return this.modelId; } getDimensions(): number | undefined { return this.dimensions; } /** * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns * `undefined` after a successful call or before the first call. See * `EmbeddingProvider.getLastError`. */ getLastError(): string | undefined { return this.lastError; } async healthcheck(_signal?: AbortSignal): Promise { // For the local provider, "healthy" means the embed model loads. // We probe with a single embed call. try { const result = await this.llm.embed("healthcheck", { model: this.modelId }); if (!result) { return { ok: false, model: this.modelId, detail: "embed probe returned null", }; } this.dimensions = result.embedding.length; return { ok: true, model: this.modelId, dimensions: this.dimensions, detail: `local llama.cpp ready, ${this.dimensions}-d`, }; } catch (err) { return { ok: false, model: this.modelId, detail: err instanceof Error ? err.message : String(err), }; } } async embed( text: string, options: ProviderEmbedOptions = {}, ): Promise { if (options.signal?.aborted) { this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`; return null; } let result; try { result = await this.llm.embed(text, { model: options.model ?? this.modelId }); } catch (err) { this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`; return null; } if (!result) { this.lastError = `provider=local error="llm.embed returned null/undefined"`; return null; } if (this.dimensions === undefined) { this.dimensions = result.embedding.length; } this.lastError = undefined; return { embedding: result.embedding, model: this.modelId, }; } async embedBatch( texts: string[], options: ProviderEmbedOptions = {}, ): Promise<(ProviderEmbedding | null)[]> { if (texts.length === 0) return []; if (options.signal?.aborted) { this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`; return texts.map(() => null); } let raw; try { raw = await this.llm.embedBatch(texts, { model: options.model ?? this.modelId, }); } catch (err) { this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`; return texts.map(() => null); } const out = raw.map((r) => { if (!r) return null; if (this.dimensions === undefined && r.embedding.length > 0) { this.dimensions = r.embedding.length; } return { embedding: r.embedding, model: this.modelId, }; }); if (out.every((r) => r !== null)) { this.lastError = undefined; } else if (out.some((r) => r === null)) { this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`; } return out; } async dispose(): Promise { // We do NOT dispose the underlying LlamaCpp here because the singleton // is shared with rerank/generate/expansion paths. Disposal is handled // by the existing `disposeDefaultLlamaCpp()` global hook. } }