| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- /**
- * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
- *
- * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
- * any other EmbeddingProvider to upstream callers. Used as the default and
- * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
- */
- import {
- type LlamaCpp,
- getDefaultLlamaCpp,
- } from "../llm.js";
- import type {
- EmbeddingProvider,
- ProviderEmbedOptions,
- ProviderEmbedding,
- ProviderHealth,
- ProviderKind,
- } from "./provider.js";
- export type LocalLlamaCppProviderConfig = {
- /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */
- llm?: LlamaCpp;
- /**
- * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma"
- * to match the value in `content_vectors.model` for existing qmd installs.
- */
- modelId?: string;
- };
- export class LocalLlamaCppProvider implements EmbeddingProvider {
- readonly kind: ProviderKind = "local";
- private readonly llm: LlamaCpp;
- private readonly modelId: string;
- private dimensions: number | undefined = undefined;
- constructor(config: LocalLlamaCppProviderConfig = {}) {
- this.llm = config.llm ?? getDefaultLlamaCpp();
- this.modelId = config.modelId ?? "embeddinggemma";
- }
- getModelId(): string {
- return this.modelId;
- }
- getDimensions(): number | undefined {
- return this.dimensions;
- }
- async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
- // For the local provider, "healthy" means the embed model loads.
- // We probe with a single embed call.
- try {
- const result = await this.llm.embed("healthcheck", { model: this.modelId });
- if (!result) {
- return {
- ok: false,
- model: this.modelId,
- detail: "embed probe returned null",
- };
- }
- this.dimensions = result.embedding.length;
- return {
- ok: true,
- model: this.modelId,
- dimensions: this.dimensions,
- detail: `local llama.cpp ready, ${this.dimensions}-d`,
- };
- } catch (err) {
- return {
- ok: false,
- model: this.modelId,
- detail: err instanceof Error ? err.message : String(err),
- };
- }
- }
- async embed(
- text: string,
- options: ProviderEmbedOptions = {},
- ): Promise<ProviderEmbedding | null> {
- if (options.signal?.aborted) return null;
- const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
- if (!result) return null;
- if (this.dimensions === undefined) {
- this.dimensions = result.embedding.length;
- }
- return {
- embedding: result.embedding,
- model: this.modelId,
- };
- }
- async embedBatch(
- texts: string[],
- options: ProviderEmbedOptions = {},
- ): Promise<(ProviderEmbedding | null)[]> {
- if (texts.length === 0) return [];
- if (options.signal?.aborted) return texts.map(() => null);
- const raw = await this.llm.embedBatch(texts, {
- model: options.model ?? this.modelId,
- });
- return raw.map((r) => {
- if (!r) return null;
- if (this.dimensions === undefined && r.embedding.length > 0) {
- this.dimensions = r.embedding.length;
- }
- return {
- embedding: r.embedding,
- model: this.modelId,
- };
- });
- }
- async dispose(): Promise<void> {
- // We do NOT dispose the underlying LlamaCpp here because the singleton
- // is shared with rerank/generate/expansion paths. Disposal is handled
- // by the existing `disposeDefaultLlamaCpp()` global hook.
- }
- }
|