suby
/
qmd


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
							/**
 * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
 *
 * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
 * any other EmbeddingProvider to upstream callers. Used as the default and
 * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
 */

import {
  type LlamaCpp,
  getDefaultLlamaCpp,
} from "../llm.js";
import type {
  EmbeddingProvider,
  ProviderEmbedOptions,
  ProviderEmbedding,
  ProviderHealth,
  ProviderKind,
} from "./provider.js";

export type LocalLlamaCppProviderConfig = {
  /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */
  llm?: LlamaCpp;
  /**
   * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma"
   * to match the value in `content_vectors.model` for existing qmd installs.
   */
  modelId?: string;
};

export class LocalLlamaCppProvider implements EmbeddingProvider {
  readonly kind: ProviderKind = "local";

  private readonly llm: LlamaCpp;
  private readonly modelId: string;
  private dimensions: number | undefined = undefined;

  constructor(config: LocalLlamaCppProviderConfig = {}) {
    this.llm = config.llm ?? getDefaultLlamaCpp();
    this.modelId = config.modelId ?? "embeddinggemma";
  }

  getModelId(): string {
    return this.modelId;
  }

  getDimensions(): number | undefined {
    return this.dimensions;
  }

  async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
    // For the local provider, "healthy" means the embed model loads.
    // We probe with a single embed call.
    try {
      const result = await this.llm.embed("healthcheck", { model: this.modelId });
      if (!result) {
        return {
          ok: false,
          model: this.modelId,
          detail: "embed probe returned null",
        };
      }
      this.dimensions = result.embedding.length;
      return {
        ok: true,
        model: this.modelId,
        dimensions: this.dimensions,
        detail: `local llama.cpp ready, ${this.dimensions}-d`,
      };
    } catch (err) {
      return {
        ok: false,
        model: this.modelId,
        detail: err instanceof Error ? err.message : String(err),
      };
    }
  }

  async embed(
    text: string,
    options: ProviderEmbedOptions = {},
  ): Promise<ProviderEmbedding | null> {
    if (options.signal?.aborted) return null;
    const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
    if (!result) return null;
    if (this.dimensions === undefined) {
      this.dimensions = result.embedding.length;
    }
    return {
      embedding: result.embedding,
      model: this.modelId,
    };
  }

  async embedBatch(
    texts: string[],
    options: ProviderEmbedOptions = {},
  ): Promise<(ProviderEmbedding | null)[]> {
    if (texts.length === 0) return [];
    if (options.signal?.aborted) return texts.map(() => null);

    const raw = await this.llm.embedBatch(texts, {
      model: options.model ?? this.modelId,
    });

    return raw.map((r) => {
      if (!r) return null;
      if (this.dimensions === undefined && r.embedding.length > 0) {
        this.dimensions = r.embedding.length;
      }
      return {
        embedding: r.embedding,
        model: this.modelId,
      };
    });
  }

  async dispose(): Promise<void> {
    // We do NOT dispose the underlying LlamaCpp here because the singleton
    // is shared with rerank/generate/expansion paths. Disposal is handled
    // by the existing `disposeDefaultLlamaCpp()` global hook.
  }
}