local.ts 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /**
  2. * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
  3. *
  4. * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
  5. * any other EmbeddingProvider to upstream callers. Used as the default and
  6. * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
  7. */
  8. import {
  9. type LlamaCpp,
  10. getDefaultLlamaCpp,
  11. } from "../llm.js";
  12. import type {
  13. EmbeddingProvider,
  14. ProviderEmbedOptions,
  15. ProviderEmbedding,
  16. ProviderHealth,
  17. ProviderKind,
  18. } from "./provider.js";
  19. export type LocalLlamaCppProviderConfig = {
  20. /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */
  21. llm?: LlamaCpp;
  22. /**
  23. * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma"
  24. * to match the value in `content_vectors.model` for existing qmd installs.
  25. */
  26. modelId?: string;
  27. };
  28. export class LocalLlamaCppProvider implements EmbeddingProvider {
  29. readonly kind: ProviderKind = "local";
  30. private readonly llm: LlamaCpp;
  31. private readonly modelId: string;
  32. private dimensions: number | undefined = undefined;
  33. constructor(config: LocalLlamaCppProviderConfig = {}) {
  34. this.llm = config.llm ?? getDefaultLlamaCpp();
  35. this.modelId = config.modelId ?? "embeddinggemma";
  36. }
  37. getModelId(): string {
  38. return this.modelId;
  39. }
  40. getDimensions(): number | undefined {
  41. return this.dimensions;
  42. }
  43. async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
  44. // For the local provider, "healthy" means the embed model loads.
  45. // We probe with a single embed call.
  46. try {
  47. const result = await this.llm.embed("healthcheck", { model: this.modelId });
  48. if (!result) {
  49. return {
  50. ok: false,
  51. model: this.modelId,
  52. detail: "embed probe returned null",
  53. };
  54. }
  55. this.dimensions = result.embedding.length;
  56. return {
  57. ok: true,
  58. model: this.modelId,
  59. dimensions: this.dimensions,
  60. detail: `local llama.cpp ready, ${this.dimensions}-d`,
  61. };
  62. } catch (err) {
  63. return {
  64. ok: false,
  65. model: this.modelId,
  66. detail: err instanceof Error ? err.message : String(err),
  67. };
  68. }
  69. }
  70. async embed(
  71. text: string,
  72. options: ProviderEmbedOptions = {},
  73. ): Promise<ProviderEmbedding | null> {
  74. if (options.signal?.aborted) return null;
  75. const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
  76. if (!result) return null;
  77. if (this.dimensions === undefined) {
  78. this.dimensions = result.embedding.length;
  79. }
  80. return {
  81. embedding: result.embedding,
  82. model: this.modelId,
  83. };
  84. }
  85. async embedBatch(
  86. texts: string[],
  87. options: ProviderEmbedOptions = {},
  88. ): Promise<(ProviderEmbedding | null)[]> {
  89. if (texts.length === 0) return [];
  90. if (options.signal?.aborted) return texts.map(() => null);
  91. const raw = await this.llm.embedBatch(texts, {
  92. model: options.model ?? this.modelId,
  93. });
  94. return raw.map((r) => {
  95. if (!r) return null;
  96. if (this.dimensions === undefined && r.embedding.length > 0) {
  97. this.dimensions = r.embedding.length;
  98. }
  99. return {
  100. embedding: r.embedding,
  101. model: this.modelId,
  102. };
  103. });
  104. }
  105. async dispose(): Promise<void> {
  106. // We do NOT dispose the underlying LlamaCpp here because the singleton
  107. // is shared with rerank/generate/expansion paths. Disposal is handled
  108. // by the existing `disposeDefaultLlamaCpp()` global hook.
  109. }
  110. }