local.ts 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /**
  2. * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
  3. *
  4. * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
  5. * any other EmbeddingProvider to upstream callers. Used as the default and
  6. * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
  7. */
  8. import {
  9. type LlamaCpp,
  10. getDefaultLlamaCpp,
  11. } from "../llm.js";
  12. import type {
  13. EmbeddingProvider,
  14. ProviderEmbedOptions,
  15. ProviderEmbedding,
  16. ProviderHealth,
  17. ProviderKind,
  18. } from "./provider.js";
  19. export type LocalLlamaCppProviderConfig = {
  20. /** Pre-built LlamaCpp instance (optional — falls back to global singleton). */
  21. llm?: LlamaCpp;
  22. /**
  23. * Stable model id reported via `getModelId()`. Defaults to "embeddinggemma"
  24. * to match the value in `content_vectors.model` for existing qmd installs.
  25. */
  26. modelId?: string;
  27. };
  28. export class LocalLlamaCppProvider implements EmbeddingProvider {
  29. readonly kind: ProviderKind = "local";
  30. private readonly llm: LlamaCpp;
  31. private readonly modelId: string;
  32. private dimensions: number | undefined = undefined;
  33. private lastError: string | undefined = undefined;
  34. constructor(config: LocalLlamaCppProviderConfig = {}) {
  35. this.llm = config.llm ?? getDefaultLlamaCpp();
  36. this.modelId = config.modelId ?? "embeddinggemma";
  37. }
  38. getModelId(): string {
  39. return this.modelId;
  40. }
  41. getDimensions(): number | undefined {
  42. return this.dimensions;
  43. }
  44. /**
  45. * Most recent thrown error from `llm.embed` / `llm.embedBatch`. Returns
  46. * `undefined` after a successful call or before the first call. See
  47. * `EmbeddingProvider.getLastError`.
  48. */
  49. getLastError(): string | undefined {
  50. return this.lastError;
  51. }
  52. async healthcheck(_signal?: AbortSignal): Promise<ProviderHealth> {
  53. // For the local provider, "healthy" means the embed model loads.
  54. // We probe with a single embed call.
  55. try {
  56. const result = await this.llm.embed("healthcheck", { model: this.modelId });
  57. if (!result) {
  58. return {
  59. ok: false,
  60. model: this.modelId,
  61. detail: "embed probe returned null",
  62. };
  63. }
  64. this.dimensions = result.embedding.length;
  65. return {
  66. ok: true,
  67. model: this.modelId,
  68. dimensions: this.dimensions,
  69. detail: `local llama.cpp ready, ${this.dimensions}-d`,
  70. };
  71. } catch (err) {
  72. return {
  73. ok: false,
  74. model: this.modelId,
  75. detail: err instanceof Error ? err.message : String(err),
  76. };
  77. }
  78. }
  79. async embed(
  80. text: string,
  81. options: ProviderEmbedOptions = {},
  82. ): Promise<ProviderEmbedding | null> {
  83. if (options.signal?.aborted) {
  84. this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
  85. return null;
  86. }
  87. let result;
  88. try {
  89. result = await this.llm.embed(text, { model: options.model ?? this.modelId });
  90. } catch (err) {
  91. this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
  92. return null;
  93. }
  94. if (!result) {
  95. this.lastError = `provider=local error="llm.embed returned null/undefined"`;
  96. return null;
  97. }
  98. if (this.dimensions === undefined) {
  99. this.dimensions = result.embedding.length;
  100. }
  101. this.lastError = undefined;
  102. return {
  103. embedding: result.embedding,
  104. model: this.modelId,
  105. };
  106. }
  107. async embedBatch(
  108. texts: string[],
  109. options: ProviderEmbedOptions = {},
  110. ): Promise<(ProviderEmbedding | null)[]> {
  111. if (texts.length === 0) return [];
  112. if (options.signal?.aborted) {
  113. this.lastError = `aborted by caller${options.signal.reason ? `: ${String(options.signal.reason)}` : ""}`;
  114. return texts.map(() => null);
  115. }
  116. let raw;
  117. try {
  118. raw = await this.llm.embedBatch(texts, {
  119. model: options.model ?? this.modelId,
  120. });
  121. } catch (err) {
  122. this.lastError = `provider=local error="${err instanceof Error ? err.message : String(err)}"`;
  123. return texts.map(() => null);
  124. }
  125. const out = raw.map((r) => {
  126. if (!r) return null;
  127. if (this.dimensions === undefined && r.embedding.length > 0) {
  128. this.dimensions = r.embedding.length;
  129. }
  130. return {
  131. embedding: r.embedding,
  132. model: this.modelId,
  133. };
  134. });
  135. if (out.every((r) => r !== null)) {
  136. this.lastError = undefined;
  137. } else if (out.some((r) => r === null)) {
  138. this.lastError = `provider=local error="llm.embedBatch returned null entries (${out.filter((r) => r === null).length}/${out.length})"`;
  139. }
  140. return out;
  141. }
  142. async dispose(): Promise<void> {
  143. // We do NOT dispose the underlying LlamaCpp here because the singleton
  144. // is shared with rerank/generate/expansion paths. Disposal is handled
  145. // by the existing `disposeDefaultLlamaCpp()` global hook.
  146. }
  147. }