local.js 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. /**
  2. * local.ts - Local llama.cpp adapter implementing EmbeddingProvider.
  3. *
  4. * Wraps an existing `LlamaCpp` instance so the legacy GGUF path looks like
  5. * any other EmbeddingProvider to upstream callers. Used as the default and
  6. * as the fallback target when `OpenAIEmbeddingsProvider` trips its breaker.
  7. */
  8. import { getDefaultLlamaCpp, } from "../llm.js";
  9. export class LocalLlamaCppProvider {
  10. kind = "local";
  11. llm;
  12. modelId;
  13. dimensions = undefined;
  14. constructor(config = {}) {
  15. this.llm = config.llm ?? getDefaultLlamaCpp();
  16. this.modelId = config.modelId ?? "embeddinggemma";
  17. }
  18. getModelId() {
  19. return this.modelId;
  20. }
  21. getDimensions() {
  22. return this.dimensions;
  23. }
  24. async healthcheck(_signal) {
  25. // For the local provider, "healthy" means the embed model loads.
  26. // We probe with a single embed call.
  27. try {
  28. const result = await this.llm.embed("healthcheck", { model: this.modelId });
  29. if (!result) {
  30. return {
  31. ok: false,
  32. model: this.modelId,
  33. detail: "embed probe returned null",
  34. };
  35. }
  36. this.dimensions = result.embedding.length;
  37. return {
  38. ok: true,
  39. model: this.modelId,
  40. dimensions: this.dimensions,
  41. detail: `local llama.cpp ready, ${this.dimensions}-d`,
  42. };
  43. }
  44. catch (err) {
  45. return {
  46. ok: false,
  47. model: this.modelId,
  48. detail: err instanceof Error ? err.message : String(err),
  49. };
  50. }
  51. }
  52. async embed(text, options = {}) {
  53. if (options.signal?.aborted)
  54. return null;
  55. const result = await this.llm.embed(text, { model: options.model ?? this.modelId });
  56. if (!result)
  57. return null;
  58. if (this.dimensions === undefined) {
  59. this.dimensions = result.embedding.length;
  60. }
  61. return {
  62. embedding: result.embedding,
  63. model: this.modelId,
  64. };
  65. }
  66. async embedBatch(texts, options = {}) {
  67. if (texts.length === 0)
  68. return [];
  69. if (options.signal?.aborted)
  70. return texts.map(() => null);
  71. const raw = await this.llm.embedBatch(texts, {
  72. model: options.model ?? this.modelId,
  73. });
  74. return raw.map((r) => {
  75. if (!r)
  76. return null;
  77. if (this.dimensions === undefined && r.embedding.length > 0) {
  78. this.dimensions = r.embedding.length;
  79. }
  80. return {
  81. embedding: r.embedding,
  82. model: this.modelId,
  83. };
  84. });
  85. }
  86. async dispose() {
  87. // We do NOT dispose the underlying LlamaCpp here because the singleton
  88. // is shared with rerank/generate/expansion paths. Disposal is handled
  89. // by the existing `disposeDefaultLlamaCpp()` global hook.
  90. }
  91. }