autofallback.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /**
  2. * autofallback.ts - AutoFallbackEmbeddingProvider.
  3. *
  4. * Composes a primary `EmbeddingProvider` (typically `OpenAIEmbeddingsProvider`)
  5. * and a fallback (typically `LocalLlamaCppProvider`). When the primary trips
  6. * its circuit breaker — or when persistent failures cross a threshold — calls
  7. * are routed to the fallback. After a recovery cooldown, the primary is
  8. * probed again; success closes the breaker and routing returns.
  9. *
  10. * Acceptance criterion 4 from i-qkarfffa: "Endpoint down → fallback local + WARN".
  11. *
  12. * Behavior summary:
  13. * - Primary call succeeds → return; record success.
  14. * - Primary throws CircuitOpenError → fall back, log WARN once per transition.
  15. * - Primary throws any other error → fall back for THIS call only;
  16. * count toward the failure-streak threshold.
  17. * - When failure streak crosses threshold (default 3) → set our own
  18. * "open until" timestamp; until expiry, route directly to fallback
  19. * (skip primary entirely).
  20. * - On expiry, retry primary opportunistically.
  21. * - getModelId / getDimensions / dispose are delegated to whichever
  22. * provider is currently active (or to the primary if both are usable).
  23. */
  24. import { CircuitOpenError } from "./openai.js";
  25. const DEFAULT_FAILURE_STREAK = 3;
  26. const DEFAULT_COOLDOWN_MS = 5 * 60_000;
  27. function defaultWarn(msg) {
  28. process.stderr.write(`${msg}\n`);
  29. }
  30. export class AutoFallbackEmbeddingProvider {
  31. kind;
  32. primary;
  33. fallback;
  34. failureStreakThreshold;
  35. cooldownMs;
  36. warn;
  37. now;
  38. failureStreak = 0;
  39. fallbackUntil = null;
  40. lastTransitionState = "primary";
  41. constructor(config) {
  42. if (!config.primary)
  43. throw new Error("AutoFallbackEmbeddingProvider: primary is required");
  44. if (!config.fallback)
  45. throw new Error("AutoFallbackEmbeddingProvider: fallback is required");
  46. if (config.primary === config.fallback) {
  47. throw new Error("AutoFallbackEmbeddingProvider: primary and fallback must differ");
  48. }
  49. this.primary = config.primary;
  50. this.fallback = config.fallback;
  51. // Inherit the primary's kind for callers introspecting `provider.kind`.
  52. this.kind = config.primary.kind;
  53. this.failureStreakThreshold = config.failureStreakThreshold ?? DEFAULT_FAILURE_STREAK;
  54. this.cooldownMs = config.cooldownMs ?? DEFAULT_COOLDOWN_MS;
  55. this.warn = config.warn ?? defaultWarn;
  56. this.now = config.now ?? Date.now;
  57. }
  58. /**
  59. * Stable model id reported by the primary. The model-id guard runs against
  60. * the primary's id because that's what callers actually want when the
  61. * remote endpoint is online; on fallback-only operation, the local
  62. * provider should report a compatible id (in the default config, both
  63. * report "embeddinggemma" so this is moot).
  64. */
  65. getModelId() {
  66. return this.primary.getModelId();
  67. }
  68. getDimensions() {
  69. return this.primary.getDimensions() ?? this.fallback.getDimensions();
  70. }
  71. /**
  72. * Combined last-error from primary + fallback. Either, neither, or both legs
  73. * may have a tracked error after `embed()`/`embedBatch()` runs:
  74. * - Both clean → undefined
  75. * - Primary failed, fallback rescued → returns primary error (most useful)
  76. * - Both failed → returns "primary: <msg> | fallback: <msg>"
  77. * - Only primary skipped (cooldown), fallback also failed → returns fallback error
  78. */
  79. getLastError() {
  80. const primaryErr = this.primary.getLastError?.();
  81. const fallbackErr = this.fallback.getLastError?.();
  82. if (primaryErr && fallbackErr) {
  83. return `primary: ${primaryErr} | fallback: ${fallbackErr}`;
  84. }
  85. return primaryErr ?? fallbackErr;
  86. }
  87. /** Current routing state (mostly for tests + observability) */
  88. getRoutingState() {
  89. if (this.fallbackUntil !== null && this.now() < this.fallbackUntil) {
  90. return "fallback";
  91. }
  92. return "primary";
  93. }
  94. /** Reset failure-streak + cooldown (mostly for tests / admin) */
  95. reset() {
  96. this.failureStreak = 0;
  97. this.fallbackUntil = null;
  98. this.transition("primary");
  99. }
  100. async healthcheck(signal) {
  101. // Primary first; if degraded, check fallback so callers can still tell
  102. // whether they have *any* working backend.
  103. const primaryHealth = await this.primary.healthcheck(signal);
  104. if (primaryHealth.ok)
  105. return primaryHealth;
  106. const fallbackHealth = await this.fallback.healthcheck(signal);
  107. return {
  108. ok: fallbackHealth.ok,
  109. model: this.primary.getModelId(),
  110. dimensions: primaryHealth.dimensions ?? fallbackHealth.dimensions,
  111. detail: `primary: ${primaryHealth.detail ?? "fail"} | fallback: ${fallbackHealth.detail ?? (fallbackHealth.ok ? "ok" : "fail")}`,
  112. };
  113. }
  114. async embed(text, options = {}) {
  115. return this.run((p, opts) => p.embed(text, opts), options);
  116. }
  117. async embedBatch(texts, options = {}) {
  118. if (texts.length === 0)
  119. return [];
  120. return this.run((p, opts) => p.embedBatch(texts, opts), options, () => texts.map(() => null));
  121. }
  122. async dispose() {
  123. await Promise.allSettled([this.primary.dispose(), this.fallback.dispose()]);
  124. }
  125. // ────────────────────── Internals ──────────────────────
  126. /**
  127. * Generic dispatcher: try primary if not in cooldown, fall back on
  128. * `CircuitOpenError`, count other errors against the failure streak.
  129. * `op` is invoked with whichever provider is selected.
  130. */
  131. async run(op, options, onTotalFail) {
  132. const inCooldown = this.fallbackUntil !== null && this.now() < this.fallbackUntil;
  133. if (inCooldown) {
  134. // Skip primary entirely
  135. this.transition("fallback");
  136. try {
  137. return await op(this.fallback, options);
  138. }
  139. catch (err) {
  140. if (onTotalFail)
  141. return onTotalFail();
  142. throw err;
  143. }
  144. }
  145. // Try primary first
  146. try {
  147. const result = await op(this.primary, options);
  148. // Success — clear streak and ensure routing reads "primary"
  149. this.failureStreak = 0;
  150. this.fallbackUntil = null;
  151. this.transition("primary");
  152. return result;
  153. }
  154. catch (err) {
  155. if (err instanceof CircuitOpenError) {
  156. // Primary circuit is open — open our own cooldown matching its
  157. // expected duration so subsequent calls skip the primary.
  158. this.openCooldown(`primary CircuitOpenError`);
  159. }
  160. else {
  161. this.failureStreak++;
  162. if (this.failureStreak >= this.failureStreakThreshold) {
  163. this.openCooldown(`primary failure streak ${this.failureStreak} ≥ ${this.failureStreakThreshold}`);
  164. }
  165. }
  166. // Try fallback for THIS call regardless
  167. try {
  168. this.transition("fallback");
  169. return await op(this.fallback, options);
  170. }
  171. catch (fbErr) {
  172. if (onTotalFail)
  173. return onTotalFail();
  174. // Both providers failed — surface the fallback error (the primary
  175. // failure already informed the breaker).
  176. throw fbErr;
  177. }
  178. }
  179. }
  180. openCooldown(reason) {
  181. if (this.fallbackUntil === null || this.now() >= this.fallbackUntil) {
  182. this.fallbackUntil = this.now() + this.cooldownMs;
  183. this.warn(`[AutoFallbackEmbeddingProvider] WARN — falling back to "${this.fallback.kind}" provider for ${Math.round(this.cooldownMs / 1000)}s (reason: ${reason})`);
  184. }
  185. }
  186. transition(to) {
  187. if (this.lastTransitionState === to)
  188. return;
  189. this.lastTransitionState = to;
  190. if (to === "primary") {
  191. this.warn(`[AutoFallbackEmbeddingProvider] WARN — primary "${this.primary.kind}" recovered, routing restored`);
  192. }
  193. // The "fallback" transition WARN is already emitted by openCooldown
  194. // (with a richer message). No second WARN here.
  195. }
  196. }