autofallback.d.ts 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. /**
  2. * autofallback.ts - AutoFallbackEmbeddingProvider.
  3. *
  4. * Composes a primary `EmbeddingProvider` (typically `OpenAIEmbeddingsProvider`)
  5. * and a fallback (typically `LocalLlamaCppProvider`). When the primary trips
  6. * its circuit breaker — or when persistent failures cross a threshold — calls
  7. * are routed to the fallback. After a recovery cooldown, the primary is
  8. * probed again; success closes the breaker and routing returns.
  9. *
  10. * Acceptance criterion 4 from i-qkarfffa: "Endpoint down → fallback local + WARN".
  11. *
  12. * Behavior summary:
  13. * - Primary call succeeds → return; record success.
  14. * - Primary throws CircuitOpenError → fall back, log WARN once per transition.
  15. * - Primary throws any other error → fall back for THIS call only;
  16. * count toward the failure-streak threshold.
  17. * - When failure streak crosses threshold (default 3) → set our own
  18. * "open until" timestamp; until expiry, route directly to fallback
  19. * (skip primary entirely).
  20. * - On expiry, retry primary opportunistically.
  21. * - getModelId / getDimensions / dispose are delegated to whichever
  22. * provider is currently active (or to the primary if both are usable).
  23. */
  24. import type { EmbeddingProvider, ProviderEmbedOptions, ProviderEmbedding, ProviderHealth, ProviderKind } from "./provider.js";
  25. export type AutoFallbackProviderConfig = {
  26. primary: EmbeddingProvider;
  27. fallback: EmbeddingProvider;
  28. /**
  29. * Number of consecutive non-CircuitOpenError failures before we suppress
  30. * primary calls and route directly to fallback. Default: 3.
  31. */
  32. failureStreakThreshold?: number;
  33. /**
  34. * Time in ms to keep routing through fallback after the breaker opens.
  35. * Default: 5 minutes (matches `OpenAIEmbeddingsProvider`'s circuit duration).
  36. */
  37. cooldownMs?: number;
  38. /**
  39. * Optional WARN sink. Defaults to writing to `process.stderr` once per
  40. * routing transition (closed→open and open→closed).
  41. */
  42. warn?: (msg: string) => void;
  43. /** Custom clock for tests */
  44. now?: () => number;
  45. };
  46. export type FallbackState = "primary" | "fallback";
  47. export declare class AutoFallbackEmbeddingProvider implements EmbeddingProvider {
  48. readonly kind: ProviderKind;
  49. readonly primary: EmbeddingProvider;
  50. readonly fallback: EmbeddingProvider;
  51. private readonly failureStreakThreshold;
  52. private readonly cooldownMs;
  53. private readonly warn;
  54. private readonly now;
  55. private failureStreak;
  56. private fallbackUntil;
  57. private lastTransitionState;
  58. constructor(config: AutoFallbackProviderConfig);
  59. /**
  60. * Stable model id reported by the primary. The model-id guard runs against
  61. * the primary's id because that's what callers actually want when the
  62. * remote endpoint is online; on fallback-only operation, the local
  63. * provider should report a compatible id (in the default config, both
  64. * report "embeddinggemma" so this is moot).
  65. */
  66. getModelId(): string;
  67. getDimensions(): number | undefined;
  68. /** Current routing state (mostly for tests + observability) */
  69. getRoutingState(): FallbackState;
  70. /** Reset failure-streak + cooldown (mostly for tests / admin) */
  71. reset(): void;
  72. healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
  73. embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
  74. embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
  75. dispose(): Promise<void>;
  76. /**
  77. * Generic dispatcher: try primary if not in cooldown, fall back on
  78. * `CircuitOpenError`, count other errors against the failure streak.
  79. * `op` is invoked with whichever provider is selected.
  80. */
  81. private run;
  82. private openCooldown;
  83. private transition;
  84. }