| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- /**
- * autofallback.ts - AutoFallbackEmbeddingProvider.
- *
- * Composes a primary `EmbeddingProvider` (typically `OpenAIEmbeddingsProvider`)
- * and a fallback (typically `LocalLlamaCppProvider`). When the primary trips
- * its circuit breaker — or when persistent failures cross a threshold — calls
- * are routed to the fallback. After a recovery cooldown, the primary is
- * probed again; success closes the breaker and routing returns.
- *
- * Acceptance criterion 4 from i-qkarfffa: "Endpoint down → fallback local + WARN".
- *
- * Behavior summary:
- * - Primary call succeeds → return; record success.
- * - Primary throws CircuitOpenError → fall back, log WARN once per transition.
- * - Primary throws any other error → fall back for THIS call only;
- * count toward the failure-streak threshold.
- * - When failure streak crosses threshold (default 3) → set our own
- * "open until" timestamp; until expiry, route directly to fallback
- * (skip primary entirely).
- * - On expiry, retry primary opportunistically.
- * - getModelId / getDimensions / dispose are delegated to whichever
- * provider is currently active (or to the primary if both are usable).
- */
- import type { EmbeddingProvider, ProviderEmbedOptions, ProviderEmbedding, ProviderHealth, ProviderKind } from "./provider.js";
- export type AutoFallbackProviderConfig = {
- primary: EmbeddingProvider;
- fallback: EmbeddingProvider;
- /**
- * Number of consecutive non-CircuitOpenError failures before we suppress
- * primary calls and route directly to fallback. Default: 3.
- */
- failureStreakThreshold?: number;
- /**
- * Time in ms to keep routing through fallback after the breaker opens.
- * Default: 5 minutes (matches `OpenAIEmbeddingsProvider`'s circuit duration).
- */
- cooldownMs?: number;
- /**
- * Optional WARN sink. Defaults to writing to `process.stderr` once per
- * routing transition (closed→open and open→closed).
- */
- warn?: (msg: string) => void;
- /** Custom clock for tests */
- now?: () => number;
- };
- export type FallbackState = "primary" | "fallback";
- export declare class AutoFallbackEmbeddingProvider implements EmbeddingProvider {
- readonly kind: ProviderKind;
- readonly primary: EmbeddingProvider;
- readonly fallback: EmbeddingProvider;
- private readonly failureStreakThreshold;
- private readonly cooldownMs;
- private readonly warn;
- private readonly now;
- private failureStreak;
- private fallbackUntil;
- private lastTransitionState;
- constructor(config: AutoFallbackProviderConfig);
- /**
- * Stable model id reported by the primary. The model-id guard runs against
- * the primary's id because that's what callers actually want when the
- * remote endpoint is online; on fallback-only operation, the local
- * provider should report a compatible id (in the default config, both
- * report "embeddinggemma" so this is moot).
- */
- getModelId(): string;
- getDimensions(): number | undefined;
- /** Current routing state (mostly for tests + observability) */
- getRoutingState(): FallbackState;
- /** Reset failure-streak + cooldown (mostly for tests / admin) */
- reset(): void;
- healthcheck(signal?: AbortSignal): Promise<ProviderHealth>;
- embed(text: string, options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null>;
- embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>;
- dispose(): Promise<void>;
- /**
- * Generic dispatcher: try primary if not in cooldown, fall back on
- * `CircuitOpenError`, count other errors against the failure streak.
- * `op` is invoked with whichever provider is selected.
- */
- private run;
- private openCooldown;
- private transition;
- }
|