/** * autofallback.ts - AutoFallbackEmbeddingProvider. * * Composes a primary `EmbeddingProvider` (typically `OpenAIEmbeddingsProvider`) * and a fallback (typically `LocalLlamaCppProvider`). When the primary trips * its circuit breaker — or when persistent failures cross a threshold — calls * are routed to the fallback. After a recovery cooldown, the primary is * probed again; success closes the breaker and routing returns. * * Acceptance criterion 4 from i-qkarfffa: "Endpoint down → fallback local + WARN". * * Behavior summary: * - Primary call succeeds → return; record success. * - Primary throws CircuitOpenError → fall back, log WARN once per transition. * - Primary throws any other error → fall back for THIS call only; * count toward the failure-streak threshold. * - When failure streak crosses threshold (default 3) → set our own * "open until" timestamp; until expiry, route directly to fallback * (skip primary entirely). * - On expiry, retry primary opportunistically. * - getModelId / getDimensions / dispose are delegated to whichever * provider is currently active (or to the primary if both are usable). */ import type { EmbeddingProvider, ProviderEmbedOptions, ProviderEmbedding, ProviderHealth, ProviderKind } from "./provider.js"; export type AutoFallbackProviderConfig = { primary: EmbeddingProvider; fallback: EmbeddingProvider; /** * Number of consecutive non-CircuitOpenError failures before we suppress * primary calls and route directly to fallback. Default: 3. */ failureStreakThreshold?: number; /** * Time in ms to keep routing through fallback after the breaker opens. * Default: 5 minutes (matches `OpenAIEmbeddingsProvider`'s circuit duration). */ cooldownMs?: number; /** * Optional WARN sink. Defaults to writing to `process.stderr` once per * routing transition (closed→open and open→closed). */ warn?: (msg: string) => void; /** Custom clock for tests */ now?: () => number; }; export type FallbackState = "primary" | "fallback"; export declare class AutoFallbackEmbeddingProvider implements EmbeddingProvider { readonly kind: ProviderKind; readonly primary: EmbeddingProvider; readonly fallback: EmbeddingProvider; private readonly failureStreakThreshold; private readonly cooldownMs; private readonly warn; private readonly now; private failureStreak; private fallbackUntil; private lastTransitionState; constructor(config: AutoFallbackProviderConfig); /** * Stable model id reported by the primary. The model-id guard runs against * the primary's id because that's what callers actually want when the * remote endpoint is online; on fallback-only operation, the local * provider should report a compatible id (in the default config, both * report "embeddinggemma" so this is moot). */ getModelId(): string; getDimensions(): number | undefined; /** * Combined last-error from primary + fallback. Either, neither, or both legs * may have a tracked error after `embed()`/`embedBatch()` runs: * - Both clean → undefined * - Primary failed, fallback rescued → returns primary error (most useful) * - Both failed → returns "primary: | fallback: " * - Only primary skipped (cooldown), fallback also failed → returns fallback error */ getLastError(): string | undefined; /** Current routing state (mostly for tests + observability) */ getRoutingState(): FallbackState; /** Reset failure-streak + cooldown (mostly for tests / admin) */ reset(): void; healthcheck(signal?: AbortSignal): Promise; embed(text: string, options?: ProviderEmbedOptions): Promise; embedBatch(texts: string[], options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]>; dispose(): Promise; /** * Generic dispatcher: try primary if not in cooldown, fall back on * `CircuitOpenError`, count other errors against the failure streak. * `op` is invoked with whichever provider is selected. */ private run; private openCooldown; private transition; }