embedding-autofallback.test.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * embedding-autofallback.test.ts - Tests for AutoFallbackEmbeddingProvider.
  3. */
  4. import { describe, test, expect } from "vitest";
  5. import {
  6. AutoFallbackEmbeddingProvider,
  7. type AutoFallbackProviderConfig,
  8. } from "../src/embedding/autofallback.js";
  9. import { CircuitOpenError } from "../src/embedding/openai.js";
  10. import type {
  11. EmbeddingProvider,
  12. ProviderEmbedOptions,
  13. ProviderEmbedding,
  14. ProviderHealth,
  15. ProviderKind,
  16. } from "../src/embedding/provider.js";
  17. // ─────────────────────────── Test fakes ──────────────────────────────────────
  18. class FakeProvider implements EmbeddingProvider {
  19. readonly kind: ProviderKind;
  20. readonly modelId: string;
  21. readonly dim: number;
  22. embedCalls = 0;
  23. embedBatchCalls = 0;
  24. healthcheckCalls = 0;
  25. disposed = false;
  26. /** Override behavior for next N calls */
  27. nextThrows: Array<Error | null> = [];
  28. /** Always-throw mode */
  29. alwaysThrows: Error | null = null;
  30. /** Health response */
  31. healthResponse: ProviderHealth | null = null;
  32. /** Stub for getLastError() return value */
  33. lastErr: string | undefined = undefined;
  34. constructor(kind: ProviderKind, modelId: string, dim = 4) {
  35. this.kind = kind;
  36. this.modelId = modelId;
  37. this.dim = dim;
  38. }
  39. getModelId(): string {
  40. return this.modelId;
  41. }
  42. getDimensions(): number | undefined {
  43. return this.dim;
  44. }
  45. getLastError(): string | undefined {
  46. return this.lastErr;
  47. }
  48. async healthcheck(): Promise<ProviderHealth> {
  49. this.healthcheckCalls++;
  50. if (this.healthResponse) return this.healthResponse;
  51. return { ok: true, model: this.modelId, dimensions: this.dim };
  52. }
  53. async embed(text: string, _options?: ProviderEmbedOptions): Promise<ProviderEmbedding | null> {
  54. this.embedCalls++;
  55. this.maybeThrow();
  56. return { embedding: this.fakeEmbed(text), model: this.modelId };
  57. }
  58. async embedBatch(texts: string[], _options?: ProviderEmbedOptions): Promise<(ProviderEmbedding | null)[]> {
  59. this.embedBatchCalls++;
  60. this.maybeThrow();
  61. return texts.map((t) => ({ embedding: this.fakeEmbed(t), model: this.modelId }));
  62. }
  63. async dispose(): Promise<void> {
  64. this.disposed = true;
  65. }
  66. private maybeThrow(): void {
  67. if (this.alwaysThrows) throw this.alwaysThrows;
  68. const next = this.nextThrows.shift();
  69. if (next) throw next;
  70. }
  71. private fakeEmbed(text: string): number[] {
  72. return Array.from({ length: this.dim }, (_, i) => (text.length + i) * 0.01);
  73. }
  74. }
  75. function buildAutoFallback(opts: Partial<AutoFallbackProviderConfig> = {}): {
  76. af: AutoFallbackEmbeddingProvider;
  77. primary: FakeProvider;
  78. fallback: FakeProvider;
  79. warns: string[];
  80. setNow: (n: number) => void;
  81. } {
  82. const primary = new FakeProvider("openai", "embeddinggemma");
  83. const fallback = new FakeProvider("local", "embeddinggemma");
  84. const warns: string[] = [];
  85. let now = 1_000_000;
  86. const af = new AutoFallbackEmbeddingProvider({
  87. primary,
  88. fallback,
  89. failureStreakThreshold: opts.failureStreakThreshold ?? 3,
  90. cooldownMs: opts.cooldownMs ?? 60_000,
  91. warn: (m) => warns.push(m),
  92. now: () => now,
  93. ...opts,
  94. });
  95. return { af, primary, fallback, warns, setNow: (n) => (now = n) };
  96. }
  97. // ─────────────────────────── Construction ────────────────────────────────────
  98. describe("AutoFallbackEmbeddingProvider — construction", () => {
  99. test("requires primary", () => {
  100. expect(
  101. () =>
  102. new AutoFallbackEmbeddingProvider({
  103. // @ts-expect-error testing runtime guard
  104. primary: undefined,
  105. fallback: new FakeProvider("local", "x"),
  106. }),
  107. ).toThrow(/primary is required/);
  108. });
  109. test("requires fallback", () => {
  110. expect(
  111. () =>
  112. new AutoFallbackEmbeddingProvider({
  113. primary: new FakeProvider("openai", "x"),
  114. // @ts-expect-error testing runtime guard
  115. fallback: undefined,
  116. }),
  117. ).toThrow(/fallback is required/);
  118. });
  119. test("rejects identical primary and fallback", () => {
  120. const same = new FakeProvider("openai", "x");
  121. expect(
  122. () =>
  123. new AutoFallbackEmbeddingProvider({
  124. primary: same,
  125. fallback: same,
  126. }),
  127. ).toThrow(/must differ/);
  128. });
  129. test("inherits primary's kind", () => {
  130. const { af } = buildAutoFallback();
  131. expect(af.kind).toBe("openai");
  132. });
  133. });
  134. // ─────────────────────────── Happy path ──────────────────────────────────────
  135. describe("AutoFallbackEmbeddingProvider — happy path", () => {
  136. test("primary succeeds → fallback never called", async () => {
  137. const { af, primary, fallback } = buildAutoFallback();
  138. const r = await af.embed("hello");
  139. expect(r).not.toBeNull();
  140. expect(primary.embedCalls).toBe(1);
  141. expect(fallback.embedCalls).toBe(0);
  142. expect(af.getRoutingState()).toBe("primary");
  143. });
  144. test("primary embedBatch succeeds → fallback untouched", async () => {
  145. const { af, primary, fallback } = buildAutoFallback();
  146. const out = await af.embedBatch(["a", "b"]);
  147. expect(out.length).toBe(2);
  148. expect(primary.embedBatchCalls).toBe(1);
  149. expect(fallback.embedBatchCalls).toBe(0);
  150. });
  151. test("getModelId / getDimensions delegate to primary", () => {
  152. const { af, primary } = buildAutoFallback();
  153. expect(af.getModelId()).toBe(primary.getModelId());
  154. expect(af.getDimensions()).toBe(primary.getDimensions());
  155. });
  156. });
  157. // ─────────────────────────── Circuit-open fallback ───────────────────────────
  158. describe("AutoFallbackEmbeddingProvider — CircuitOpenError handling", () => {
  159. test("primary throws CircuitOpenError → fallback served + cooldown opens", async () => {
  160. const { af, primary, fallback, warns } = buildAutoFallback();
  161. primary.nextThrows.push(new CircuitOpenError());
  162. const r = await af.embed("hello");
  163. expect(r).not.toBeNull();
  164. expect(r!.embedding.length).toBe(4); // came from fallback
  165. expect(primary.embedCalls).toBe(1);
  166. expect(fallback.embedCalls).toBe(1);
  167. expect(af.getRoutingState()).toBe("fallback");
  168. expect(warns.some((w) => w.includes("CircuitOpenError"))).toBe(true);
  169. });
  170. test("during cooldown subsequent calls skip primary entirely", async () => {
  171. const { af, primary, fallback } = buildAutoFallback();
  172. primary.nextThrows.push(new CircuitOpenError());
  173. await af.embed("first");
  174. expect(primary.embedCalls).toBe(1);
  175. expect(fallback.embedCalls).toBe(1);
  176. // Subsequent call within cooldown
  177. await af.embed("second");
  178. expect(primary.embedCalls).toBe(1); // unchanged
  179. expect(fallback.embedCalls).toBe(2);
  180. });
  181. test("after cooldown expires, primary is retried", async () => {
  182. const { af, primary, fallback, setNow } = buildAutoFallback({ cooldownMs: 5000 });
  183. primary.nextThrows.push(new CircuitOpenError());
  184. await af.embed("a");
  185. expect(af.getRoutingState()).toBe("fallback");
  186. setNow(1_000_000 + 5_001);
  187. expect(af.getRoutingState()).toBe("primary");
  188. // Next call reaches primary again
  189. await af.embed("b");
  190. expect(primary.embedCalls).toBe(2);
  191. expect(fallback.embedCalls).toBe(1);
  192. });
  193. test("WARN fired only once per transition (not per call during cooldown)", async () => {
  194. const { af, primary, warns } = buildAutoFallback();
  195. primary.nextThrows.push(new CircuitOpenError());
  196. await af.embed("a");
  197. await af.embed("b");
  198. await af.embed("c");
  199. const fallbackWarns = warns.filter((w) => w.includes("falling back"));
  200. expect(fallbackWarns.length).toBe(1);
  201. });
  202. });
  203. // ─────────────────────────── Failure-streak threshold ────────────────────────
  204. describe("AutoFallbackEmbeddingProvider — failure streak", () => {
  205. test("non-CircuitOpen errors below threshold → no cooldown", async () => {
  206. const { af, primary, fallback } = buildAutoFallback({ failureStreakThreshold: 3 });
  207. primary.nextThrows.push(new Error("transient"));
  208. const r = await af.embed("a");
  209. expect(r).not.toBeNull(); // fallback served it
  210. expect(af.getRoutingState()).toBe("primary");
  211. expect(primary.embedCalls).toBe(1);
  212. expect(fallback.embedCalls).toBe(1);
  213. });
  214. test("threshold consecutive failures → cooldown opens", async () => {
  215. const { af, primary, fallback } = buildAutoFallback({ failureStreakThreshold: 3 });
  216. for (let i = 0; i < 3; i++) {
  217. primary.nextThrows.push(new Error(`err ${i}`));
  218. }
  219. await af.embed("a");
  220. await af.embed("b");
  221. await af.embed("c");
  222. expect(af.getRoutingState()).toBe("fallback");
  223. expect(primary.embedCalls).toBe(3);
  224. expect(fallback.embedCalls).toBe(3);
  225. });
  226. test("a single primary success resets the streak", async () => {
  227. const { af, primary } = buildAutoFallback({ failureStreakThreshold: 3 });
  228. primary.nextThrows.push(new Error("e1"));
  229. primary.nextThrows.push(new Error("e2"));
  230. await af.embed("a");
  231. await af.embed("b");
  232. // Now success
  233. await af.embed("c");
  234. // Streak reset; another two failures shouldn't trip cooldown yet
  235. primary.nextThrows.push(new Error("e3"));
  236. primary.nextThrows.push(new Error("e4"));
  237. await af.embed("d");
  238. await af.embed("e");
  239. expect(af.getRoutingState()).toBe("primary");
  240. });
  241. });
  242. // ─────────────────────────── Recovery transition ─────────────────────────────
  243. describe("AutoFallbackEmbeddingProvider — recovery transitions", () => {
  244. test("recovery WARN fires when primary call succeeds after fallback", async () => {
  245. const { af, primary, warns, setNow } = buildAutoFallback({ cooldownMs: 5000 });
  246. primary.nextThrows.push(new CircuitOpenError());
  247. await af.embed("a");
  248. setNow(1_000_000 + 5_001);
  249. await af.embed("b"); // primary succeeds
  250. const recoveryWarns = warns.filter((w) => w.includes("recovered"));
  251. expect(recoveryWarns.length).toBe(1);
  252. });
  253. test("reset() clears state + transitions back to primary", async () => {
  254. const { af, primary } = buildAutoFallback({ cooldownMs: 60_000 });
  255. primary.nextThrows.push(new CircuitOpenError());
  256. await af.embed("a");
  257. expect(af.getRoutingState()).toBe("fallback");
  258. af.reset();
  259. expect(af.getRoutingState()).toBe("primary");
  260. await af.embed("b");
  261. expect(primary.embedCalls).toBe(2);
  262. });
  263. });
  264. // ─────────────────────────── Both fail ───────────────────────────────────────
  265. describe("AutoFallbackEmbeddingProvider — both providers fail", () => {
  266. test("primary throws + fallback throws → embedBatch returns nulls", async () => {
  267. const { af, primary, fallback } = buildAutoFallback();
  268. primary.alwaysThrows = new Error("primary down");
  269. fallback.alwaysThrows = new Error("local broken");
  270. const r = await af.embedBatch(["a", "b"]);
  271. expect(r).toEqual([null, null]);
  272. });
  273. test("primary throws + fallback throws → embed propagates fallback error", async () => {
  274. const { af, primary, fallback } = buildAutoFallback();
  275. primary.alwaysThrows = new Error("primary down");
  276. fallback.alwaysThrows = new Error("local broken");
  277. await expect(af.embed("a")).rejects.toThrow(/local broken/);
  278. });
  279. });
  280. // ─────────────────────────── Healthcheck ─────────────────────────────────────
  281. describe("AutoFallbackEmbeddingProvider — healthcheck", () => {
  282. test("primary healthy → returns primary health", async () => {
  283. const { af, primary, fallback } = buildAutoFallback();
  284. const h = await af.healthcheck();
  285. expect(h.ok).toBe(true);
  286. expect(primary.healthcheckCalls).toBe(1);
  287. expect(fallback.healthcheckCalls).toBe(0);
  288. });
  289. test("primary unhealthy → fallback checked + reported", async () => {
  290. const { af, primary, fallback } = buildAutoFallback();
  291. primary.healthResponse = { ok: false, model: "primary-model", detail: "down" };
  292. fallback.healthResponse = { ok: true, model: "local-model", detail: "fine" };
  293. const h = await af.healthcheck();
  294. expect(h.ok).toBe(true);
  295. expect(primary.healthcheckCalls).toBe(1);
  296. expect(fallback.healthcheckCalls).toBe(1);
  297. expect(h.detail).toContain("primary");
  298. expect(h.detail).toContain("fallback");
  299. });
  300. test("both unhealthy → ok=false", async () => {
  301. const { af, primary, fallback } = buildAutoFallback();
  302. primary.healthResponse = { ok: false, model: "p", detail: "down" };
  303. fallback.healthResponse = { ok: false, model: "f", detail: "down" };
  304. const h = await af.healthcheck();
  305. expect(h.ok).toBe(false);
  306. });
  307. });
  308. // ─────────────────────────── getLastError (i-vm1lxwry) ──────────────────────
  309. describe("AutoFallbackEmbeddingProvider — getLastError (i-vm1lxwry)", () => {
  310. test("returns undefined when both legs are clean", () => {
  311. const { af, primary, fallback } = buildAutoFallback();
  312. primary.lastErr = undefined;
  313. fallback.lastErr = undefined;
  314. expect(af.getLastError()).toBeUndefined();
  315. });
  316. test("returns primary error when only primary has one", () => {
  317. const { af, primary, fallback } = buildAutoFallback();
  318. primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503 body="busy"`;
  319. fallback.lastErr = undefined;
  320. expect(af.getLastError()).toBe(primary.lastErr);
  321. });
  322. test("returns fallback error when only fallback has one", () => {
  323. const { af, primary, fallback } = buildAutoFallback();
  324. primary.lastErr = undefined;
  325. fallback.lastErr = `provider=local error="model file not found"`;
  326. expect(af.getLastError()).toBe(fallback.lastErr);
  327. });
  328. test("combines primary + fallback when both failed", () => {
  329. const { af, primary, fallback } = buildAutoFallback();
  330. primary.lastErr = `endpoint=https://ai.mm.mk/v1/embeddings status=503`;
  331. fallback.lastErr = `provider=local error="OOM"`;
  332. const combined = af.getLastError();
  333. expect(combined).toContain("primary:");
  334. expect(combined).toContain("fallback:");
  335. expect(combined).toContain("status=503");
  336. expect(combined).toContain("OOM");
  337. });
  338. });
  339. // ─────────────────────────── dispose ─────────────────────────────────────────
  340. describe("AutoFallbackEmbeddingProvider — dispose", () => {
  341. test("dispose cascades to both providers", async () => {
  342. const { af, primary, fallback } = buildAutoFallback();
  343. await af.dispose();
  344. expect(primary.disposed).toBe(true);
  345. expect(fallback.disposed).toBe(true);
  346. });
  347. });