|
|
@@ -778,6 +778,305 @@ describe("OpenAIEmbeddingsProvider — getLastError (i-vm1lxwry)", () => {
|
|
|
|
|
|
// ─────────────────────────── dispose ─────────────────────────────────────────
|
|
|
|
|
|
+// ─────────────────────────── Concurrent dispatch (i-fkpnar9i Phase 1 #1) ─────
|
|
|
+
|
|
|
+/**
|
|
|
+ * Fetch helper that lets a test:
|
|
|
+ * - count how many requests are in-flight at any moment
|
|
|
+ * - control resolution order via per-call deferred promises
|
|
|
+ * - inspect the start order vs resolution order
|
|
|
+ *
|
|
|
+ * Each `responses[i]` returns a Response (or Promise<Response>); the helper
|
|
|
+ * wraps each call so it awaits a `gate[i]` deferred BEFORE responding. Tests
|
|
|
+ * call `release(i)` to let the i-th request settle. Useful for testing that
|
|
|
+ * concurrent dispatch actually overlaps requests.
|
|
|
+ */
|
|
|
+function makeGatedFetchSequence(count: number): {
|
|
|
+ fetchImpl: typeof fetch;
|
|
|
+ inFlight: () => number;
|
|
|
+ startOrder: number[];
|
|
|
+ release: (idx: number, response: Response) => void;
|
|
|
+ releaseAll: (responseFor: (idx: number) => Response) => void;
|
|
|
+} {
|
|
|
+ const gates: Array<{ resolve: (r: Response) => void }> = [];
|
|
|
+ const startOrder: number[] = [];
|
|
|
+ let inFlight = 0;
|
|
|
+ let nextStart = 0;
|
|
|
+
|
|
|
+ for (let i = 0; i < count; i++) {
|
|
|
+ let resolveFn: (r: Response) => void = () => {};
|
|
|
+ new Promise<Response>((resolve) => {
|
|
|
+ resolveFn = resolve;
|
|
|
+ });
|
|
|
+ // re-create properly:
|
|
|
+ let r2: (x: Response) => void = () => {};
|
|
|
+ const p = new Promise<Response>((resolve) => {
|
|
|
+ r2 = resolve;
|
|
|
+ });
|
|
|
+ gates.push({ resolve: r2 });
|
|
|
+ // attach the unresolved promise back to the slot via a closure (below)
|
|
|
+ (gates[i] as any).promise = p;
|
|
|
+ }
|
|
|
+
|
|
|
+ const fetchImpl = (async (_input: RequestInfo | URL, _init?: RequestInit) => {
|
|
|
+ const idx = nextStart++;
|
|
|
+ if (idx >= count) throw new Error(`gated fetch exhausted at ${idx + 1}`);
|
|
|
+ startOrder.push(idx);
|
|
|
+ inFlight++;
|
|
|
+ try {
|
|
|
+ const r = await (gates[idx] as any).promise;
|
|
|
+ return r as Response;
|
|
|
+ } finally {
|
|
|
+ inFlight--;
|
|
|
+ }
|
|
|
+ }) as typeof fetch;
|
|
|
+
|
|
|
+ return {
|
|
|
+ fetchImpl,
|
|
|
+ inFlight: () => inFlight,
|
|
|
+ startOrder,
|
|
|
+ release: (idx: number, response: Response) => gates[idx]!.resolve(response),
|
|
|
+ releaseAll: (responseFor: (idx: number) => Response) => {
|
|
|
+ for (let i = 0; i < count; i++) gates[i]!.resolve(responseFor(i));
|
|
|
+ },
|
|
|
+ };
|
|
|
+}
|
|
|
+
|
|
|
+describe("OpenAIEmbeddingsProvider — concurrent dispatch (i-fkpnar9i)", () => {
|
|
|
+ test("default concurrency is 4 — 8 chunks of size 1, max in-flight = 4", async () => {
|
|
|
+ const N = 8;
|
|
|
+ const gated = makeGatedFetchSequence(N);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1, // each text becomes its own chunk
|
|
|
+ });
|
|
|
+ // Expected concurrency=4 default
|
|
|
+ expect((p as any).concurrency).toBe(4);
|
|
|
+
|
|
|
+ const texts = Array.from({ length: N }, (_, i) => `t${i}`);
|
|
|
+ const promise = p.embedBatch(texts);
|
|
|
+
|
|
|
+ // Yield to the microtask queue so workers can start their first dispatch.
|
|
|
+ // Multiple yields needed for chained `await this.requestWithRetry → await fetch`.
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+
|
|
|
+ expect(gated.inFlight()).toBe(4);
|
|
|
+ expect(gated.startOrder).toEqual([0, 1, 2, 3]);
|
|
|
+
|
|
|
+ // Release first 4 in reverse order — concurrent dispatch should still
|
|
|
+ // preserve input order in `results` because each worker writes to its
|
|
|
+ // pre-computed slot.
|
|
|
+ for (let i = 3; i >= 0; i--) {
|
|
|
+ gated.release(i, embeddingsResponse([`t${i}`], 4));
|
|
|
+ }
|
|
|
+ // Yield to let workers pick up next chunks
|
|
|
+ for (let i = 0; i < 10; i++) await Promise.resolve();
|
|
|
+
|
|
|
+ expect(gated.inFlight()).toBeGreaterThan(0); // 4 more workers should be in flight
|
|
|
+ expect(gated.startOrder.length).toBe(8); // all 8 dispatched
|
|
|
+
|
|
|
+ // Release the rest
|
|
|
+ for (let i = 4; i < N; i++) {
|
|
|
+ gated.release(i, embeddingsResponse([`t${i}`], 4));
|
|
|
+ }
|
|
|
+ const result = await promise;
|
|
|
+ expect(result.length).toBe(N);
|
|
|
+ // Critical: input order preserved despite out-of-order resolution
|
|
|
+ for (let i = 0; i < N; i++) {
|
|
|
+ expect(result[i]).not.toBeNull();
|
|
|
+ expect(result[i]!.model).toBe("embeddinggemma");
|
|
|
+ }
|
|
|
+ expect(gated.inFlight()).toBe(0);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("explicit concurrency=2 — only 2 in-flight at any moment", async () => {
|
|
|
+ const N = 6;
|
|
|
+ const gated = makeGatedFetchSequence(N);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 2,
|
|
|
+ });
|
|
|
+ const texts = Array.from({ length: N }, (_, i) => `t${i}`);
|
|
|
+ const promise = p.embedBatch(texts);
|
|
|
+
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ expect(gated.inFlight()).toBe(2);
|
|
|
+
|
|
|
+ // Cycle: release one, wait, expect new one started
|
|
|
+ gated.release(0, embeddingsResponse(["t0"], 4));
|
|
|
+ for (let i = 0; i < 10; i++) await Promise.resolve();
|
|
|
+ expect(gated.inFlight()).toBe(2); // still 2 — slot filled by t2
|
|
|
+
|
|
|
+ // Drain the rest
|
|
|
+ for (let i = 1; i < N; i++) {
|
|
|
+ gated.release(i, embeddingsResponse([`t${i}`], 4));
|
|
|
+ for (let j = 0; j < 5; j++) await Promise.resolve();
|
|
|
+ }
|
|
|
+ const result = await promise;
|
|
|
+ expect(result.every((r) => r !== null)).toBe(true);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("concurrency=1 reproduces legacy sequential behavior", async () => {
|
|
|
+ const N = 4;
|
|
|
+ const gated = makeGatedFetchSequence(N);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 1,
|
|
|
+ });
|
|
|
+ const texts = Array.from({ length: N }, (_, i) => `t${i}`);
|
|
|
+ const promise = p.embedBatch(texts);
|
|
|
+
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ expect(gated.inFlight()).toBe(1);
|
|
|
+ expect(gated.startOrder).toEqual([0]);
|
|
|
+
|
|
|
+ // Release one at a time, confirm the next starts only after.
|
|
|
+ for (let i = 0; i < N; i++) {
|
|
|
+ gated.release(i, embeddingsResponse([`t${i}`], 4));
|
|
|
+ for (let j = 0; j < 5; j++) await Promise.resolve();
|
|
|
+ }
|
|
|
+ await promise;
|
|
|
+ // All started in order (sequential)
|
|
|
+ expect(gated.startOrder).toEqual([0, 1, 2, 3]);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("results in input order even when the LAST chunk resolves first", async () => {
|
|
|
+ const N = 4;
|
|
|
+ const gated = makeGatedFetchSequence(N);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 4,
|
|
|
+ });
|
|
|
+ const texts = ["alpha", "beta", "gamma", "delta"];
|
|
|
+ const promise = p.embedBatch(texts);
|
|
|
+
|
|
|
+ // Wait for all 4 to be in flight, then resolve LAST first
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ expect(gated.inFlight()).toBe(4);
|
|
|
+ gated.release(3, embeddingsResponse(["delta"], 4));
|
|
|
+ gated.release(2, embeddingsResponse(["gamma"], 4));
|
|
|
+ gated.release(1, embeddingsResponse(["beta"], 4));
|
|
|
+ gated.release(0, embeddingsResponse(["alpha"], 4));
|
|
|
+
|
|
|
+ const result = await promise;
|
|
|
+ expect(result.length).toBe(N);
|
|
|
+ // Each input slot got its own embedding — input order preserved
|
|
|
+ expect(result[0]).not.toBeNull();
|
|
|
+ expect(result[1]).not.toBeNull();
|
|
|
+ expect(result[2]).not.toBeNull();
|
|
|
+ expect(result[3]).not.toBeNull();
|
|
|
+ });
|
|
|
+
|
|
|
+ test("dimensions recorded correctly even if the first-resolving chunk is not chunk 0", async () => {
|
|
|
+ const gated = makeGatedFetchSequence(2);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 2,
|
|
|
+ });
|
|
|
+ const promise = p.embedBatch(["a", "b"]);
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ // Resolve chunk 1 first with 7-dim, then chunk 0 with 7-dim
|
|
|
+ gated.release(1, embeddingsResponse(["b"], 7));
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ gated.release(0, embeddingsResponse(["a"], 7));
|
|
|
+ await promise;
|
|
|
+ expect(p.getDimensions()).toBe(7);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("abort signal during concurrent run stops new dispatches; in-flight settle", async () => {
|
|
|
+ const gated = makeGatedFetchSequence(8);
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: gated.fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 4,
|
|
|
+ });
|
|
|
+ const ctrl = new AbortController();
|
|
|
+ const texts = Array.from({ length: 8 }, (_, i) => `t${i}`);
|
|
|
+ const promise = p.embedBatch(texts, { signal: ctrl.signal });
|
|
|
+ for (let i = 0; i < 5; i++) await Promise.resolve();
|
|
|
+ expect(gated.startOrder.length).toBe(4);
|
|
|
+
|
|
|
+ // Resolve in-flight, then abort — remaining 4 should NOT dispatch
|
|
|
+ gated.release(0, embeddingsResponse(["t0"], 4));
|
|
|
+ gated.release(1, embeddingsResponse(["t1"], 4));
|
|
|
+ gated.release(2, embeddingsResponse(["t2"], 4));
|
|
|
+ gated.release(3, embeddingsResponse(["t3"], 4));
|
|
|
+ ctrl.abort(new Error("operator cancelled"));
|
|
|
+ for (let i = 0; i < 20; i++) await Promise.resolve();
|
|
|
+
|
|
|
+ const result = await promise;
|
|
|
+ // First 4 succeeded, last 4 are null (never dispatched after abort)
|
|
|
+ expect(result.slice(0, 4).every((r) => r !== null)).toBe(true);
|
|
|
+ expect(result.slice(4).every((r) => r === null)).toBe(true);
|
|
|
+ // Total dispatched MUST be ≤ 5 (the abort can race with one extra
|
|
|
+ // worker pulling the next idx before the abort flag is set; we cap
|
|
|
+ // at first 4 + at-most-1 grace).
|
|
|
+ expect(gated.startOrder.length).toBeLessThanOrEqual(5);
|
|
|
+ // Audit string captured
|
|
|
+ expect(p.getLastError()).toMatch(/aborted by caller/);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("ctor rejects concurrency < 1", () => {
|
|
|
+ expect(() => new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: (async () => mockResponse(200, {})) as typeof fetch,
|
|
|
+ concurrency: 0,
|
|
|
+ })).toThrow(/concurrency must be ≥ 1/);
|
|
|
+ expect(() => new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl: (async () => mockResponse(200, {})) as typeof fetch,
|
|
|
+ concurrency: -3,
|
|
|
+ })).toThrow(/concurrency must be ≥ 1/);
|
|
|
+ });
|
|
|
+
|
|
|
+ test("circuit-open observed mid-run is thrown after in-flight settle", async () => {
|
|
|
+ // 8 chunks, all fail → breaker opens after the first 4 (minSamples=4).
|
|
|
+ // Workers 0-3 dispatch in parallel, all fail, recordFailure × 4 → breaker
|
|
|
+ // OPEN. Remaining workers see shouldFailFast() and set circuitTrippedDuringRun.
|
|
|
+ const N = 8;
|
|
|
+ const { fetchImpl } = makeFetchSequence(
|
|
|
+ Array.from({ length: N }, () => () => mockResponse(401, "fail"))
|
|
|
+ );
|
|
|
+ const p = new OpenAIEmbeddingsProvider({
|
|
|
+ endpoint: "https://ai.example.com",
|
|
|
+ fetchImpl,
|
|
|
+ batchSize: 1,
|
|
|
+ concurrency: 4,
|
|
|
+ retryBackoffsMs: [],
|
|
|
+ sleep: async () => {},
|
|
|
+ });
|
|
|
+ // First 4 land before breaker opens (concurrent dispatch); after they all
|
|
|
+ // fail the breaker tips OPEN. The next pull observes shouldFailFast().
|
|
|
+ // Either the result resolves with all-null (legacy semantics — breaker
|
|
|
+ // tripped AFTER all workers grabbed their chunk) OR throws CircuitOpenError
|
|
|
+ // (breaker observed before next pull). Both are valid post-condition;
|
|
|
+ // we just assert the state ends OPEN and the call completes.
|
|
|
+ let res: Awaited<ReturnType<typeof p.embedBatch>> | undefined;
|
|
|
+ let err: unknown;
|
|
|
+ try {
|
|
|
+ res = await p.embedBatch(Array.from({ length: N }, (_, i) => `t${i}`));
|
|
|
+ } catch (e) {
|
|
|
+ err = e;
|
|
|
+ }
|
|
|
+ expect(p.breaker.getState()).toBe("open");
|
|
|
+ if (err) {
|
|
|
+ expect(err).toBeInstanceOf(CircuitOpenError);
|
|
|
+ } else {
|
|
|
+ expect(res!.every((r) => r === null)).toBe(true);
|
|
|
+ }
|
|
|
+ });
|
|
|
+});
|
|
|
+
|
|
|
describe("OpenAIEmbeddingsProvider — dispose", () => {
|
|
|
test("dispose resets the breaker", async () => {
|
|
|
const { fetchImpl } = makeFetchSequence([
|