embedding-factory.test.ts 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /**
  2. * embedding-factory.test.ts - Tests for createEmbeddingProvider factory.
  3. *
  4. * Verifies the resolution precedence:
  5. * 1. explicit `kind` argument
  6. * 2. QMD_EMBED_PROVIDER env
  7. * 3. QMD_EMBED_ENDPOINT env (forces openai)
  8. * 4. config file `embedProvider.kind` / `embedProvider.endpoint`
  9. * 5. fallback: local
  10. */
  11. import { describe, test, expect, beforeEach, afterEach } from "vitest";
  12. import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
  13. import { tmpdir } from "node:os";
  14. import { join } from "node:path";
  15. import {
  16. resolveProviderKind,
  17. createEmbeddingProvider,
  18. loadConfigFile,
  19. } from "../src/embedding/factory.js";
  20. import { OpenAIEmbeddingsProvider } from "../src/embedding/openai.js";
  21. import { LocalLlamaCppProvider } from "../src/embedding/local.js";
  22. let workDir: string;
  23. let configPath: string;
  24. beforeEach(() => {
  25. workDir = mkdtempSync(join(tmpdir(), "qmd-factory-test-"));
  26. mkdirSync(join(workDir, "qmd"), { recursive: true });
  27. configPath = join(workDir, "qmd", "config.json");
  28. });
  29. afterEach(() => {
  30. rmSync(workDir, { recursive: true, force: true });
  31. });
  32. // ─────────────────────────── Helpers ─────────────────────────────────────────
  33. function writeConfig(obj: Record<string, unknown>) {
  34. writeFileSync(configPath, JSON.stringify(obj));
  35. }
  36. const EMPTY_ENV: Record<string, string | undefined> = {};
  37. // ─────────────────────────── resolveProviderKind ─────────────────────────────
  38. describe("resolveProviderKind", () => {
  39. test("explicit kind argument wins", () => {
  40. expect(
  41. resolveProviderKind({
  42. kind: "local",
  43. env: { QMD_EMBED_ENDPOINT: "https://x" },
  44. configPath,
  45. }),
  46. ).toBe("local");
  47. expect(
  48. resolveProviderKind({
  49. kind: "openai",
  50. env: EMPTY_ENV,
  51. configPath,
  52. }),
  53. ).toBe("openai");
  54. });
  55. test("QMD_EMBED_PROVIDER env wins over QMD_EMBED_ENDPOINT", () => {
  56. expect(
  57. resolveProviderKind({
  58. env: { QMD_EMBED_PROVIDER: "local", QMD_EMBED_ENDPOINT: "https://x" },
  59. configPath,
  60. }),
  61. ).toBe("local");
  62. });
  63. test("QMD_EMBED_ENDPOINT presence → openai", () => {
  64. expect(
  65. resolveProviderKind({
  66. env: { QMD_EMBED_ENDPOINT: "https://ai.example.com" },
  67. configPath,
  68. }),
  69. ).toBe("openai");
  70. });
  71. test("QMD_EMBED_ENDPOINT empty string ignored", () => {
  72. expect(
  73. resolveProviderKind({
  74. env: { QMD_EMBED_ENDPOINT: "" },
  75. configPath,
  76. }),
  77. ).toBe("local");
  78. });
  79. test("config file embedProvider.kind respected", () => {
  80. writeConfig({ embedProvider: { kind: "openai", endpoint: "https://ai.example.com" } });
  81. expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("openai");
  82. });
  83. test("config file embedProvider.endpoint alone → openai", () => {
  84. writeConfig({ embedProvider: { endpoint: "https://ai.example.com" } });
  85. expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("openai");
  86. });
  87. test("no signal anywhere → local fallback", () => {
  88. expect(resolveProviderKind({ env: EMPTY_ENV, configPath })).toBe("local");
  89. });
  90. test("invalid env QMD_EMBED_PROVIDER is ignored", () => {
  91. expect(
  92. resolveProviderKind({
  93. env: { QMD_EMBED_PROVIDER: "garbage" },
  94. configPath,
  95. }),
  96. ).toBe("local");
  97. });
  98. test("uppercase env QMD_EMBED_PROVIDER normalized", () => {
  99. expect(
  100. resolveProviderKind({
  101. env: { QMD_EMBED_PROVIDER: "OPENAI", QMD_EMBED_ENDPOINT: "https://x" },
  102. configPath,
  103. }),
  104. ).toBe("openai");
  105. });
  106. });
  107. // ─────────────────────────── createEmbeddingProvider ─────────────────────────
  108. describe("createEmbeddingProvider", () => {
  109. test("openai kind w/ endpoint env → OpenAIEmbeddingsProvider", () => {
  110. const p = createEmbeddingProvider({
  111. env: { QMD_EMBED_ENDPOINT: "https://ai.example.com" },
  112. configPath,
  113. });
  114. expect(p).toBeInstanceOf(OpenAIEmbeddingsProvider);
  115. expect(p.kind).toBe("openai");
  116. expect(p.getModelId()).toBe("embeddinggemma");
  117. });
  118. test("openai kind w/ explicit options merges over env", () => {
  119. const p = createEmbeddingProvider({
  120. env: { QMD_EMBED_ENDPOINT: "https://env.example.com", QMD_EMBED_API_KEY: "env-key" },
  121. configPath,
  122. openai: { endpoint: "https://override.example.com" },
  123. });
  124. // Cast to access internal properties for verification
  125. const inner = p as OpenAIEmbeddingsProvider & { endpoint: string; apiKey: string };
  126. expect(inner["endpoint"]).toBe("https://override.example.com");
  127. // apiKey should still come from env since we didn't override it
  128. expect(inner["apiKey"]).toBe("env-key");
  129. });
  130. test("openai kind reads modelId from env", () => {
  131. const p = createEmbeddingProvider({
  132. env: {
  133. QMD_EMBED_ENDPOINT: "https://ai.example.com",
  134. QMD_EMBED_MODEL_ID: "custom-model",
  135. },
  136. configPath,
  137. });
  138. expect(p.getModelId()).toBe("custom-model");
  139. });
  140. test("openai kind reads upstream model from env", () => {
  141. const p = createEmbeddingProvider({
  142. env: {
  143. QMD_EMBED_ENDPOINT: "https://ai.example.com",
  144. QMD_EMBED_UPSTREAM_MODEL: "embeddinggemma:300m",
  145. },
  146. configPath,
  147. }) as OpenAIEmbeddingsProvider & { upstreamModel: string };
  148. expect(p["upstreamModel"]).toBe("embeddinggemma:300m");
  149. });
  150. test("openai kind reads batch size and timeout from env", () => {
  151. const p = createEmbeddingProvider({
  152. env: {
  153. QMD_EMBED_ENDPOINT: "https://ai.example.com",
  154. QMD_EMBED_BATCH_SIZE: "32",
  155. QMD_EMBED_TIMEOUT_MS: "5000",
  156. },
  157. configPath,
  158. }) as OpenAIEmbeddingsProvider & { batchSize: number; timeoutMs: number };
  159. expect(p["batchSize"]).toBe(32);
  160. expect(p["timeoutMs"]).toBe(5000);
  161. });
  162. test("openai kind merges config file values", () => {
  163. writeConfig({
  164. embedProvider: {
  165. kind: "openai",
  166. endpoint: "https://config.example.com",
  167. apiKey: "config-key",
  168. modelId: "config-model",
  169. batchSize: 16,
  170. },
  171. });
  172. const p = createEmbeddingProvider({
  173. env: EMPTY_ENV,
  174. configPath,
  175. }) as OpenAIEmbeddingsProvider & {
  176. endpoint: string;
  177. apiKey: string;
  178. batchSize: number;
  179. };
  180. expect(p["endpoint"]).toBe("https://config.example.com");
  181. expect(p["apiKey"]).toBe("config-key");
  182. expect(p.getModelId()).toBe("config-model");
  183. expect(p["batchSize"]).toBe(16);
  184. });
  185. test("env wins over config file", () => {
  186. writeConfig({
  187. embedProvider: {
  188. endpoint: "https://config.example.com",
  189. },
  190. });
  191. const p = createEmbeddingProvider({
  192. env: { QMD_EMBED_ENDPOINT: "https://env.example.com" },
  193. configPath,
  194. }) as OpenAIEmbeddingsProvider & { endpoint: string };
  195. expect(p["endpoint"]).toBe("https://env.example.com");
  196. });
  197. test("openai kind without endpoint throws", () => {
  198. expect(() =>
  199. createEmbeddingProvider({ kind: "openai", env: EMPTY_ENV, configPath }),
  200. ).toThrow(/endpoint/);
  201. });
  202. test("local kind explicitly requested → LocalLlamaCppProvider", () => {
  203. const p = createEmbeddingProvider({
  204. kind: "local",
  205. env: EMPTY_ENV,
  206. configPath,
  207. });
  208. expect(p).toBeInstanceOf(LocalLlamaCppProvider);
  209. expect(p.kind).toBe("local");
  210. });
  211. test("default fallback → LocalLlamaCppProvider", () => {
  212. const p = createEmbeddingProvider({ env: EMPTY_ENV, configPath });
  213. expect(p).toBeInstanceOf(LocalLlamaCppProvider);
  214. });
  215. });
  216. // ─────────────────────────── loadConfigFile ──────────────────────────────────
  217. describe("loadConfigFile", () => {
  218. test("missing file → empty object", () => {
  219. expect(loadConfigFile(join(workDir, "missing.json"))).toEqual({});
  220. });
  221. test("invalid JSON → empty object (no throw)", () => {
  222. writeFileSync(configPath, "not json");
  223. expect(loadConfigFile(configPath)).toEqual({});
  224. });
  225. test("valid JSON parsed", () => {
  226. writeConfig({ embedProvider: { kind: "openai" } });
  227. expect(loadConfigFile(configPath)).toEqual({
  228. embedProvider: { kind: "openai" },
  229. });
  230. });
  231. });