index.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /**
  2. * QMD SDK - Library mode for programmatic access to QMD search and indexing.
  3. *
  4. * Usage:
  5. * import { createStore } from '@tobilu/qmd'
  6. *
  7. * const store = await createStore({
  8. * dbPath: './my-index.sqlite',
  9. * config: {
  10. * collections: {
  11. * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
  12. * }
  13. * }
  14. * })
  15. *
  16. * const results = await store.search({ query: "how does auth work?" })
  17. * await store.close()
  18. */
  19. import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
  20. import { LlamaCpp, } from "./llm.js";
  21. import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
  22. // Re-export utility functions and types used by frontends
  23. export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
  24. // Re-export getDefaultDbPath for CLI/MCP that need the default database location
  25. export { getDefaultDbPath } from "./store.js";
  26. // Re-export Maintenance class for CLI housekeeping operations
  27. export { Maintenance } from "./maintenance.js";
  28. // Re-export embedding provider abstraction for SDK consumers (i-qkarfffa).
  29. // `createEmbeddingProvider` honors QMD_EMBED_ENDPOINT / config-file / kind
  30. // arg precedence; default fallback is the legacy LocalLlamaCppProvider so
  31. // SDK code that doesn't pass `embedProvider` keeps the prior behavior.
  32. export { createEmbeddingProvider, resolveProviderKind, LocalLlamaCppProvider, OpenAIEmbeddingsProvider, CircuitBreaker, CircuitOpenError, HttpError, ModelMismatchError, assertModelCompatible, DEFAULT_BATCH_SIZE as DEFAULT_PROVIDER_BATCH_SIZE, DEFAULT_TIMEOUT_MS as DEFAULT_PROVIDER_TIMEOUT_MS, RETRY_BACKOFFS_MS as PROVIDER_RETRY_BACKOFFS_MS, } from "./embedding/index.js";
  33. export { getDistinctEmbeddingModels } from "./store.js";
  34. /**
  35. * Create a QMD store for programmatic access to search and indexing.
  36. *
  37. * @example
  38. * ```typescript
  39. * // With a YAML config file
  40. * const store = await createStore({
  41. * dbPath: './index.sqlite',
  42. * configPath: './qmd.yml',
  43. * })
  44. *
  45. * // With inline config (no files needed besides the DB)
  46. * const store = await createStore({
  47. * dbPath: './index.sqlite',
  48. * config: {
  49. * collections: {
  50. * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
  51. * }
  52. * }
  53. * })
  54. *
  55. * const results = await store.search({ query: "authentication flow" })
  56. * await store.close()
  57. * ```
  58. */
  59. export async function createStore(options) {
  60. if (!options.dbPath) {
  61. throw new Error("dbPath is required");
  62. }
  63. if (options.configPath && options.config) {
  64. throw new Error("Provide either configPath or config, not both");
  65. }
  66. // Create the internal store (opens DB, creates tables)
  67. const internal = createStoreInternal(options.dbPath);
  68. const db = internal.db;
  69. // Track whether we have a YAML config path for write-through
  70. const hasYamlConfig = !!options.configPath;
  71. // Sync config into SQLite store_collections
  72. let config;
  73. if (options.configPath) {
  74. // YAML mode: inject config source for write-through, sync to DB
  75. setConfigSource({ configPath: options.configPath });
  76. config = loadConfig();
  77. syncConfigToDb(db, config);
  78. }
  79. else if (options.config) {
  80. // Inline config mode: inject config source for mutations, sync to DB
  81. setConfigSource({ config: options.config });
  82. config = options.config;
  83. syncConfigToDb(db, config);
  84. }
  85. // else: DB-only mode — no external config, use existing store_collections
  86. // Create a per-store LlamaCpp instance — lazy-loads models on first use,
  87. // auto-unloads after 5 min inactivity to free VRAM.
  88. const llm = new LlamaCpp({
  89. embedModel: config?.models?.embed,
  90. generateModel: config?.models?.generate,
  91. rerankModel: config?.models?.rerank,
  92. inactivityTimeoutMs: 5 * 60 * 1000,
  93. disposeModelsOnInactivity: true,
  94. });
  95. internal.llm = llm;
  96. const store = {
  97. internal,
  98. dbPath: internal.dbPath,
  99. // Search
  100. search: async (opts) => {
  101. if (!opts.query && !opts.queries) {
  102. throw new Error("search() requires either 'query' or 'queries'");
  103. }
  104. // Normalize collection/collections
  105. const collections = [
  106. ...(opts.collection ? [opts.collection] : []),
  107. ...(opts.collections ?? []),
  108. ];
  109. const skipRerank = opts.rerank === false;
  110. // Per-call provider wins over store-level default.
  111. const provider = opts.embedProvider ?? options.embedProvider;
  112. if (opts.queries) {
  113. // Pre-expanded queries — use structuredSearch
  114. return structuredSearch(internal, opts.queries, {
  115. collections: collections.length > 0 ? collections : undefined,
  116. limit: opts.limit,
  117. minScore: opts.minScore,
  118. explain: opts.explain,
  119. intent: opts.intent,
  120. skipRerank,
  121. chunkStrategy: opts.chunkStrategy,
  122. ...(provider ? { embedProvider: provider } : {}),
  123. });
  124. }
  125. // Simple query string — use hybridQuery (expand + search + rerank)
  126. return hybridQuery(internal, opts.query, {
  127. collection: collections[0],
  128. limit: opts.limit,
  129. minScore: opts.minScore,
  130. explain: opts.explain,
  131. intent: opts.intent,
  132. skipRerank,
  133. chunkStrategy: opts.chunkStrategy,
  134. ...(provider ? { embedProvider: provider } : {}),
  135. });
  136. },
  137. searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
  138. searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection, undefined, undefined, opts?.embedProvider ?? options.embedProvider),
  139. expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
  140. get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
  141. getDocumentBody: async (pathOrDocid, opts) => {
  142. const result = internal.findDocument(pathOrDocid, { includeBody: false });
  143. if ("error" in result)
  144. return null;
  145. return internal.getDocumentBody(result, opts?.fromLine, opts?.maxLines);
  146. },
  147. multiGet: async (pattern, opts) => internal.findDocuments(pattern, opts),
  148. // Collection Management — write to SQLite + write-through to YAML/inline if configured
  149. addCollection: async (name, opts) => {
  150. upsertStoreCollection(db, name, { path: opts.path, pattern: opts.pattern, ignore: opts.ignore });
  151. if (hasYamlConfig || options.config) {
  152. collectionsAddCollection(name, opts.path, opts.pattern);
  153. }
  154. },
  155. removeCollection: async (name) => {
  156. const result = deleteStoreCollection(db, name);
  157. if (hasYamlConfig || options.config) {
  158. collectionsRemoveCollection(name);
  159. }
  160. return result;
  161. },
  162. renameCollection: async (oldName, newName) => {
  163. const result = renameStoreCollection(db, oldName, newName);
  164. if (hasYamlConfig || options.config) {
  165. collectionsRenameCollection(oldName, newName);
  166. }
  167. return result;
  168. },
  169. listCollections: async () => storeListCollections(db),
  170. getDefaultCollectionNames: async () => {
  171. const collections = storeListCollections(db);
  172. return collections.filter(c => c.includeByDefault).map(c => c.name);
  173. },
  174. // Context Management — write to SQLite + write-through to YAML/inline if configured
  175. addContext: async (collectionName, pathPrefix, contextText) => {
  176. const result = updateStoreContext(db, collectionName, pathPrefix, contextText);
  177. if (hasYamlConfig || options.config) {
  178. collectionsAddContext(collectionName, pathPrefix, contextText);
  179. }
  180. return result;
  181. },
  182. removeContext: async (collectionName, pathPrefix) => {
  183. const result = removeStoreContext(db, collectionName, pathPrefix);
  184. if (hasYamlConfig || options.config) {
  185. collectionsRemoveContext(collectionName, pathPrefix);
  186. }
  187. return result;
  188. },
  189. setGlobalContext: async (context) => {
  190. setStoreGlobalContext(db, context);
  191. if (hasYamlConfig || options.config) {
  192. collectionsSetGlobalContext(context);
  193. }
  194. },
  195. getGlobalContext: async () => getStoreGlobalContext(db),
  196. listContexts: async () => getStoreContexts(db),
  197. // Indexing — reads collections from SQLite
  198. update: async (updateOpts) => {
  199. const collections = getStoreCollections(db);
  200. const filtered = updateOpts?.collections
  201. ? collections.filter(c => updateOpts.collections.includes(c.name))
  202. : collections;
  203. internal.clearCache();
  204. let totalIndexed = 0, totalUpdated = 0, totalUnchanged = 0, totalRemoved = 0;
  205. for (const col of filtered) {
  206. const result = await reindexCollection(internal, col.path, col.pattern || "**/*.md", col.name, {
  207. ignorePatterns: col.ignore,
  208. onProgress: updateOpts?.onProgress
  209. ? (info) => updateOpts.onProgress({ collection: col.name, ...info })
  210. : undefined,
  211. });
  212. totalIndexed += result.indexed;
  213. totalUpdated += result.updated;
  214. totalUnchanged += result.unchanged;
  215. totalRemoved += result.removed;
  216. }
  217. return {
  218. collections: filtered.length,
  219. indexed: totalIndexed,
  220. updated: totalUpdated,
  221. unchanged: totalUnchanged,
  222. removed: totalRemoved,
  223. needsEmbedding: internal.getHashesNeedingEmbedding(),
  224. };
  225. },
  226. embed: async (embedOpts) => {
  227. return generateEmbeddings(internal, {
  228. force: embedOpts?.force,
  229. model: embedOpts?.model,
  230. maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
  231. maxBatchBytes: embedOpts?.maxBatchBytes,
  232. chunkStrategy: embedOpts?.chunkStrategy,
  233. onProgress: embedOpts?.onProgress,
  234. });
  235. },
  236. // Index Health
  237. getStatus: async () => internal.getStatus(),
  238. getIndexHealth: async () => internal.getIndexHealth(),
  239. // Lifecycle
  240. close: async () => {
  241. await llm.dispose();
  242. internal.close();
  243. if (hasYamlConfig || options.config) {
  244. setConfigSource(undefined); // Reset config source
  245. }
  246. },
  247. };
  248. return store;
  249. }