index.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /**
  2. * QMD SDK - Library mode for programmatic access to QMD search and indexing.
  3. *
  4. * Usage:
  5. * import { createStore } from '@tobilu/qmd'
  6. *
  7. * const store = await createStore({
  8. * dbPath: './my-index.sqlite',
  9. * config: {
  10. * collections: {
  11. * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
  12. * }
  13. * }
  14. * })
  15. *
  16. * const results = await store.search({ query: "how does auth work?" })
  17. * await store.close()
  18. */
  19. import { createStore as createStoreInternal, hybridQuery, structuredSearch, extractSnippet, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_MULTI_GET_MAX_BYTES, reindexCollection, generateEmbeddings, listCollections as storeListCollections, syncConfigToDb, getStoreCollections, getStoreCollection, getStoreGlobalContext, getStoreContexts, upsertStoreCollection, deleteStoreCollection, renameStoreCollection, updateStoreContext, removeStoreContext, setStoreGlobalContext, vacuumDatabase, cleanupOrphanedContent, cleanupOrphanedVectors, deleteLLMCache, deleteInactiveDocuments, clearAllEmbeddings, } from "./store.js";
  20. import { LlamaCpp, } from "./llm.js";
  21. import { setConfigSource, loadConfig, addCollection as collectionsAddCollection, removeCollection as collectionsRemoveCollection, renameCollection as collectionsRenameCollection, addContext as collectionsAddContext, removeContext as collectionsRemoveContext, setGlobalContext as collectionsSetGlobalContext, } from "./collections.js";
  22. // Re-export utility functions and types used by frontends
  23. export { extractSnippet, addLineNumbers, DEFAULT_MULTI_GET_MAX_BYTES };
  24. // Re-export getDefaultDbPath for CLI/MCP that need the default database location
  25. export { getDefaultDbPath } from "./store.js";
  26. // Re-export Maintenance class for CLI housekeeping operations
  27. export { Maintenance } from "./maintenance.js";
  28. /**
  29. * Create a QMD store for programmatic access to search and indexing.
  30. *
  31. * @example
  32. * ```typescript
  33. * // With a YAML config file
  34. * const store = await createStore({
  35. * dbPath: './index.sqlite',
  36. * configPath: './qmd.yml',
  37. * })
  38. *
  39. * // With inline config (no files needed besides the DB)
  40. * const store = await createStore({
  41. * dbPath: './index.sqlite',
  42. * config: {
  43. * collections: {
  44. * docs: { path: '/path/to/docs', pattern: '**\/*.md' }
  45. * }
  46. * }
  47. * })
  48. *
  49. * const results = await store.search({ query: "authentication flow" })
  50. * await store.close()
  51. * ```
  52. */
  53. export async function createStore(options) {
  54. if (!options.dbPath) {
  55. throw new Error("dbPath is required");
  56. }
  57. if (options.configPath && options.config) {
  58. throw new Error("Provide either configPath or config, not both");
  59. }
  60. // Create the internal store (opens DB, creates tables)
  61. const internal = createStoreInternal(options.dbPath);
  62. const db = internal.db;
  63. // Track whether we have a YAML config path for write-through
  64. const hasYamlConfig = !!options.configPath;
  65. // Sync config into SQLite store_collections
  66. let config;
  67. if (options.configPath) {
  68. // YAML mode: inject config source for write-through, sync to DB
  69. setConfigSource({ configPath: options.configPath });
  70. config = loadConfig();
  71. syncConfigToDb(db, config);
  72. }
  73. else if (options.config) {
  74. // Inline config mode: inject config source for mutations, sync to DB
  75. setConfigSource({ config: options.config });
  76. config = options.config;
  77. syncConfigToDb(db, config);
  78. }
  79. // else: DB-only mode — no external config, use existing store_collections
  80. // Create a per-store LlamaCpp instance — lazy-loads models on first use,
  81. // auto-unloads after 5 min inactivity to free VRAM.
  82. const llm = new LlamaCpp({
  83. embedModel: config?.models?.embed,
  84. generateModel: config?.models?.generate,
  85. rerankModel: config?.models?.rerank,
  86. inactivityTimeoutMs: 5 * 60 * 1000,
  87. disposeModelsOnInactivity: true,
  88. });
  89. internal.llm = llm;
  90. const store = {
  91. internal,
  92. dbPath: internal.dbPath,
  93. // Search
  94. search: async (opts) => {
  95. if (!opts.query && !opts.queries) {
  96. throw new Error("search() requires either 'query' or 'queries'");
  97. }
  98. // Normalize collection/collections
  99. const collections = [
  100. ...(opts.collection ? [opts.collection] : []),
  101. ...(opts.collections ?? []),
  102. ];
  103. const skipRerank = opts.rerank === false;
  104. if (opts.queries) {
  105. // Pre-expanded queries — use structuredSearch
  106. return structuredSearch(internal, opts.queries, {
  107. collections: collections.length > 0 ? collections : undefined,
  108. limit: opts.limit,
  109. minScore: opts.minScore,
  110. explain: opts.explain,
  111. intent: opts.intent,
  112. skipRerank,
  113. chunkStrategy: opts.chunkStrategy,
  114. });
  115. }
  116. // Simple query string — use hybridQuery (expand + search + rerank)
  117. return hybridQuery(internal, opts.query, {
  118. collection: collections[0],
  119. limit: opts.limit,
  120. minScore: opts.minScore,
  121. explain: opts.explain,
  122. intent: opts.intent,
  123. skipRerank,
  124. chunkStrategy: opts.chunkStrategy,
  125. });
  126. },
  127. searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
  128. searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection),
  129. expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
  130. get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
  131. getDocumentBody: async (pathOrDocid, opts) => {
  132. const result = internal.findDocument(pathOrDocid, { includeBody: false });
  133. if ("error" in result)
  134. return null;
  135. return internal.getDocumentBody(result, opts?.fromLine, opts?.maxLines);
  136. },
  137. multiGet: async (pattern, opts) => internal.findDocuments(pattern, opts),
  138. // Collection Management — write to SQLite + write-through to YAML/inline if configured
  139. addCollection: async (name, opts) => {
  140. upsertStoreCollection(db, name, { path: opts.path, pattern: opts.pattern, ignore: opts.ignore });
  141. if (hasYamlConfig || options.config) {
  142. collectionsAddCollection(name, opts.path, opts.pattern);
  143. }
  144. },
  145. removeCollection: async (name) => {
  146. const result = deleteStoreCollection(db, name);
  147. if (hasYamlConfig || options.config) {
  148. collectionsRemoveCollection(name);
  149. }
  150. return result;
  151. },
  152. renameCollection: async (oldName, newName) => {
  153. const result = renameStoreCollection(db, oldName, newName);
  154. if (hasYamlConfig || options.config) {
  155. collectionsRenameCollection(oldName, newName);
  156. }
  157. return result;
  158. },
  159. listCollections: async () => storeListCollections(db),
  160. getDefaultCollectionNames: async () => {
  161. const collections = storeListCollections(db);
  162. return collections.filter(c => c.includeByDefault).map(c => c.name);
  163. },
  164. // Context Management — write to SQLite + write-through to YAML/inline if configured
  165. addContext: async (collectionName, pathPrefix, contextText) => {
  166. const result = updateStoreContext(db, collectionName, pathPrefix, contextText);
  167. if (hasYamlConfig || options.config) {
  168. collectionsAddContext(collectionName, pathPrefix, contextText);
  169. }
  170. return result;
  171. },
  172. removeContext: async (collectionName, pathPrefix) => {
  173. const result = removeStoreContext(db, collectionName, pathPrefix);
  174. if (hasYamlConfig || options.config) {
  175. collectionsRemoveContext(collectionName, pathPrefix);
  176. }
  177. return result;
  178. },
  179. setGlobalContext: async (context) => {
  180. setStoreGlobalContext(db, context);
  181. if (hasYamlConfig || options.config) {
  182. collectionsSetGlobalContext(context);
  183. }
  184. },
  185. getGlobalContext: async () => getStoreGlobalContext(db),
  186. listContexts: async () => getStoreContexts(db),
  187. // Indexing — reads collections from SQLite
  188. update: async (updateOpts) => {
  189. const collections = getStoreCollections(db);
  190. const filtered = updateOpts?.collections
  191. ? collections.filter(c => updateOpts.collections.includes(c.name))
  192. : collections;
  193. internal.clearCache();
  194. let totalIndexed = 0, totalUpdated = 0, totalUnchanged = 0, totalRemoved = 0;
  195. for (const col of filtered) {
  196. const result = await reindexCollection(internal, col.path, col.pattern || "**/*.md", col.name, {
  197. ignorePatterns: col.ignore,
  198. onProgress: updateOpts?.onProgress
  199. ? (info) => updateOpts.onProgress({ collection: col.name, ...info })
  200. : undefined,
  201. });
  202. totalIndexed += result.indexed;
  203. totalUpdated += result.updated;
  204. totalUnchanged += result.unchanged;
  205. totalRemoved += result.removed;
  206. }
  207. return {
  208. collections: filtered.length,
  209. indexed: totalIndexed,
  210. updated: totalUpdated,
  211. unchanged: totalUnchanged,
  212. removed: totalRemoved,
  213. needsEmbedding: internal.getHashesNeedingEmbedding(),
  214. };
  215. },
  216. embed: async (embedOpts) => {
  217. return generateEmbeddings(internal, {
  218. force: embedOpts?.force,
  219. model: embedOpts?.model,
  220. maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
  221. maxBatchBytes: embedOpts?.maxBatchBytes,
  222. chunkStrategy: embedOpts?.chunkStrategy,
  223. onProgress: embedOpts?.onProgress,
  224. });
  225. },
  226. // Index Health
  227. getStatus: async () => internal.getStatus(),
  228. getIndexHealth: async () => internal.getIndexHealth(),
  229. // Lifecycle
  230. close: async () => {
  231. await llm.dispose();
  232. internal.close();
  233. if (hasYamlConfig || options.config) {
  234. setConfigSource(undefined); // Reset config source
  235. }
  236. },
  237. };
  238. return store;
  239. }