|
@@ -193,7 +193,7 @@ export type RerankDocument = {
|
|
|
// HuggingFace model URIs for node-llama-cpp
|
|
// HuggingFace model URIs for node-llama-cpp
|
|
|
// Format: hf:<user>/<repo>/<file>
|
|
// Format: hf:<user>/<repo>/<file>
|
|
|
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
|
|
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf)
|
|
|
-const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
|
|
|
|
|
+const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
|
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
|
|
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
|
|
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
|
|
|
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
|
@@ -436,9 +436,9 @@ export class LlamaCpp implements LLM {
|
|
|
|
|
|
|
|
|
|
|
|
|
constructor(config: LlamaCppConfig = {}) {
|
|
constructor(config: LlamaCppConfig = {}) {
|
|
|
- this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
|
|
|
|
|
- this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL;
|
|
|
|
|
- this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL;
|
|
|
|
|
|
|
+ this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
|
|
|
|
|
+ this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
|
|
|
|
|
+ this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
|
|
|
this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
|
|
this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
|
|
|
this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
|
|
this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
|
|
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
@@ -1559,8 +1559,7 @@ let defaultLlamaCpp: LlamaCpp | null = null;
|
|
|
*/
|
|
*/
|
|
|
export function getDefaultLlamaCpp(): LlamaCpp {
|
|
export function getDefaultLlamaCpp(): LlamaCpp {
|
|
|
if (!defaultLlamaCpp) {
|
|
if (!defaultLlamaCpp) {
|
|
|
- const embedModel = process.env.QMD_EMBED_MODEL;
|
|
|
|
|
- defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
|
|
|
|
|
|
|
+ defaultLlamaCpp = new LlamaCpp();
|
|
|
}
|
|
}
|
|
|
return defaultLlamaCpp;
|
|
return defaultLlamaCpp;
|
|
|
}
|
|
}
|