il y a 4 mois · b71649b12d
--- a/README.md
+++ b/README.md
@@ -252,12 +252,34 @@ QMD uses three local GGUF models (auto-downloaded on first use):
 
				 
			
 
				 | Model | Purpose | Size |
			
 
				 |-------|---------|------|
			
 
				-| `embeddinggemma-300M-Q8_0` | Vector embeddings | ~300MB |
			
 
				+| `embeddinggemma-300M-Q8_0` | Vector embeddings (default) | ~300MB |
			
 
				 | `qwen3-reranker-0.6b-q8_0` | Re-ranking | ~640MB |
			
 
				 | `qmd-query-expansion-1.7B-q4_k_m` | Query expansion (fine-tuned) | ~1.1GB |
			
 
				 
			
 
				 Models are downloaded from HuggingFace and cached in `~/.cache/qmd/models/`.
			
 
				 
			
 
				+### Custom Embedding Model
			
 
				+
			
 
				+Override the default embedding model via the `QMD_EMBED_MODEL` environment variable.
			
 
				+This is useful for multilingual corpora (e.g. Chinese, Japanese, Korean) where
			
 
				+`embeddinggemma-300M` has limited coverage.
			
 
				+
			
 
				+```sh
			
 
				+# Use Qwen3-Embedding-0.6B for better multilingual (CJK) support
			
 
				+export QMD_EMBED_MODEL="hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf"
			
 
				+
			
 
				+# After changing the model, re-embed all collections:
			
 
				+qmd embed -f
			
 
				+```
			
 
				+
			
 
				+Supported model families:
			
 
				+- **embeddinggemma** (default) — English-optimized, small footprint
			
 
				+- **Qwen3-Embedding** — Multilingual (119 languages including CJK), MTEB top-ranked
			
 
				+
			
 
				+> **Note:** When switching embedding models, you must re-index with `qmd embed -f`
			
 
				+> since vectors are not cross-compatible between models. The prompt format is
			
 
				+> automatically adjusted for each model family.
			
 
				+
			
 
				 ## Installation
			
 
				 
			
 
				 ```sh
			
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -23,19 +23,38 @@ import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync,
 
				 // Embedding Formatting Functions
			
 
				 // =============================================================================
			
 
				 
			
 
				+/**
			
 
				+ * Detect if a model URI uses the Qwen3-Embedding format.
			
 
				+ * Qwen3-Embedding uses a different prompting style than nomic/embeddinggemma.
			
 
				+ */
			
 
				+export function isQwen3EmbeddingModel(modelUri: string): boolean {
			
 
				+  return /qwen.*embed/i.test(modelUri) || /embed.*qwen/i.test(modelUri);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Format a query for embedding.
			
 
				- * Uses nomic-style task prefix format for embeddinggemma.
			
 
				+ * Uses nomic-style task prefix format for embeddinggemma (default).
			
 
				+ * Uses Qwen3-Embedding instruct format when a Qwen embedding model is active.
			
 
				  */
			
 
				-export function formatQueryForEmbedding(query: string): string {
			
 
				+export function formatQueryForEmbedding(query: string, modelUri?: string): string {
			
 
				+  const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
			
 
				+  if (isQwen3EmbeddingModel(uri)) {
			
 
				+    return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
			
 
				+  }
			
 
				   return `task: search result | query: ${query}`;
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				  * Format a document for embedding.
			
 
				- * Uses nomic-style format with title and text fields.
			
 
				+ * Uses nomic-style format with title and text fields (default).
			
 
				+ * Qwen3-Embedding encodes documents as raw text without special prefixes.
			
 
				  */
			
 
				-export function formatDocForEmbedding(text: string, title?: string): string {
			
 
				+export function formatDocForEmbedding(text: string, title?: string, modelUri?: string): string {
			
 
				+  const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
			
 
				+  if (isQwen3EmbeddingModel(uri)) {
			
 
				+    // Qwen3-Embedding: documents are raw text, no task prefix
			
 
				+    return title ? `${title}\n${text}` : text;
			
 
				+  }
			
 
				   return `title: ${title || "none"} | text: ${text}`;
			
 
				 }
			
 
				 
			
@@ -174,7 +193,8 @@ export type RerankDocument = {
 
				 
			
 
				 // HuggingFace model URIs for node-llama-cpp
			
 
				 // Format: hf:<user>/<repo>/<file>
			
 
				-const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
			
 
				+// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf)
			
 
				+const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
			
 
				 const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
			
 
				 // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
			
 
				 const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
			
@@ -1396,7 +1416,8 @@ let defaultLlamaCpp: LlamaCpp | null = null;
 
				  */
			
 
				 export function getDefaultLlamaCpp(): LlamaCpp {
			
 
				   if (!defaultLlamaCpp) {
			
 
				-    defaultLlamaCpp = new LlamaCpp();
			
 
				+    const embedModel = process.env.QMD_EMBED_MODEL;
			
 
				+    defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
			
 
				   }
			
 
				   return defaultLlamaCpp;
			
 
				 }
			
--- a/src/store.ts
+++ b/src/store.ts
@@ -2242,7 +2242,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
 
				 
			
 
				 async function getEmbedding(text: string, model: string, isQuery: boolean, session?: ILLMSession): Promise<number[] | null> {
			
 
				   // Format text using the appropriate prompt template
			
 
				-  const formattedText = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text);
			
 
				+  const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
			
 
				   const result = session
			
 
				     ? await session.embed(formattedText, { model, isQuery })
			
 
				     : await getDefaultLlamaCpp().embed(formattedText, { model, isQuery });