hai 5 meses · 739038e1a7
--- a/finetune/CLAUDE.md
+++ b/finetune/CLAUDE.md
@@ -18,14 +18,20 @@ vec: another semantic variation
 
															 - `lex:` lines for BM25 keyword search (1-3 lines, short keywords)
														
 
															 - `vec:` lines for vector similarity search (1-3 lines, natural language)
														
 
															-## Model Repository
														
 
															-
														
 
															-**Single destination**: `tobil/qmd-query-expansion-1.7B`
														
 
															-
														
 
															-- No versioned directories (`-v1`, `-v2`, `-v4`, etc.)
														
 
															-- No separate `-sft` or `-grpo` repos for final models
														
 
															-- Update the main repo only when eval scores improve
														
 
															-- GGUF variants go to `tobil/qmd-query-expansion-1.7B-gguf`
														
 
															+## HuggingFace Repositories
														
 
															+
														
 
															+| Repository | Purpose |
														
 
															+|------------|---------|
														
 
															+| `tobil/qmd-query-expansion-1.7B` | Final merged model (SFT + GRPO) |
														
 
															+| `tobil/qmd-query-expansion-1.7B-gguf` | GGUF quantized versions for deployment |
														
 
															+| `tobil/qmd-query-expansion-1.7B-sft` | SFT adapter checkpoint (intermediate) |
														
 
															+| `tobil/qmd-query-expansion-1.7B-grpo` | GRPO adapter checkpoint (intermediate) |
														
 
															+| `tobil/qmd-query-expansion-train` | Prepared training dataset |
														
 
															+
														
 
															+**Rules:**
														
 
															+- No versioned repos (`-v1`, `-v2`, `-v4`, etc.) - update in place
														
 
															+- Only push when eval scores improve over current deployed model
														
 
															+- Always include eval results in model card when pushing
														
 
															 ## Training Data
														
--- a/finetune/jobs/grpo.py
+++ b/finetune/jobs/grpo.py
@@ -46,7 +46,7 @@ from eval_common import QMDRewardFunction, run_eval
 
															 BASE_MODEL = "Qwen/Qwen3-1.7B"
														
 
															 SFT_MODEL = "tobil/qmd-query-expansion-1.7B-sft"
														
 
															 OUTPUT_MODEL = "tobil/qmd-query-expansion-1.7B-grpo"
														
 
															-DATASET = "tobil/qmd-query-expansion-train-v2"
														
 
															+DATASET = "tobil/qmd-query-expansion-train"
														
 
															 def main():
														
--- a/finetune/jobs/sft.py
+++ b/finetune/jobs/sft.py
@@ -25,7 +25,7 @@ from huggingface_hub import login
 
															 # --- Config (inlined from configs/sft.yaml) ---
														
 
															 BASE_MODEL = "Qwen/Qwen3-1.7B"
														
 
															 OUTPUT_MODEL = "tobil/qmd-query-expansion-1.7B-sft"
														
 
															-DATASET = "tobil/qmd-query-expansion-train-v2"
														
 
															+DATASET = "tobil/qmd-query-expansion-train"
														
 
															 hf_token = os.environ.get("HF_TOKEN")
														
 
															 if hf_token: