5 months ago · 739038e1a7
--- a/finetune/CLAUDE.md
+++ b/finetune/CLAUDE.md
@@ -18,14 +18,20 @@ vec: another semantic variation
 
				 - `lex:` lines for BM25 keyword search (1-3 lines, short keywords)
			
 
				 - `vec:` lines for vector similarity search (1-3 lines, natural language)
			
 
				 
			
 
				-## Model Repository
			
 
				-
			
 
				-**Single destination**: `tobil/qmd-query-expansion-1.7B`
			
 
				-
			
 
				-- No versioned directories (`-v1`, `-v2`, `-v4`, etc.)
			
 
				-- No separate `-sft` or `-grpo` repos for final models
			
 
				-- Update the main repo only when eval scores improve
			
 
				-- GGUF variants go to `tobil/qmd-query-expansion-1.7B-gguf`
			
 
				+## HuggingFace Repositories
			
 
				+
			
 
				+| Repository | Purpose |
			
 
				+|------------|---------|
			
 
				+| `tobil/qmd-query-expansion-1.7B` | Final merged model (SFT + GRPO) |
			
 
				+| `tobil/qmd-query-expansion-1.7B-gguf` | GGUF quantized versions for deployment |
			
 
				+| `tobil/qmd-query-expansion-1.7B-sft` | SFT adapter checkpoint (intermediate) |
			
 
				+| `tobil/qmd-query-expansion-1.7B-grpo` | GRPO adapter checkpoint (intermediate) |
			
 
				+| `tobil/qmd-query-expansion-train` | Prepared training dataset |
			
 
				+
			
 
				+**Rules:**
			
 
				+- No versioned repos (`-v1`, `-v2`, `-v4`, etc.) - update in place
			
 
				+- Only push when eval scores improve over current deployed model
			
 
				+- Always include eval results in model card when pushing
			
 
				 
			
 
				 ## Training Data
			
 
				 
			
--- a/finetune/jobs/grpo.py
+++ b/finetune/jobs/grpo.py
@@ -46,7 +46,7 @@ from eval_common import QMDRewardFunction, run_eval
 
				 BASE_MODEL = "Qwen/Qwen3-1.7B"
			
 
				 SFT_MODEL = "tobil/qmd-query-expansion-1.7B-sft"
			
 
				 OUTPUT_MODEL = "tobil/qmd-query-expansion-1.7B-grpo"
			
 
				-DATASET = "tobil/qmd-query-expansion-train-v2"
			
 
				+DATASET = "tobil/qmd-query-expansion-train"
			
 
				 
			
 
				 
			
 
				 def main():
			
--- a/finetune/jobs/sft.py
+++ b/finetune/jobs/sft.py
@@ -25,7 +25,7 @@ from huggingface_hub import login
 
				 # --- Config (inlined from configs/sft.yaml) ---
			
 
				 BASE_MODEL = "Qwen/Qwen3-1.7B"
			
 
				 OUTPUT_MODEL = "tobil/qmd-query-expansion-1.7B-sft"
			
 
				-DATASET = "tobil/qmd-query-expansion-train-v2"
			
 
				+DATASET = "tobil/qmd-query-expansion-train"
			
 
				 
			
 
				 hf_token = os.environ.get("HF_TOKEN")
			
 
				 if hf_token: