před 4 měsíci · d6f3688d91
--- a/finetune/CLAUDE.md
+++ b/finetune/CLAUDE.md
@@ -83,11 +83,8 @@ hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py
 
				 ### Stage 2: (Experimental) GRPO
			
 
				 
			
 
				 ```bash
			
 
				-# Local (optional; experimental)
			
 
				-uv run train.py grpo --config experiments/grpo/grpo.yaml
			
 
				-
			
 
				 # Experimental script
			
 
				-HF_TOKEN=${HF_TOKEN} uv run experiments/grpo/grpo.py
			
 
				+cd finetune && HF_TOKEN=${HF_TOKEN} uv run python experiments/grpo/grpo.py
			
 
				 ```
			
 
				 
			
 
				 ### HuggingFace Jobs
			
--- a/finetune/README.md
+++ b/finetune/README.md
@@ -47,7 +47,8 @@ uv run eval.py tobil/qmd-query-expansion-1.7B
 
				 uv run convert_gguf.py --size 1.7B
			
 
				 
			
 
				 # NOTE: GRPO is currently experimental and moved to finetune/experiments/grpo
			
 
				-# if you want to run it manually, use uv run python experiments/grpo/grpo.py
			
 
				+# if you want to run it manually, use:
			
 
				+#   cd finetune && uv run python experiments/grpo/grpo.py
			
 
				 ```
			
 
				 
			
 
				 ### Local training (if you have a GPU)
			
@@ -56,7 +57,7 @@ uv run convert_gguf.py --size 1.7B
 
				 uv run train.py sft  --config configs/sft.yaml
			
 
				 
			
 
				 # Experimental GRPO
			
 
				-uv run train.py grpo --config experiments/grpo/grpo.yaml
			
 
				+cd finetune && uv run python experiments/grpo/grpo.py
			
 
				 ```
			
 
				 
			
 
				 ### Monitoring HF Jobs
			
@@ -138,7 +139,7 @@ It is not part of the default production path for this repository.
 
				 
			
 
				 ```bash
			
 
				 # Optional experimental GRPO run
			
 
				-uv run train.py grpo --config experiments/grpo/grpo.yaml
			
 
				+cd finetune && uv run python experiments/grpo/grpo.py
			
 
				 ```
			
 
				 
			
 
				 ## Evaluation
			
--- a/finetune/train.py
+++ b/finetune/train.py
@@ -417,8 +417,6 @@ def cmd_grpo(args):
 
				     )
			
 
				     print("To run experimental GRPO, use:")
			
 
				     print("  cd finetune && uv run python experiments/grpo/grpo.py")
			
 
				-    print("Or, if you have local config wiring ready:")
			
 
				-    print("  uv run train.py grpo --config experiments/grpo/grpo.yaml")
			
 
				     return
			
 
				 
			
 
				     import torch
			
@@ -664,22 +662,9 @@ Examples:
 
				         "--dry-run", action="store_true", help="Print config and exit"
			
 
				     )
			
 
				 
			
 
				-    grpo_parser = sub.add_parser(
			
 
				-        "grpo",
			
 
				-        help="Experimental: GRPO reinforcement learning (moved to experiments/grpo/)",
			
 
				-    )
			
 
				-    grpo_parser.add_argument("--config", required=True, help="Path to GRPO config YAML")
			
 
				-    grpo_parser.add_argument(
			
 
				-        "--dry-run", action="store_true", help="Print config, test reward, and exit"
			
 
				-    )
			
 
				-
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				-    if args.stage == "sft":
			
 
				-        cmd_sft(args)
			
 
				-    elif args.stage == "grpo":
			
 
				-        cmd_grpo(args)
			
 
				-
			
 
				+    cmd_sft(args)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     main()