4 tháng trước cách đây · d6f3688d91
--- a/finetune/CLAUDE.md
+++ b/finetune/CLAUDE.md
@@ -83,11 +83,8 @@ hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py
 
															 ### Stage 2: (Experimental) GRPO
														
 
															 ```bash
														
 
															-# Local (optional; experimental)
														
 
															-uv run train.py grpo --config experiments/grpo/grpo.yaml
														
 
															-
														
 
															 # Experimental script
														
 
															-HF_TOKEN=${HF_TOKEN} uv run experiments/grpo/grpo.py
														
 
															+cd finetune && HF_TOKEN=${HF_TOKEN} uv run python experiments/grpo/grpo.py
														
 
															 ```
														
 
															 ### HuggingFace Jobs
														
--- a/finetune/README.md
+++ b/finetune/README.md
@@ -47,7 +47,8 @@ uv run eval.py tobil/qmd-query-expansion-1.7B
 
															 uv run convert_gguf.py --size 1.7B
														
 
															 # NOTE: GRPO is currently experimental and moved to finetune/experiments/grpo
														
 
															-# if you want to run it manually, use uv run python experiments/grpo/grpo.py
														
 
															+# if you want to run it manually, use:
														
 
															+#   cd finetune && uv run python experiments/grpo/grpo.py
														
 
															 ```
														
 
															 ### Local training (if you have a GPU)
														
@@ -56,7 +57,7 @@ uv run convert_gguf.py --size 1.7B
 
															 uv run train.py sft  --config configs/sft.yaml
														
 
															 # Experimental GRPO
														
 
															-uv run train.py grpo --config experiments/grpo/grpo.yaml
														
 
															+cd finetune && uv run python experiments/grpo/grpo.py
														
 
															 ```
														
 
															 ### Monitoring HF Jobs
														
@@ -138,7 +139,7 @@ It is not part of the default production path for this repository.
 
															 ```bash
														
 
															 # Optional experimental GRPO run
														
 
															-uv run train.py grpo --config experiments/grpo/grpo.yaml
														
 
															+cd finetune && uv run python experiments/grpo/grpo.py
														
 
															 ```
														
 
															 ## Evaluation
														
--- a/finetune/train.py
+++ b/finetune/train.py
@@ -417,8 +417,6 @@ def cmd_grpo(args):
 
															     )
														
 
															     print("To run experimental GRPO, use:")
														
 
															     print("  cd finetune && uv run python experiments/grpo/grpo.py")
														
 
															-    print("Or, if you have local config wiring ready:")
														
 
															-    print("  uv run train.py grpo --config experiments/grpo/grpo.yaml")
														
 
															     return
														
 
															     import torch
														
@@ -664,22 +662,9 @@ Examples:
 
															         "--dry-run", action="store_true", help="Print config and exit"
														
 
															     )
														
 
															-    grpo_parser = sub.add_parser(
														
 
															-        "grpo",
														
 
															-        help="Experimental: GRPO reinforcement learning (moved to experiments/grpo/)",
														
 
															-    )
														
 
															-    grpo_parser.add_argument("--config", required=True, help="Path to GRPO config YAML")
														
 
															-    grpo_parser.add_argument(
														
 
															-        "--dry-run", action="store_true", help="Print config, test reward, and exit"
														
 
															-    )
														
 
															-
														
 
															     args = parser.parse_args()
														
 
															-    if args.stage == "sft":
														
 
															-        cmd_sft(args)
														
 
															-    elif args.stage == "grpo":
														
 
															-        cmd_grpo(args)
														
 
															-
														
 
															+    cmd_sft(args)
														
 
															 if __name__ == "__main__":
														
 
															     main()