sft_v4.yaml 727 B

1234567891011121314151617181920212223242526272829303132333435363738
  1. # SFT Training Config - v4 with /only: support
  2. # Usage: accelerate launch --config_file configs/accelerate_multi_gpu.yaml train.py sft --config configs/sft_v4.yaml
  3. model:
  4. base: "Qwen/Qwen3-1.7B"
  5. output: "qmd-sft-v4"
  6. dataset:
  7. name: "data/train_v4"
  8. text_field: "text"
  9. split: "train"
  10. eval_split: 0.1
  11. training:
  12. epochs: 3
  13. batch_size: 2
  14. gradient_accumulation_steps: 4
  15. learning_rate: 0.0002
  16. max_length: 512
  17. warmup_ratio: 0.03
  18. lr_scheduler: "cosine"
  19. lora:
  20. rank: 16
  21. alpha: 32
  22. dropout: 0.0
  23. target_modules:
  24. - "q_proj"
  25. - "k_proj"
  26. - "v_proj"
  27. - "o_proj"
  28. - "gate_proj"
  29. - "up_proj"
  30. - "down_proj"
  31. tracking:
  32. project: "qmd-query-expansion"
  33. run_name: "sft-1.7B-v4-only-modes"