sft.yaml 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # SFT Training Config for QMD Query Expansion
  2. # Target: Qwen3-1.7B with LoRA
  3. #
  4. # Usage: uv run train.py sft --config configs/sft.yaml
  5. model:
  6. base: "Qwen/Qwen3-1.7B"
  7. output: "outputs/sft" # Local training output (push to HF manually after eval)
  8. dataset:
  9. # Local: run `uv run dataset/prepare_data.py` first, then use "data/train/"
  10. # HuggingFace: use "tobil/qmd-query-expansion-train" (already prepared)
  11. name: "data/train/"
  12. text_field: "text"
  13. split: "train"
  14. eval_split: 0.1
  15. training:
  16. epochs: 5
  17. batch_size: 4
  18. gradient_accumulation_steps: 4
  19. learning_rate: 2e-4
  20. max_length: 512
  21. warmup_ratio: 0.03
  22. lr_scheduler: "cosine"
  23. # Save checkpoints every 30 minutes
  24. save_interval_minutes: 30
  25. # Fallback time-step save cadence if needed (not used for wall-clock mode)
  26. save_steps: 200
  27. save_total_limit: 3
  28. lora:
  29. rank: 16
  30. alpha: 32
  31. dropout: 0.0
  32. target_modules:
  33. - "q_proj"
  34. - "k_proj"
  35. - "v_proj"
  36. - "o_proj"
  37. - "gate_proj"
  38. - "up_proj"
  39. - "down_proj"
  40. tracking:
  41. project: "qmd-query-expansion"
  42. run_name: "sft-1.7B"