sft.yaml 890 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. # SFT Training Config for QMD Query Expansion
  2. # Target: Qwen3-1.7B with LoRA
  3. #
  4. # Usage: uv run train.py sft --config configs/sft.yaml
  5. model:
  6. base: "Qwen/Qwen3-1.7B"
  7. output: "outputs/sft" # Local training output (push to HF manually after eval)
  8. dataset:
  9. # Local: run `uv run dataset/prepare_data.py` first, then use "data/train/"
  10. # HuggingFace: use "tobil/qmd-query-expansion-train" (already prepared)
  11. name: "data/train/"
  12. text_field: "text"
  13. split: "train"
  14. eval_split: 0.1
  15. training:
  16. epochs: 5
  17. batch_size: 4
  18. gradient_accumulation_steps: 4
  19. learning_rate: 2e-4
  20. max_length: 512
  21. warmup_ratio: 0.03
  22. lr_scheduler: "cosine"
  23. lora:
  24. rank: 16
  25. alpha: 32
  26. dropout: 0.0
  27. target_modules:
  28. - "q_proj"
  29. - "k_proj"
  30. - "v_proj"
  31. - "o_proj"
  32. - "gate_proj"
  33. - "up_proj"
  34. - "down_proj"
  35. tracking:
  36. project: "qmd-query-expansion"
  37. run_name: "sft-1.7B"