sft_lfm2.yaml 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # SFT Training Config for QMD Query Expansion with LiquidAI LFM2
  2. # Target: LFM2-1.2B with LoRA (hybrid architecture: convolutions + attention)
  3. #
  4. # LFM2 is optimized for on-device inference with fast decode/prefill.
  5. # Recommended for: agentic tasks, data extraction, RAG, creative writing.
  6. #
  7. # Usage: uv run train.py sft --config configs/sft_lfm2.yaml
  8. #
  9. # Requirements:
  10. # - transformers >= 4.55.0 (LFM2 architecture support)
  11. # - May need: pip install -U transformers
  12. model:
  13. base: "LiquidAI/LFM2-1.2B"
  14. output: "outputs/sft-lfm2" # Local training output (push to HF manually after eval)
  15. dataset:
  16. # Local: run `uv run dataset/prepare_data.py` first, then use "data/train/"
  17. # HuggingFace: use "tobil/qmd-query-expansion-train" (already prepared)
  18. name: "data/train/"
  19. text_field: "text"
  20. split: "train"
  21. eval_split: 0.1
  22. training:
  23. epochs: 5
  24. batch_size: 4
  25. gradient_accumulation_steps: 4
  26. learning_rate: 2e-4
  27. max_length: 512
  28. warmup_ratio: 0.03
  29. lr_scheduler: "cosine"
  30. lora:
  31. rank: 16
  32. alpha: 32
  33. dropout: 0.0
  34. # LFM2 uses different architecture than standard transformers:
  35. # - Attention layers: q_proj, k_proj, v_proj, out_proj
  36. # - Input projection: in_proj
  37. # - FFN/MLP gates: w1, w2, w3 (SwiGLU activation)
  38. target_modules:
  39. - "q_proj"
  40. - "k_proj"
  41. - "v_proj"
  42. - "out_proj"
  43. - "in_proj"
  44. - "w1"
  45. - "w2"
  46. - "w3"
  47. tracking:
  48. project: "qmd-query-expansion"
  49. run_name: "sft-lfm2-1.2B"
  50. # LFM2-specific generation settings (recommended by LiquidAI)
  51. generation:
  52. temperature: 0.3
  53. min_p: 0.15
  54. repetition_penalty: 1.05