| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- # SFT Training Config for QMD Query Expansion with LiquidAI LFM2
- # Target: LFM2-1.2B with LoRA (hybrid architecture: convolutions + attention)
- #
- # LFM2 is optimized for on-device inference with fast decode/prefill.
- # Recommended for: agentic tasks, data extraction, RAG, creative writing.
- #
- # Usage: uv run train.py sft --config configs/sft_lfm2.yaml
- #
- # Requirements:
- # - transformers >= 4.55.0 (LFM2 architecture support)
- # - May need: pip install -U transformers
- model:
- base: "LiquidAI/LFM2-1.2B"
- output: "outputs/sft-lfm2" # Local training output (push to HF manually after eval)
- dataset:
- # Local: run `uv run dataset/prepare_data.py` first, then use "data/train/"
- # HuggingFace: use "tobil/qmd-query-expansion-train" (already prepared)
- name: "data/train/"
- text_field: "text"
- split: "train"
- eval_split: 0.1
- training:
- epochs: 5
- batch_size: 4
- gradient_accumulation_steps: 4
- learning_rate: 2e-4
- max_length: 512
- warmup_ratio: 0.03
- lr_scheduler: "cosine"
- lora:
- rank: 16
- alpha: 32
- dropout: 0.0
- # LFM2 uses different architecture than standard transformers:
- # - Attention layers: q_proj, k_proj, v_proj, out_proj
- # - Input projection: in_proj
- # - FFN/MLP gates: w1, w2, w3 (SwiGLU activation)
- target_modules:
- - "q_proj"
- - "k_proj"
- - "v_proj"
- - "out_proj"
- - "in_proj"
- - "w1"
- - "w2"
- - "w3"
- tracking:
- project: "qmd-query-expansion"
- run_name: "sft-lfm2-1.2B"
- # LFM2-specific generation settings (recommended by LiquidAI)
- generation:
- temperature: 0.3
- min_p: 0.15
- repetition_penalty: 1.05
|