# SFT Training Config for QMD Query Expansion with LiquidAI LFM2 # Target: LFM2-1.2B with LoRA (hybrid architecture: convolutions + attention) # # LFM2 is optimized for on-device inference with fast decode/prefill. # Recommended for: agentic tasks, data extraction, RAG, creative writing. # # Usage: uv run train.py sft --config configs/sft_lfm2.yaml # # Requirements: # - transformers >= 4.55.0 (LFM2 architecture support) # - May need: pip install -U transformers model: base: "LiquidAI/LFM2-1.2B" output: "outputs/sft-lfm2" # Local training output (push to HF manually after eval) dataset: # Local: run `uv run dataset/prepare_data.py` first, then use "data/train/" # HuggingFace: use "tobil/qmd-query-expansion-train" (already prepared) name: "data/train/" text_field: "text" split: "train" eval_split: 0.1 training: epochs: 5 batch_size: 4 gradient_accumulation_steps: 4 learning_rate: 2e-4 max_length: 512 warmup_ratio: 0.03 lr_scheduler: "cosine" lora: rank: 16 alpha: 32 dropout: 0.0 # LFM2 uses different architecture than standard transformers: # - Attention layers: q_proj, k_proj, v_proj, out_proj # - Input projection: in_proj # - FFN/MLP gates: w1, w2, w3 (SwiGLU activation) target_modules: - "q_proj" - "k_proj" - "v_proj" - "out_proj" - "in_proj" - "w1" - "w2" - "w3" tracking: project: "qmd-query-expansion" run_name: "sft-lfm2-1.2B" # LFM2-specific generation settings (recommended by LiquidAI) generation: temperature: 0.3 min_p: 0.15 repetition_penalty: 1.05