set shell := ["bash", "-uc"] validate: uv run dataset/validate_schema.py uv run dataset/score_data.py for f in data/*.jsonl; do \ uv run dataset/analyze_data.py --input "$f" --show-examples 0; \ done score: uv run dataset/score_data.py schema: uv run dataset/validate_schema.py analyze: for f in data/*.jsonl; do \ uv run dataset/analyze_data.py --input "$f" --show-examples 0; \ done prepare: QMD_BASE_MODEL=Qwen/Qwen3-1.7B uv run dataset/prepare_data.py --seed 42 convert-onnx size="1.7B": uv run convert_onnx.py --size {{size}} convert-gguf size="1.7B": uv run convert_gguf.py --size {{size}} train-local: just prepare HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node auto \ train.py sft --config configs/sft_local.yaml |& tee /tmp/qmd-sft-train.log # Experimental GRPO training is in finetune/experiments/grpo and not part of # the default pipeline. # # grpo-local: # HF_TOKEN=${HF_TOKEN} uv run train.py grpo --config experiments/grpo/grpo.yaml |& tee /tmp/qmd-grpo-train.log