| 12345678910111213141516171819202122232425262728293031323334353637383940 |
- set shell := ["bash", "-uc"]
- validate:
- uv run dataset/validate_schema.py
- uv run dataset/score_data.py
- for f in data/*.jsonl; do \
- uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
- done
- score:
- uv run dataset/score_data.py
- schema:
- uv run dataset/validate_schema.py
- analyze:
- for f in data/*.jsonl; do \
- uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
- done
- prepare:
- QMD_BASE_MODEL=Qwen/Qwen3-1.7B uv run dataset/prepare_data.py --seed 42
- convert-onnx size="1.7B":
- uv run convert_onnx.py --size {{size}}
- convert-gguf size="1.7B":
- uv run convert_gguf.py --size {{size}}
- train-local:
- just prepare
- HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node auto \
- train.py sft --config configs/sft_local.yaml |& tee /tmp/qmd-sft-train.log
- # Experimental GRPO training is in finetune/experiments/grpo and not part of
- # the default pipeline.
- #
- # grpo-local:
- # HF_TOKEN=${HF_TOKEN} uv run train.py grpo --config experiments/grpo/grpo.yaml |& tee /tmp/qmd-grpo-train.log
|