Justfile 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. set shell := ["bash", "-uc"]
  2. validate:
  3. uv run dataset/validate_schema.py
  4. uv run dataset/score_data.py
  5. for f in data/*.jsonl; do \
  6. uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
  7. done
  8. score:
  9. uv run dataset/score_data.py
  10. schema:
  11. uv run dataset/validate_schema.py
  12. analyze:
  13. for f in data/*.jsonl; do \
  14. uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
  15. done
  16. prepare:
  17. QMD_BASE_MODEL=Qwen/Qwen3-1.7B uv run dataset/prepare_data.py --seed 42
  18. convert-onnx size="1.7B":
  19. uv run convert_onnx.py --size {{size}}
  20. convert-gguf size="1.7B":
  21. uv run convert_gguf.py --size {{size}}
  22. train-local:
  23. just prepare
  24. HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node auto \
  25. train.py sft --config configs/sft_local.yaml |& tee /tmp/qmd-sft-train.log
  26. # Experimental GRPO training is in finetune/experiments/grpo and not part of
  27. # the default pipeline.
  28. #
  29. # grpo-local:
  30. # HF_TOKEN=${HF_TOKEN} uv run train.py grpo --config experiments/grpo/grpo.yaml |& tee /tmp/qmd-grpo-train.log