| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- #!/usr/bin/env python3
- """Write model.json prompt config for generating high-quality examples."""
- from __future__ import annotations
- import argparse
- import json
- from pathlib import Path
- from example import SearchType, SEARCH_TYPE_TO_PREFIX
- def build_prompt() -> str:
- lex = SEARCH_TYPE_TO_PREFIX[SearchType.LexSearch]
- vec = SEARCH_TYPE_TO_PREFIX[SearchType.VecSearch]
- hyde = SEARCH_TYPE_TO_PREFIX[SearchType.HydeSearch]
- return (
- "You are a query expansion expert. Given a user query, output a single JSON object "
- "that matches the training JSONL schema:\n"
- '{"query": "...", "output": [["lex", "..."], ["vec", "..."], ["hyde", "..."]]}\n'
- "Rules:\n"
- f"- output is a list of pairs, where the first element is one of: "
- f"\"{lex}\", \"{vec}\", \"{hyde}\".\n"
- "- Include 2-3 lex lines, 2-3 vec lines, and 0-1 hyde line.\n"
- "- lex lines are short keyword phrases; never equal or near-echo the query.\n"
- "- vec lines are natural language search phrases.\n"
- "- hyde is a concise hypothetical passage (50-200 chars), single line.\n"
- "- Preserve key terms and named entities in lex lines.\n"
- "- No extra text outside the JSON object.\n"
- )
- def write_model_json(path: Path) -> None:
- payload = {
- "name": "qmd-gepa-example-generator",
- "model": "grok-4-1-fast-reasoning",
- "schema_version": 1,
- "prompt": build_prompt(),
- "output_schema": {
- "query": "string",
- "output": [["lex|vec|hyde", "string"]],
- },
- "notes": [
- "LexSearch/VecSearch/HydeSearch are represented as lex/vec/hyde in output.",
- "Do not echo the query in lex lines.",
- ],
- }
- path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
- def main() -> int:
- parser = argparse.ArgumentParser(description="Write model.json for GEPA generation")
- parser.add_argument(
- "--output",
- type=str,
- default="gepa/model.json",
- help="Path to write model.json",
- )
- args = parser.parse_args()
- output_path = Path(args.output)
- output_path.parent.mkdir(parents=True, exist_ok=True)
- write_model_json(output_path)
- print(f"Wrote {output_path}")
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|