optimizer.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. #!/usr/bin/env python3
  2. """Write model.json prompt config for generating high-quality examples."""
  3. from __future__ import annotations
  4. import argparse
  5. import json
  6. from pathlib import Path
  7. from example import SearchType, SEARCH_TYPE_TO_PREFIX
  8. def build_prompt() -> str:
  9. lex = SEARCH_TYPE_TO_PREFIX[SearchType.LexSearch]
  10. vec = SEARCH_TYPE_TO_PREFIX[SearchType.VecSearch]
  11. hyde = SEARCH_TYPE_TO_PREFIX[SearchType.HydeSearch]
  12. return (
  13. "You are a query expansion expert. Given a user query, output a single JSON object "
  14. "that matches the training JSONL schema:\n"
  15. '{"query": "...", "output": [["lex", "..."], ["vec", "..."], ["hyde", "..."]]}\n'
  16. "Rules:\n"
  17. f"- output is a list of pairs, where the first element is one of: "
  18. f"\"{lex}\", \"{vec}\", \"{hyde}\".\n"
  19. "- Include 2-3 lex lines, 2-3 vec lines, and 0-1 hyde line.\n"
  20. "- lex lines are short keyword phrases; never equal or near-echo the query.\n"
  21. "- vec lines are natural language search phrases.\n"
  22. "- hyde is a concise hypothetical passage (50-200 chars), single line.\n"
  23. "- Preserve key terms and named entities in lex lines.\n"
  24. "- No extra text outside the JSON object.\n"
  25. )
  26. def write_model_json(path: Path) -> None:
  27. payload = {
  28. "name": "qmd-gepa-example-generator",
  29. "model": "grok-4-1-fast-reasoning",
  30. "schema_version": 1,
  31. "prompt": build_prompt(),
  32. "output_schema": {
  33. "query": "string",
  34. "output": [["lex|vec|hyde", "string"]],
  35. },
  36. "notes": [
  37. "LexSearch/VecSearch/HydeSearch are represented as lex/vec/hyde in output.",
  38. "Do not echo the query in lex lines.",
  39. ],
  40. }
  41. path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
  42. def main() -> int:
  43. parser = argparse.ArgumentParser(description="Write model.json for GEPA generation")
  44. parser.add_argument(
  45. "--output",
  46. type=str,
  47. default="gepa/model.json",
  48. help="Path to write model.json",
  49. )
  50. args = parser.parse_args()
  51. output_path = Path(args.output)
  52. output_path.parent.mkdir(parents=True, exist_ok=True)
  53. write_model_json(output_path)
  54. print(f"Wrote {output_path}")
  55. return 0
  56. if __name__ == "__main__":
  57. raise SystemExit(main())