reward.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. # /// script
  2. # requires-python = ">=3.10"
  3. # dependencies = []
  4. # ///
  5. """
  6. QMD Query Expansion Reward Function
  7. Single source of truth for scoring query expansions. Used by:
  8. - GRPO training (as the RL reward signal)
  9. - Evaluation scripts (for scoring model outputs)
  10. Scores expansions on five dimensions:
  11. Format (30) - Has lex/vec lines, no invalid lines
  12. Diversity (30) - Multiple types, diverse content, no echoes
  13. HyDE (20) - Optional bonus for hypothetical document passage
  14. Quality (20) - Lex shorter than vec, natural language, key terms
  15. Entity (20) - Named entity preservation in lex/vec lines
  16. Returns 0.0-1.0 for RL rewards, or a detailed breakdown dict for evaluation.
  17. """
  18. import re
  19. from collections import Counter
  20. # =============================================================================
  21. # Constants
  22. # =============================================================================
  23. # "only:" mode patterns - when query ends with these, expect only that type
  24. # Format: "query /only:lex" (slash prefix, no space after colon)
  25. ONLY_MODE_PATTERN = re.compile(r'\s+/only:(lex|vec|hyde)\s*$', re.IGNORECASE)
  26. STOPWORDS = frozenset({
  27. 'the', 'a', 'an', 'is', 'are', 'to', 'for', 'of', 'in',
  28. 'and', 'or', 'it', 'this', 'that', 'be', 'with', 'as', 'on', 'by',
  29. })
  30. KEY_TERM_STOPWORDS = frozenset({
  31. 'what', 'is', 'how', 'to', 'the', 'a', 'an', 'in', 'on', 'for', 'of',
  32. 'and', 'or', 'with', 'my', 'your', 'do', 'does', 'can', 'i', 'me', 'we',
  33. 'who', 'where', 'when', 'why', 'which', 'find', 'get', 'show', 'tell',
  34. })
  35. GENERIC_LEX_PHRASES = frozenset({
  36. 'find information about', 'search for', 'look up', 'get information',
  37. 'learn about', 'information on', 'details about', 'find out about',
  38. 'what is', 'how to', 'guide to', 'help with',
  39. })
  40. # Chat template tokens that indicate a broken output
  41. CHAT_TEMPLATE_TOKENS = frozenset({
  42. '<|im_start|>', '<|im_end|>', '<|endoftext|>',
  43. '\nassistant\n', '\nuser\n',
  44. })
  45. # =============================================================================
  46. # Parsing
  47. # =============================================================================
  48. def parse_expansion(text: str) -> dict:
  49. """Parse a multi-line expansion into {lex, vec, hyde, invalid} lists."""
  50. result = {"lex": [], "vec": [], "hyde": [], "invalid": []}
  51. for line in text.strip().split("\n"):
  52. line = line.strip()
  53. if not line:
  54. continue
  55. if line.startswith("lex:"):
  56. result["lex"].append(line[4:].strip())
  57. elif line.startswith("vec:"):
  58. result["vec"].append(line[4:].strip())
  59. elif line.startswith("hyde:"):
  60. result["hyde"].append(line[5:].strip())
  61. else:
  62. result["invalid"].append(line)
  63. return result
  64. def detect_only_mode(query: str) -> tuple[str | None, str]:
  65. """Detect if query ends with 'only: lex/vec/hyde'.
  66. Returns (only_type, base_query) where only_type is None for normal queries.
  67. """
  68. match = ONLY_MODE_PATTERN.search(query)
  69. if match:
  70. only_type = match.group(1).lower()
  71. base_query = query[:match.start()].strip()
  72. return only_type, base_query
  73. return None, query
  74. def clean_model_output(text: str) -> tuple[str, bool]:
  75. """Strip chat template artifacts from model output.
  76. Returns (cleaned_text, used_thinking) where used_thinking is True
  77. if the model emitted <think>...</think> blocks.
  78. """
  79. text = text.replace('<|im_end|>', '').strip()
  80. used_thinking = '<think>' in text and '</think>' in text
  81. if used_thinking:
  82. text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
  83. return text, used_thinking
  84. # =============================================================================
  85. # Helpers
  86. # =============================================================================
  87. def extract_named_entities(query: str) -> set:
  88. """Extract named entities using heuristics.
  89. Detects: ALL-CAPS acronyms (TDS, API), capitalized proper nouns (React),
  90. technical terms with special chars (node.js, C++), CamelCase (JavaScript),
  91. and compound names (TDS motorsports -> both words).
  92. """
  93. entities = set()
  94. words = query.split()
  95. prev_was_entity = False
  96. for i, word in enumerate(words):
  97. clean = word.strip('.,!?:;()[]"\'')
  98. if not clean:
  99. prev_was_entity = False
  100. continue
  101. is_entity = False
  102. if clean.isupper() and len(clean) >= 2:
  103. entities.add(clean.lower())
  104. is_entity = True
  105. elif i > 0 and clean[0].isupper() and clean.lower() not in KEY_TERM_STOPWORDS:
  106. entities.add(clean.lower())
  107. is_entity = True
  108. elif any(c in clean for c in '.+-#@') and len(clean) >= 2:
  109. entities.add(clean.lower())
  110. is_entity = True
  111. elif len(clean) > 1 and any(c.isupper() for c in clean[1:]) and clean[0].isupper():
  112. entities.add(clean.lower())
  113. is_entity = True
  114. elif prev_was_entity and clean.lower() not in KEY_TERM_STOPWORDS:
  115. entities.add(clean.lower())
  116. is_entity = True
  117. prev_was_entity = is_entity
  118. return entities
  119. def get_key_terms(query: str) -> set:
  120. """Get non-stopword terms from a query."""
  121. return set(query.lower().split()) - KEY_TERM_STOPWORDS
  122. def lex_preserves_key_terms(lex_line: str, query: str) -> bool:
  123. """Does the lex line contain at least one key term from the query?"""
  124. key_terms = get_key_terms(query)
  125. if not key_terms:
  126. return True
  127. return bool(key_terms & set(lex_line.lower().split()))
  128. def lex_preserves_entities(line: str, entities: set) -> bool:
  129. """Does the line contain at least one named entity?"""
  130. if not entities:
  131. return True
  132. lower = line.lower()
  133. return any(e in lower for e in entities)
  134. def lex_is_generic(lex_line: str) -> bool:
  135. """Is this lex line a useless generic filler phrase?"""
  136. lower = lex_line.lower().strip()
  137. for phrase in GENERIC_LEX_PHRASES:
  138. if phrase in lower or lower.startswith(phrase.split()[0]):
  139. remaining = lower
  140. for word in phrase.split():
  141. remaining = remaining.replace(word, '', 1).strip()
  142. if len(remaining) < 3:
  143. return True
  144. return False
  145. def word_set_distance(a: str, b: str) -> int:
  146. """Symmetric difference of word sets (how many words are unique to one)."""
  147. return len(set(a.lower().split()) ^ set(b.lower().split()))
  148. def is_diverse(a: str, b: str, min_distance: int = 2) -> bool:
  149. """Are two strings sufficiently different?"""
  150. a, b = a.lower().strip(), b.lower().strip()
  151. if a == b or a in b or b in a:
  152. return False
  153. return word_set_distance(a, b) >= min_distance
  154. def echoes_query(expansion: str, query: str) -> bool:
  155. """Is this expansion just echoing the original query?"""
  156. exp, q = expansion.lower().strip(), query.lower().strip()
  157. return exp == q or (q in exp and len(exp) < len(q) + 10)
  158. def word_repetition_penalty(text: str) -> int:
  159. """Penalty for words repeated 3+ times (excluding stopwords)."""
  160. counts = Counter(re.findall(r'\b\w+\b', text.lower()))
  161. return sum((c - 2) * 2 for w, c in counts.items()
  162. if c >= 3 and w not in STOPWORDS and len(w) > 2)
  163. # =============================================================================
  164. # Scoring
  165. # =============================================================================
  166. def _score_only_mode(query: str, base_query: str, text: str, used_thinking: bool, only_type: str) -> dict:
  167. """Score an 'only:' mode expansion. Expects ONLY the requested type."""
  168. parsed = parse_expansion(text)
  169. deductions = []
  170. # Expected type must be present
  171. expected_items = parsed.get(only_type, [])
  172. if not expected_items:
  173. return {
  174. "format": 0, "diversity": 0, "hyde": 0, "quality": 0, "entity": 0,
  175. "think_bonus": 0, "total": 0, "max_possible": 100,
  176. "percentage": 0.0, "rating": "Failed",
  177. "deductions": [f"missing expected {only_type}: output"],
  178. "parsed": parsed,
  179. "entities_detected": [],
  180. "only_mode": only_type,
  181. }
  182. # Penalize presence of OTHER types
  183. other_types = {"lex", "vec", "hyde"} - {only_type}
  184. unwanted_count = sum(len(parsed.get(t, [])) for t in other_types)
  185. if unwanted_count > 0:
  186. deductions.append(f"contains unwanted types (expected only {only_type})")
  187. # --- Format (0-30) ---
  188. format_score = 30 if unwanted_count == 0 else max(0, 30 - unwanted_count * 10)
  189. # --- Diversity (0-30) ---
  190. diversity_score = 0
  191. if len(expected_items) >= 2:
  192. diversity_score += 15
  193. # Check for diversity among items
  194. div_score = 15
  195. for i, a in enumerate(expected_items):
  196. for b in expected_items[i+1:]:
  197. if not is_diverse(a, b, 2):
  198. div_score -= 5
  199. deductions.append(f"{only_type} duplicate: {a[:20]}...")
  200. diversity_score += max(0, div_score)
  201. elif len(expected_items) == 1:
  202. diversity_score = 15 # One item is fine for single-type output
  203. # Check for echoes
  204. for exp in expected_items:
  205. if echoes_query(exp, base_query):
  206. diversity_score -= 5
  207. deductions.append(f"echoes query: {exp[:20]}...")
  208. diversity_score = max(0, diversity_score)
  209. # --- Type-specific quality (0-20) ---
  210. quality_score = 10 # base
  211. entities = extract_named_entities(base_query)
  212. if only_type == "lex":
  213. # Lex should be short keyword phrases with key terms
  214. with_terms = sum(1 for l in expected_items if lex_preserves_key_terms(l, base_query))
  215. if with_terms == len(expected_items):
  216. quality_score += 5
  217. # Check for generic phrases
  218. generic = sum(1 for l in expected_items if lex_is_generic(l))
  219. if generic == 0:
  220. quality_score += 5
  221. else:
  222. deductions.append(f"{generic} generic lex phrases")
  223. elif only_type == "vec":
  224. # Vec should be natural language sentences
  225. natural = sum(1 for v in expected_items if " " in v and len(v) > 15)
  226. if natural == len(expected_items):
  227. quality_score += 10
  228. else:
  229. quality_score += 5
  230. deductions.append("vec not all natural language")
  231. elif only_type == "hyde":
  232. # Hyde should be a document snippet (50-200 chars)
  233. hyde_text = expected_items[0]
  234. hyde_len = len(hyde_text)
  235. if 50 <= hyde_len <= 200:
  236. quality_score += 10
  237. elif 30 <= hyde_len <= 300:
  238. quality_score += 5
  239. deductions.append(f"hyde length {hyde_len} (ideal: 50-200)")
  240. else:
  241. deductions.append(f"hyde length {hyde_len} out of range")
  242. # --- Entity preservation (0-20) ---
  243. entity_score = 10 # base
  244. if entities:
  245. with_entities = sum(1 for item in expected_items if lex_preserves_entities(item, entities))
  246. if with_entities == len(expected_items):
  247. entity_score += 10
  248. elif with_entities > 0:
  249. entity_score += 5
  250. else:
  251. entity_score = 0
  252. deductions.append(f"missing entities: {entities}")
  253. # --- Think bonus (0-20) ---
  254. think_bonus = 0 if used_thinking else 20
  255. # --- Total ---
  256. total = format_score + diversity_score + quality_score + entity_score + think_bonus
  257. max_possible = 120
  258. percentage = max(0.0, min(100.0, total / max_possible * 100))
  259. if percentage >= 80:
  260. rating = "Excellent"
  261. elif percentage >= 60:
  262. rating = "Good"
  263. elif percentage >= 40:
  264. rating = "Acceptable"
  265. elif percentage >= 20:
  266. rating = "Poor"
  267. else:
  268. rating = "Failed"
  269. return {
  270. "format": format_score,
  271. "diversity": diversity_score,
  272. "hyde": 0, # not used in only mode (quality covers it)
  273. "quality": quality_score,
  274. "entity": entity_score,
  275. "think_bonus": think_bonus,
  276. "total": total,
  277. "max_possible": max_possible,
  278. "percentage": round(percentage, 1),
  279. "rating": rating,
  280. "deductions": deductions,
  281. "parsed": parsed,
  282. "entities_detected": list(entities) if entities else [],
  283. "only_mode": only_type,
  284. }
  285. def score_expansion_detailed(query: str, expansion: str) -> dict:
  286. """Score an expansion with full breakdown. Returns dict with all dimensions."""
  287. text, used_thinking = clean_model_output(expansion.strip())
  288. deductions = []
  289. # Detect "only:" mode
  290. only_type, base_query = detect_only_mode(query)
  291. def _fail(reason):
  292. return {
  293. "format": 0, "diversity": 0, "hyde": 0, "quality": 0, "entity": 0,
  294. "think_bonus": 0, "total": 0, "max_possible": 100,
  295. "percentage": 0.0, "rating": "Failed",
  296. "deductions": [reason],
  297. "parsed": parse_expansion(expansion),
  298. "entities_detected": [],
  299. "only_mode": only_type,
  300. }
  301. # Hard fail: remaining chat template tokens
  302. if any(tok in text for tok in CHAT_TEMPLATE_TOKENS):
  303. return _fail("CHAT TEMPLATE LEAKAGE")
  304. # Hard fail: every non-empty line must have a valid prefix
  305. for line in text.split("\n"):
  306. line = line.strip()
  307. if line and not line.startswith(("lex:", "vec:", "hyde:")):
  308. return _fail(f"INVALID LINE: {line[:50]}")
  309. # --- Handle "only:" mode separately ---
  310. if only_type:
  311. return _score_only_mode(query, base_query, text, used_thinking, only_type)
  312. parsed = parse_expansion(text)
  313. # --- Format (0-30) ---
  314. format_score = 10 # no invalid lines (guaranteed by hard fail)
  315. if parsed["lex"]:
  316. format_score += 10
  317. else:
  318. deductions.append("missing lex:")
  319. if parsed["vec"]:
  320. format_score += 10
  321. else:
  322. deductions.append("missing vec:")
  323. # --- Diversity (0-30) ---
  324. diversity_score = 0
  325. types_present = sum(1 for t in ("lex", "vec") if parsed[t])
  326. if types_present >= 2:
  327. diversity_score += 10
  328. else:
  329. deductions.append("only one type")
  330. if len(parsed["lex"]) + len(parsed["vec"]) >= 2:
  331. diversity_score += 5
  332. lex_div = 5
  333. for i, a in enumerate(parsed["lex"]):
  334. for b in parsed["lex"][i+1:]:
  335. if not is_diverse(a, b, 2):
  336. lex_div -= 2
  337. deductions.append(f"lex duplicate: {a[:20]}...")
  338. diversity_score += max(0, lex_div)
  339. vec_div = 5
  340. for i, a in enumerate(parsed["vec"]):
  341. for b in parsed["vec"][i+1:]:
  342. if not is_diverse(a, b, 3):
  343. vec_div -= 2
  344. deductions.append(f"vec duplicate: {a[:20]}...")
  345. diversity_score += max(0, vec_div)
  346. echo = 5
  347. lex_echo_count = 0
  348. for exp in parsed["lex"]:
  349. if echoes_query(exp, query):
  350. lex_echo_count += 1
  351. deductions.append(f"lex echoes query: {exp[:20]}...")
  352. # Harsh penalty for lex echoes - they're useless
  353. if lex_echo_count > 0:
  354. echo -= lex_echo_count * 10 # -10 per echo
  355. for exp in parsed["vec"]:
  356. if echoes_query(exp, query):
  357. echo -= 3 # vec echoes less severe (natural language overlap ok)
  358. deductions.append(f"vec echoes query: {exp[:20]}...")
  359. diversity_score += max(-10, echo) # can go negative
  360. # --- HyDE (0-20, optional bonus) ---
  361. hyde_score = 0
  362. if parsed["hyde"]:
  363. hyde_text = parsed["hyde"][0]
  364. hyde_score += 5
  365. hyde_len = len(hyde_text)
  366. if 50 <= hyde_len <= 200:
  367. hyde_score += 5
  368. elif hyde_len < 50:
  369. hyde_score += 2
  370. deductions.append(f"hyde too short ({hyde_len})")
  371. else:
  372. deductions.append(f"hyde too long ({hyde_len})")
  373. if "\n" not in hyde_text:
  374. hyde_score += 5
  375. hyde_score += max(0, 5 - word_repetition_penalty(hyde_text))
  376. # --- Quality (0-20) ---
  377. quality_score = 5 # base relevance
  378. if parsed["lex"] and parsed["vec"]:
  379. avg_lex = sum(len(l) for l in parsed["lex"]) / len(parsed["lex"])
  380. avg_vec = sum(len(v) for v in parsed["vec"]) / len(parsed["vec"])
  381. if avg_lex <= avg_vec:
  382. quality_score += 5
  383. else:
  384. deductions.append("lex longer than vec")
  385. if parsed["vec"]:
  386. natural = sum(1 for v in parsed["vec"] if " " in v and len(v) > 15)
  387. quality_score += 5 if natural == len(parsed["vec"]) else 2
  388. if parsed["lex"]:
  389. with_terms = sum(1 for l in parsed["lex"] if lex_preserves_key_terms(l, query))
  390. if with_terms == len(parsed["lex"]):
  391. quality_score += 5
  392. elif with_terms > 0:
  393. quality_score += 2
  394. else:
  395. deductions.append("lex missing key terms")
  396. # --- Entity Preservation (-45 to +20) ---
  397. entity_score = 0
  398. entities = extract_named_entities(query)
  399. if entities and parsed["lex"]:
  400. with_entities = sum(1 for l in parsed["lex"] if lex_preserves_entities(l, entities))
  401. if with_entities == len(parsed["lex"]):
  402. entity_score += 15
  403. elif with_entities > 0:
  404. entity_score += 5
  405. else:
  406. entity_score -= 30
  407. deductions.append(f"lex missing entities: {entities}")
  408. generic_count = sum(1 for l in parsed["lex"] if lex_is_generic(l))
  409. if generic_count:
  410. entity_score -= generic_count * 15
  411. deductions.append(f"{generic_count} generic lex phrases")
  412. if parsed["vec"]:
  413. vec_with = sum(1 for v in parsed["vec"] if lex_preserves_entities(v, entities))
  414. if vec_with > 0:
  415. entity_score += 5
  416. elif not entities:
  417. entity_score = 10
  418. # --- Think bonus (0-20): reward NOT using thinking mode ---
  419. think_bonus = 0 if used_thinking else 20
  420. # --- Total ---
  421. total = format_score + diversity_score + hyde_score + quality_score + entity_score + think_bonus
  422. max_possible = 140 if parsed["hyde"] else 120
  423. percentage = max(0.0, min(100.0, total / max_possible * 100))
  424. # Hard cap: lex echoes are unacceptable - cap at 50%
  425. if lex_echo_count > 0:
  426. percentage = min(percentage, 50.0)
  427. deductions.insert(0, f"CAPPED: {lex_echo_count} lex echo(es)")
  428. if percentage >= 80:
  429. rating = "Excellent"
  430. elif percentage >= 60:
  431. rating = "Good"
  432. elif percentage >= 40:
  433. rating = "Acceptable"
  434. elif percentage >= 20:
  435. rating = "Poor"
  436. else:
  437. rating = "Failed"
  438. return {
  439. "format": format_score,
  440. "diversity": diversity_score,
  441. "hyde": hyde_score,
  442. "quality": quality_score,
  443. "entity": max(0, entity_score),
  444. "think_bonus": think_bonus,
  445. "total": max(0, total),
  446. "max_possible": max_possible,
  447. "percentage": round(percentage, 1),
  448. "rating": rating,
  449. "deductions": deductions,
  450. "parsed": parsed,
  451. "entities_detected": list(entities) if entities else [],
  452. "only_mode": None,
  453. }
  454. def score_expansion(query: str, expansion: str) -> float:
  455. """Score expansion as a float in [0.0, 1.0] for use as RL reward."""
  456. result = score_expansion_detailed(query, expansion)
  457. return max(0.0, min(1.0, result["total"] / result["max_possible"]))
  458. def extract_query_from_prompt(prompt: str) -> str:
  459. """Extract the query string from a chat-formatted prompt."""
  460. if "Expand this search query:" in prompt:
  461. query = prompt.split("Expand this search query:")[-1].strip()
  462. if "<|im_end|>" in query:
  463. query = query.split("<|im_end|>")[0].strip()
  464. return query
  465. return prompt.strip()
  466. # =============================================================================
  467. # TRL-compatible reward class
  468. # =============================================================================
  469. class QMDRewardFunction:
  470. """Reward function compatible with TRL's GRPOTrainer."""
  471. __name__ = "qmd_scoring_reward"
  472. def __call__(self, completions: list[str], prompts: list[str] = None, **kwargs) -> list[float]:
  473. rewards = []
  474. for i, completion in enumerate(completions):
  475. query = ""
  476. if prompts and i < len(prompts):
  477. query = extract_query_from_prompt(prompts[i])
  478. rewards.append(score_expansion(query, completion))
  479. return rewards
  480. # =============================================================================
  481. # CLI: run standalone to test the reward function
  482. # =============================================================================
  483. if __name__ == "__main__":
  484. print("QMD Reward Function Self-Test")
  485. print("=" * 60)
  486. tests = [
  487. ("auth", "lex: auth setup\nlex: authentication config\nvec: how to configure authentication\nhyde: Configure auth by setting AUTH_SECRET."),
  488. ("auth", "auth is important for security"),
  489. ("who is TDS motorsports", "lex: TDS motorsports history\nlex: TDS motorsports founders\nvec: information about TDS motorsports company"),
  490. ("who is TDS motorsports", "lex: find information about\nlex: company details\nvec: who is this company"),
  491. ("how to use React hooks", "lex: React hooks tutorial\nlex: useEffect useState\nvec: how to use React hooks in functional components"),
  492. ("auth", "<think>Let me think...</think>\nlex: auth"),
  493. ("auth", "lex: auth\nThis is some explanation\nvec: more"),
  494. # "/only:" mode tests (slash prefix)
  495. ("auth /only:lex", "lex: auth setup\nlex: authentication config\nlex: login credentials"),
  496. ("auth /only:lex", "lex: auth setup\nvec: how to configure authentication"), # should fail - has vec
  497. ("React hooks /only:vec", "vec: how to use React hooks in functional components\nvec: useState and useEffect patterns in React"),
  498. ("PostgreSQL indexing /only:hyde", "hyde: PostgreSQL uses B-tree indexes by default. Create indexes with CREATE INDEX idx_name ON table(column). EXPLAIN ANALYZE shows whether queries use indexes efficiently."),
  499. ]
  500. for query, expansion in tests:
  501. score = score_expansion(query, expansion)
  502. detail = score_expansion_detailed(query, expansion)
  503. only_mode = detail.get("only_mode")
  504. mode_str = f" [only:{only_mode}]" if only_mode else ""
  505. print(f"\n Query: '{query}'{mode_str}")
  506. print(f" Score: {score:.2f} ({detail['rating']})")
  507. if detail["deductions"]:
  508. print(f" Issues: {', '.join(detail['deductions'][:3])}")