Ver Fonte

refactor: remove OR operator from lex queries

Simplify to just: terms, "phrases", and -negation
Tobi Lütke há 3 meses atrás
pai
commit
de3a83a553
3 ficheiros alterados com 14 adições e 60 exclusões
  1. 1 3
      skills/qmd/SKILL.md
  2. 13 54
      src/store.ts
  3. 0 3
      test/structured-search.test.ts

+ 1 - 3
skills/qmd/SKILL.md

@@ -42,7 +42,6 @@ Local search engine for markdown content.
 
 **lex (keyword)**
 - 2-5 terms, no filler words
-- Include synonyms: `auth OR authentication`
 - Exact phrase: `"connection pool"` (quoted)
 - Exclude terms: `performance -sports` (minus prefix)
 - Code identifiers work: `handleError async`
@@ -75,9 +74,8 @@ First query gets 2x weight in fusion — put your best guess first.
 | `term` | Prefix match | `perf` matches "performance" |
 | `"phrase"` | Exact phrase | `"rate limiter"` |
 | `-term` | Exclude | `performance -sports` |
-| `OR` | Either term | `auth OR authentication` |
 
-Note: `-term` and `OR` only work in lex queries, not vec/hyde.
+Note: `-term` only works in lex queries, not vec/hyde.
 
 ### Collection Filtering
 

+ 13 - 54
src/store.ts

@@ -1993,7 +1993,6 @@ function sanitizeFTS5Term(term: string): string {
  * Supports:
  * - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
  * - Negation: -term or -"phrase" → uses FTS5 NOT operator
- * - OR: term1 OR term2 (case-insensitive)
  * - Plain terms: term → "term"* (prefix match)
  *
  * FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
@@ -2002,13 +2001,10 @@ function sanitizeFTS5Term(term: string): string {
  * Examples:
  *   performance -sports     → "performance"* NOT "sports"*
  *   "machine learning"      → "machine learning"
- *   auth OR authentication  → ("auth"* OR "authentication"*)
  */
 function buildFTS5Query(query: string): string | null {
   const positive: string[] = [];
   const negative: string[] = [];
-  const orGroups: string[][] = [[]]; // Track OR groupings
-  let currentOrGroup = 0;
 
   let i = 0;
   const s = query.trim();
@@ -2037,7 +2033,6 @@ function buildFTS5Query(query: string): string | null {
             negative.push(ftsPhrase);
           } else {
             positive.push(ftsPhrase);
-            orGroups[currentOrGroup]!.push(ftsPhrase);
           }
         }
       }
@@ -2047,24 +2042,13 @@ function buildFTS5Query(query: string): string | null {
       while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
       const term = s.slice(start, i);
 
-      // Check for OR operator
-      if (term.toUpperCase() === 'OR') {
-        // Start new OR group
-        currentOrGroup++;
-        orGroups.push([]);
-      } else if (term.toUpperCase() === 'AND' || term.toUpperCase() === 'NOT') {
-        // AND is implicit, NOT should use - prefix
-        continue;
-      } else {
-        const sanitized = sanitizeFTS5Term(term);
-        if (sanitized) {
-          const ftsTerm = `"${sanitized}"*`;  // Prefix match
-          if (negated) {
-            negative.push(ftsTerm);
-          } else {
-            positive.push(ftsTerm);
-            orGroups[currentOrGroup]!.push(ftsTerm);
-          }
+      const sanitized = sanitizeFTS5Term(term);
+      if (sanitized) {
+        const ftsTerm = `"${sanitized}"*`;  // Prefix match
+        if (negated) {
+          negative.push(ftsTerm);
+        } else {
+          positive.push(ftsTerm);
         }
       }
     }
@@ -2073,30 +2057,14 @@ function buildFTS5Query(query: string): string | null {
   if (positive.length === 0 && negative.length === 0) return null;
 
   // If only negative terms, we can't search (FTS5 NOT is binary)
-  if (positive.length === 0) {
-    // Fall back to searching without negation
-    return null;
-  }
+  if (positive.length === 0) return null;
 
-  // Build the positive part with OR groups
-  let result: string;
-  if (orGroups.length > 1 && orGroups.some(g => g.length > 0)) {
-    // Has OR groups - build (a OR b) AND c structure
-    const orParts = orGroups.filter(g => g.length > 0).map(g =>
-      g.length === 1 ? g[0]! : `(${g.join(' OR ')})`
-    );
-    result = orParts.join(' AND ');
-  } else {
-    // Simple AND of all positive terms
-    result = positive.join(' AND ');
-  }
+  // Join positive terms with AND
+  let result = positive.join(' AND ');
 
-  // Add NOT clause for negative terms (FTS5: positive NOT negative)
-  if (negative.length > 0) {
-    // FTS5 NOT only works with single term on right side, chain them
-    for (const neg of negative) {
-      result = `${result} NOT ${neg}`;
-    }
+  // Add NOT clause for negative terms
+  for (const neg of negative) {
+    result = `${result} NOT ${neg}`;
   }
 
   return result;
@@ -2111,15 +2079,6 @@ export function validateSemanticQuery(query: string): string | null {
   if (/-\w/.test(query) || /-"/.test(query)) {
     return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
   }
-  // Check for quoted exact phrases (semantic search doesn't do exact matching)
-  if (/"[^"]+"\s*$/.test(query.trim()) || /^"[^"]+"/.test(query.trim())) {
-    // Single quoted phrase is the whole query - that's fine for hyde
-    // But warn if it looks like they expect exact matching
-  }
-  // Check for OR operator (semantic search doesn't support boolean logic)
-  if (/\bOR\b/i.test(query)) {
-    return 'OR operator is not supported in vec/hyde queries. Use multiple lex queries or rephrase.';
-  }
   return null;
 }
 

+ 0 - 3
test/structured-search.test.ts

@@ -340,9 +340,6 @@ describe("lex query syntax", () => {
       expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation");
     });
 
-    test("rejects OR operator", () => {
-      expect(validateSemanticQuery("auth OR authentication")).toContain("OR");
-    });
 
     test("accepts hyde-style hypothetical answers", () => {
       expect(validateSemanticQuery(