|
|
@@ -1993,7 +1993,6 @@ function sanitizeFTS5Term(term: string): string {
|
|
|
* Supports:
|
|
|
* - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
|
|
|
* - Negation: -term or -"phrase" → uses FTS5 NOT operator
|
|
|
- * - OR: term1 OR term2 (case-insensitive)
|
|
|
* - Plain terms: term → "term"* (prefix match)
|
|
|
*
|
|
|
* FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
|
|
|
@@ -2002,13 +2001,10 @@ function sanitizeFTS5Term(term: string): string {
|
|
|
* Examples:
|
|
|
* performance -sports → "performance"* NOT "sports"*
|
|
|
* "machine learning" → "machine learning"
|
|
|
- * auth OR authentication → ("auth"* OR "authentication"*)
|
|
|
*/
|
|
|
function buildFTS5Query(query: string): string | null {
|
|
|
const positive: string[] = [];
|
|
|
const negative: string[] = [];
|
|
|
- const orGroups: string[][] = [[]]; // Track OR groupings
|
|
|
- let currentOrGroup = 0;
|
|
|
|
|
|
let i = 0;
|
|
|
const s = query.trim();
|
|
|
@@ -2037,7 +2033,6 @@ function buildFTS5Query(query: string): string | null {
|
|
|
negative.push(ftsPhrase);
|
|
|
} else {
|
|
|
positive.push(ftsPhrase);
|
|
|
- orGroups[currentOrGroup]!.push(ftsPhrase);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -2047,24 +2042,13 @@ function buildFTS5Query(query: string): string | null {
|
|
|
while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
|
|
|
const term = s.slice(start, i);
|
|
|
|
|
|
- // Check for OR operator
|
|
|
- if (term.toUpperCase() === 'OR') {
|
|
|
- // Start new OR group
|
|
|
- currentOrGroup++;
|
|
|
- orGroups.push([]);
|
|
|
- } else if (term.toUpperCase() === 'AND' || term.toUpperCase() === 'NOT') {
|
|
|
- // AND is implicit, NOT should use - prefix
|
|
|
- continue;
|
|
|
- } else {
|
|
|
- const sanitized = sanitizeFTS5Term(term);
|
|
|
- if (sanitized) {
|
|
|
- const ftsTerm = `"${sanitized}"*`; // Prefix match
|
|
|
- if (negated) {
|
|
|
- negative.push(ftsTerm);
|
|
|
- } else {
|
|
|
- positive.push(ftsTerm);
|
|
|
- orGroups[currentOrGroup]!.push(ftsTerm);
|
|
|
- }
|
|
|
+ const sanitized = sanitizeFTS5Term(term);
|
|
|
+ if (sanitized) {
|
|
|
+ const ftsTerm = `"${sanitized}"*`; // Prefix match
|
|
|
+ if (negated) {
|
|
|
+ negative.push(ftsTerm);
|
|
|
+ } else {
|
|
|
+ positive.push(ftsTerm);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -2073,30 +2057,14 @@ function buildFTS5Query(query: string): string | null {
|
|
|
if (positive.length === 0 && negative.length === 0) return null;
|
|
|
|
|
|
// If only negative terms, we can't search (FTS5 NOT is binary)
|
|
|
- if (positive.length === 0) {
|
|
|
- // Fall back to searching without negation
|
|
|
- return null;
|
|
|
- }
|
|
|
+ if (positive.length === 0) return null;
|
|
|
|
|
|
- // Build the positive part with OR groups
|
|
|
- let result: string;
|
|
|
- if (orGroups.length > 1 && orGroups.some(g => g.length > 0)) {
|
|
|
- // Has OR groups - build (a OR b) AND c structure
|
|
|
- const orParts = orGroups.filter(g => g.length > 0).map(g =>
|
|
|
- g.length === 1 ? g[0]! : `(${g.join(' OR ')})`
|
|
|
- );
|
|
|
- result = orParts.join(' AND ');
|
|
|
- } else {
|
|
|
- // Simple AND of all positive terms
|
|
|
- result = positive.join(' AND ');
|
|
|
- }
|
|
|
+ // Join positive terms with AND
|
|
|
+ let result = positive.join(' AND ');
|
|
|
|
|
|
- // Add NOT clause for negative terms (FTS5: positive NOT negative)
|
|
|
- if (negative.length > 0) {
|
|
|
- // FTS5 NOT only works with single term on right side, chain them
|
|
|
- for (const neg of negative) {
|
|
|
- result = `${result} NOT ${neg}`;
|
|
|
- }
|
|
|
+ // Add NOT clause for negative terms
|
|
|
+ for (const neg of negative) {
|
|
|
+ result = `${result} NOT ${neg}`;
|
|
|
}
|
|
|
|
|
|
return result;
|
|
|
@@ -2111,15 +2079,6 @@ export function validateSemanticQuery(query: string): string | null {
|
|
|
if (/-\w/.test(query) || /-"/.test(query)) {
|
|
|
return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
|
|
|
}
|
|
|
- // Check for quoted exact phrases (semantic search doesn't do exact matching)
|
|
|
- if (/"[^"]+"\s*$/.test(query.trim()) || /^"[^"]+"/.test(query.trim())) {
|
|
|
- // Single quoted phrase is the whole query - that's fine for hyde
|
|
|
- // But warn if it looks like they expect exact matching
|
|
|
- }
|
|
|
- // Check for OR operator (semantic search doesn't support boolean logic)
|
|
|
- if (/\bOR\b/i.test(query)) {
|
|
|
- return 'OR operator is not supported in vec/hyde queries. Use multiple lex queries or rephrase.';
|
|
|
- }
|
|
|
return null;
|
|
|
}
|
|
|
|