diff --git a/services/search/api.py b/services/search/api.py index 44afe0fe4..54636c562 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -242,9 +242,21 @@ def build_search_query(query: str): and_operands = re.split(r"[\s,&]+", or_operand) expression = "" for and_operand in and_operands: + if not and_operand.strip(): + # Skip empty or whitespace-only operands + continue if re.fullmatch(r"'+", and_operand): # Skip any operands that are just repeating single-quotes continue + # Skip operands that start with a single quote (likely user error or + # malicious input). This prevents tsquery syntax errors like "'r:*" while + # allowing valid embedded quotes like "b'c". + if ( + and_operand.startswith("'") + and len(and_operand) > 1 + and and_operand[1] != "'" + ): + continue if expression: expression += f" & {and_operand}:*" else: diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index ff3c7efa2..abbe965a8 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -262,6 +262,20 @@ def test_search_with_vertical_bar_in_query(api_client, units): ("a, &&& , & b || || |||| |c,,,, d", "a:* & b:* | c:* & d:*"), # Expression with repeating single-quotes ("','','''',a,b'c,d''e,f'''g,','','''", "a:* & b'c:* & d''e:* & f'''g:*"), + # Empty operands + (" ", ""), + (" | ", ""), + (" & ", ""), + (",", ""), + (" , ", ""), + ("a | | b", "a:* | b:*"), + ("a & & b", "a:* & b:*"), + ("a, ,b", "a:* & b:*"), + (" | a", "a:*"), + ("a | ", "a:*"), + # Unbalanced leading single quote + ("'r", ""), + ("'museo palloiluhalli", "palloiluhalli:*"), ], ) def test_build_search_query(query, expected):