@@ -29,9 +29,16 @@ class InvalidMetadataFilterError(ValueError):
2929class MetadataFilterBuilder :
3030 """Translate JSON-style metadata filters into SQL, covering arrays, regex, and substring operators."""
3131
32- _COLUMN_FIELDS = {
33- "filename" : "filename" ,
34- }
32+ def __init__ (
33+ self ,
34+ * ,
35+ metadata_column : str = "doc_metadata" ,
36+ metadata_types_column : Optional [str ] = "metadata_types" ,
37+ column_fields : Optional [Dict [str , str ]] = None ,
38+ ) -> None :
39+ self .metadata_column = metadata_column
40+ self .metadata_types_column = metadata_types_column
41+ self ._column_fields = column_fields or {"filename" : "filename" }
3542
3643 def build (self , filters : Optional [Dict [str , Any ]]) -> str :
3744 """Construct a SQL WHERE clause from a metadata filter dictionary."""
@@ -113,7 +120,7 @@ def _combine_clauses(self, clauses: List[str], operator: str, context: str) -> s
113120
114121 def _build_field_metadata_clause (self , field : str , value : Any ) -> str :
115122 """Build SQL clause for a single metadata field."""
116- if field in self ._COLUMN_FIELDS :
123+ if field in self ._column_fields :
117124 return self ._build_column_field_clause (field , value )
118125
119126 if isinstance (value , dict ) and not any (key .startswith ("$" ) for key in value ):
@@ -192,7 +199,7 @@ def _build_single_value_clause(self, field: str, value: Any) -> str:
192199
193200 def _build_column_field_clause (self , field : str , value : Any ) -> str :
194201 """Build SQL clause for a reserved column field (e.g., filename)."""
195- column = self ._COLUMN_FIELDS [field ]
202+ column = self ._column_fields [field ]
196203 builder = TextColumnFilterBuilder (column )
197204
198205 if isinstance (value , dict ):
@@ -220,7 +227,7 @@ def _build_exists_clause(self, field: str, operand: Any) -> str:
220227 raise InvalidMetadataFilterError (f"$exists operator for field '{ field } ' expects a boolean value." )
221228
222229 field_key = self ._escape_single_quotes (field )
223- clause = f"(doc_metadata ? '{ field_key } ')"
230+ clause = f"({ self . metadata_column } ? '{ field_key } ')"
224231 return clause if expected else f"(NOT { clause } )"
225232
226233 def _build_comparison_clause (self , field : str , operator : str , operand : Any ) -> str :
@@ -270,9 +277,9 @@ def _build_numeric_comparison_clause(self, field: str, sql_operator: str, operan
270277
271278 field_key = self ._escape_single_quotes (field )
272279 type_expr = self ._metadata_type_expr (field_key )
273- # Use CASE to ensure casting only happens when type is correct
274280 value_expr = (
275- f"(CASE WHEN { type_expr } = 'number' THEN (doc_metadata ->> '{ field_key } ')::double precision ELSE NULL END)"
281+ f"(CASE WHEN { type_expr } = 'number' THEN ({ self .metadata_column } ->> '{ field_key } ')::double precision "
282+ "ELSE NULL END)"
276283 )
277284 return f"({ value_expr } { sql_operator } { literal } )"
278285
@@ -285,8 +292,10 @@ def _build_decimal_comparison_clause(self, field: str, sql_operator: str, operan
285292
286293 field_key = self ._escape_single_quotes (field )
287294 type_expr = self ._metadata_type_expr (field_key )
288- # Use CASE to ensure casting only happens when type is correct
289- value_expr = f"(CASE WHEN { type_expr } = 'decimal' THEN (doc_metadata ->> '{ field_key } ')::numeric ELSE NULL END)"
295+ value_expr = (
296+ f"(CASE WHEN { type_expr } = 'decimal' THEN ({ self .metadata_column } ->> '{ field_key } ')::numeric "
297+ "ELSE NULL END)"
298+ )
290299 return f"({ value_expr } { sql_operator } { literal } ::numeric)"
291300
292301 def _build_datetime_comparison_clause (self , field : str , sql_operator : str , operand : Any ) -> str :
@@ -298,9 +307,9 @@ def _build_datetime_comparison_clause(self, field: str, sql_operator: str, opera
298307
299308 field_key = self ._escape_single_quotes (field )
300309 type_expr = self ._metadata_type_expr (field_key )
301- # Use CASE to ensure casting only happens when type is correct
302310 value_expr = (
303- f"(CASE WHEN { type_expr } = 'datetime' THEN (doc_metadata ->> '{ field_key } ')::timestamptz ELSE NULL END)"
311+ f"(CASE WHEN { type_expr } = 'datetime' THEN ({ self .metadata_column } ->> '{ field_key } ')::timestamptz "
312+ "ELSE NULL END)"
304313 )
305314 return f"({ value_expr } { sql_operator } { literal } )"
306315
@@ -313,16 +322,17 @@ def _build_date_comparison_clause(self, field: str, sql_operator: str, operand:
313322
314323 field_key = self ._escape_single_quotes (field )
315324 type_expr = self ._metadata_type_expr (field_key )
316- # Use CASE to ensure casting only happens when type is correct
317- value_expr = f"(CASE WHEN { type_expr } = 'date' THEN (doc_metadata ->> '{ field_key } ')::date ELSE NULL END)"
325+ value_expr = (
326+ f"(CASE WHEN { type_expr } = 'date' THEN ({ self .metadata_column } ->> '{ field_key } ')::date " "ELSE NULL END)"
327+ )
318328 return f"({ value_expr } { sql_operator } { literal } )"
319329
320330 def _build_string_comparison_clause (self , field : str , sql_operator : str , operand : str ) -> str :
321331 """Build comparison clause for 'string' typed metadata (only for $eq/$ne)."""
322332 field_key = self ._escape_single_quotes (field )
323333 escaped_value = self ._escape_single_quotes (operand )
324334 type_expr = self ._metadata_type_expr (field_key )
325- value_expr = f"(doc_metadata ->> '{ field_key } ')"
335+ value_expr = f"({ self . metadata_column } ->> '{ field_key } ')"
326336 # For strings without explicit type, assume string type (COALESCE handles missing metadata_types)
327337 return f"((COALESCE({ type_expr } , 'string') = 'string') AND { value_expr } { sql_operator } '{ escaped_value } ')"
328338
@@ -345,8 +355,23 @@ def _build_type_clause(self, field: str, operand: Any) -> str:
345355 raise InvalidMetadataFilterError (str (exc )) from exc
346356
347357 field_key = self ._escape_single_quotes (field )
348- type_expr = f"COALESCE(metadata_types ->> '{ field_key } ', 'string')"
349- clauses = [f"({ type_expr } = '{ type_name } ')" for type_name in canonical_types ]
358+ if not self .metadata_types_column :
359+ jsonb_type_expr = f"jsonb_typeof({ self .metadata_column } -> '{ field_key } ')"
360+ type_map = {
361+ "string" : "string" ,
362+ "number" : "number" ,
363+ "decimal" : "number" ,
364+ "boolean" : "boolean" ,
365+ "object" : "object" ,
366+ "array" : "array" ,
367+ "null" : "null" ,
368+ "datetime" : "string" ,
369+ "date" : "string" ,
370+ }
371+ clauses = [f"({ jsonb_type_expr } = '{ type_map .get (type_name , type_name )} ')" for type_name in canonical_types ]
372+ else :
373+ type_expr = f"COALESCE({ self .metadata_types_column } ->> '{ field_key } ', 'string')"
374+ clauses = [f"({ type_expr } = '{ type_name } ')" for type_name in canonical_types ]
350375 if len (clauses ) == 1 :
351376 return clauses [0 ]
352377 return "(" + " OR " .join (clauses ) + ")"
@@ -367,7 +392,7 @@ def _jsonb_contains_clause(self, field: str, value: Any) -> str:
367392 ) from exc
368393
369394 escaped_payload = json_payload .replace ("'" , "''" )
370- base_clause = f"(doc_metadata @> '{ escaped_payload } '::jsonb)"
395+ base_clause = f"({ self . metadata_column } @> '{ escaped_payload } '::jsonb)"
371396
372397 array_clause = self ._build_array_membership_clause (field , value )
373398 if array_clause :
@@ -391,8 +416,8 @@ def _build_array_membership_clause(self, field: str, value: Any) -> str:
391416 field_key = self ._escape_single_quotes (field )
392417
393418 return (
394- f"((jsonb_typeof(doc_metadata -> '{ field_key } ') = 'array') "
395- f"AND ((doc_metadata -> '{ field_key } ') @> '{ escaped_array_payload } '::jsonb))"
419+ f"((jsonb_typeof({ self . metadata_column } -> '{ field_key } ') = 'array') "
420+ f"AND (({ self . metadata_column } -> '{ field_key } ') @> '{ escaped_array_payload } '::jsonb))"
396421 )
397422
398423 def _build_regex_clause (self , field : str , operand : Any ) -> str :
@@ -403,7 +428,7 @@ def _build_regex_clause(self, field: str, operand: Any) -> str:
403428 escaped_pattern = pattern .replace ("\\ " , "\\ \\ " ).replace ("'" , "''" )
404429 field_key = self ._escape_single_quotes (field )
405430
406- base_clause = f"((doc_metadata ->> '{ field_key } ') { regex_operator } '{ escaped_pattern } ')"
431+ base_clause = f"(({ self . metadata_column } ->> '{ field_key } ') { regex_operator } '{ escaped_pattern } ')"
407432 array_clause = self ._build_array_regex_clause (field , regex_operator , escaped_pattern )
408433 if array_clause :
409434 return f"({ base_clause } OR { array_clause } )"
@@ -440,8 +465,8 @@ def _build_array_regex_clause(self, field: str, regex_operator: str, escaped_pat
440465 field_key = self ._escape_single_quotes (field )
441466 array_value_expr = "trim('\" ' FROM arr.value::text)"
442467 return (
443- f"((jsonb_typeof(doc_metadata -> '{ field_key } ') = 'array') AND EXISTS ("
444- f"SELECT 1 FROM jsonb_array_elements(doc_metadata -> '{ field_key } ') AS arr(value) "
468+ f"((jsonb_typeof({ self . metadata_column } -> '{ field_key } ') = 'array') AND EXISTS ("
469+ f"SELECT 1 FROM jsonb_array_elements({ self . metadata_column } -> '{ field_key } ') AS arr(value) "
445470 f"WHERE jsonb_typeof(arr.value) = 'string' AND { array_value_expr } { regex_operator } '{ escaped_pattern } '))"
446471 )
447472
@@ -453,7 +478,7 @@ def _build_contains_clause(self, field: str, operand: Any) -> str:
453478 escaped_pattern = self ._escape_like_pattern (value )
454479 field_key = self ._escape_single_quotes (field )
455480
456- base_clause = f"((doc_metadata ->> '{ field_key } ') { like_operator } '%{ escaped_pattern } %')"
481+ base_clause = f"(({ self . metadata_column } ->> '{ field_key } ') { like_operator } '%{ escaped_pattern } %')"
457482 array_clause = self ._build_array_like_clause (field , like_operator , escaped_pattern )
458483 if array_clause :
459484 return f"({ base_clause } OR { array_clause } )"
@@ -492,15 +517,17 @@ def _build_array_like_clause(self, field: str, like_operator: str, escaped_patte
492517 field_key = self ._escape_single_quotes (field )
493518 array_value_expr = "trim('\" ' FROM arr.value::text)"
494519 return (
495- f"((jsonb_typeof(doc_metadata -> '{ field_key } ') = 'array') AND EXISTS ("
496- f"SELECT 1 FROM jsonb_array_elements(doc_metadata -> '{ field_key } ') AS arr(value) "
520+ f"((jsonb_typeof({ self . metadata_column } -> '{ field_key } ') = 'array') AND EXISTS ("
521+ f"SELECT 1 FROM jsonb_array_elements({ self . metadata_column } -> '{ field_key } ') AS arr(value) "
497522 f"WHERE jsonb_typeof(arr.value) = 'string' AND "
498523 f"{ array_value_expr } { like_operator } '%{ escaped_pattern } %'))"
499524 )
500525
501526 def _metadata_type_expr (self , field_key : str ) -> str :
502527 """Return SQL expression fetching the stored metadata type for a field."""
503- return f"(metadata_types ->> '{ field_key } ')"
528+ if not self .metadata_types_column :
529+ return "NULL"
530+ return f"({ self .metadata_types_column } ->> '{ field_key } ')"
504531
505532 def _map_comparison_operator (self , operator : str ) -> str :
506533 """Map comparison operators to SQL symbols."""
0 commit comments