@@ -30,9 +30,7 @@ def get_sql_type(pandas_dtype: str) -> str:
3030 return "TEXT"
3131
3232
33- def create_table_for_resource (
34- resource : Resource , df : pd .DataFrame
35- ) -> Optional [ResourceDataTable ]:
33+ def create_table_for_resource (resource : Resource , df : pd .DataFrame ) -> Optional [ResourceDataTable ]:
3634 """Create a database table for the resource data and index it."""
3735 try :
3836 # Create ResourceDataTable entry first to get the table name
@@ -65,9 +63,7 @@ def create_table_for_resource(
6563 df .to_csv (csv_data , index = False , header = False )
6664 csv_data .seek (0 )
6765
68- copy_sql = (
69- f'COPY "{ temp_table } " ({ "," .join (quoted_columns )} ) FROM STDIN WITH CSV'
70- )
66+ copy_sql = f'COPY "{ temp_table } " ({ "," .join (quoted_columns )} ) FROM STDIN WITH CSV'
7167 cursor .copy_expert (copy_sql , csv_data )
7268
7369 # Insert from temp to main table with validation
@@ -102,14 +98,10 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]:
10298 try :
10399 file_details = resource .resourcefiledetails
104100 if not file_details :
105- logger .info (
106- f"Resource { resource_id } has no file details, skipping indexing"
107- )
101+ logger .info (f"Resource { resource_id } has no file details, skipping indexing" )
108102 return None
109103 except Exception as e :
110- logger .error (
111- f"Failed to access file details for resource { resource_id } : { str (e )} "
112- )
104+ logger .error (f"Failed to access file details for resource { resource_id } : { str (e )} " )
113105 return None
114106
115107 # Check file format
@@ -131,9 +123,7 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]:
131123 )
132124 return None
133125 except Exception as e :
134- logger .error (
135- f"Failed to determine format for resource { resource_id } : { str (e )} "
136- )
126+ logger .error (f"Failed to determine format for resource { resource_id } : { str (e )} " )
137127 return None
138128
139129 # Load tabular data with timeout protection
@@ -144,9 +134,7 @@ def index_resource_data(resource: Resource) -> Optional[ResourceDataTable]:
144134 @contextmanager
145135 def timeout (seconds : int ) -> Generator [None , None , None ]:
146136 def handler (signum : int , frame : Any ) -> None :
147- raise TimeoutError (
148- f"Loading data timed out after { seconds } seconds"
149- )
137+ raise TimeoutError (f"Loading data timed out after { seconds } seconds" )
150138
151139 # Set the timeout handler
152140 original_handler = signal .getsignal (signal .SIGALRM )
@@ -163,9 +151,7 @@ def handler(signum: int, frame: Any) -> None:
163151 with timeout (60 ): # 60 second timeout for loading data
164152 df = load_tabular_data (file_details .file .path , format )
165153 except TimeoutError as te :
166- logger .error (
167- f"Timeout while loading data for resource { resource_id } : { str (te )} "
168- )
154+ logger .error (f"Timeout while loading data for resource { resource_id } : { str (te )} " )
169155 return None
170156 except Exception :
171157 # Fallback without timeout if signal.SIGALRM is not available (e.g., on Windows)
@@ -204,9 +190,7 @@ def handler(signum: int, frame: Any) -> None:
204190 # Rename all but the first occurrence
205191 for i , idx in enumerate (indices [1 :], 1 ):
206192 df .columns .values [idx ] = f"{ col } _{ i } "
207- logger .warning (
208- f"Renamed duplicate columns in resource { resource_id } "
209- )
193+ logger .warning (f"Renamed duplicate columns in resource { resource_id } " )
210194 except Exception as e :
211195 logger .error (
212196 f"Failed to sanitize column names for resource { resource_id } : { str (e )} "
@@ -229,9 +213,7 @@ def handler(signum: int, frame: Any) -> None:
229213 existing_table = ResourceDataTable .objects .get (resource = resource )
230214 try :
231215 with connections [DATA_DB ].cursor () as cursor :
232- cursor .execute (
233- f'DROP TABLE IF EXISTS "{ existing_table .table_name } "'
234- )
216+ cursor .execute (f'DROP TABLE IF EXISTS "{ existing_table .table_name } "' )
235217 except Exception as drop_error :
236218 logger .error (
237219 f"Failed to drop existing table for resource { resource_id } : { str (drop_error )} "
@@ -292,15 +274,11 @@ def handler(signum: int, frame: Any) -> None:
292274 # For description, preserve existing if available, otherwise auto-generate
293275 description = f"Description of column { col } "
294276 if col in existing_schemas :
295- existing_description = existing_schemas [col ][
296- "description"
297- ]
277+ existing_description = existing_schemas [col ]["description" ]
298278 # Check for None and non-auto-generated descriptions
299279 if existing_description is not None :
300280 description = existing_description
301- logger .debug (
302- f"Preserved custom description for column { col } "
303- )
281+ logger .debug (f"Preserved custom description for column { col } " )
304282
305283 # Create the schema entry
306284 ResourceSchema .objects .create (
@@ -393,9 +371,7 @@ def get_row_count(resource: Resource) -> int:
393371 import traceback
394372
395373 error_tb = traceback .format_exc ()
396- logger .error (
397- f"Error getting row count for resource { resource .id } :\n { str (e )} \n { error_tb } "
398- )
374+ logger .error (f"Error getting row count for resource { resource .id } :\n { str (e )} \n { error_tb } " )
399375 return 0
400376
401377
@@ -429,9 +405,7 @@ def get_preview_data(resource: Resource) -> Optional[PreviewData]:
429405 try :
430406 if is_all_entries :
431407 # For safety, always limit the number of rows returned even for 'all entries'
432- cursor .execute (
433- f'SELECT * FROM "{ data_table .table_name } " LIMIT 1000'
434- )
408+ cursor .execute (f'SELECT * FROM "{ data_table .table_name } " LIMIT 1000' )
435409 else :
436410 # Ensure we have valid integer values for the calculation
437411 start = int (start_entry ) if start_entry is not None else 0
@@ -443,8 +417,8 @@ def get_preview_data(resource: Resource) -> Optional[PreviewData]:
443417
444418 columns = [desc [0 ] for desc in cursor .description ]
445419 data = cursor .fetchall ()
446- # Convert tuples to lists
447- rows = [list ( row ) for row in data ]
420+ # Convert tuples to lists and sanitize None values to empty strings
421+ rows = [[ cell if cell is not None else "" for cell in row ] for row in data ]
448422 return PreviewData (columns = columns , rows = rows )
449423 except Exception as query_error :
450424 logger .error (
0 commit comments