1+ import io
12from datetime import datetime
23from pathlib import Path
34from typing import Any
@@ -100,9 +101,25 @@ def _convert_value(value: str) -> float:
100101 return float (value )
101102
102103
104+ def _safe_open (path : str ) -> io .TextIOWrapper :
105+ encodings = ["utf-8" , "utf-8-sig" , "cp1252" , "latin-1" , "shift_jis" ]
106+ for enc in encodings :
107+ try :
108+ # read a few bytes to test decoding
109+ with open (path , mode = "r" , encoding = enc ) as test :
110+ test .read (2048 )
111+ return open (path , mode = "r" , encoding = enc )
112+ except UnicodeDecodeError :
113+ continue
114+ # if nothing worked, raise
115+ raise UnicodeDecodeError ("none" , b"" , 0 , 0 , "No valid encoding found" )
116+
117+
103118def _read_peaks_from_csv (path : str ) -> list [Peak ]:
104119 peaks : list [Peak ] = []
105- with open (path , mode = "r" , encoding = "shift_jis" ) as file :
120+
121+ # use robust open instead of fixed Shift-JIS
122+ with _safe_open (path ) as file :
106123 lines = file .readlines ()
107124
108125 # Find the start of peak data
@@ -120,7 +137,6 @@ def _read_peaks_from_csv(path: str) -> list[Peak]:
120137 if line .startswith ('"Warning' ) or line .startswith ('"Missing' ):
121138 break
122139
123- # Parse line using quote-aware splitting
124140 parts = _parse_line_with_decimal_comma (line )
125141
126142 # Skip separator lines or empty peaks
@@ -134,7 +150,6 @@ def _read_peaks_from_csv(path: str) -> list[Peak]:
134150 if parts [1 ] == "" :
135151 continue
136152
137- # Convert values using helper method
138153 time = _convert_value (parts [1 ])
139154 area = _convert_value (parts [2 ])
140155 height = _convert_value (parts [3 ])
@@ -171,10 +186,13 @@ def _extract_value(parts: list[str], key: str) -> str | None:
171186
172187def _read_metadata (path : str ) -> dict [str , Any ]:
173188 metadata : dict [str , Any ] = {}
174- with open (path , mode = "r" , encoding = "shift_jis" ) as file :
189+
190+ # use the robust opener with fallback encodings
191+ with _safe_open (path ) as file :
175192 lines = file .readlines ()
176193
177194 for line in lines :
195+ # skip empty / separator lines
178196 if not line .strip () or line .startswith ("===" ):
179197 continue
180198
@@ -183,13 +201,16 @@ def _read_metadata(path: str) -> dict[str, Any]:
183201 # Extract metadata using helper function
184202 if value := _extract_value (parts , "Software Version:" ):
185203 metadata ["software_version" ] = value
204+
186205 elif value := _extract_value (parts , "Sample Name:" ):
187206 metadata ["sample_name" ] = value
207+
188208 elif value := _extract_value (parts , "Sample Amount:" ):
189209 try :
190210 metadata ["sample_amount" ] = float (value .replace ("," , "." ))
191211 except ValueError :
192212 pass
213+
193214 elif value := _extract_value (parts , "Data Acquisition Time:" ):
194215 try :
195216 metadata ["acquisition_time" ] = str (
@@ -198,7 +219,7 @@ def _read_metadata(path: str) -> dict[str, Any]:
198219 except ValueError :
199220 pass
200221
201- # Stop reading when we hit the peak data
222+ # Stop once we reach the start of the peak table
202223 if '"Peak"' in line and '"Time"' in line :
203224 break
204225
0 commit comments