@@ -149,19 +149,20 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
149149 def _strip_html (html : str ) -> str :
150150 """Simple HTML to text conversion."""
151151 import re
152+
152153 # Remove script and style elements
153- text = re .sub (r' <(script|style)[^>]*>.*?</\1>' , '' , html , flags = re .DOTALL | re .IGNORECASE )
154+ text = re .sub (r" <(script|style)[^>]*>.*?</\1>" , "" , html , flags = re .DOTALL | re .IGNORECASE )
154155 # Convert common block elements to newlines
155- text = re .sub (r' <(br|p|div|tr|li)[^>]*/?>' , ' \n ' , text , flags = re .IGNORECASE )
156+ text = re .sub (r" <(br|p|div|tr|li)[^>]*/?>" , " \n " , text , flags = re .IGNORECASE )
156157 # Remove all remaining HTML tags
157- text = re .sub (r' <[^>]+>' , '' , text )
158+ text = re .sub (r" <[^>]+>" , "" , text )
158159 # Decode common HTML entities
159- text = text .replace (' ' , ' ' ).replace (' &' , '&' )
160- text = text .replace (' <' , '<' ).replace (' >' , '>' )
161- text = text .replace (' "' , '"' ).replace (' '' , "'" )
160+ text = text .replace (" " , " " ).replace (" &" , "&" )
161+ text = text .replace (" <" , "<" ).replace (" >" , ">" )
162+ text = text .replace (" "" , '"' ).replace (" '" , "'" )
162163 # Collapse multiple newlines and whitespace
163- text = re .sub (r' \n\s*\n' , ' \n \n ' , text )
164- text = re .sub (r' +' , ' ' , text )
164+ text = re .sub (r" \n\s*\n" , " \n \n " , text )
165+ text = re .sub (r" +" , " " , text )
165166 return text .strip ()
166167
167168 if email_message .is_multipart ():
0 commit comments