Skip to content

Commit c442e33

Browse files
peteWTPeter Tittmannclaudepre-commit-ci[bot]
authored
feat: Add HTML body fallback when text/plain is not available (#114)
* feat: Add HTML body fallback when text/plain is not available Many email clients (Outlook, Gmail, etc.) send HTML-only emails without a text/plain alternative. This causes get_emails_content to return empty body fields. This change: - Collects text/html content as a fallback when parsing multipart emails - Strips HTML tags to convert to readable text if no text/plain is found - Handles single-part HTML emails the same way Fixes #113 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * style: Use ternary operator per ruff SIM108 --------- Co-authored-by: Peter Tittmann <ptittmann@@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a3c8424 commit c442e33

File tree

1 file changed

+40
-4
lines changed

1 file changed

+40
-4
lines changed

mcp_email_server/emails/classic.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,28 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
150150

151151
# Get body content
152152
body = ""
153+
html_body = "" # Fallback if no text/plain
153154
attachments = []
154155

156+
def _strip_html(html: str) -> str:
157+
"""Simple HTML to text conversion."""
158+
import re
159+
160+
# Remove script and style elements
161+
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
162+
# Convert common block elements to newlines
163+
text = re.sub(r"<(br|p|div|tr|li)[^>]*/?>", "\n", text, flags=re.IGNORECASE)
164+
# Remove all remaining HTML tags
165+
text = re.sub(r"<[^>]+>", "", text)
166+
# Decode common HTML entities
167+
text = text.replace("&nbsp;", " ").replace("&amp;", "&")
168+
text = text.replace("&lt;", "<").replace("&gt;", ">")
169+
text = text.replace("&quot;", '"').replace("&#39;", "'")
170+
# Collapse multiple newlines and whitespace
171+
text = re.sub(r"\n\s*\n", "\n\n", text)
172+
text = re.sub(r" +", " ", text)
173+
return text.strip()
174+
155175
if email_message.is_multipart():
156176
for part in email_message.walk():
157177
content_type = part.get_content_type()
@@ -162,7 +182,7 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
162182
filename = part.get_filename()
163183
if filename:
164184
attachments.append(filename)
165-
# Handle text parts
185+
# Handle text parts - prefer text/plain
166186
elif content_type == "text/plain":
167187
body_part = part.get_payload(decode=True)
168188
if body_part:
@@ -171,15 +191,31 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
171191
body += body_part.decode(charset)
172192
except UnicodeDecodeError:
173193
body += body_part.decode("utf-8", errors="replace")
194+
# Collect HTML as fallback
195+
elif content_type == "text/html" and not body:
196+
html_part = part.get_payload(decode=True)
197+
if html_part:
198+
charset = part.get_content_charset("utf-8")
199+
try:
200+
html_body += html_part.decode(charset)
201+
except UnicodeDecodeError:
202+
html_body += html_part.decode("utf-8", errors="replace")
203+
204+
# Fall back to HTML if no plain text found
205+
if not body and html_body:
206+
body = _strip_html(html_body)
174207
else:
175-
# Handle plain text emails
208+
# Handle single-part emails
209+
content_type = email_message.get_content_type()
176210
payload = email_message.get_payload(decode=True)
177211
if payload:
178212
charset = email_message.get_content_charset("utf-8")
179213
try:
180-
body = payload.decode(charset)
214+
text = payload.decode(charset)
181215
except UnicodeDecodeError:
182-
body = payload.decode("utf-8", errors="replace")
216+
text = payload.decode("utf-8", errors="replace")
217+
218+
body = _strip_html(text) if content_type == "text/html" else text
183219
# TODO: Allow retrieving full email body
184220
if body and len(body) > MAX_BODY_LENGTH:
185221
body = body[:MAX_BODY_LENGTH] + "...[TRUNCATED]"

0 commit comments

Comments
 (0)