Skip to content

Commit 8f1a508

Browse files
Peter Tittmannclaude
andcommitted
feat: Add HTML body fallback when text/plain is not available
Many email clients (Outlook, Gmail, etc.) send HTML-only emails without a text/plain alternative. This causes get_emails_content to return empty body fields. This change: - Collects text/html content as a fallback when parsing multipart emails - Strips HTML tags to convert to readable text if no text/plain is found - Handles single-part HTML emails the same way Fixes #113 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent de08972 commit 8f1a508

File tree

1 file changed

+42
-4
lines changed

1 file changed

+42
-4
lines changed

mcp_email_server/emails/classic.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,27 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
143143

144144
# Get body content
145145
body = ""
146+
html_body = "" # Fallback if no text/plain
146147
attachments = []
147148

149+
def _strip_html(html: str) -> str:
150+
"""Simple HTML to text conversion."""
151+
import re
152+
# Remove script and style elements
153+
text = re.sub(r'<(script|style)[^>]*>.*?</\1>', '', html, flags=re.DOTALL | re.IGNORECASE)
154+
# Convert common block elements to newlines
155+
text = re.sub(r'<(br|p|div|tr|li)[^>]*/?>', '\n', text, flags=re.IGNORECASE)
156+
# Remove all remaining HTML tags
157+
text = re.sub(r'<[^>]+>', '', text)
158+
# Decode common HTML entities
159+
text = text.replace('&nbsp;', ' ').replace('&amp;', '&')
160+
text = text.replace('&lt;', '<').replace('&gt;', '>')
161+
text = text.replace('&quot;', '"').replace('&#39;', "'")
162+
# Collapse multiple newlines and whitespace
163+
text = re.sub(r'\n\s*\n', '\n\n', text)
164+
text = re.sub(r' +', ' ', text)
165+
return text.strip()
166+
148167
if email_message.is_multipart():
149168
for part in email_message.walk():
150169
content_type = part.get_content_type()
@@ -155,7 +174,7 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
155174
filename = part.get_filename()
156175
if filename:
157176
attachments.append(filename)
158-
# Handle text parts
177+
# Handle text parts - prefer text/plain
159178
elif content_type == "text/plain":
160179
body_part = part.get_payload(decode=True)
161180
if body_part:
@@ -164,15 +183,34 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di
164183
body += body_part.decode(charset)
165184
except UnicodeDecodeError:
166185
body += body_part.decode("utf-8", errors="replace")
186+
# Collect HTML as fallback
187+
elif content_type == "text/html" and not body:
188+
html_part = part.get_payload(decode=True)
189+
if html_part:
190+
charset = part.get_content_charset("utf-8")
191+
try:
192+
html_body += html_part.decode(charset)
193+
except UnicodeDecodeError:
194+
html_body += html_part.decode("utf-8", errors="replace")
195+
196+
# Fall back to HTML if no plain text found
197+
if not body and html_body:
198+
body = _strip_html(html_body)
167199
else:
168-
# Handle plain text emails
200+
# Handle single-part emails
201+
content_type = email_message.get_content_type()
169202
payload = email_message.get_payload(decode=True)
170203
if payload:
171204
charset = email_message.get_content_charset("utf-8")
172205
try:
173-
body = payload.decode(charset)
206+
text = payload.decode(charset)
174207
except UnicodeDecodeError:
175-
body = payload.decode("utf-8", errors="replace")
208+
text = payload.decode("utf-8", errors="replace")
209+
210+
if content_type == "text/html":
211+
body = _strip_html(text)
212+
else:
213+
body = text
176214
# TODO: Allow retrieving full email body
177215
if body and len(body) > 20000:
178216
body = body[:20000] + "...[TRUNCATED]"

0 commit comments

Comments
 (0)