From 7cefed07b3fa737e1f2edc679dcf30940cf16edb Mon Sep 17 00:00:00 2001 From: Jamie Kirkpatrick Date: Sat, 17 Jan 2026 11:49:08 +0000 Subject: [PATCH 1/3] feat: add content_format option to get_emails_content Add a content_format parameter to extract clean text from HTML emails: - "raw" (default): text/plain preferred, falls back to HTML - "html": return HTML content as-is - "text": strip HTML tags to return clean plain text - "markdown": convert HTML to markdown format Uses justhtml library for HTML parsing and conversion. Co-Authored-By: Claude Opus 4.5 --- README.md | 24 +++++ mcp_email_server/app.py | 15 +++- mcp_email_server/emails/__init__.py | 16 +++- mcp_email_server/emails/classic.py | 94 ++++++++++++++++++-- pyproject.toml | 1 + tests/test_email_client.py | 132 ++++++++++++++++++++++++++++ tests/test_mcp_tools.py | 108 ++++++++++++++++++++++- uv.lock | 11 +++ 8 files changed, 388 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 2700d46..9dea394 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,30 @@ await send_email( The `in_reply_to` parameter sets the `In-Reply-To` header, and `references` sets the `References` header. Both are used by email clients to thread conversations properly. +### Extracting Clean Text from HTML Emails + +When fetching email content, you can control how the body is formatted using the `content_format` parameter: + +```python +# Get raw content (default) - returns text/plain if available, otherwise HTML +emails = await get_emails_content(account_name="work", email_ids=["123"]) + +# Get HTML content as-is +emails = await get_emails_content(account_name="work", email_ids=["123"], content_format="html") + +# Strip HTML tags and get clean plain text +emails = await get_emails_content(account_name="work", email_ids=["123"], content_format="text") + +# Convert HTML to markdown format +emails = await get_emails_content(account_name="work", email_ids=["123"], content_format="markdown") +``` + +Available formats: +- `raw` (default): Returns text/plain content if available, falls back to HTML +- `html`: Returns HTML content as-is +- `text`: Strips all HTML tags and returns clean plain text +- `markdown`: Converts HTML to markdown, preserving links and basic formatting + ## Development This project is managed using [uv](https://github.com/ai-zerolab/uv). diff --git a/mcp_email_server/app.py b/mcp_email_server/app.py index 17b6cee..7d0f03a 100644 --- a/mcp_email_server/app.py +++ b/mcp_email_server/app.py @@ -97,9 +97,22 @@ async def get_emails_content( ), ], mailbox: Annotated[str, Field(default="INBOX", description="The mailbox to retrieve emails from.")] = "INBOX", + content_format: Annotated[ + str, + Field( + default="raw", + description=( + "How to format the email body content: " + "'raw' (default) returns text/plain if available or HTML otherwise; " + "'html' returns HTML content; " + "'text' strips HTML tags to return clean plain text; " + "'markdown' converts HTML to markdown format." + ), + ), + ] = "raw", ) -> EmailContentBatchResponse: handler = dispatch_handler(account_name) - return await handler.get_emails_content(email_ids, mailbox) + return await handler.get_emails_content(email_ids, mailbox, content_format) @mcp.tool( diff --git a/mcp_email_server/emails/__init__.py b/mcp_email_server/emails/__init__.py index cbd0e5e..488da63 100644 --- a/mcp_email_server/emails/__init__.py +++ b/mcp_email_server/emails/__init__.py @@ -29,9 +29,23 @@ async def get_emails_metadata( """ @abc.abstractmethod - async def get_emails_content(self, email_ids: list[str], mailbox: str = "INBOX") -> "EmailContentBatchResponse": + async def get_emails_content( + self, + email_ids: list[str], + mailbox: str = "INBOX", + content_format: str = "raw", + ) -> "EmailContentBatchResponse": """ Get full content (including body) of multiple emails by their email IDs (IMAP UIDs) + + Args: + email_ids: List of email UIDs to retrieve. + mailbox: The mailbox to search in (default: "INBOX"). + content_format: How to format the body content: + - "raw": Return original content (text/plain preferred, falls back to HTML) + - "html": Return HTML content as-is + - "text": Strip HTML tags and return clean plain text + - "markdown": Convert HTML to markdown format """ @abc.abstractmethod diff --git a/mcp_email_server/emails/classic.py b/mcp_email_server/emails/classic.py index 9c0552e..0561d15 100644 --- a/mcp_email_server/emails/classic.py +++ b/mcp_email_server/emails/classic.py @@ -13,6 +13,7 @@ import aioimaplib import aiosmtplib +import justhtml from mcp_email_server.config import EmailServer, EmailSettings from mcp_email_server.emails import EmailHandler @@ -72,6 +73,42 @@ async def _send_imap_id(imap: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL) -> None: logger.warning(f"IMAP ID command failed: {e!s}") +def _format_body_content(body: str, html_body: str, content_format: str) -> str: + """Format email body content based on the requested format. + + Args: + body: Plain text body content. + html_body: HTML body content. + content_format: One of "raw", "html", "text", "markdown". + + Returns: + Formatted body content. + """ + if content_format == "raw": + # Return plain text if available, else HTML + return body if body else html_body + + if content_format == "html": + # Return HTML content, fall back to plain text if no HTML + return html_body if html_body else body + + if content_format == "text": + # Convert HTML to clean text, or return plain text + if html_body: + return justhtml.JustHTML(html_body).to_text() + return body + + if content_format == "markdown": + # Convert HTML to markdown + if html_body: + return justhtml.JustHTML(html_body).to_markdown() + return body + + # Unknown format, return raw + logger.warning(f"Unknown content_format: {content_format}, returning raw content") + return body if body else html_body + + class EmailClient: def __init__(self, email_server: EmailServer, sender: str | None = None): self.email_server = email_server @@ -118,8 +155,9 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di except Exception: date = datetime.now(timezone.utc) - # Get body content + # Get body content - extract both plain text and HTML body = "" + html_body = "" attachments = [] if email_message.is_multipart(): @@ -141,18 +179,36 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di body += body_part.decode(charset) except UnicodeDecodeError: body += body_part.decode("utf-8", errors="replace") + elif content_type == "text/html": + html_part = part.get_payload(decode=True) + if html_part: + charset = part.get_content_charset("utf-8") + try: + html_body += html_part.decode(charset) + except UnicodeDecodeError: + html_body += html_part.decode("utf-8", errors="replace") else: - # Handle plain text emails + # Handle single-part emails payload = email_message.get_payload(decode=True) if payload: charset = email_message.get_content_charset("utf-8") + content_type = email_message.get_content_type() try: - body = payload.decode(charset) + decoded = payload.decode(charset) except UnicodeDecodeError: - body = payload.decode("utf-8", errors="replace") - # TODO: Allow retrieving full email body + decoded = payload.decode("utf-8", errors="replace") + + if content_type == "text/html": + html_body = decoded + else: + body = decoded + + # Truncate if too long if body and len(body) > 20000: body = body[:20000] + "...[TRUNCATED]" + if html_body and len(html_body) > 20000: + html_body = html_body[:20000] + "...[TRUNCATED]" + return { "email_id": email_id or "", "message_id": message_id, @@ -160,6 +216,7 @@ def _parse_email_data(self, raw_email: bytes, email_id: str | None = None) -> di "from": sender, "to": to_addresses, "body": body, + "html_body": html_body, "date": date, "attachments": attachments, } @@ -837,8 +894,23 @@ async def get_emails_metadata( total=total, ) - async def get_emails_content(self, email_ids: list[str], mailbox: str = "INBOX") -> EmailContentBatchResponse: - """Batch retrieve email body content""" + async def get_emails_content( + self, + email_ids: list[str], + mailbox: str = "INBOX", + content_format: str = "raw", + ) -> EmailContentBatchResponse: + """Batch retrieve email body content. + + Args: + email_ids: List of email UIDs to retrieve. + mailbox: The mailbox to search in (default: "INBOX"). + content_format: How to format the body content: + - "raw": Return original content (text/plain preferred, falls back to HTML) + - "html": Return HTML content as-is + - "text": Strip HTML tags and return clean plain text + - "markdown": Convert HTML to markdown format + """ emails = [] failed_ids = [] @@ -846,6 +918,12 @@ async def get_emails_content(self, email_ids: list[str], mailbox: str = "INBOX") try: email_data = await self.incoming_client.get_email_body_by_id(email_id, mailbox) if email_data: + # Apply content format conversion + formatted_body = _format_body_content( + email_data.get("body", ""), + email_data.get("html_body", ""), + content_format, + ) emails.append( EmailBodyResponse( email_id=email_data["email_id"], @@ -854,7 +932,7 @@ async def get_emails_content(self, email_ids: list[str], mailbox: str = "INBOX") sender=email_data["from"], recipients=email_data["to"], date=email_data["date"], - body=email_data["body"], + body=formatted_body, attachments=email_data["attachments"], ) ) diff --git a/pyproject.toml b/pyproject.toml index 0b12205..3d94376 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "aiosmtplib>=4.0.0", "gradio>=6.0.1", "jinja2>=3.1.5", + "justhtml>=0.35.0", "loguru>=0.7.3", "mcp[cli]>=1.3.0", "pydantic>=2.11.0", diff --git a/tests/test_email_client.py b/tests/test_email_client.py index 88b132a..39321a3 100644 --- a/tests/test_email_client.py +++ b/tests/test_email_client.py @@ -354,3 +354,135 @@ async def test_send_email_without_reply_headers(self, email_client): msg = call_args[0][0] assert "In-Reply-To" not in msg assert "References" not in msg + + +class TestFormatBodyContent: + """Tests for the _format_body_content helper function.""" + + def test_format_raw_returns_plain_text_if_available(self): + """Test raw format returns plain text when available.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain text", "html", "raw") + assert result == "plain text" + + def test_format_raw_falls_back_to_html(self): + """Test raw format falls back to HTML when no plain text.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("", "html", "raw") + assert result == "html" + + def test_format_html_returns_html_if_available(self): + """Test html format returns HTML when available.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain text", "html content", "html") + assert result == "html content" + + def test_format_html_falls_back_to_plain_text(self): + """Test html format falls back to plain text when no HTML.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain text only", "", "html") + assert result == "plain text only" + + def test_format_text_strips_html_tags(self): + """Test text format strips HTML tags.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("", "
Hello World
", "text") + assert "Hello" in result + assert "World" in result + assert "
" not in result + assert "" not in result + + def test_format_text_returns_plain_text_directly(self): + """Test text format returns plain text directly if no HTML.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain text", "", "text") + assert result == "plain text" + + def test_format_markdown_converts_html(self): + """Test markdown format converts HTML to markdown.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("", "

Paragraph

link", "markdown") + # Should contain text without HTML tags + assert "Paragraph" in result + assert "link" in result + assert "

" not in result + assert "" not in result + + def test_format_markdown_returns_plain_text_if_no_html(self): + """Test markdown format returns plain text directly if no HTML.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain text", "", "markdown") + assert result == "plain text" + + def test_format_unknown_returns_raw(self): + """Test unknown format returns raw content.""" + from mcp_email_server.emails.classic import _format_body_content + + result = _format_body_content("plain", "html", "unknown") + assert result == "plain" + + +class TestParseEmailDataHtml: + """Tests for _parse_email_data extracting HTML content.""" + + def test_parse_multipart_extracts_html_body(self, email_client): + """Test that multipart emails have HTML body extracted.""" + raw_email = b"""From: sender@example.com +To: recipient@example.com +Subject: Test Email +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="boundary123" + +--boundary123 +Content-Type: text/plain; charset="utf-8" + +This is plain text + +--boundary123 +Content-Type: text/html; charset="utf-8" + +

This is HTML

+ +--boundary123-- +""" + result = email_client._parse_email_data(raw_email, "test123") + + assert result["body"] == "This is plain text\n" + assert "" in result["html_body"] + assert "

This is HTML

" in result["html_body"] + + def test_parse_html_only_email(self, email_client): + """Test parsing email with only HTML body.""" + raw_email = b"""From: sender@example.com +To: recipient@example.com +Subject: HTML Only +Content-Type: text/html; charset="utf-8" + +

Hello!

+""" + result = email_client._parse_email_data(raw_email, "test123") + + assert result["body"] == "" + assert "

Hello!

" in result["html_body"] + + def test_parse_plain_only_email(self, email_client): + """Test parsing email with only plain text body.""" + raw_email = b"""From: sender@example.com +To: recipient@example.com +Subject: Plain Only +Content-Type: text/plain; charset="utf-8" + +Just plain text here +""" + result = email_client._parse_email_data(raw_email, "test123") + + assert "Just plain text here" in result["body"] + assert result["html_body"] == "" diff --git a/tests/test_mcp_tools.py b/tests/test_mcp_tools.py index b270068..6ec1bdc 100644 --- a/tests/test_mcp_tools.py +++ b/tests/test_mcp_tools.py @@ -259,7 +259,7 @@ async def test_get_emails_content_single(self): assert result.emails[0].subject == "Test Subject" # Verify dispatch_handler and get_emails_content were called correctly - mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX") + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "raw") @pytest.mark.asyncio async def test_get_emails_content_batch(self): @@ -315,7 +315,7 @@ async def test_get_emails_content_batch(self): assert result.emails[1].email_id == "12346" # Verify dispatch_handler and get_emails_content were called correctly - mock_handler.get_emails_content.assert_called_once_with(["12345", "12346", "12347"], "INBOX") + mock_handler.get_emails_content.assert_called_once_with(["12345", "12346", "12347"], "INBOX", "raw") @pytest.mark.asyncio async def test_get_emails_content_with_mailbox(self): @@ -349,7 +349,109 @@ async def test_get_emails_content_with_mailbox(self): ) assert result == batch_response - mock_handler.get_emails_content.assert_called_once_with(["12345"], "Sent") + mock_handler.get_emails_content.assert_called_once_with(["12345"], "Sent", "raw") + + @pytest.mark.asyncio + async def test_get_emails_content_with_content_format_text(self): + """Test get_emails_content MCP tool with content_format='text'.""" + now = datetime.now(timezone.utc) + email_body = EmailBodyResponse( + email_id="12345", + subject="HTML Email", + sender="sender@example.com", + recipients=["recipient@example.com"], + date=now, + body="Clean text extracted from HTML", + attachments=[], + ) + + batch_response = EmailContentBatchResponse( + emails=[email_body], + requested_count=1, + retrieved_count=1, + failed_ids=[], + ) + + mock_handler = AsyncMock() + mock_handler.get_emails_content.return_value = batch_response + + with patch("mcp_email_server.app.dispatch_handler", return_value=mock_handler): + result = await get_emails_content( + account_name="test_account", + email_ids=["12345"], + content_format="text", + ) + + assert result == batch_response + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "text") + + @pytest.mark.asyncio + async def test_get_emails_content_with_content_format_html(self): + """Test get_emails_content MCP tool with content_format='html'.""" + now = datetime.now(timezone.utc) + email_body = EmailBodyResponse( + email_id="12345", + subject="HTML Email", + sender="sender@example.com", + recipients=["recipient@example.com"], + date=now, + body="HTML content", + attachments=[], + ) + + batch_response = EmailContentBatchResponse( + emails=[email_body], + requested_count=1, + retrieved_count=1, + failed_ids=[], + ) + + mock_handler = AsyncMock() + mock_handler.get_emails_content.return_value = batch_response + + with patch("mcp_email_server.app.dispatch_handler", return_value=mock_handler): + result = await get_emails_content( + account_name="test_account", + email_ids=["12345"], + content_format="html", + ) + + assert result == batch_response + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "html") + + @pytest.mark.asyncio + async def test_get_emails_content_with_content_format_markdown(self): + """Test get_emails_content MCP tool with content_format='markdown'.""" + now = datetime.now(timezone.utc) + email_body = EmailBodyResponse( + email_id="12345", + subject="HTML Email", + sender="sender@example.com", + recipients=["recipient@example.com"], + date=now, + body="# Header\n\nParagraph with [link](http://example.com)", + attachments=[], + ) + + batch_response = EmailContentBatchResponse( + emails=[email_body], + requested_count=1, + retrieved_count=1, + failed_ids=[], + ) + + mock_handler = AsyncMock() + mock_handler.get_emails_content.return_value = batch_response + + with patch("mcp_email_server.app.dispatch_handler", return_value=mock_handler): + result = await get_emails_content( + account_name="test_account", + email_ids=["12345"], + content_format="markdown", + ) + + assert result == batch_response + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "markdown") @pytest.mark.asyncio async def test_send_email(self): diff --git a/uv.lock b/uv.lock index 525775d..714566f 100644 --- a/uv.lock +++ b/uv.lock @@ -766,6 +766,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] +[[package]] +name = "justhtml" +version = "0.35.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8f/8c/f075596d1e14c62b49af21752e933f521cfca64d1efb471fe1af166649c0/justhtml-0.35.0.tar.gz", hash = "sha256:14658260ebb40e73e73e8442c12f112a1b750695b332e7882ea3d668174126a7", size = 296744, upload-time = "2026-01-11T13:34:21.363Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/6d/c97778ab7c97045fa0a4547c0c1f0c6a9e0117704c01a02d1f8c6f1e9d2b/justhtml-0.35.0-py3-none-any.whl", hash = "sha256:219e96f0810f188dbdc9e93ea7e52530897ab77721b49c28d9d42e9829f7e7c9", size = 107110, upload-time = "2026-01-11T13:34:19.997Z" }, +] + [[package]] name = "loguru" version = "0.7.3" @@ -922,6 +931,7 @@ dependencies = [ { name = "aiosmtplib" }, { name = "gradio" }, { name = "jinja2" }, + { name = "justhtml" }, { name = "loguru" }, { name = "mcp", extra = ["cli"] }, { name = "pydantic" }, @@ -950,6 +960,7 @@ requires-dist = [ { name = "aiosmtplib", specifier = ">=4.0.0" }, { name = "gradio", specifier = ">=6.0.1" }, { name = "jinja2", specifier = ">=3.1.5" }, + { name = "justhtml", specifier = ">=0.35.0" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "mcp", extras = ["cli"], specifier = ">=1.3.0" }, { name = "pydantic", specifier = ">=2.11.0" }, From 343e2da79a001bcad64609d9b0e886cf55f654eb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 17 Jan 2026 12:07:24 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9dea394..f246308 100644 --- a/README.md +++ b/README.md @@ -236,6 +236,7 @@ emails = await get_emails_content(account_name="work", email_ids=["123"], conten ``` Available formats: + - `raw` (default): Returns text/plain content if available, falls back to HTML - `html`: Returns HTML content as-is - `text`: Strips all HTML tags and returns clean plain text From fb1bd6a5ad7a6c592cfa64d86ee282d67ff980a8 Mon Sep 17 00:00:00 2001 From: Jamie Kirkpatrick Date: Sat, 17 Jan 2026 12:24:12 +0000 Subject: [PATCH 3/3] feat: add default_content_format setting - Add default_content_format to Settings class (defaults to "raw") - Support MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT env var - Validate content format values (raw, html, text, markdown) - Update get_emails_content to use setting when format not specified - Add tests for new configuration option - Update README with configuration documentation Co-Authored-By: Claude Opus 4.5 --- README.md | 12 +++++ mcp_email_server/app.py | 15 +++--- mcp_email_server/config.py | 16 ++++++ tests/test_env_config_coverage.py | 78 +++++++++++++++++++++++++++ tests/test_mcp_tools.py | 89 +++++++++++++++++++++++++++++++ 5 files changed, 204 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f246308..087793f 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,18 @@ Available formats: - `text`: Strips all HTML tags and returns clean plain text - `markdown`: Converts HTML to markdown, preserving links and basic formatting +To set a default format for all email content retrieval, configure `default_content_format` in your config file: + +```toml +default_content_format = "markdown" +``` + +Or via environment variable: + +```bash +export MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT=markdown +``` + ## Development This project is managed using [uv](https://github.com/ai-zerolab/uv). diff --git a/mcp_email_server/app.py b/mcp_email_server/app.py index 7d0f03a..f66f425 100644 --- a/mcp_email_server/app.py +++ b/mcp_email_server/app.py @@ -98,21 +98,24 @@ async def get_emails_content( ], mailbox: Annotated[str, Field(default="INBOX", description="The mailbox to retrieve emails from.")] = "INBOX", content_format: Annotated[ - str, + str | None, Field( - default="raw", + default=None, description=( "How to format the email body content: " - "'raw' (default) returns text/plain if available or HTML otherwise; " + "'raw' returns text/plain if available or HTML otherwise; " "'html' returns HTML content; " "'text' strips HTML tags to return clean plain text; " - "'markdown' converts HTML to markdown format." + "'markdown' converts HTML to markdown format. " + "Defaults to the server's default_content_format setting (usually 'raw')." ), ), - ] = "raw", + ] = None, ) -> EmailContentBatchResponse: + settings = get_settings() + effective_format = content_format if content_format is not None else settings.default_content_format handler = dispatch_handler(account_name) - return await handler.get_emails_content(email_ids, mailbox, content_format) + return await handler.get_emails_content(email_ids, mailbox, effective_format) @mcp.tool( diff --git a/mcp_email_server/config.py b/mcp_email_server/config.py index 4463fde..816dfca 100644 --- a/mcp_email_server/config.py +++ b/mcp_email_server/config.py @@ -218,11 +218,15 @@ def _parse_bool_env(value: str | None, default: bool = False) -> bool: return value.lower() in ("true", "1", "yes", "on") +VALID_CONTENT_FORMATS = {"raw", "html", "text", "markdown"} + + class Settings(BaseSettings): emails: list[EmailSettings] = [] providers: list[ProviderSettings] = [] db_location: str = CONFIG_PATH.with_name("db.sqlite3").as_posix() enable_attachment_download: bool = False + default_content_format: str = "raw" model_config = SettingsConfigDict(toml_file=CONFIG_PATH, validate_assignment=True, revalidate_instances="always") @@ -236,6 +240,18 @@ def __init__(self, **data: Any) -> None: self.enable_attachment_download = _parse_bool_env(env_enable_attachment, False) logger.info(f"Set enable_attachment_download={self.enable_attachment_download} from environment variable") + # Check for default_content_format from environment variable + env_content_format = os.getenv("MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT") + if env_content_format is not None: + if env_content_format in VALID_CONTENT_FORMATS: + self.default_content_format = env_content_format + logger.info(f"Set default_content_format={self.default_content_format} from environment variable") + else: + logger.warning( + f"Invalid MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT '{env_content_format}', " + f"must be one of {VALID_CONTENT_FORMATS}. Using default 'raw'." + ) + # Check for email configuration from environment variables env_email = EmailSettings.from_env() if env_email: diff --git a/tests/test_env_config_coverage.py b/tests/test_env_config_coverage.py index f9fe4a6..21a2033 100644 --- a/tests/test_env_config_coverage.py +++ b/tests/test_env_config_coverage.py @@ -358,3 +358,81 @@ def test_enable_attachment_download_env_overrides_toml(monkeypatch, tmp_path): settings = Settings() assert settings.enable_attachment_download is True + + +def test_default_content_format_defaults_to_raw(monkeypatch, tmp_path): + """Test default_content_format defaults to 'raw'.""" + config_file = tmp_path / "empty.toml" + config_file.write_text("") + monkeypatch.setenv("MCP_EMAIL_SERVER_CONFIG_PATH", str(config_file)) + + for key in list(os.environ.keys()): + if key.startswith("MCP_EMAIL_SERVER_") and "CONFIG_PATH" not in key: + monkeypatch.delenv(key, raising=False) + + settings = Settings() + assert settings.default_content_format == "raw" + + +def test_default_content_format_from_env_markdown(monkeypatch, tmp_path): + """Test default_content_format can be set to 'markdown' via env var.""" + config_file = tmp_path / "empty.toml" + config_file.write_text("") + monkeypatch.setenv("MCP_EMAIL_SERVER_CONFIG_PATH", str(config_file)) + + for key in list(os.environ.keys()): + if key.startswith("MCP_EMAIL_SERVER_") and "CONFIG_PATH" not in key: + monkeypatch.delenv(key, raising=False) + + monkeypatch.setenv("MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT", "markdown") + + settings = Settings() + assert settings.default_content_format == "markdown" + + +def test_default_content_format_from_env_text(monkeypatch, tmp_path): + """Test default_content_format can be set to 'text' via env var.""" + config_file = tmp_path / "empty.toml" + config_file.write_text("") + monkeypatch.setenv("MCP_EMAIL_SERVER_CONFIG_PATH", str(config_file)) + + for key in list(os.environ.keys()): + if key.startswith("MCP_EMAIL_SERVER_") and "CONFIG_PATH" not in key: + monkeypatch.delenv(key, raising=False) + + monkeypatch.setenv("MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT", "text") + + settings = Settings() + assert settings.default_content_format == "text" + + +def test_default_content_format_invalid_value_keeps_raw(monkeypatch, tmp_path): + """Test invalid default_content_format value logs warning and keeps 'raw'.""" + config_file = tmp_path / "empty.toml" + config_file.write_text("") + monkeypatch.setenv("MCP_EMAIL_SERVER_CONFIG_PATH", str(config_file)) + + for key in list(os.environ.keys()): + if key.startswith("MCP_EMAIL_SERVER_") and "CONFIG_PATH" not in key: + monkeypatch.delenv(key, raising=False) + + monkeypatch.setenv("MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT", "invalid_format") + + settings = Settings() + assert settings.default_content_format == "raw" + + +def test_default_content_format_html(monkeypatch, tmp_path): + """Test default_content_format can be set to 'html' via env var.""" + config_file = tmp_path / "empty.toml" + config_file.write_text("") + monkeypatch.setenv("MCP_EMAIL_SERVER_CONFIG_PATH", str(config_file)) + + for key in list(os.environ.keys()): + if key.startswith("MCP_EMAIL_SERVER_") and "CONFIG_PATH" not in key: + monkeypatch.delenv(key, raising=False) + + monkeypatch.setenv("MCP_EMAIL_SERVER_DEFAULT_CONTENT_FORMAT", "html") + + settings = Settings() + assert settings.default_content_format == "html" diff --git a/tests/test_mcp_tools.py b/tests/test_mcp_tools.py index 6ec1bdc..1e233d7 100644 --- a/tests/test_mcp_tools.py +++ b/tests/test_mcp_tools.py @@ -453,6 +453,95 @@ async def test_get_emails_content_with_content_format_markdown(self): assert result == batch_response mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "markdown") + @pytest.mark.asyncio + async def test_get_emails_content_uses_default_content_format_setting(self): + """Test get_emails_content uses default_content_format from settings when content_format not specified.""" + from mcp_email_server.config import Settings + + now = datetime.now(timezone.utc) + email_body = EmailBodyResponse( + email_id="12345", + subject="Test Email", + sender="sender@example.com", + recipients=["recipient@example.com"], + date=now, + body="Markdown converted content", + attachments=[], + ) + + batch_response = EmailContentBatchResponse( + emails=[email_body], + requested_count=1, + retrieved_count=1, + failed_ids=[], + ) + + mock_handler = AsyncMock() + mock_handler.get_emails_content.return_value = batch_response + + # Create a mock settings object with default_content_format set to "markdown" + mock_settings = MagicMock(spec=Settings) + mock_settings.default_content_format = "markdown" + + with ( + patch("mcp_email_server.app.dispatch_handler", return_value=mock_handler), + patch("mcp_email_server.app.get_settings", return_value=mock_settings), + ): + # Call without specifying content_format + result = await get_emails_content( + account_name="test_account", + email_ids=["12345"], + ) + + assert result == batch_response + # Should use "markdown" from settings, not "raw" + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "markdown") + + @pytest.mark.asyncio + async def test_get_emails_content_explicit_format_overrides_setting(self): + """Test explicit content_format overrides default_content_format setting.""" + from mcp_email_server.config import Settings + + now = datetime.now(timezone.utc) + email_body = EmailBodyResponse( + email_id="12345", + subject="Test Email", + sender="sender@example.com", + recipients=["recipient@example.com"], + date=now, + body="HTML content", + attachments=[], + ) + + batch_response = EmailContentBatchResponse( + emails=[email_body], + requested_count=1, + retrieved_count=1, + failed_ids=[], + ) + + mock_handler = AsyncMock() + mock_handler.get_emails_content.return_value = batch_response + + # Settings has "markdown" as default + mock_settings = MagicMock(spec=Settings) + mock_settings.default_content_format = "markdown" + + with ( + patch("mcp_email_server.app.dispatch_handler", return_value=mock_handler), + patch("mcp_email_server.app.get_settings", return_value=mock_settings), + ): + # Explicitly request "html" format + result = await get_emails_content( + account_name="test_account", + email_ids=["12345"], + content_format="html", + ) + + assert result == batch_response + # Should use explicit "html", not "markdown" from settings + mock_handler.get_emails_content.assert_called_once_with(["12345"], "INBOX", "html") + @pytest.mark.asyncio async def test_send_email(self): """Test send_email MCP tool."""