Skip to content

Commit 5729251

Browse files
authored
fix: Sanitize security advisory HTML at ingestion to prevent stored XSS (#17024)
Advisory HTML from upstream YAML/markdown was stored unsanitized and rendered with |safe. Add sanitize_advisory_html() using the existing justhtml-backed sanitize_html utility with an allowlist of tags and attributes needed by the CVE partial template and markdown output. Called in update_db_from_file() immediately after parsing, before the HTML reaches the database. Jira: WT-646
1 parent b77213f commit 5729251

File tree

2 files changed

+158
-0
lines changed

2 files changed

+158
-0
lines changed

bedrock/security/management/commands/update_security_advisories.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from dateutil.parser import parse as parsedate
1515

16+
from bedrock.base.sanitization import sanitize_html
1617
from bedrock.security.models import HallOfFamer, MitreCVE, Product, SecurityAdvisory
1718
from bedrock.security.utils import (
1819
FILENAME_RE,
@@ -36,6 +37,55 @@
3637
HOF_FILES = ["client.yml", "web.yml"]
3738
HOF_DIRECTORY = "bug-bounty-hof"
3839

40+
# Allowlists for advisory HTML sanitization.
41+
# Tags come from markdown output and the security/partials/cve.html template.
42+
ADVISORY_ALLOWED_TAGS = frozenset(
43+
{
44+
# Markdown output
45+
"a",
46+
"abbr",
47+
"b",
48+
"blockquote",
49+
"br",
50+
"code",
51+
"del",
52+
"em",
53+
"h1",
54+
"h2",
55+
"h3",
56+
"h4",
57+
"h5",
58+
"h6",
59+
"hr",
60+
"i",
61+
"img",
62+
"li",
63+
"ol",
64+
"p",
65+
"pre",
66+
"small",
67+
"strong",
68+
"ul",
69+
# CVE partial template structure
70+
"section",
71+
"dl",
72+
"dt",
73+
"dd",
74+
"span",
75+
}
76+
)
77+
ADVISORY_ALLOWED_ATTRS = {
78+
"*": ["class", "id"],
79+
"a": ["href"],
80+
"img": ["src", "srcset", "alt"],
81+
"span": ["class"],
82+
}
83+
84+
85+
def sanitize_advisory_html(html):
86+
"""Sanitize advisory HTML using an allowlist of tags and attributes."""
87+
return sanitize_html(html, ADVISORY_ALLOWED_TAGS, ADVISORY_ALLOWED_ATTRS)
88+
3989

4090
def fix_product_name(name):
4191
if "seamonkey" in name.lower():
@@ -178,6 +228,7 @@ def update_db_from_file(filename):
178228
raise RuntimeError(f"Unknown file type {filename}")
179229

180230
data, html = parser(filename)
231+
html = sanitize_advisory_html(html)
181232
if "advisories" in data:
182233
add_or_update_cve(data)
183234
return add_or_update_advisory(data, html)

bedrock/security/tests/test_commands.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,113 @@
1212
from bedrock.security.models import Product
1313

1414

15+
class TestSanitizeAdvisoryHtml:
16+
"""Verify advisory HTML sanitization strips dangerous content."""
17+
18+
def test_strips_script_tags(self):
19+
html = '<p>Safe</p><script>alert("xss")</script>'
20+
result = update_security_advisories.sanitize_advisory_html(html)
21+
assert "<script" not in result
22+
assert "Safe" in result
23+
24+
def test_strips_event_handlers(self):
25+
html = '<img src=x onerror=alert("XSS")>'
26+
result = update_security_advisories.sanitize_advisory_html(html)
27+
assert "onerror" not in result
28+
29+
def test_strips_javascript_urls(self):
30+
html = "<a href=\"javascript:alert('xss')\">click</a>"
31+
result = update_security_advisories.sanitize_advisory_html(html)
32+
assert "javascript:" not in result.lower()
33+
34+
def test_preserves_safe_advisory_html(self):
35+
"""Tags used by the CVE partial template and markdown must survive."""
36+
html = (
37+
'<section class="cve">'
38+
'<h4 id="CVE-2024-0001"><a href="#CVE-2024-0001">'
39+
'<span class="anchor">#</span>CVE-2024-0001: Title</a></h4>'
40+
'<dl class="summary"><dt>Reporter</dt><dd>Alice</dd>'
41+
'<dt>Impact</dt><dd><span class="level critical">critical</span></dd></dl>'
42+
"<h5>Description</h5><p>A <strong>bad</strong> thing.</p>"
43+
"<h5>References</h5><ul><li>"
44+
'<a href="https://bugzilla.mozilla.org/show_bug.cgi?id=12345">Bug 12345</a>'
45+
"</li></ul></section>"
46+
)
47+
result = update_security_advisories.sanitize_advisory_html(html)
48+
# All structural tags preserved
49+
for tag in ["<section", "<h4", "<h5", "<dl", "<dt", "<dd", "<span", "<a ", "<ul", "<li", "<p>", "<strong>"]:
50+
assert tag in result
51+
assert 'href="https://bugzilla.mozilla.org' in result
52+
assert 'id="CVE-2024-0001"' in result
53+
assert 'class="cve"' in result
54+
55+
def test_preserves_markdown_output_tags(self):
56+
"""Standard markdown output tags must survive."""
57+
html = (
58+
"<h1>Title</h1><h2>Sub</h2><h3>Sub2</h3>"
59+
"<p><em>emphasis</em> and <code>code</code></p>"
60+
"<pre><code>block</code></pre>"
61+
"<blockquote><p>quote</p></blockquote>"
62+
"<ol><li>one</li></ol><ul><li>two</li></ul>"
63+
"<hr><br>"
64+
)
65+
result = update_security_advisories.sanitize_advisory_html(html)
66+
for tag in ["<h1>", "<h2>", "<h3>", "<em>", "<code>", "<pre>", "<blockquote>", "<ol>", "<hr", "<br"]:
67+
assert tag in result
68+
69+
def test_strips_svg_tags(self):
70+
html = '<p>ok</p><svg onload="alert(1)"></svg>'
71+
result = update_security_advisories.sanitize_advisory_html(html)
72+
assert "<svg" not in result
73+
74+
def test_strips_iframe_tags(self):
75+
html = '<p>ok</p><iframe src="https://evil.com"></iframe>'
76+
result = update_security_advisories.sanitize_advisory_html(html)
77+
assert "<iframe" not in result
78+
79+
def test_strips_disallowed_attrs_on_allowed_tags(self):
80+
html = '<span class="ok" onclick="bad()">text</span>'
81+
result = update_security_advisories.sanitize_advisory_html(html)
82+
assert 'class="ok"' in result
83+
assert "onclick" not in result
84+
85+
def test_preserves_additional_allowlisted_tags(self):
86+
html = (
87+
"<p><del>removed</del> <b>bold</b> <i>italic</i> "
88+
"<small>fine print</small> <abbr>abbr</abbr></p>"
89+
'<img src="https://example.com/pic.png" alt="pic">'
90+
)
91+
result = update_security_advisories.sanitize_advisory_html(html)
92+
for tag in ["<del>", "<b>", "<i>", "<small>", "<abbr>", "<img "]:
93+
assert tag in result
94+
assert 'src="https://example.com/pic.png"' in result
95+
assert 'alt="pic"' in result
96+
97+
def test_strips_javascript_img_src(self):
98+
html = '<img src="javascript:alert(1)" alt="x">'
99+
result = update_security_advisories.sanitize_advisory_html(html)
100+
assert "javascript:" not in result.lower()
101+
102+
@patch.object(update_security_advisories, "add_or_update_advisory")
103+
@patch.object(update_security_advisories, "parse_md_file")
104+
def test_update_db_from_file_sanitizes_html(self, mock_parser, mock_add):
105+
"""Integration: update_db_from_file must sanitize before storing."""
106+
mock_parser.return_value = (
107+
{
108+
"mfsa_id": "2024-01",
109+
"title": "T",
110+
"impact": "high",
111+
"fixed_in": ["Firefox 99"],
112+
"announced": "January 1, 2024",
113+
},
114+
'<p>ok</p><script>alert("xss")</script>',
115+
)
116+
update_security_advisories.update_db_from_file("/fake/path/mfsa2024-01.md")
117+
_data, html = mock_add.call_args[0]
118+
assert "<script" not in html
119+
assert "<p>ok</p>" in html
120+
121+
15122
def test_fix_product_name():
16123
"""Should fix SeaMonkey and strip '.0' from names."""
17124
assert update_security_advisories.fix_product_name("Seamonkey 2.2") == "SeaMonkey 2.2"

0 commit comments

Comments
 (0)