From 88941cec0e20f3c085b05271dcc9b0b9ea58b61f Mon Sep 17 00:00:00 2001 From: Peter Dedene Date: Tue, 28 Nov 2023 09:38:24 +0100 Subject: [PATCH] Add support for boolean and empty attributes --- lib/loofah/html5/safelist.rb | 148 ++++++++++++++++++++++ lib/loofah/html5/scrub.rb | 12 +- test/assets/testdata_sanitizer_tests1.dat | 18 +++ test/html5/test_sanitizer.rb | 8 ++ 4 files changed, 182 insertions(+), 4 deletions(-) diff --git a/lib/loofah/html5/safelist.rb b/lib/loofah/html5/safelist.rb index 18a5450..3eb1623 100644 --- a/lib/loofah/html5/safelist.rb +++ b/lib/loofah/html5/safelist.rb @@ -229,6 +229,154 @@ module SafeList "use", ]) + ACCEPTABLE_EMPTY_ATTRIBUTES = { + "*" => Set.new([ + "title", + ]), + "area" => Set.new([ + "alt", + ]), + "audio" => Set.new([ + "src", + ]), + "base" => Set.new([ + "href", + ]), + "img" => Set.new([ + "alt", + ]), + "input" => Set.new([ + "value", + "placeholder", + ]), + "li" => Set.new([ + "value", + ]), + "link" => Set.new([ + "href", + ]), + "meter" => Set.new([ + "value", + ]), + "option" => Set.new([ + "value", + ]), + "progress" => Set.new([ + "value", + ]), + "source" => Set.new([ + "src", + ]), + "textarea" => Set.new([ + "placeholder", + ]), + "track" => Set.new([ + "default", + ]), + } + + ACCEPTABLE_BOOLEAN_ATTRIBUTES = { + "*" => Set.new([ + "hidden", + "contenteditable", + "draggable", + "spellcheck", + "translate", + ]), + "a" => Set.new([ + "download", + ]), + "area" => Set.new([ + "download", + ]), + "audio" => Set.new([ + "autoplay", + "controls", + "loop", + "muted", + ]), + "button" => Set.new([ + "autofocus", + "disabled", + "formnovalidate", + ]), + "details" => Set.new([ + "open", + ]), + "fieldset" => Set.new([ + "disabled", + ]), + "form" => Set.new([ + "novalidate", + ]), + "iframe" => Set.new([ + "allowfullscreen", + "seamless", + ]), + "img" => Set.new([ + "alt", + "ismap", + ]), + "input" => Set.new([ + "autofocus", + "checked", + "disabled", + "multiple", + "readonly", + "required", + "formnovalidate", + ]), + "ol" => Set.new([ + "reversed", + ]), + "optgroup" => Set.new([ + "disabled", + ]), + "option" => Set.new([ + "disabled", + "selected", + ]), + "select" => Set.new([ + "autofocus", + "disabled", + "multiple", + "required", + ]), + "style" => Set.new([ + "scoped", + ]), + "table" => Set.new([ + "sortable", + ]), + "td" => Set.new([ + "nowrap", + ]), + "th" => Set.new([ + "nowrap", + ]), + "textarea" => Set.new([ + "autofocus", + "disabled", + "readonly", + "required", + ]), + "track" => Set.new([ + "default", + ]), + "video" => Set.new([ + "autoplay", + "controls", + "loop", + "muted", + "playsinline", + ]), + } + + ACCEPTABLE_BOOLEAN_OR_EMPTY_ATTRIBUTES = + ACCEPTABLE_BOOLEAN_ATTRIBUTES.merge(ACCEPTABLE_EMPTY_ATTRIBUTES) do |_, a, b| + a + b + end + ACCEPTABLE_ATTRIBUTES = Set.new([ "abbr", "accept", diff --git a/lib/loofah/html5/scrub.rb b/lib/loofah/html5/scrub.rb index bd69a58..9b1c861 100644 --- a/lib/loofah/html5/scrub.rb +++ b/lib/loofah/html5/scrub.rb @@ -32,7 +32,9 @@ def scrub_attributes(node) next end - unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name) + unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name) || + SafeList::ACCEPTABLE_BOOLEAN_OR_EMPTY_ATTRIBUTES[node.name]&.include?(attr_node.name) || + SafeList::ACCEPTABLE_BOOLEAN_OR_EMPTY_ATTRIBUTES["*"].include?(attr_node.name) attr_node.remove next end @@ -56,9 +58,11 @@ def scrub_attributes(node) scrub_css_attribute(node) node.attribute_nodes.each do |attr_node| - if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME - node.remove_attribute(attr_node.name) - end + next if attr_node.value =~ /[^[:space:]]/ || attr_node.name =~ DATA_ATTRIBUTE_NAME || + SafeList::ACCEPTABLE_BOOLEAN_OR_EMPTY_ATTRIBUTES[node.name]&.include?(attr_node.name) || + SafeList::ACCEPTABLE_BOOLEAN_OR_EMPTY_ATTRIBUTES["*"].include?(attr_node.name) + + node.remove_attribute(attr_node.name) end force_correct_attribute_escaping!(node) diff --git a/test/assets/testdata_sanitizer_tests1.dat b/test/assets/testdata_sanitizer_tests1.dat index a734ab5..c5b1b4f 100644 --- a/test/assets/testdata_sanitizer_tests1.dat +++ b/test/assets/testdata_sanitizer_tests1.dat @@ -526,5 +526,23 @@ "name": "attributes_with_embedded_quotes_II", "input": "", "libxml": "" + }, + + { + "name": "empty_attributes_1", + "input": "", + "libxml": "" + }, + + { + "name": "empty_attributes_2", + "input": "", + "libxml": "" + }, + + { + "name": "empty_attributes_3", + "input": "", + "libxml": "" } ] diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index 8f533f6..a58b08e 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -200,6 +200,14 @@ def test_should_allow_contenteditable check_sanitization(input, output) end + def test_boolean_attributes + input = "" + expected_html5 = "" + output_html5 = sanitize_html5(input).tr('"', "'") + + assert_equal(expected_html5, output_html5) + end + ## ## libxml2 downcases attributes, so this is moot. ##