diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index afe2e0e2..23f6befe 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -184,6 +184,12 @@ def serialize(self, treewalker, encoding=None):
if encoding and self.inject_meta_charset:
from ..filters.inject_meta_charset import Filter
treewalker = Filter(treewalker, encoding)
+ # Alphabetical attributes is here under the assumption that none of
+ # the later filters add or change order of attributes; it needs to be
+ # before the sanitizer so escaped elements come out correctly
+ if self.alphabetical_attributes:
+ from ..filters.alphabeticalattributes import Filter
+ treewalker = Filter(treewalker)
# WhitespaceFilter should be used before OptionalTagFilter
# for maximum efficiently of this latter filter
if self.strip_whitespace:
@@ -195,11 +201,6 @@ def serialize(self, treewalker, encoding=None):
if self.omit_optional_tags:
from ..filters.optionaltags import Filter
treewalker = Filter(treewalker)
- # Alphabetical attributes must be last, as other filters
- # could add attributes and alter the order
- if self.alphabetical_attributes:
- from ..filters.alphabeticalattributes import Filter
- treewalker = Filter(treewalker)
for token in treewalker:
type = token["type"]
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index 811aebbf..dceb94cc 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -2,11 +2,13 @@
from .tree_construction import TreeConstructionFile
from .tokenizer import TokenizerFile
+from .sanitizer import SanitizerFile
_dir = os.path.abspath(os.path.dirname(__file__))
_testdata = os.path.join(_dir, "testdata")
_tree_construction = os.path.join(_testdata, "tree-construction")
_tokenizer = os.path.join(_testdata, "tokenizer")
+_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
def pytest_collectstart():
@@ -24,3 +26,6 @@ def pytest_collect_file(path, parent):
elif dir == _tokenizer:
if path.ext == ".test":
return TokenizerFile(path, parent)
+ elif dir == _sanitizer_testdata:
+ if path.ext == ".dat":
+ return SanitizerFile(path, parent)
diff --git a/html5lib/tests/sanitizer-testdata/tests1.dat b/html5lib/tests/sanitizer-testdata/tests1.dat
new file mode 100644
index 00000000..74e88336
--- /dev/null
+++ b/html5lib/tests/sanitizer-testdata/tests1.dat
@@ -0,0 +1,433 @@
+[
+ {
+ "name": "IE_Comments",
+ "input": "",
+ "output": ""
+ },
+
+ {
+ "name": "IE_Comments_2",
+ "input": "",
+ "output": "<script>alert('XSS');</script>"
+ },
+
+ {
+ "name": "allow_colons_in_path_component",
+ "input": "foo",
+ "output": "foo"
+ },
+
+ {
+ "name": "background_attribute",
+ "input": "