Skip to content

Commit

Permalink
Reintroduce the old sanitizer testsuite from html5lib-tests
Browse files Browse the repository at this point in the history
This is imported into this repo as its expectations are very much
implementation dependent, with expectations amended to match our
actual behaviour.
  • Loading branch information
gsnedders committed May 18, 2016
1 parent a2917e9 commit 75cf697
Show file tree
Hide file tree
Showing 4 changed files with 494 additions and 5 deletions.
11 changes: 6 additions & 5 deletions html5lib/serializer/htmlserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ def serialize(self, treewalker, encoding=None):
if encoding and self.inject_meta_charset:
from ..filters.inject_meta_charset import Filter
treewalker = Filter(treewalker, encoding)
# Alphabetical attributes is here under the assumption that none of
# the later filters add or change order of attributes; it needs to be
# before the sanitizer so escaped elements come out correctly
if self.alphabetical_attributes:
from ..filters.alphabeticalattributes import Filter
treewalker = Filter(treewalker)
# WhitespaceFilter should be used before OptionalTagFilter
# for maximum efficiently of this latter filter
if self.strip_whitespace:
Expand All @@ -195,11 +201,6 @@ def serialize(self, treewalker, encoding=None):
if self.omit_optional_tags:
from ..filters.optionaltags import Filter
treewalker = Filter(treewalker)
# Alphabetical attributes must be last, as other filters
# could add attributes and alter the order
if self.alphabetical_attributes:
from ..filters.alphabeticalattributes import Filter
treewalker = Filter(treewalker)

for token in treewalker:
type = token["type"]
Expand Down
5 changes: 5 additions & 0 deletions html5lib/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from .tree_construction import TreeConstructionFile
from .tokenizer import TokenizerFile
from .sanitizer import SanitizerFile

_dir = os.path.abspath(os.path.dirname(__file__))
_testdata = os.path.join(_dir, "testdata")
_tree_construction = os.path.join(_testdata, "tree-construction")
_tokenizer = os.path.join(_testdata, "tokenizer")
_sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")


def pytest_collectstart():
Expand All @@ -24,3 +26,6 @@ def pytest_collect_file(path, parent):
elif dir == _tokenizer:
if path.ext == ".test":
return TokenizerFile(path, parent)
elif dir == _sanitizer_testdata:
if path.ext == ".dat":
return SanitizerFile(path, parent)
Loading

0 comments on commit 75cf697

Please sign in to comment.