Skip to content

Commit

Permalink
fixup! squash! Fix #72: rewrite the sanitizer to be a treewalker filt…
Browse files Browse the repository at this point in the history
…er only.
  • Loading branch information
gsnedders committed May 8, 2016
1 parent 42fde37 commit d4abff1
Showing 1 changed file with 22 additions and 12 deletions.
34 changes: 22 additions & 12 deletions html5lib/tests/test_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,28 @@ def test_should_allow_relative_uris():
assert expected == sanitized


def test_invalid_data_uri():
sanitized = sanitize_html('<audio controls="" src="data:foobar"></audio>')
expected = '<audio controls></audio>'
assert expected == sanitized


def test_invalid_ipv6_url():
sanitized = sanitize_html('<a href="h://]">')
expected = "<a></a>"
assert expected == sanitized


def test_data_uri_disallowed_type():
sanitized = sanitize_html('<audio controls="" src="data:text/html,<html>"></audio>')
expected = "<audio controls></audio>"
assert expected == sanitized


def test_sanitizer():
for ns, tag_name in sanitizer.allowed_elements:
if ns != constants.namespaces["html"]:
continue
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
continue # TODO
if tag_name != tag_name.lower():
Expand All @@ -63,6 +83,8 @@ def test_sanitizer():
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))

for ns, attribute_name in sanitizer.allowed_attributes:
if ns != None:
continue
if attribute_name != attribute_name.lower():
continue # TODO
if attribute_name == 'style':
Expand All @@ -82,18 +104,6 @@ def test_sanitizer():
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))

yield (runSanitizerTest, "test_invalid_data_uri",
"<audio controls=\"\"></audio>",
"<audio controls=\"\" src=\"data:foobar\"></audio>")

yield (runSanitizerTest, "test_invalid_ipv6_url",
"<a>",
"<a href=\"h://]\">")

yield (runSanitizerTest, "test_data_uri_disallowed_type",
"<audio controls=\"\"></audio>",
"<audio controls=\"\" src=\"data:text/html,<html>\"></audio>")

for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
Expand Down

0 comments on commit d4abff1

Please sign in to comment.