Skip to content

Commit

Permalink
Fix test data to pass sets instead of lists
Browse files Browse the repository at this point in the history
This fixes the test data to pass sets instead of lists for "tags",
"skip_tags", "recognized_tags", and "protocols".
  • Loading branch information
willkg committed Jan 22, 2023
1 parent 29231a1 commit 3085abc
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 70 deletions.
3 changes: 2 additions & 1 deletion bleach/linkifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def __init__(
:arg set skip_tags: set of tags that you don't want to linkify the
contents of; for example, you could set this to ``{'pre'}`` to skip
linkifying contents of ``pre`` tags
linkifying contents of ``pre`` tags; ``None`` means you don't
want linkify to skip any tags
:arg bool parse_email: whether or not to linkify email addresses
Expand Down
94 changes: 45 additions & 49 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_clean_idempotent(data):


def test_clean_idempotent_img():
tags = ["img"]
tags = {"img"}
dirty = '<imr src="http://example.com?foo=bar&bar=foo&amp;biz=bash">'
assert clean(clean(dirty, tags=tags), tags=tags) == clean(dirty, tags=tags)

Expand Down Expand Up @@ -254,21 +254,21 @@ def test_character_entities_handling(text, expected):
# a tag is disallowed, so it's stripped
(
'<p><a href="http://example.com/">link text</a></p>',
{"tags": ["p"]},
{"tags": {"p"}},
"<p>link text</p>",
),
# Test nested disallowed tag
(
"<p><span>multiply <span>nested <span>text</span></span></span></p>",
{"tags": ["p"]},
{"tags": {"p"}},
"<p>multiply nested text</p>",
),
# (#271)
("<ul><li><script></li></ul>", {"tags": ["ul", "li"]}, "<ul><li></li></ul>"),
("<ul><li><script></li></ul>", {"tags": {"ul", "li"}}, "<ul><li></li></ul>"),
# Test disallowed tag that's deep in the tree
(
'<p><a href="http://example.com/"><img src="http://example.com/"></a></p>',
{"tags": ["a", "p"]},
{"tags": {"a", "p"}},
'<p><a href="http://example.com/"></a></p>',
),
# Test isindex -- the parser expands this to a prompt (#279)
Expand Down Expand Up @@ -342,9 +342,7 @@ def test_href_with_wrong_tag():


def test_disallowed_attr():
IMG = [
"img",
]
IMG = {"img"}
IMG_ATTR = ["src"]

assert clean('<a onclick="evil" href="test">test</a>') == '<a href="test">test</a>'
Expand Down Expand Up @@ -373,9 +371,7 @@ def test_unquoted_event_handler_attr_value():


def test_invalid_filter_attr():
IMG = [
"img",
]
IMG = {"img"}
IMG_ATTR = {
"img": lambda tag, name, val: name == "src" and val == "http://example.com/"
}
Expand All @@ -400,7 +396,7 @@ def test_invalid_filter_attr():

def test_poster_attribute():
"""Poster attributes should not allow javascript."""
tags = ["video"]
tags = {"video"}
attrs = {"video": ["poster"]}

test = '<video poster="javascript:alert(1)"></video>'
Expand All @@ -413,7 +409,7 @@ def test_poster_attribute():
def test_attributes_callable():
"""Verify attributes can take a callable"""
ATTRS = lambda tag, name, val: name == "title"
TAGS = ["a"]
TAGS = {"a"}

text = '<a href="/foo" title="blah">example</a>'
assert clean(text, tags=TAGS, attributes=ATTRS) == '<a title="blah">example</a>'
Expand All @@ -425,7 +421,7 @@ def test_attributes_wildcard():
"*": ["id"],
"img": ["src"],
}
TAGS = ["img", "em"]
TAGS = {"img", "em"}

text = (
'both <em id="foo" style="color: black">can</em> have <img id="bar" src="foo"/>'
Expand All @@ -439,7 +435,7 @@ def test_attributes_wildcard():
def test_attributes_wildcard_callable():
"""Verify attributes[*] callable works"""
ATTRS = {"*": lambda tag, name, val: name == "title"}
TAGS = ["a"]
TAGS = {"a"}

assert (
clean('<a href="/foo" title="blah">example</a>', tags=TAGS, attributes=ATTRS)
Expand All @@ -456,7 +452,7 @@ def img_test(tag, name, val):
ATTRS = {
"img": img_test,
}
TAGS = ["img"]
TAGS = {"img"}

text = 'foo <img src="http://example.com" alt="blah"> baz'
assert clean(text, tags=TAGS, attributes=ATTRS) == "foo <img> baz"
Expand All @@ -470,7 +466,7 @@ def img_test(tag, name, val):
def test_attributes_tag_list():
"""Verify attributes[tag] list works"""
ATTRS = {"a": ["title"]}
TAGS = ["a"]
TAGS = {"a"}

assert (
clean('<a href="/foo" title="blah">example</a>', tags=TAGS, attributes=ATTRS)
Expand All @@ -481,7 +477,7 @@ def test_attributes_tag_list():
def test_attributes_list():
"""Verify attributes list works"""
ATTRS = ["title"]
TAGS = ["a"]
TAGS = {"a"}

text = '<a href="/foo" title="blah">example</a>'
assert clean(text, tags=TAGS, attributes=ATTRS) == '<a title="blah">example</a>'
Expand Down Expand Up @@ -518,83 +514,83 @@ def test_attributes_list():
# Specified protocols are allowed
(
'<a href="myprotocol://more_text">allowed href</a>',
{"protocols": ["myprotocol"]},
{"protocols": {"myprotocol"}},
'<a href="myprotocol://more_text">allowed href</a>',
),
# Unspecified protocols are not allowed
(
'<a href="http://example.com">invalid href</a>',
{"protocols": ["myprotocol"]},
{"protocols": {"myprotocol"}},
"<a>invalid href</a>",
),
# Anchors are ok
(
'<a href="#section-1">foo</a>',
{"protocols": []},
{"protocols": set()},
'<a href="#section-1">foo</a>',
),
# Anchor that looks like a domain is ok
(
'<a href="#example.com">foo</a>',
{"protocols": []},
{"protocols": set()},
'<a href="#example.com">foo</a>',
),
# Allow implicit http/https if allowed
(
'<a href="/path">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="/path">valid</a>',
),
(
'<a href="/path">valid</a>',
{"protocols": ["https"]},
{"protocols": {"https"}},
'<a href="/path">valid</a>',
),
(
'<a href="example.com">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="example.com">valid</a>',
),
(
'<a href="example.com:8000">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="example.com:8000">valid</a>',
),
(
'<a href="localhost">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="localhost">valid</a>',
),
(
'<a href="localhost:8000">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="localhost:8000">valid</a>',
),
(
'<a href="192.168.100.100">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="192.168.100.100">valid</a>',
),
(
'<a href="192.168.100.100:8000">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="192.168.100.100:8000">valid</a>',
),
pytest.param(
*(
'<a href="192.168.100.100:8000/foo#bar">valid</a>',
{"protocols": ["http"]},
{"protocols": {"http"}},
'<a href="192.168.100.100:8000/foo#bar">valid</a>',
),
marks=pytest.mark.xfail,
),
# Disallow implicit http/https if disallowed
('<a href="example.com">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="example.com:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="localhost">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="localhost:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="192.168.100.100">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="192.168.100.100:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="example.com">foo</a>', {"protocols": set()}, "<a>foo</a>"),
('<a href="example.com:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
('<a href="localhost">foo</a>', {"protocols": set()}, "<a>foo</a>"),
('<a href="localhost:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
('<a href="192.168.100.100">foo</a>', {"protocols": set()}, "<a>foo</a>"),
('<a href="192.168.100.100:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
# Disallowed protocols with sneaky character entities
('<a href="javas&#x09;cript:alert(1)">alert</a>', {}, "<a>alert</a>"),
('<a href="&#14;javascript:alert(1)">alert</a>', {}, "<a>alert</a>"),
Expand All @@ -613,7 +609,7 @@ def test_uri_value_allowed_protocols(data, kwargs, expected):
def test_svg_attr_val_allows_ref():
"""Unescape values in svg attrs that allow url references"""
# Local IRI, so keep it
TAGS = ["svg", "rect"]
TAGS = {"svg", "rect"}
ATTRS = {
"rect": ["fill"],
}
Expand All @@ -625,7 +621,7 @@ def test_svg_attr_val_allows_ref():
)

# Non-local IRI, so drop it
TAGS = ["svg", "rect"]
TAGS = {"svg", "rect"}
ATTRS = {
"rect": ["fill"],
}
Expand All @@ -649,7 +645,7 @@ def test_svg_attr_val_allows_ref():
)
def test_svg_allow_local_href(text, expected):
"""Keep local hrefs for svg elements"""
TAGS = ["svg", "pattern"]
TAGS = {"svg", "pattern"}
ATTRS = {
"pattern": ["id", "href"],
}
Expand All @@ -671,7 +667,7 @@ def test_svg_allow_local_href(text, expected):
)
def test_svg_allow_local_href_nonlocal(text, expected):
"""Drop non-local hrefs for svg elements"""
TAGS = ["svg", "pattern"]
TAGS = {"svg", "pattern"}
ATTRS = {
"pattern": ["id", "href"],
}
Expand Down Expand Up @@ -741,7 +737,7 @@ def test_nonexistent_namespace():
],
)
def test_self_closing_tags_self_close(tag):
assert clean(f"<{tag}>", tags=[tag]) == f"<{tag}>"
assert clean(f"<{tag}>", tags={tag}) == f"<{tag}>"


# tags that get content passed through (i.e. parsed with parseRCDataRawtext)
Expand Down Expand Up @@ -770,7 +766,7 @@ def test_self_closing_tags_self_close(tag):
)
def test_noscript_rawtag_(raw_tag, data, expected):
# refs: bug 1615315 / GHSA-q65m-pv3f-wr5r
assert clean(data, tags=["noscript", raw_tag]) == expected
assert clean(data, tags={"noscript", raw_tag}) == expected


@pytest.mark.parametrize(
Expand Down Expand Up @@ -803,7 +799,7 @@ def test_namespace_rc_data_element_strip_false(
#
# browsers will pull the img out of the namespace and rc data tag resulting in XSS
assert (
clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
clean(data, tags={namespace_tag, rc_data_element_tag}, strip=False) == expected
)


Expand Down Expand Up @@ -1087,7 +1083,7 @@ def test_html_comments_escaped(namespace_tag, end_tag, eject_tag, data, expected
#
# the ejected elements can trigger XSS
assert (
clean(data, tags=[namespace_tag, end_tag, eject_tag], strip_comments=False)
clean(data, tags={namespace_tag, end_tag, eject_tag}, strip_comments=False)
== expected
)

Expand Down Expand Up @@ -1125,7 +1121,7 @@ def test_strip_respects_block_level_elements(text, expected):
Insert a newline between block level elements
https://github.com/mozilla/bleach/issues/369
"""
assert clean(text, tags=[], strip=True) == expected
assert clean(text, tags=set(), strip=True) == expected


def get_ids_and_tests():
Expand Down Expand Up @@ -1171,7 +1167,7 @@ def test_regressions(test_case):

def test_preserves_attributes_order():
html = """<a target="_blank" href="https://example.com">Link</a>"""
cleaned_html = clean(html, tags=["a"], attributes={"a": ["href", "target"]})
cleaned_html = clean(html, tags={"a"}, attributes={"a": ["href", "target"]})

assert cleaned_html == html

Expand All @@ -1192,7 +1188,7 @@ def test_css_sanitizer_warning(attr):

class TestCleaner:
def test_basics(self):
TAGS = ["span", "br"]
TAGS = {"span", "br"}
ATTRS = {"span": ["style"]}

cleaner = Cleaner(tags=TAGS, attributes=ATTRS)
Expand All @@ -1214,7 +1210,7 @@ def __iter__(self):
yield token

ATTRS = {"img": ["rel", "src"]}
TAGS = ["img"]
TAGS = {"img"}

cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])

Expand Down
4 changes: 2 additions & 2 deletions tests/test_css.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bleach.css_sanitizer import CSSSanitizer # noqa


clean = partial(clean, tags=["p"], attributes=["style"])
clean = partial(clean, tags={"p"}, attributes=["style"])


@pytest.mark.parametrize(
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_css_parsing_with_entities(data, styles, expected):
css_sanitizer = CSSSanitizer(allowed_css_properties=styles)
assert (
clean(
data, tags=["p"], attributes={"p": ["style"]}, css_sanitizer=css_sanitizer
data, tags={"p"}, attributes={"p": ["style"]}, css_sanitizer=css_sanitizer
)
== expected
)
Loading

0 comments on commit 3085abc

Please sign in to comment.