diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 54f624fa..0f6babda 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -593,8 +593,14 @@ def sanitize_css(self, style): # the whole thing. parts = style.split(';') gauntlet = re.compile( - r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""", - flags=re.U + r"""^( # consider a style attribute value as composed of: +[/:,#%!.\s\w] # a non-newline character +|\w-\w # 3 characters in the form \w-\w +|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space +|"[\s\w]+" # a double quoted string of [\s\w]+ +|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'' +)*$""", + flags=re.U | re.VERBOSE ) for part in parts: diff --git a/tests/test_css.py b/tests/test_css.py index 92fe9553..6a3d8eb9 100644 --- a/tests/test_css.py +++ b/tests/test_css.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from functools import partial +from timeit import timeit import pytest @@ -37,10 +38,12 @@ '
bar
' ), # Handle leading - in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( 'bar
', ['cursor'], - 'bar
' + 'bar
', + marks=pytest.mark.xfail, ), # Handle () in attributes ( @@ -54,16 +57,20 @@ 'bar
', ), # Handle ' in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( 'bar
', ['text-overflow'], - 'bar
' + 'bar
', + marks=pytest.mark.xfail, ), # Handle " in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( 'bar
', ['text-overflow'], - 'bar
' + 'bar
', + marks=pytest.mark.xfail, ), ( 'bar
', @@ -223,3 +230,17 @@ def test_style_hang(): def test_css_parsing_with_entities(data, styles, expected): """The sanitizer should be ok with character entities""" assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected + + +@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"]) +def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char): + """The sanitizer gauntlet regex should not catastrophically backtrack""" + # refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633 + + def time_clean(test_char, size): + style_attr_value = (test_char + 'a' + test_char) * size + '^' + stmt = """clean('''''', attributes={'a': ['style']})""" % style_attr_value + return timeit(stmt=stmt, setup='from bleach import clean', number=1) + + # should complete in less than one second + assert time_clean(overlap_test_char, 22) < 1.0