diff --git a/tests/css-parsing-tests b/tests/css-parsing-tests index c5749e5..43e65b2 160000 --- a/tests/css-parsing-tests +++ b/tests/css-parsing-tests @@ -1 +1 @@ -Subproject commit c5749e51dda3868b3e8062e65a36584c2fec8059 +Subproject commit 43e65b244133f17eb8a4d4404d5774672b94824f diff --git a/tests/test_tinycss2.py b/tests/test_tinycss2.py index d67e8b9..d07aa69 100644 --- a/tests/test_tinycss2.py +++ b/tests/test_tinycss2.py @@ -5,7 +5,7 @@ import pytest from tinycss2 import ( - parse_component_value_list, parse_declaration_list, + parse_blocks_contents, parse_component_value_list, parse_declaration_list, parse_one_component_value, parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet, parse_stylesheet_bytes, serialize) from tinycss2.ast import ( @@ -112,6 +112,11 @@ def test_declaration_list(input): return parse_declaration_list(input, **SKIP) +@json_test() +def test_blocks_contents(input): + return parse_blocks_contents(input, **SKIP) + + @json_test() def test_one_declaration(input): return parse_one_declaration(input, skip_comments=True) diff --git a/tinycss2/__init__.py b/tinycss2/__init__.py index ae50481..ce51131 100644 --- a/tinycss2/__init__.py +++ b/tinycss2/__init__.py @@ -10,8 +10,8 @@ from .bytes import parse_stylesheet_bytes # noqa from .parser import ( # noqa - parse_declaration_list, parse_one_component_value, parse_one_declaration, - parse_one_rule, parse_rule_list, parse_stylesheet) + parse_blocks_contents, parse_declaration_list, parse_one_component_value, + parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet) from .serializer import serialize, serialize_identifier # noqa from .tokenizer import parse_component_value_list # noqa diff --git a/tinycss2/parser.py b/tinycss2/parser.py index 9932027..11dd519 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -1,3 +1,5 @@ +from itertools import chain + from .ast import AtRule, Declaration, ParseError, QualifiedRule from .tokenizer import parse_component_value_list @@ -12,7 +14,6 @@ def _to_token_iterator(input, skip_comments=False): :returns: An iterator yielding :term:`component values`. """ - # Accept ASCII-only byte strings on Python 2, with implicit conversion. if isinstance(input, str): input = parse_component_value_list(input, skip_comments) return iter(input) @@ -83,7 +84,15 @@ def parse_one_declaration(input, skip_comments=False): return _parse_declaration(first_token, tokens) -def _parse_declaration(first_token, tokens): +def _consume_remnants(input, nested): + for token in input: + if token == ';': + return + elif nested and token == '}': + return + + +def _parse_declaration(first_token, tokens, nested=True): """Parse a declaration. Consume :obj:`tokens` until the end of the declaration or the first error. @@ -92,6 +101,8 @@ def _parse_declaration(first_token, tokens): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the declaration is nested or top-level. :returns: A :class:`~tinycss2.ast.Declaration` or :class:`~tinycss2.ast.ParseError`. @@ -99,21 +110,27 @@ def _parse_declaration(first_token, tokens): """ name = first_token if name.type != 'ident': - return ParseError(name.source_line, name.source_column, 'invalid', - 'Expected for declaration name, got %s.' - % name.type) + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + f'Expected for declaration name, got {name.type}.') colon = _next_significant(tokens) if colon is None: - return ParseError(name.source_line, name.source_column, 'invalid', - "Expected ':' after declaration name, got EOF") + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + "Expected ':' after declaration name, got EOF") elif colon != ':': - return ParseError(colon.source_line, colon.source_column, 'invalid', - "Expected ':' after declaration name, got %s." - % colon.type) + _consume_remnants(tokens, nested) + return ParseError( + colon.source_line, colon.source_column, 'invalid', + "Expected ':' after declaration name, got {colon.type}.") value = [] state = 'value' + contains_non_whitespace = False + contains_simple_block = False for i, token in enumerate(tokens): if state == 'value' and token == '!': state = 'bang' @@ -123,17 +140,59 @@ def _parse_declaration(first_token, tokens): state = 'important' elif token.type not in ('whitespace', 'comment'): state = 'value' + if token.type == '{} block': + if contains_non_whitespace: + contains_simple_block = True + else: + contains_non_whitespace = True + else: + contains_non_whitespace = True value.append(token) if state == 'important': del value[bang_position:] + # TODO: Handle custom property names + + if contains_simple_block and contains_non_whitespace: + return ParseError( + colon.source_line, colon.source_column, 'invalid', + 'Declaration contains {} block') + + # TODO: Handle unicode-range + return Declaration(name.source_line, name.source_column, name.value, name.lower_value, value, state == 'important') +def _consume_blocks_content(first_token, tokens): + """Consume declaration or nested rule.""" + declaration_tokens = [] + semicolon_token = [] + if first_token != ';' and first_token.type != '{} block': + for token in tokens: + if token == ';': + semicolon_token.append(token) + break + declaration_tokens.append(token) + if token.type == '{} block': + break + declaration = _parse_declaration( + first_token, iter(declaration_tokens), nested=True) + if declaration.type == 'declaration': + return declaration + else: + tokens = chain(declaration_tokens, semicolon_token, tokens) + return _consume_qualified_rule( + first_token, tokens, stop_token=';', nested=True) + + def _consume_declaration_in_list(first_token, tokens): - """Like :func:`_parse_declaration`, but stop at the first ``;``.""" + """Like :func:`_parse_declaration`, but stop at the first ``;``. + + Deprecated, use :func:`_consume_blocks_content` instead. + + """ other_declaration_tokens = [] for token in tokens: if token == ';': @@ -142,16 +201,70 @@ def _consume_declaration_in_list(first_token, tokens): return _parse_declaration(first_token, iter(other_declaration_tokens)) +def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): + """Parse a block’s contents. + + This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` + of a style rule or ``@page`` rule, or for the ``style`` attribute of an + HTML element. + + In contexts that don’t expect any at-rule and/or qualified rule, + all :class:`~tinycss2.ast.AtRule` and/or + :class:`~tinycss2.ast.QualifiedRule` objects should simply be rejected as + invalid. + + :type input: :obj:`str` or :term:`iterable` + :param input: A string or an iterable of :term:`component values`. + :type skip_comments: :obj:`bool` + :param skip_comments: + Ignore CSS comments at the top-level of the list. + If the input is a string, ignore all comments. + :type skip_whitespace: :obj:`bool` + :param skip_whitespace: + Ignore whitespace at the top-level of the list. + Whitespace is still preserved + in the :attr:`~tinycss2.ast.Declaration.value` of declarations + and the :attr:`~tinycss2.ast.AtRule.prelude` + and :attr:`~tinycss2.ast.AtRule.content` of at-rules. + :returns: + A list of + :class:`~tinycss2.ast.Declaration`, + :class:`~tinycss2.ast.AtRule`, + :class:`~tinycss2.ast.QualifiedRule`, + :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false), + :class:`~tinycss2.ast.WhitespaceToken` + (if ``skip_whitespace`` is false), + and :class:`~tinycss2.ast.ParseError` objects + + """ + tokens = _to_token_iterator(input, skip_comments) + result = [] + for token in tokens: + if token.type == 'whitespace': + if not skip_whitespace: + result.append(token) + elif token.type == 'comment': + if not skip_comments: + result.append(token) + elif token.type == 'at-keyword': + result.append(_consume_at_rule(token, tokens)) + elif token != ';': + result.append(_consume_blocks_content(token, tokens)) + return result + + def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): """Parse a :diagram:`declaration list` (which may also contain at-rules). + Deprecated and removed from CSS Syntax Level 3. Use + :func:`parse_blocks_contents` instead. + This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` - of a style rule or ``@page`` rule, - or for the ``style`` attribute of an HTML element. + of a style rule or ``@page`` rule, or for the ``style`` attribute of an + HTML element. - In contexts that don’t expect any at-rule, - all :class:`~tinycss2.ast.AtRule` objects - should simply be rejected as invalid. + In contexts that don’t expect any at-rule, all + :class:`~tinycss2.ast.AtRule` objects should simply be rejected as invalid. :type input: :obj:`str` or :term:`iterable` :param input: A string or an iterable of :term:`component values`. @@ -229,6 +342,9 @@ def parse_one_rule(input, skip_comments=False): def parse_rule_list(input, skip_comments=False, skip_whitespace=False): """Parse a non-top-level :diagram:`rule list`. + Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_content` + instead. + This is used for parsing the :attr:`~tinycss2.ast.AtRule.content` of nested rules like ``@media``. This differs from :func:`parse_stylesheet` in that @@ -332,22 +448,7 @@ def _consume_rule(first_token, tokens): """ if first_token.type == 'at-keyword': return _consume_at_rule(first_token, tokens) - if first_token.type == '{} block': - prelude = [] - block = first_token - else: - prelude = [first_token] - for token in tokens: - if token.type == '{} block': - block = token - break - prelude.append(token) - else: - return ParseError( - prelude[-1].source_line, prelude[-1].source_column, 'invalid', - 'EOF reached before {} block for a qualified rule.') - return QualifiedRule(first_token.source_line, first_token.source_column, - prelude, block.content) + return _consume_qualified_rule(first_token, tokens) def _consume_at_rule(at_keyword, tokens): @@ -359,6 +460,8 @@ def _consume_at_rule(at_keyword, tokens): :param at_keyword: The at-rule keyword token starting this rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the at-rule is nested or top-level. :returns: A :class:`~tinycss2.ast.QualifiedRule`, or :class:`~tinycss2.ast.ParseError`. @@ -368,6 +471,8 @@ def _consume_at_rule(at_keyword, tokens): content = None for token in tokens: if token.type == '{} block': + # TODO: handle nested at-rules + # https://drafts.csswg.org/css-syntax-3/#consume-at-rule content = token.content break elif token == ';': @@ -375,3 +480,48 @@ def _consume_at_rule(at_keyword, tokens): prelude.append(token) return AtRule(at_keyword.source_line, at_keyword.source_column, at_keyword.value, at_keyword.lower_value, prelude, content) + + +def _rule_error(token, name): + """Create rule parse error raised because of given token.""" + return ParseError( + token.source_line, token.source_column, 'invalid', + f'{name} reached before {{}} block for a qualified rule.') + + +def _consume_qualified_rule(first_token, tokens, nested=False, + stop_token=None): + """Consume a qualified rule. + + Consume just enough of :obj:`tokens` for this rule. + + :type first_token: :term:`component value` + :param first_token: The first component value of the rule. + :type tokens: :term:`iterator` + :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the rule is nested or top-level. + :type stop_token: :class:`~tinycss2.ast.Node` + :param stop_token: A token that ends rule parsing when met. + + """ + if first_token == stop_token: + return _rule_error(first_token, 'Stop token') + if first_token.type == '{} block': + prelude = [] + block = first_token + else: + prelude = [first_token] + for token in tokens: + if token == stop_token: + return _rule_error(token, 'Stop token') + if token.type == '{} block': + block = token + # TODO: handle special case for CSS variables (using "nested") + # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule + break + prelude.append(token) + else: + return _rule_error(prelude[-1], 'EOF') + return QualifiedRule(first_token.source_line, first_token.source_column, + prelude, block.content)