Kozea · liZe · Feb 27, 2024 · Jan 5, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/tests/css-parsing-tests b/tests/css-parsing-tests
diff --git a/tests/test_tinycss2.py b/tests/test_tinycss2.py
@@ -5,7 +5,7 @@
 
 import pytest
 from tinycss2 import (
-    parse_component_value_list, parse_declaration_list,
+    parse_blocks_contents, parse_component_value_list, parse_declaration_list,
     parse_one_component_value, parse_one_declaration, parse_one_rule,
     parse_rule_list, parse_stylesheet, parse_stylesheet_bytes, serialize)
 from tinycss2.ast import (
@@ -112,6 +112,11 @@ def test_declaration_list(input):
     return parse_declaration_list(input, **SKIP)
 
 
+@json_test()
+def test_blocks_contents(input):
+    return parse_blocks_contents(input, **SKIP)
+
+
 @json_test()
 def test_one_declaration(input):
     return parse_one_declaration(input, skip_comments=True)

diff --git a/tinycss2/__init__.py b/tinycss2/__init__.py
@@ -10,8 +10,8 @@
 
 from .bytes import parse_stylesheet_bytes  # noqa
 from .parser import (  # noqa
-    parse_declaration_list, parse_one_component_value, parse_one_declaration,
-    parse_one_rule, parse_rule_list, parse_stylesheet)
+    parse_blocks_contents, parse_declaration_list, parse_one_component_value,
+    parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet)
 from .serializer import serialize, serialize_identifier  # noqa
 from .tokenizer import parse_component_value_list  # noqa
 

diff --git a/tinycss2/parser.py b/tinycss2/parser.py
@@ -1,3 +1,5 @@
+from itertools import chain
+
 from .ast import AtRule, Declaration, ParseError, QualifiedRule
 from .tokenizer import parse_component_value_list
 
@@ -12,7 +14,6 @@ def _to_token_iterator(input, skip_comments=False):
     :returns: An iterator yielding :term:`component values`.
 
     """
-    # Accept ASCII-only byte strings on Python 2, with implicit conversion.
     if isinstance(input, str):
         input = parse_component_value_list(input, skip_comments)
     return iter(input)
@@ -83,7 +84,15 @@ def parse_one_declaration(input, skip_comments=False):
     return _parse_declaration(first_token, tokens)
 
 
-def _parse_declaration(first_token, tokens):
+def _consume_remnants(input, nested):
+    for token in input:
+        if token == ';':
+            return
+        elif nested and token == '}':
+            return
+
+
+def _parse_declaration(first_token, tokens, nested=True):
     """Parse a declaration.
 
     Consume :obj:`tokens` until the end of the declaration or the first error.
@@ -92,28 +101,36 @@ def _parse_declaration(first_token, tokens):
     :param first_token: The first component value of the rule.
     :type tokens: :term:`iterator`
     :param tokens: An iterator yielding :term:`component values`.
+    :type nested: :obj:`bool`
+    :param nested: Whether the declaration is nested or top-level.
     :returns:
         A :class:`~tinycss2.ast.Declaration`
         or :class:`~tinycss2.ast.ParseError`.
 
     """
     name = first_token
     if name.type != 'ident':
-        return ParseError(name.source_line, name.source_column, 'invalid',
-                          'Expected <ident> for declaration name, got %s.'
-                          % name.type)
+        _consume_remnants(tokens, nested)
+        return ParseError(
+            name.source_line, name.source_column, 'invalid',
+            f'Expected <ident> for declaration name, got {name.type}.')
 
     colon = _next_significant(tokens)
     if colon is None:
-        return ParseError(name.source_line, name.source_column, 'invalid',
-                          "Expected ':' after declaration name, got EOF")
+        _consume_remnants(tokens, nested)
+        return ParseError(
+            name.source_line, name.source_column, 'invalid',
+            "Expected ':' after declaration name, got EOF")
     elif colon != ':':
-        return ParseError(colon.source_line, colon.source_column, 'invalid',
-                          "Expected ':' after declaration name, got %s."
-                          % colon.type)
+        _consume_remnants(tokens, nested)
+        return ParseError(
+            colon.source_line, colon.source_column, 'invalid',
+            "Expected ':' after declaration name, got {colon.type}.")
 
     value = []
     state = 'value'
+    contains_non_whitespace = False
+    contains_simple_block = False
     for i, token in enumerate(tokens):
         if state == 'value' and token == '!':
             state = 'bang'
@@ -123,17 +140,59 @@ def _parse_declaration(first_token, tokens):
             state = 'important'
         elif token.type not in ('whitespace', 'comment'):
             state = 'value'
+            if token.type == '{} block':
+                if contains_non_whitespace:
+                    contains_simple_block = True
+                else:
+                    contains_non_whitespace = True
+            else:
+                contains_non_whitespace = True
         value.append(token)
 
     if state == 'important':
         del value[bang_position:]
 
+    # TODO: Handle custom property names
+
+    if contains_simple_block and contains_non_whitespace:
+        return ParseError(
+            colon.source_line, colon.source_column, 'invalid',
+            'Declaration contains {} block')
+
+    # TODO: Handle unicode-range
+
     return Declaration(name.source_line, name.source_column, name.value,
                        name.lower_value, value, state == 'important')
 
 
+def _consume_blocks_content(first_token, tokens):
+    """Consume declaration or nested rule."""
+    declaration_tokens = []
+    semicolon_token = []
+    if first_token != ';' and first_token.type != '{} block':
+        for token in tokens:
+            if token == ';':
+                semicolon_token.append(token)
+                break
+            declaration_tokens.append(token)
+            if token.type == '{} block':
+                break
+    declaration = _parse_declaration(
+        first_token, iter(declaration_tokens), nested=True)
+    if declaration.type == 'declaration':
+        return declaration
+    else:
+        tokens = chain(declaration_tokens, semicolon_token, tokens)
+        return _consume_qualified_rule(
+            first_token, tokens, stop_token=';', nested=True)
+
+
 def _consume_declaration_in_list(first_token, tokens):
-    """Like :func:`_parse_declaration`, but stop at the first ``;``."""
+    """Like :func:`_parse_declaration`, but stop at the first ``;``.
+
+    Deprecated, use :func:`_consume_blocks_content` instead.
+
+    """
     other_declaration_tokens = []
     for token in tokens:
         if token == ';':
@@ -142,16 +201,70 @@ def _consume_declaration_in_list(first_token, tokens):
     return _parse_declaration(first_token, iter(other_declaration_tokens))
 
 
+def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False):
+    """Parse a block’s contents.
+
+    This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
+    of a style rule or ``@page`` rule, or for the ``style`` attribute of an
+    HTML element.
+
+    In contexts that don’t expect any at-rule and/or qualified rule,
+    all :class:`~tinycss2.ast.AtRule` and/or
+    :class:`~tinycss2.ast.QualifiedRule` objects should simply be rejected as
+    invalid.
+
+    :type input: :obj:`str` or :term:`iterable`
+    :param input: A string or an iterable of :term:`component values`.
+    :type skip_comments: :obj:`bool`
+    :param skip_comments:
+        Ignore CSS comments at the top-level of the list.
+        If the input is a string, ignore all comments.
+    :type skip_whitespace: :obj:`bool`
+    :param skip_whitespace:
+        Ignore whitespace at the top-level of the list.
+        Whitespace is still preserved
+        in the :attr:`~tinycss2.ast.Declaration.value` of declarations
+        and the :attr:`~tinycss2.ast.AtRule.prelude`
+        and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
+    :returns:
+        A list of
+        :class:`~tinycss2.ast.Declaration`,
+        :class:`~tinycss2.ast.AtRule`,
+        :class:`~tinycss2.ast.QualifiedRule`,
+        :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
+        :class:`~tinycss2.ast.WhitespaceToken`
+        (if ``skip_whitespace`` is false),
+        and :class:`~tinycss2.ast.ParseError` objects
+
+    """
+    tokens = _to_token_iterator(input, skip_comments)
+    result = []
+    for token in tokens:
+        if token.type == 'whitespace':
+            if not skip_whitespace:
+                result.append(token)
+        elif token.type == 'comment':
+            if not skip_comments:
+                result.append(token)
+        elif token.type == 'at-keyword':
+            result.append(_consume_at_rule(token, tokens))
+        elif token != ';':
+            result.append(_consume_blocks_content(token, tokens))
+    return result
+
+
 def parse_declaration_list(input, skip_comments=False, skip_whitespace=False):
     """Parse a :diagram:`declaration list` (which may also contain at-rules).
 
+    Deprecated and removed from CSS Syntax Level 3. Use
+    :func:`parse_blocks_contents` instead.
+
     This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
-    of a style rule or ``@page`` rule,
-    or for the ``style`` attribute of an HTML element.
+    of a style rule or ``@page`` rule, or for the ``style`` attribute of an
+    HTML element.
 
-    In contexts that don’t expect any at-rule,
-    all :class:`~tinycss2.ast.AtRule` objects
-    should simply be rejected as invalid.
+    In contexts that don’t expect any at-rule, all
+    :class:`~tinycss2.ast.AtRule` objects should simply be rejected as invalid.
 
     :type input: :obj:`str` or :term:`iterable`
     :param input: A string or an iterable of :term:`component values`.
@@ -229,6 +342,9 @@ def parse_one_rule(input, skip_comments=False):
 def parse_rule_list(input, skip_comments=False, skip_whitespace=False):
     """Parse a non-top-level :diagram:`rule list`.
 
+    Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_content`
+    instead.
+
     This is used for parsing the :attr:`~tinycss2.ast.AtRule.content`
     of nested rules like ``@media``.
     This differs from :func:`parse_stylesheet` in that
@@ -332,22 +448,7 @@ def _consume_rule(first_token, tokens):
     """
     if first_token.type == 'at-keyword':
         return _consume_at_rule(first_token, tokens)
-    if first_token.type == '{} block':
-        prelude = []
-        block = first_token
-    else:
-        prelude = [first_token]
-        for token in tokens:
-            if token.type == '{} block':
-                block = token
-                break
-            prelude.append(token)
-        else:
-            return ParseError(
-                prelude[-1].source_line, prelude[-1].source_column, 'invalid',
-                'EOF reached before {} block for a qualified rule.')
-    return QualifiedRule(first_token.source_line, first_token.source_column,
-                         prelude, block.content)
+    return _consume_qualified_rule(first_token, tokens)
 
 
 def _consume_at_rule(at_keyword, tokens):
@@ -359,6 +460,8 @@ def _consume_at_rule(at_keyword, tokens):
     :param at_keyword: The at-rule keyword token starting this rule.
     :type tokens: :term:`iterator`
     :param tokens: An iterator yielding :term:`component values`.
+    :type nested: :obj:`bool`
+    :param nested: Whether the at-rule is nested or top-level.
     :returns:
         A :class:`~tinycss2.ast.QualifiedRule`,
         or :class:`~tinycss2.ast.ParseError`.
@@ -368,10 +471,57 @@ def _consume_at_rule(at_keyword, tokens):
     content = None
     for token in tokens:
         if token.type == '{} block':
+            # TODO: handle nested at-rules
+            # https://drafts.csswg.org/css-syntax-3/#consume-at-rule
             content = token.content
             break
         elif token == ';':
             break
         prelude.append(token)
     return AtRule(at_keyword.source_line, at_keyword.source_column,
                   at_keyword.value, at_keyword.lower_value, prelude, content)
+
+
+def _rule_error(token, name):
+    """Create rule parse error raised because of given token."""
+    return ParseError(
+        token.source_line, token.source_column, 'invalid',
+        f'{name} reached before {{}} block for a qualified rule.')
+
+
+def _consume_qualified_rule(first_token, tokens, nested=False,
+                            stop_token=None):
+    """Consume a qualified rule.
+
+    Consume just enough of :obj:`tokens` for this rule.
+
+    :type first_token: :term:`component value`
+    :param first_token: The first component value of the rule.
+    :type tokens: :term:`iterator`
+    :param tokens: An iterator yielding :term:`component values`.
+    :type nested: :obj:`bool`
+    :param nested: Whether the rule is nested or top-level.
+    :type stop_token: :class:`~tinycss2.ast.Node`
+    :param stop_token: A token that ends rule parsing when met.
+
+    """
+    if first_token == stop_token:
+        return _rule_error(first_token, 'Stop token')
+    if first_token.type == '{} block':
+        prelude = []
+        block = first_token
+    else:
+        prelude = [first_token]
+        for token in tokens:
+            if token == stop_token:
+                return _rule_error(token, 'Stop token')
+            if token.type == '{} block':
+                block = token
+                # TODO: handle special case for CSS variables (using "nested")
+                # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule
+                break
+            prelude.append(token)
+        else:
+            return _rule_error(prelude[-1], 'EOF')
+    return QualifiedRule(first_token.source_line, first_token.source_column,
+                         prelude, block.content)