From 4beb0b051e28942988a57db0583319779c3c8d0c Mon Sep 17 00:00:00 2001 From: Lucie Anglade Date: Fri, 5 Jan 2024 19:41:27 +0100 Subject: [PATCH 01/13] Handle nested qualified rules --- tinycss2/parser.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index 9932027..e1ebdb1 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -1,6 +1,8 @@ from .ast import AtRule, Declaration, ParseError, QualifiedRule from .tokenizer import parse_component_value_list +from itertools import chain + def _to_token_iterator(input, skip_comments=False): """Iterate component values out of string or component values iterable. @@ -139,7 +141,11 @@ def _consume_declaration_in_list(first_token, tokens): if token == ';': break other_declaration_tokens.append(token) - return _parse_declaration(first_token, iter(other_declaration_tokens)) + declaration = _parse_declaration(first_token, iter(other_declaration_tokens)) + if isinstance(declaration, Declaration): + return declaration + else: + return _consume_rule(first_token, chain(other_declaration_tokens, tokens)) def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): @@ -302,6 +308,10 @@ def parse_stylesheet(input, skip_comments=False, skip_whitespace=False): """ tokens = _to_token_iterator(input, skip_comments) + return _consume_stylesheet_content(tokens, skip_comments, skip_whitespace) + + +def _consume_stylesheet_content(tokens, skip_comments, skip_whitespace): result = [] for token in tokens: if token.type == 'whitespace': From fbda0a17557030889f6ae31665b8cb456a1d9f4b Mon Sep 17 00:00:00 2001 From: Lucie Anglade Date: Fri, 19 Jan 2024 18:02:55 +0100 Subject: [PATCH 02/13] =?UTF-8?q?Add=20documentation=20and=20don=E2=80=99t?= =?UTF-8?q?=20break=20current=20behaviour?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tinycss2/parser.py | 53 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index e1ebdb1..60ef765 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -134,7 +134,7 @@ def _parse_declaration(first_token, tokens): name.lower_value, value, state == 'important') -def _consume_declaration_in_list(first_token, tokens): +def _consume_declaration_in_list(first_token, tokens, allow_nested): """Like :func:`_parse_declaration`, but stop at the first ``;``.""" other_declaration_tokens = [] for token in tokens: @@ -142,13 +142,53 @@ def _consume_declaration_in_list(first_token, tokens): break other_declaration_tokens.append(token) declaration = _parse_declaration(first_token, iter(other_declaration_tokens)) - if isinstance(declaration, Declaration): + if not allow_nested or isinstance(declaration, Declaration): return declaration else: return _consume_rule(first_token, chain(other_declaration_tokens, tokens)) -def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): +def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): + """Parse a block’s contents. + + This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` + of a style rule or ``@page`` rule, + or for the ``style`` attribute of an HTML element. + + In contexts that don’t expect any at-rule or nested style rule, + all :class:`~tinycss2.ast.AtRule` and + :class:`~tinycss2.ast.QualifiedRule` objects + should simply be rejected as invalid. + + :type input: :obj:`str` or :term:`iterable` + :param input: A string or an iterable of :term:`component values`. + :type skip_comments: :obj:`bool` + :param skip_comments: + Ignore CSS comments at the top-level of the list. + If the input is a string, ignore all comments. + :type skip_whitespace: :obj:`bool` + :param skip_whitespace: + Ignore whitespace at the top-level of the list. + Whitespace is still preserved + in the :attr:`~tinycss2.ast.Declaration.value` of declarations + and the :attr:`~tinycss2.ast.AtRule.prelude` + and :attr:`~tinycss2.ast.AtRule.content` of at-rules. + :returns: + A list of + :class:`~tinycss2.ast.Declaration`, + :class:`~tinycss2.ast.AtRule`, + :class:`~tinycss2.ast.QualifiedRule`, + :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false), + :class:`~tinycss2.ast.WhitespaceToken` + (if ``skip_whitespace`` is false), + and :class:`~tinycss2.ast.ParseError` objects + + """ + return parse_declaration_list(input, skip_comments, skip_whitespace, True) + + +def parse_declaration_list(input, skip_comments=False, skip_whitespace=False, + _allow_nested=False): """Parse a :diagram:`declaration list` (which may also contain at-rules). This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` @@ -194,7 +234,8 @@ def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): elif token.type == 'at-keyword': result.append(_consume_at_rule(token, tokens)) elif token != ';': - result.append(_consume_declaration_in_list(token, tokens)) + result.append( + _consume_declaration_in_list(token, tokens, _allow_nested)) return result @@ -308,10 +349,6 @@ def parse_stylesheet(input, skip_comments=False, skip_whitespace=False): """ tokens = _to_token_iterator(input, skip_comments) - return _consume_stylesheet_content(tokens, skip_comments, skip_whitespace) - - -def _consume_stylesheet_content(tokens, skip_comments, skip_whitespace): result = [] for token in tokens: if token.type == 'whitespace': From d1025b83ec5c2da1b98645946f0041310d519bb4 Mon Sep 17 00:00:00 2001 From: Lucie Anglade Date: Fri, 19 Jan 2024 19:03:11 +0100 Subject: [PATCH 03/13] Import parse_blocks_contents() in top level module --- tinycss2/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tinycss2/__init__.py b/tinycss2/__init__.py index ae50481..ce51131 100644 --- a/tinycss2/__init__.py +++ b/tinycss2/__init__.py @@ -10,8 +10,8 @@ from .bytes import parse_stylesheet_bytes # noqa from .parser import ( # noqa - parse_declaration_list, parse_one_component_value, parse_one_declaration, - parse_one_rule, parse_rule_list, parse_stylesheet) + parse_blocks_contents, parse_declaration_list, parse_one_component_value, + parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet) from .serializer import serialize, serialize_identifier # noqa from .tokenizer import parse_component_value_list # noqa From b8be8d1808243736ea383f1764dd1eed10489c8c Mon Sep 17 00:00:00 2001 From: Lucie Anglade Date: Fri, 19 Jan 2024 19:05:52 +0100 Subject: [PATCH 04/13] Fix long lines --- tinycss2/parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index 60ef765..f371a9a 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -141,11 +141,13 @@ def _consume_declaration_in_list(first_token, tokens, allow_nested): if token == ';': break other_declaration_tokens.append(token) - declaration = _parse_declaration(first_token, iter(other_declaration_tokens)) + declaration = _parse_declaration( + first_token, iter(other_declaration_tokens)) if not allow_nested or isinstance(declaration, Declaration): return declaration else: - return _consume_rule(first_token, chain(other_declaration_tokens, tokens)) + return _consume_rule( + first_token, chain(other_declaration_tokens, tokens)) def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): From 58bb63e05eac9fcd0447ff843d2fb535712235ed Mon Sep 17 00:00:00 2001 From: Lucie Anglade Date: Fri, 19 Jan 2024 19:06:59 +0100 Subject: [PATCH 05/13] Fix import order --- tinycss2/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index f371a9a..25355c9 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -1,8 +1,8 @@ +from itertools import chain + from .ast import AtRule, Declaration, ParseError, QualifiedRule from .tokenizer import parse_component_value_list -from itertools import chain - def _to_token_iterator(input, skip_comments=False): """Iterate component values out of string or component values iterable. From 0fa74c91101a439bdf51d28d1e10c18f8b36fad6 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 20 Jan 2024 19:22:54 +0100 Subject: [PATCH 06/13] Add nested and stop_token parameters for related functions --- tinycss2/parser.py | 49 +++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index 25355c9..ccbbc63 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -136,18 +136,19 @@ def _parse_declaration(first_token, tokens): def _consume_declaration_in_list(first_token, tokens, allow_nested): """Like :func:`_parse_declaration`, but stop at the first ``;``.""" - other_declaration_tokens = [] + declaration_tokens = [] + semicolon_token = [] for token in tokens: if token == ';': + semicolon_token.append(token) break - other_declaration_tokens.append(token) - declaration = _parse_declaration( - first_token, iter(other_declaration_tokens)) - if not allow_nested or isinstance(declaration, Declaration): + declaration_tokens.append(token) + declaration = _parse_declaration(first_token, iter(declaration_tokens)) + if not allow_nested or declaration.type == Declaration: return declaration else: - return _consume_rule( - first_token, chain(other_declaration_tokens, tokens)) + tokens = chain(declaration_tokens, semicolon_token, tokens) + return _consume_rule(first_token, tokens, stop_token=';', nested=True) def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): @@ -234,7 +235,7 @@ def parse_declaration_list(input, skip_comments=False, skip_whitespace=False, if not skip_comments: result.append(token) elif token.type == 'at-keyword': - result.append(_consume_at_rule(token, tokens)) + result.append(_consume_at_rule(token, tokens, nested=True)) elif token != ';': result.append( _consume_declaration_in_list(token, tokens, _allow_nested)) @@ -315,7 +316,8 @@ def parse_rule_list(input, skip_comments=False, skip_whitespace=False): if not skip_comments: result.append(token) else: - result.append(_consume_rule(token, tokens)) + result.append(_consume_rule( + token, tokens, stop_token=';', nested=True)) return result @@ -364,7 +366,14 @@ def parse_stylesheet(input, skip_comments=False, skip_whitespace=False): return result -def _consume_rule(first_token, tokens): +def _rule_error(token, name): + """Create rule parse error raised because of given token.""" + return ParseError( + token.source_line, token.source_column, 'invalid', + f'{name} reached before {{}} block for a qualified rule.') + + +def _consume_rule(first_token, tokens, nested=False, stop_token=None): """Parse a qualified rule or at-rule. Consume just enough of :obj:`tokens` for this rule. @@ -373,12 +382,18 @@ def _consume_rule(first_token, tokens): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the rule is nested or top-level. + :type stop_token: :class:`~tinycss2.ast.Node` + :param stop_token: A token that ends rule parsing when met. :returns: A :class:`~tinycss2.ast.QualifiedRule`, :class:`~tinycss2.ast.AtRule`, or :class:`~tinycss2.ast.ParseError`. """ + if first_token == stop_token: + return _rule_error(first_token, 'Stop token') if first_token.type == 'at-keyword': return _consume_at_rule(first_token, tokens) if first_token.type == '{} block': @@ -387,19 +402,21 @@ def _consume_rule(first_token, tokens): else: prelude = [first_token] for token in tokens: + if token == stop_token: + return _rule_error(token, 'Stop token') if token.type == '{} block': block = token + # TODO: handle special case for CSS variables (using "nested") + # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule break prelude.append(token) else: - return ParseError( - prelude[-1].source_line, prelude[-1].source_column, 'invalid', - 'EOF reached before {} block for a qualified rule.') + return _rule_error(prelude[-1], 'EOF') return QualifiedRule(first_token.source_line, first_token.source_column, prelude, block.content) -def _consume_at_rule(at_keyword, tokens): +def _consume_at_rule(at_keyword, tokens, nested=False): """Parse an at-rule. Consume just enough of :obj:`tokens` for this rule. @@ -408,6 +425,8 @@ def _consume_at_rule(at_keyword, tokens): :param at_keyword: The at-rule keyword token starting this rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the at-rule is nested or top-level. :returns: A :class:`~tinycss2.ast.QualifiedRule`, or :class:`~tinycss2.ast.ParseError`. @@ -417,6 +436,8 @@ def _consume_at_rule(at_keyword, tokens): content = None for token in tokens: if token.type == '{} block': + # TODO: handle nested at-rules + # https://drafts.csswg.org/css-syntax-3/#consume-at-rule content = token.content break elif token == ';': From 8b296a80eec028aff32fb485cc9c63e70f8f6102 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 20 Jan 2024 19:26:16 +0100 Subject: [PATCH 07/13] Fix stupid equality test --- tinycss2/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index ccbbc63..78f4c27 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -144,7 +144,7 @@ def _consume_declaration_in_list(first_token, tokens, allow_nested): break declaration_tokens.append(token) declaration = _parse_declaration(first_token, iter(declaration_tokens)) - if not allow_nested or declaration.type == Declaration: + if not allow_nested or declaration.type == 'declaration': return declaration else: tokens = chain(declaration_tokens, semicolon_token, tokens) From b2e9861ce4cde3d6b84c7c9866ea5881f66d2661 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sun, 21 Jan 2024 10:31:17 +0100 Subject: [PATCH 08/13] Stop pre-parsing declarations after {} blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don’t want to consume tokens after curly brackets as they may be a declaration we want to keep in the tokens iterator. --- tinycss2/parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index 78f4c27..adfa7d0 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -143,6 +143,8 @@ def _consume_declaration_in_list(first_token, tokens, allow_nested): semicolon_token.append(token) break declaration_tokens.append(token) + if token.type == '{} block': + break declaration = _parse_declaration(first_token, iter(declaration_tokens)) if not allow_nested or declaration.type == 'declaration': return declaration From 305cac756ae2d30da8ecd090fa30273901059ca4 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sun, 21 Jan 2024 10:43:08 +0100 Subject: [PATCH 09/13] Stop pre-parsing declarations on {} blocks only when nested allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When nesting is not allowed, we should consume tokens until we meet a semicolon. When it’s allowed, we have to stop at {} blocks to keep possibly valid declarations after them. --- tinycss2/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index adfa7d0..ab0a9d4 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -143,7 +143,7 @@ def _consume_declaration_in_list(first_token, tokens, allow_nested): semicolon_token.append(token) break declaration_tokens.append(token) - if token.type == '{} block': + if allow_nested and token.type == '{} block': break declaration = _parse_declaration(first_token, iter(declaration_tokens)) if not allow_nested or declaration.type == 'declaration': From 21dd31a97c240ec2898635b83263014d33a545e1 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 10 Feb 2024 11:48:33 +0100 Subject: [PATCH 10/13] Split deprecated and new functions --- tinycss2/parser.py | 97 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 20 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index ab0a9d4..b56e3ca 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -134,7 +134,7 @@ def _parse_declaration(first_token, tokens): name.lower_value, value, state == 'important') -def _consume_declaration_in_list(first_token, tokens, allow_nested): +def _consume_declaration_in_list(first_token, tokens): """Like :func:`_parse_declaration`, but stop at the first ``;``.""" declaration_tokens = [] semicolon_token = [] @@ -143,27 +143,41 @@ def _consume_declaration_in_list(first_token, tokens, allow_nested): semicolon_token.append(token) break declaration_tokens.append(token) - if allow_nested and token.type == '{} block': + if token.type == '{} block': break declaration = _parse_declaration(first_token, iter(declaration_tokens)) - if not allow_nested or declaration.type == 'declaration': + if declaration.type == 'declaration': return declaration else: tokens = chain(declaration_tokens, semicolon_token, tokens) return _consume_rule(first_token, tokens, stop_token=';', nested=True) +def _consume_declaration_in_list_deprecated(first_token, tokens): + """Like :func:`_parse_declaration`, but stop at the first ``;``. + + Deprecated, use :func:`_consume_declaration_in_list` instead. + + """ + other_declaration_tokens = [] + for token in tokens: + if token == ';': + break + other_declaration_tokens.append(token) + return _parse_declaration(first_token, iter(other_declaration_tokens)) + + def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): """Parse a block’s contents. This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` - of a style rule or ``@page`` rule, - or for the ``style`` attribute of an HTML element. + of a style rule or ``@page`` rule, or for the ``style`` attribute of an + HTML element. - In contexts that don’t expect any at-rule or nested style rule, - all :class:`~tinycss2.ast.AtRule` and - :class:`~tinycss2.ast.QualifiedRule` objects - should simply be rejected as invalid. + In contexts that don’t expect any at-rule and/or qualified rule, + all :class:`~tinycss2.ast.AtRule` and/or + :class:`~tinycss2.ast.QualifiedRule` objects should simply be rejected as + invalid. :type input: :obj:`str` or :term:`iterable` :param input: A string or an iterable of :term:`component values`. @@ -189,20 +203,34 @@ def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): and :class:`~tinycss2.ast.ParseError` objects """ - return parse_declaration_list(input, skip_comments, skip_whitespace, True) + tokens = _to_token_iterator(input, skip_comments) + result = [] + for token in tokens: + if token.type == 'whitespace': + if not skip_whitespace: + result.append(token) + elif token.type == 'comment': + if not skip_comments: + result.append(token) + elif token.type == 'at-keyword': + result.append(_consume_at_rule(token, tokens)) + elif token != ';': + result.append(_consume_declaration_in_list(token, tokens)) + return result -def parse_declaration_list(input, skip_comments=False, skip_whitespace=False, - _allow_nested=False): +def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): """Parse a :diagram:`declaration list` (which may also contain at-rules). + Deprecated and removed from CSS Syntax Level 3. Use + :func:`parse_blocks_contents` instead. + This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content` - of a style rule or ``@page`` rule, - or for the ``style`` attribute of an HTML element. + of a style rule or ``@page`` rule, or for the ``style`` attribute of an + HTML element. - In contexts that don’t expect any at-rule, - all :class:`~tinycss2.ast.AtRule` objects - should simply be rejected as invalid. + In contexts that don’t expect any at-rule, all + :class:`~tinycss2.ast.AtRule` objects should simply be rejected as invalid. :type input: :obj:`str` or :term:`iterable` :param input: A string or an iterable of :term:`component values`. @@ -237,10 +265,10 @@ def parse_declaration_list(input, skip_comments=False, skip_whitespace=False, if not skip_comments: result.append(token) elif token.type == 'at-keyword': - result.append(_consume_at_rule(token, tokens, nested=True)) + result.append(_consume_at_rule_deprecated(token, tokens)) elif token != ';': result.append( - _consume_declaration_in_list(token, tokens, _allow_nested)) + _consume_declaration_in_list_deprecated(token, tokens)) return result @@ -418,7 +446,7 @@ def _consume_rule(first_token, tokens, nested=False, stop_token=None): prelude, block.content) -def _consume_at_rule(at_keyword, tokens, nested=False): +def _consume_at_rule(at_keyword, tokens): """Parse an at-rule. Consume just enough of :obj:`tokens` for this rule. @@ -447,3 +475,32 @@ def _consume_at_rule(at_keyword, tokens, nested=False): prelude.append(token) return AtRule(at_keyword.source_line, at_keyword.source_column, at_keyword.value, at_keyword.lower_value, prelude, content) + + +def _consume_at_rule_deprecated(at_keyword, tokens): + """Parse an at-rule. + + Deprecated, use :func:`_consume_at_rule` instead. + + Consume just enough of :obj:`tokens` for this rule. + + :type at_keyword: :class:`AtKeywordToken` + :param at_keyword: The at-rule keyword token starting this rule. + :type tokens: :term:`iterator` + :param tokens: An iterator yielding :term:`component values`. + :returns: + A :class:`~tinycss2.ast.QualifiedRule`, + or :class:`~tinycss2.ast.ParseError`. + + """ + prelude = [] + content = None + for token in tokens: + if token.type == '{} block': + content = token.content + break + elif token == ';': + break + prelude.append(token) + return AtRule(at_keyword.source_line, at_keyword.source_column, + at_keyword.value, at_keyword.lower_value, prelude, content) From f6958702692ee98d8acc1146747bbc292b5f909e Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 10 Feb 2024 13:12:31 +0100 Subject: [PATCH 11/13] Take care of declaration first token --- tinycss2/parser.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index b56e3ca..a97aa55 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -138,13 +138,14 @@ def _consume_declaration_in_list(first_token, tokens): """Like :func:`_parse_declaration`, but stop at the first ``;``.""" declaration_tokens = [] semicolon_token = [] - for token in tokens: - if token == ';': - semicolon_token.append(token) - break - declaration_tokens.append(token) - if token.type == '{} block': - break + if first_token != ';' and first_token.type != '{} block': + for token in tokens: + if token == ';': + semicolon_token.append(token) + break + declaration_tokens.append(token) + if token.type == '{} block': + break declaration = _parse_declaration(first_token, iter(declaration_tokens)) if declaration.type == 'declaration': return declaration From 3ae55131230aa059374603b6fee0d9513a973961 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sun, 11 Feb 2024 00:44:39 +0100 Subject: [PATCH 12/13] Implement new CSS syntax draft --- tests/test_tinycss2.py | 7 +- tinycss2/parser.py | 166 +++++++++++++++++++++++------------------ 2 files changed, 99 insertions(+), 74 deletions(-) diff --git a/tests/test_tinycss2.py b/tests/test_tinycss2.py index d67e8b9..d07aa69 100644 --- a/tests/test_tinycss2.py +++ b/tests/test_tinycss2.py @@ -5,7 +5,7 @@ import pytest from tinycss2 import ( - parse_component_value_list, parse_declaration_list, + parse_blocks_contents, parse_component_value_list, parse_declaration_list, parse_one_component_value, parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet, parse_stylesheet_bytes, serialize) from tinycss2.ast import ( @@ -112,6 +112,11 @@ def test_declaration_list(input): return parse_declaration_list(input, **SKIP) +@json_test() +def test_blocks_contents(input): + return parse_blocks_contents(input, **SKIP) + + @json_test() def test_one_declaration(input): return parse_one_declaration(input, skip_comments=True) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index a97aa55..11dd519 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -14,7 +14,6 @@ def _to_token_iterator(input, skip_comments=False): :returns: An iterator yielding :term:`component values`. """ - # Accept ASCII-only byte strings on Python 2, with implicit conversion. if isinstance(input, str): input = parse_component_value_list(input, skip_comments) return iter(input) @@ -85,7 +84,15 @@ def parse_one_declaration(input, skip_comments=False): return _parse_declaration(first_token, tokens) -def _parse_declaration(first_token, tokens): +def _consume_remnants(input, nested): + for token in input: + if token == ';': + return + elif nested and token == '}': + return + + +def _parse_declaration(first_token, tokens, nested=True): """Parse a declaration. Consume :obj:`tokens` until the end of the declaration or the first error. @@ -94,6 +101,8 @@ def _parse_declaration(first_token, tokens): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the declaration is nested or top-level. :returns: A :class:`~tinycss2.ast.Declaration` or :class:`~tinycss2.ast.ParseError`. @@ -101,21 +110,27 @@ def _parse_declaration(first_token, tokens): """ name = first_token if name.type != 'ident': - return ParseError(name.source_line, name.source_column, 'invalid', - 'Expected for declaration name, got %s.' - % name.type) + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + f'Expected for declaration name, got {name.type}.') colon = _next_significant(tokens) if colon is None: - return ParseError(name.source_line, name.source_column, 'invalid', - "Expected ':' after declaration name, got EOF") + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + "Expected ':' after declaration name, got EOF") elif colon != ':': - return ParseError(colon.source_line, colon.source_column, 'invalid', - "Expected ':' after declaration name, got %s." - % colon.type) + _consume_remnants(tokens, nested) + return ParseError( + colon.source_line, colon.source_column, 'invalid', + "Expected ':' after declaration name, got {colon.type}.") value = [] state = 'value' + contains_non_whitespace = False + contains_simple_block = False for i, token in enumerate(tokens): if state == 'value' and token == '!': state = 'bang' @@ -125,17 +140,33 @@ def _parse_declaration(first_token, tokens): state = 'important' elif token.type not in ('whitespace', 'comment'): state = 'value' + if token.type == '{} block': + if contains_non_whitespace: + contains_simple_block = True + else: + contains_non_whitespace = True + else: + contains_non_whitespace = True value.append(token) if state == 'important': del value[bang_position:] + # TODO: Handle custom property names + + if contains_simple_block and contains_non_whitespace: + return ParseError( + colon.source_line, colon.source_column, 'invalid', + 'Declaration contains {} block') + + # TODO: Handle unicode-range + return Declaration(name.source_line, name.source_column, name.value, name.lower_value, value, state == 'important') -def _consume_declaration_in_list(first_token, tokens): - """Like :func:`_parse_declaration`, but stop at the first ``;``.""" +def _consume_blocks_content(first_token, tokens): + """Consume declaration or nested rule.""" declaration_tokens = [] semicolon_token = [] if first_token != ';' and first_token.type != '{} block': @@ -146,18 +177,20 @@ def _consume_declaration_in_list(first_token, tokens): declaration_tokens.append(token) if token.type == '{} block': break - declaration = _parse_declaration(first_token, iter(declaration_tokens)) + declaration = _parse_declaration( + first_token, iter(declaration_tokens), nested=True) if declaration.type == 'declaration': return declaration else: tokens = chain(declaration_tokens, semicolon_token, tokens) - return _consume_rule(first_token, tokens, stop_token=';', nested=True) + return _consume_qualified_rule( + first_token, tokens, stop_token=';', nested=True) -def _consume_declaration_in_list_deprecated(first_token, tokens): +def _consume_declaration_in_list(first_token, tokens): """Like :func:`_parse_declaration`, but stop at the first ``;``. - Deprecated, use :func:`_consume_declaration_in_list` instead. + Deprecated, use :func:`_consume_blocks_content` instead. """ other_declaration_tokens = [] @@ -216,7 +249,7 @@ def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): elif token.type == 'at-keyword': result.append(_consume_at_rule(token, tokens)) elif token != ';': - result.append(_consume_declaration_in_list(token, tokens)) + result.append(_consume_blocks_content(token, tokens)) return result @@ -266,10 +299,9 @@ def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): if not skip_comments: result.append(token) elif token.type == 'at-keyword': - result.append(_consume_at_rule_deprecated(token, tokens)) + result.append(_consume_at_rule(token, tokens)) elif token != ';': - result.append( - _consume_declaration_in_list_deprecated(token, tokens)) + result.append(_consume_declaration_in_list(token, tokens)) return result @@ -310,6 +342,9 @@ def parse_one_rule(input, skip_comments=False): def parse_rule_list(input, skip_comments=False, skip_whitespace=False): """Parse a non-top-level :diagram:`rule list`. + Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_content` + instead. + This is used for parsing the :attr:`~tinycss2.ast.AtRule.content` of nested rules like ``@media``. This differs from :func:`parse_stylesheet` in that @@ -347,8 +382,7 @@ def parse_rule_list(input, skip_comments=False, skip_whitespace=False): if not skip_comments: result.append(token) else: - result.append(_consume_rule( - token, tokens, stop_token=';', nested=True)) + result.append(_consume_rule(token, tokens)) return result @@ -397,14 +431,7 @@ def parse_stylesheet(input, skip_comments=False, skip_whitespace=False): return result -def _rule_error(token, name): - """Create rule parse error raised because of given token.""" - return ParseError( - token.source_line, token.source_column, 'invalid', - f'{name} reached before {{}} block for a qualified rule.') - - -def _consume_rule(first_token, tokens, nested=False, stop_token=None): +def _consume_rule(first_token, tokens): """Parse a qualified rule or at-rule. Consume just enough of :obj:`tokens` for this rule. @@ -413,38 +440,15 @@ def _consume_rule(first_token, tokens, nested=False, stop_token=None): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. - :type nested: :obj:`bool` - :param nested: Whether the rule is nested or top-level. - :type stop_token: :class:`~tinycss2.ast.Node` - :param stop_token: A token that ends rule parsing when met. :returns: A :class:`~tinycss2.ast.QualifiedRule`, :class:`~tinycss2.ast.AtRule`, or :class:`~tinycss2.ast.ParseError`. """ - if first_token == stop_token: - return _rule_error(first_token, 'Stop token') if first_token.type == 'at-keyword': return _consume_at_rule(first_token, tokens) - if first_token.type == '{} block': - prelude = [] - block = first_token - else: - prelude = [first_token] - for token in tokens: - if token == stop_token: - return _rule_error(token, 'Stop token') - if token.type == '{} block': - block = token - # TODO: handle special case for CSS variables (using "nested") - # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule - break - prelude.append(token) - else: - return _rule_error(prelude[-1], 'EOF') - return QualifiedRule(first_token.source_line, first_token.source_column, - prelude, block.content) + return _consume_qualified_rule(first_token, tokens) def _consume_at_rule(at_keyword, tokens): @@ -478,30 +482,46 @@ def _consume_at_rule(at_keyword, tokens): at_keyword.value, at_keyword.lower_value, prelude, content) -def _consume_at_rule_deprecated(at_keyword, tokens): - """Parse an at-rule. +def _rule_error(token, name): + """Create rule parse error raised because of given token.""" + return ParseError( + token.source_line, token.source_column, 'invalid', + f'{name} reached before {{}} block for a qualified rule.') + - Deprecated, use :func:`_consume_at_rule` instead. +def _consume_qualified_rule(first_token, tokens, nested=False, + stop_token=None): + """Consume a qualified rule. Consume just enough of :obj:`tokens` for this rule. - :type at_keyword: :class:`AtKeywordToken` - :param at_keyword: The at-rule keyword token starting this rule. + :type first_token: :term:`component value` + :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. - :returns: - A :class:`~tinycss2.ast.QualifiedRule`, - or :class:`~tinycss2.ast.ParseError`. + :type nested: :obj:`bool` + :param nested: Whether the rule is nested or top-level. + :type stop_token: :class:`~tinycss2.ast.Node` + :param stop_token: A token that ends rule parsing when met. """ - prelude = [] - content = None - for token in tokens: - if token.type == '{} block': - content = token.content - break - elif token == ';': - break - prelude.append(token) - return AtRule(at_keyword.source_line, at_keyword.source_column, - at_keyword.value, at_keyword.lower_value, prelude, content) + if first_token == stop_token: + return _rule_error(first_token, 'Stop token') + if first_token.type == '{} block': + prelude = [] + block = first_token + else: + prelude = [first_token] + for token in tokens: + if token == stop_token: + return _rule_error(token, 'Stop token') + if token.type == '{} block': + block = token + # TODO: handle special case for CSS variables (using "nested") + # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule + break + prelude.append(token) + else: + return _rule_error(prelude[-1], 'EOF') + return QualifiedRule(first_token.source_line, first_token.source_column, + prelude, block.content) From 62b23f712c9e2bf9e45e31df00503a2bc11a0016 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 17 Feb 2024 12:38:39 +0100 Subject: [PATCH 13/13] Update parsing tests suite version --- tests/css-parsing-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/css-parsing-tests b/tests/css-parsing-tests index c5749e5..43e65b2 160000 --- a/tests/css-parsing-tests +++ b/tests/css-parsing-tests @@ -1 +1 @@ -Subproject commit c5749e51dda3868b3e8062e65a36584c2fec8059 +Subproject commit 43e65b244133f17eb8a4d4404d5774672b94824f