Skip to content

Commit

Permalink
Add F f-string parsing for Python 3.12 (PEP 701)
Browse files Browse the repository at this point in the history
Since Python 3.12, f-strings are tokenized and parsed like the rest
of Python's grammar, using the new tokens FSTRING_START, FSTRING_MIDDLE
and FSTRING_END.

Make the babel message extractor concatenate these three if they're
adjacent to each other. If they're not, that means there are dynamic
substitutions, so the f-string is ignored.
  • Loading branch information
encukou committed Sep 21, 2023
1 parent 9ef53c6 commit be26d03
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from textwrap import dedent
from tokenize import COMMENT, NAME, OP, STRING, generate_tokens
from typing import TYPE_CHECKING, Any
import tokenize

from babel.util import parse_encoding, parse_future_flags, pathmatch

Expand Down Expand Up @@ -89,6 +90,11 @@ def tell(self) -> int: ...

DEFAULT_MAPPING: list[tuple[str, str]] = [('**.py', 'python')]

# New tokens in Python 3.12, or None on older versions
FSTRING_START = getattr(tokenize, "FSTRING_START", None)
FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
FSTRING_END = getattr(tokenize, "FSTRING_END", None)


def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
"""Helper function for `extract` that strips comment tags from strings
Expand Down Expand Up @@ -497,6 +503,11 @@ def extract_python(
next_line = lambda: fileobj.readline().decode(encoding)

tokens = generate_tokens(next_line)

# Current prefix of a Python 3.12 (PEP 701) f-string, or None if we're not
# currently parsing one.
current_fstring_start = None

for tok, value, (lineno, _), _, _ in tokens:
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
Expand Down Expand Up @@ -558,6 +569,20 @@ def extract_python(
val = _parse_python_string(value, encoding, future_flags)
if val is not None:
buf.append(val)

# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
elif tok == FSTRING_START:
current_fstring_start = value

Check warning on line 575 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L575

Added line #L575 was not covered by tests
elif tok == FSTRING_MIDDLE:
if current_fstring_start is not None:
current_fstring_start += value

Check warning on line 578 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L577-L578

Added lines #L577 - L578 were not covered by tests
elif tok == FSTRING_END:
if current_fstring_start is not None:
fstring = current_fstring_start + value
val = _parse_python_string(fstring, encoding, future_flags)
if val is not None:
buf.append(val)

Check warning on line 584 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L580-L584

Added lines #L580 - L584 were not covered by tests

elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
Expand All @@ -578,6 +603,15 @@ def extract_python(
elif tok == NAME and value in keywords:
funcname = value

if (current_fstring_start is not None
and tok not in {FSTRING_START, FSTRING_MIDDLE}
):
# In Python 3.12, tokens other than FSTRING_* mean the
# f-string is dynamic, so we don't wan't to extract it.
# And if it's FSTRING_END, we've already handled it above.
# Let's forget that we're in an f-string.
current_fstring_start = None

Check warning on line 613 in babel/messages/extract.py

View check run for this annotation

Codecov / codecov/patch

babel/messages/extract.py#L613

Added line #L613 was not covered by tests


def _parse_python_string(value: str, encoding: str, future_flags: int) -> str | None:
# Unwrap quotes in a safe manner, maintaining the string's encoding
Expand Down

0 comments on commit be26d03

Please sign in to comment.