Skip to content

Commit

Permalink
gh-110259: Fix f-strings with multiline expressions and format specs (#…
Browse files Browse the repository at this point in the history
…110271)

Signed-off-by: Pablo Galindo <[email protected]>
  • Loading branch information
pablogsal authored Oct 5, 2023
1 parent af29282 commit cc389ef
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 10 deletions.
11 changes: 8 additions & 3 deletions Lib/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,13 +1270,15 @@ def visit_JoinedStr(self, node):
quote_type = quote_types[0]
self.write(f"{quote_type}{value}{quote_type}")

def _write_fstring_inner(self, node):
def _write_fstring_inner(self, node, scape_newlines=False):
if isinstance(node, JoinedStr):
# for both the f-string itself, and format_spec
for value in node.values:
self._write_fstring_inner(value)
self._write_fstring_inner(value, scape_newlines=scape_newlines)
elif isinstance(node, Constant) and isinstance(node.value, str):
value = node.value.replace("{", "{{").replace("}", "}}")
if scape_newlines:
value = value.replace("\n", "\\n")
self.write(value)
elif isinstance(node, FormattedValue):
self.visit_FormattedValue(node)
Expand All @@ -1299,7 +1301,10 @@ def unparse_inner(inner):
self.write(f"!{chr(node.conversion)}")
if node.format_spec:
self.write(":")
self._write_fstring_inner(node.format_spec)
self._write_fstring_inner(
node.format_spec,
scape_newlines=True
)

def visit_Name(self, node):
self.write(node.id)
Expand Down
97 changes: 97 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,55 @@ def test_string(self):
OP '=' (3, 0) (3, 1)
OP '}' (3, 1) (3, 2)
FSTRING_END "'''" (3, 2) (3, 5)
""")
self.check_tokenize("""\
f'''__{
x:a
}__'''""", """\
FSTRING_START "f'''" (1, 0) (1, 4)
FSTRING_MIDDLE '__' (1, 4) (1, 6)
OP '{' (1, 6) (1, 7)
NL '\\n' (1, 7) (1, 8)
NAME 'x' (2, 4) (2, 5)
OP ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
OP '}' (3, 0) (3, 1)
FSTRING_MIDDLE '__' (3, 1) (3, 3)
FSTRING_END "'''" (3, 3) (3, 6)
""")
self.check_tokenize("""\
f'''__{
x:a
b
c
d
}__'''""", """\
FSTRING_START "f'''" (1, 0) (1, 4)
FSTRING_MIDDLE '__' (1, 4) (1, 6)
OP '{' (1, 6) (1, 7)
NL '\\n' (1, 7) (1, 8)
NAME 'x' (2, 4) (2, 5)
OP ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
OP '}' (6, 0) (6, 1)
FSTRING_MIDDLE '__' (6, 1) (6, 3)
FSTRING_END "'''" (6, 3) (6, 6)
""")
self.check_tokenize("""\
f'__{
x:d
}__'""", """\
FSTRING_START "f'" (1, 0) (1, 2)
FSTRING_MIDDLE '__' (1, 2) (1, 4)
OP '{' (1, 4) (1, 5)
NL '\\n' (1, 5) (1, 6)
NAME 'x' (2, 4) (2, 5)
OP ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
NL '\\n' (2, 7) (2, 8)
OP '}' (3, 0) (3, 1)
FSTRING_MIDDLE '__' (3, 1) (3, 3)
FSTRING_END "'" (3, 3) (3, 4)
""")

def test_function(self):
Expand Down Expand Up @@ -2277,6 +2326,54 @@ def test_string(self):
FSTRING_START \'f"\' (1, 0) (1, 2)
FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16)
FSTRING_END \'"\' (1, 16) (1, 17)
""")

self.check_tokenize("""\
f'''__{
x:a
}__'''""", """\
FSTRING_START "f'''" (1, 0) (1, 4)
FSTRING_MIDDLE '__' (1, 4) (1, 6)
LBRACE '{' (1, 6) (1, 7)
NAME 'x' (2, 4) (2, 5)
COLON ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0)
RBRACE '}' (3, 0) (3, 1)
FSTRING_MIDDLE '__' (3, 1) (3, 3)
FSTRING_END "'''" (3, 3) (3, 6)
""")

self.check_tokenize("""\
f'''__{
x:a
b
c
d
}__'''""", """\
FSTRING_START "f'''" (1, 0) (1, 4)
FSTRING_MIDDLE '__' (1, 4) (1, 6)
LBRACE '{' (1, 6) (1, 7)
NAME 'x' (2, 4) (2, 5)
COLON ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0)
RBRACE '}' (6, 0) (6, 1)
FSTRING_MIDDLE '__' (6, 1) (6, 3)
FSTRING_END "'''" (6, 3) (6, 6)
""")

self.check_tokenize("""\
f'__{
x:d
}__'""", """\
FSTRING_START "f'" (1, 0) (1, 2)
FSTRING_MIDDLE '__' (1, 2) (1, 4)
LBRACE '{' (1, 4) (1, 5)
NAME 'x' (2, 4) (2, 5)
COLON ':' (2, 5) (2, 6)
FSTRING_MIDDLE 'd' (2, 6) (2, 7)
RBRACE '}' (3, 0) (3, 1)
FSTRING_MIDDLE '__' (3, 1) (3, 3)
FSTRING_END "'" (3, 3) (3, 4)
""")

def test_function(self):
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_unparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase):
test_directories = (lib_dir, lib_dir / "test")
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
"test_ast.py", "test_asdl_parser.py", "test_fstring.py",
"test_patma.py", "test_type_alias.py", "test_type_params.py"}
"test_patma.py", "test_type_alias.py", "test_type_params.py",
"test_tokenize.py"}

_files_to_test = None

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Correctly identify the format spec in f-strings (with single or triple
quotes) that have multiple lines in the expression part and include a
formatting spec. Patch by Pablo Galindo
24 changes: 18 additions & 6 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -2690,11 +2690,28 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
if (tok->done == E_ERROR) {
return MAKE_TOKEN(ERRORTOKEN);
}
if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
int in_format_spec = (
current_tok->last_expr_end != -1
&&
INSIDE_FSTRING_EXPR(current_tok)
);

if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
if (tok->decoding_erred) {
return MAKE_TOKEN(ERRORTOKEN);
}

// If we are in a format spec and we found a newline,
// it means that the format spec ends here and we should
// return to the regular mode.
if (in_format_spec && c == '\n') {
tok_backup(tok, c);
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
p_start = tok->start;
p_end = tok->cur;
return MAKE_TOKEN(FSTRING_MIDDLE);
}

assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
// the start of string, and report the error
Expand Down Expand Up @@ -2726,11 +2743,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
end_quote_size = 0;
}

int in_format_spec = (
current_tok->last_expr_end != -1
&&
INSIDE_FSTRING_EXPR(current_tok)
);
if (c == '{') {
int peek = tok_nextc(tok);
if (peek != '{' || in_format_spec) {
Expand Down

0 comments on commit cc389ef

Please sign in to comment.