Skip to content

Commit

Permalink
Use new tokenizer column offset to fix fstring ast locations (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
lysnikolaou authored Oct 8, 2022
1 parent 5e546d2 commit 09e7732
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 29 deletions.
44 changes: 29 additions & 15 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,13 +329,13 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 4)
self.assertEqual(t.body[1].value.values[2].lineno, 6)
self.assertEqual(t.body[1].col_offset, 0)
self.assertEqual(t.body[1].value.col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 4)
self.assertEqual(t.body[1].value.values[1].col_offset, 2)
self.assertEqual(t.body[1].value.values[2].col_offset, 11)
# NOTE: the following lineno information and col_offset is correct for
# expressions within FormattedValues.
binop = t.body[1].value.values[1].value
Expand Down Expand Up @@ -366,13 +366,13 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(t.body[0].lineno, 2)
self.assertEqual(t.body[0].value.lineno, 2)
self.assertEqual(t.body[0].value.values[0].lineno, 2)
self.assertEqual(t.body[0].value.values[1].lineno, 2)
self.assertEqual(t.body[0].value.values[2].lineno, 2)
self.assertEqual(t.body[0].value.values[1].lineno, 3)
self.assertEqual(t.body[0].value.values[2].lineno, 3)
self.assertEqual(t.body[0].col_offset, 0)
self.assertEqual(t.body[0].value.col_offset, 4)
self.assertEqual(t.body[0].value.values[0].col_offset, 4)
self.assertEqual(t.body[0].value.values[1].col_offset, 4)
self.assertEqual(t.body[0].value.values[2].col_offset, 4)
self.assertEqual(t.body[0].value.values[0].col_offset, 8)
self.assertEqual(t.body[0].value.values[1].col_offset, 10)
self.assertEqual(t.body[0].value.values[2].col_offset, 17)
# Check {blech}
self.assertEqual(t.body[0].value.values[1].value.lineno, 3)
self.assertEqual(t.body[0].value.values[1].value.end_lineno, 3)
Expand All @@ -387,6 +387,20 @@ def test_ast_line_numbers_with_parentheses(self):
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 1)
# check the joinedstr location
joinedstr = t.body[0].value
self.assertEqual(type(joinedstr), ast.JoinedStr)
self.assertEqual(joinedstr.lineno, 3)
self.assertEqual(joinedstr.end_lineno, 3)
self.assertEqual(joinedstr.col_offset, 4)
self.assertEqual(joinedstr.end_col_offset, 17)
# check the formatted value location
fv = t.body[0].value.values[1]
self.assertEqual(type(fv), ast.FormattedValue)
self.assertEqual(fv.lineno, 3)
self.assertEqual(fv.end_lineno, 3)
self.assertEqual(fv.col_offset, 7)
self.assertEqual(fv.end_col_offset, 16)
# check the test(t) location
call = t.body[0].value.values[1].value
self.assertEqual(type(call), ast.Call)
Expand Down Expand Up @@ -415,9 +429,9 @@ def test_ast_line_numbers_with_parentheses(self):
# check the first wat
self.assertEqual(type(wat1), ast.Constant)
self.assertEqual(wat1.lineno, 4)
self.assertEqual(wat1.end_lineno, 6)
self.assertEqual(wat1.col_offset, 12)
self.assertEqual(wat1.end_col_offset, 18)
self.assertEqual(wat1.end_lineno, 5)
self.assertEqual(wat1.col_offset, 14)
self.assertEqual(wat1.end_col_offset, 26)
# check the call
call = middle.value
self.assertEqual(type(call), ast.Call)
Expand All @@ -427,9 +441,9 @@ def test_ast_line_numbers_with_parentheses(self):
self.assertEqual(call.end_col_offset, 31)
# check the second wat
self.assertEqual(type(wat2), ast.Constant)
self.assertEqual(wat2.lineno, 4)
self.assertEqual(wat2.lineno, 5)
self.assertEqual(wat2.end_lineno, 6)
self.assertEqual(wat2.col_offset, 12)
self.assertEqual(wat2.col_offset, 32)
self.assertEqual(wat2.end_col_offset, 18)

def test_docstring(self):
Expand Down
8 changes: 4 additions & 4 deletions Parser/action_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1665,13 +1665,13 @@ _PyPegen_concatenate_strings2(Parser *p, asdl_expr_seq *strings,
_PyUnicodeWriter_Init(&writer);
expr_ty last_elem = elem;
for (j = i; j < n_flattened_elements; j++) {
elem = asdl_seq_GET(flattened, j);
if (elem->kind == Constant_kind) {
if (_PyUnicodeWriter_WriteStr(&writer, elem->v.Constant.value)) {
expr_ty current_elem = asdl_seq_GET(flattened, j);
if (current_elem->kind == Constant_kind) {
if (_PyUnicodeWriter_WriteStr(&writer, current_elem->v.Constant.value)) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
last_elem = elem;
last_elem = current_elem;
} else {
break;
}
Expand Down
18 changes: 10 additions & 8 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1549,7 +1549,7 @@ token_setup(struct tok_state *tok, struct token *token, int type, const char *st
{
assert((start == NULL && end == NULL) || (start != NULL && end != NULL));
token->level = tok->level;
token->lineno = type == STRING ? tok->first_lineno : tok->lineno;
token->lineno = type == STRING ? tok->first_lineno : (type == FSTRING_MIDDLE || type == FSTRING_END ? tok->fstring_first_constant_lineno : tok->lineno);
token->end_lineno = tok->lineno;
token->col_offset = token->end_col_offset = -1;
token->start = start;
Expand Down Expand Up @@ -1726,7 +1726,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
if (c == '#') {

if (tok->tok_mode_stack_index > 0) {
return syntaxerror(tok, "f-string expression part cannot include '#'");
return MAKE_TOKEN(syntaxerror(tok, "f-string expression part cannot include '#'"));
}

const char *prefix, *p, *type_start;
Expand Down Expand Up @@ -2191,7 +2191,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
current_tok->bracket_stack = 0;
current_tok->bracket_mark[0] = 0;
current_tok->bracket_mark_index = -1;
return FSTRING_START;
return MAKE_TOKEN(FSTRING_START);
}

letter_quote:
Expand Down Expand Up @@ -2248,7 +2248,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
tokenizer_mode *current_tok = &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
if (current_tok->f_string_quote == quote &&
current_tok->f_string_quote_size == quote_size) {
return syntaxerror(tok, "f-string: expecting '}'", start);
return MAKE_TOKEN(syntaxerror(tok, "f-string: expecting '}'", start));
}
}

Expand Down Expand Up @@ -2304,14 +2304,14 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
int cursor = current_tok->bracket_stack - (c != '{');

if (cursor == 0 && !update_fstring_expr(tok, c)) {
return 0;
return MAKE_TOKEN(ENDMARKER);
}

if (c == ':' && cursor == mark) {
current_tok->kind = TOK_FSTRING_MODE;
p_start = tok->start;
p_end = tok->cur;
return _PyToken_OneChar(c);
return MAKE_TOKEN(_PyToken_OneChar(c));
}
}

Expand Down Expand Up @@ -2407,9 +2407,11 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
const char *p_start = NULL;
const char *p_end = NULL;
tok->start = tok->cur;
tok->fstring_first_constant_lineno = tok->lineno;
tok->starting_col_offset = tok->col_offset;

// If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
// befor it.
// before it.
char start_char = tok_nextc(tok);
char peek = tok_nextc(tok);
tok_backup(tok, peek);
Expand Down Expand Up @@ -2517,7 +2519,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
}

p_start = tok->start;
p_end = tok->cur-current_tok->f_string_quote_size;
p_end = tok->cur - current_tok->f_string_quote_size;
tok->tok_mode_stack_index--;
return MAKE_TOKEN(FSTRING_END);
}
Expand Down
6 changes: 4 additions & 2 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ typedef struct _tokenizer_mode {
char f_string_quote;
int f_string_quote_size;
int f_string_raw;
const char* f_string_start;
const char* f_string_multi_line_start;
const char* f_string_start;
const char* f_string_multi_line_start;

int last_expr_size;
int last_expr_end;
Expand Down Expand Up @@ -82,6 +82,8 @@ struct tok_state {
int lineno; /* Current line number */
int first_lineno; /* First line of a single line or multi line string
expression (cf. issue 16806) */
int fstring_first_constant_lineno; /* First line number of a single line or multiline
constant part of an f-string*/
int starting_col_offset; /* The column offset at the beginning of a token */
int col_offset; /* Current col offset */
int level; /* () [] {} Parentheses nesting level */
Expand Down

0 comments on commit 09e7732

Please sign in to comment.