From 9c658971f48b4214943a3dcaa86145bc333a1629 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 4 Sep 2023 09:35:06 +0530 Subject: [PATCH] Emit empty `FStringMiddle` token for special case --- crates/ruff_python_parser/src/lexer.rs | 30 +++++++++++++++---- .../ruff_python_parser/src/lexer/fstring.rs | 15 ++++++---- ..._parser__lexer__tests__fstring_escape.snap | 4 +++ ...ser__lexer__tests__fstring_escape_raw.snap | 4 +++ ...exer__tests__fstring_with_format_spec.snap | 12 ++++++++ 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 6293b5c6eab2c6..d75bc1c9237eed 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -565,6 +565,7 @@ impl<'source> Lexer<'source> { let mut last_offset = self.offset(); let mut in_named_unicode = false; + let mut try_end_format_spec = false; loop { match self.cursor.first() { @@ -637,6 +638,7 @@ impl<'source> Lexer<'source> { self.cursor.bump(); // Skip the second `}` last_offset = self.offset(); } else { + try_end_format_spec = true; break; } } @@ -647,20 +649,36 @@ impl<'source> Lexer<'source> { } let range = self.token_range(); - if range.is_empty() { + + // Avoid emitting the empty `FStringMiddle` token for anything other than + // the closing curly braces (`}`). + if range.is_empty() && !try_end_format_spec { return Ok(None); } - let value = if normalized.is_empty() { + let value = if range.is_empty() { + // Emit an empty `FStringMiddle` token for a special case to disallow + // lambda expressions without parenthesis. For example, in `f"{lambda x:{x}}"` + // the lexer wouldn't have emitted a `FStringMiddle` token. + String::new() + } else if normalized.is_empty() { self.source[range].to_string() } else { normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]); normalized }; - Ok(Some(Tok::FStringMiddle { - value, - is_raw: fstring.is_raw_string(), - })) + let is_raw = fstring.is_raw_string(); + if try_end_format_spec { + // We need to decrement the format spec depth to avoid going into infinite + // loop where the lexer keeps on emitting an empty `FStringMiddle` token. + // This is because the lexer still thinks that we're in a f-string expression + // but as we've encountered a `}` token, we need to decrement the depth so + // that the lexer can go forward with the `Rbrace` token. + // + // SAFETY: Safe because the function is only called when `self.fstrings` is not empty. + self.fstrings.current_mut().unwrap().try_end_format_spec(); + } + Ok(Some(Tok::FStringMiddle { value, is_raw })) } /// Lex a string literal. diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs index 7dcf5f9cfc974f..40515151b6a73c 100644 --- a/crates/ruff_python_parser/src/lexer/fstring.rs +++ b/crates/ruff_python_parser/src/lexer/fstring.rs @@ -97,11 +97,9 @@ impl FStringContext { } /// Decrements the number of parentheses for the current f-string. If the - /// lexer is in a format spec, also decrements the number of format specs. + /// lexer is in a format spec, also decrements the format spec depth. pub(crate) fn decrement_closing_parentheses(&mut self) { - if self.is_in_format_spec() { - self.format_spec_depth = self.format_spec_depth.saturating_sub(1); - } + self.try_end_format_spec(); self.open_parentheses_count = self.open_parentheses_count.saturating_sub(1); } @@ -118,7 +116,7 @@ impl FStringContext { /// Returns `true` if the context is in a valid position to start format spec /// i.e., at the same level of nesting as the opening parentheses token. - /// Increments the number of format specs if it is. + /// Increments the format spec depth if it is. /// /// This assumes that the current character for the lexer is a colon (`:`). pub(crate) fn try_start_format_spec(&mut self) -> bool { @@ -133,6 +131,13 @@ impl FStringContext { false } } + + /// Decrements the format spec depth if the lexer is in a format spec. + pub(crate) fn try_end_format_spec(&mut self) { + if self.is_in_format_spec() { + self.format_spec_depth = self.format_spec_depth.saturating_sub(1); + } + } } /// The f-strings stack is used to keep track of all the f-strings that the diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap index c2bb475599111a..3fda7ff2487e62 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap @@ -22,6 +22,10 @@ expression: lex_source(source) name: "x", }, Rbrace, + FStringMiddle { + value: "", + is_raw: false, + }, Rbrace, FStringMiddle { value: " \\\"\\\"\\\n end", diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap index 989196d56ccf1d..f6fb19ff3334a0 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap @@ -22,6 +22,10 @@ expression: lex_source(source) name: "x", }, Rbrace, + FStringMiddle { + value: "", + is_raw: true, + }, Rbrace, FStringMiddle { value: " \\\"\\\"\\\n end", diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap index b1125413f3f5f2..e8677585d7cf74 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap @@ -9,6 +9,10 @@ expression: lex_source(source) name: "foo", }, Colon, + FStringMiddle { + value: "", + is_raw: false, + }, Rbrace, FStringMiddle { value: " ", @@ -77,7 +81,15 @@ expression: lex_source(source) value: 1, }, Rbrace, + FStringMiddle { + value: "", + is_raw: false, + }, Rbrace, + FStringMiddle { + value: "", + is_raw: false, + }, Rbrace, FStringEnd, Newline,