From 098ee5d493ca83238754a8cb4629fa1b91144b84 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 5 Sep 2023 07:06:33 +0530 Subject: [PATCH] Avoid tracking parentheses nesting multiple times --- crates/ruff_python_parser/src/lexer.rs | 42 ++++++----------- .../ruff_python_parser/src/lexer/fstring.rs | 47 +++++++------------ 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index a7d0eb3d78fd88..834f4b4f360228 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -537,7 +537,7 @@ impl<'source> Lexer<'source> { flags |= FStringContextFlags::TRIPLE; }; - self.fstrings.push(FStringContext::new(flags)); + self.fstrings.push(FStringContext::new(flags, self.nesting)); Tok::FStringStart } @@ -565,7 +565,7 @@ impl<'source> Lexer<'source> { let mut last_offset = self.offset(); let mut in_named_unicode = false; - let mut try_end_format_spec = false; + let mut end_format_spec = false; loop { match self.cursor.first() { @@ -631,14 +631,18 @@ impl<'source> Lexer<'source> { if in_named_unicode { in_named_unicode = false; self.cursor.bump(); - } else if self.cursor.second() == '}' && !fstring.is_in_format_spec() { + } else if self.cursor.second() == '}' + && !fstring.is_in_format_spec(self.nesting) + { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); self.cursor.bump(); // Skip the second `}` last_offset = self.offset(); } else { - try_end_format_spec = true; + // The lexer can only be in a format spec if we encounter a `}` token + // while scanning for `FStringMiddle` tokens. + end_format_spec = true; break; } } @@ -652,7 +656,7 @@ impl<'source> Lexer<'source> { // Avoid emitting the empty `FStringMiddle` token for anything other than // the closing curly braces (`}`). - if range.is_empty() && !try_end_format_spec { + if range.is_empty() && !end_format_spec { return Ok(None); } @@ -668,7 +672,7 @@ impl<'source> Lexer<'source> { normalized }; let is_raw = fstring.is_raw_string(); - if try_end_format_spec { + if end_format_spec { // We need to decrement the format spec depth to avoid going into infinite // loop where the lexer keeps on emitting an empty `FStringMiddle` token. // This is because the lexer still thinks that we're in a f-string expression @@ -676,7 +680,7 @@ impl<'source> Lexer<'source> { // that the lexer can go forward with the `Rbrace` token. // // SAFETY: Safe because the function is only called when `self.fstrings` is not empty. - self.fstrings.current_mut().unwrap().try_end_format_spec(); + self.fstrings.current_mut().unwrap().end_format_spec(); } Ok(Some(Tok::FStringMiddle { value, is_raw })) } @@ -773,7 +777,7 @@ impl<'source> Lexer<'source> { // This function is used by the iterator implementation. pub fn next_token(&mut self) -> LexResult { if let Some(fstring) = self.fstrings.current() { - if !fstring.is_in_expression() { + if !fstring.is_in_expression(self.nesting) { self.cursor.start_token(); if let Some(tok) = self.lex_fstring_middle_or_end()? { if matches!(tok, Tok::FStringEnd) { @@ -1059,49 +1063,33 @@ impl<'source> Lexer<'source> { } '~' => Tok::Tilde, '(' => { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.increment_opening_parentheses(); - } self.nesting += 1; Tok::Lpar } ')' => { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.decrement_closing_parentheses(); - } self.nesting = self.nesting.saturating_sub(1); Tok::Rpar } '[' => { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.increment_opening_parentheses(); - } self.nesting += 1; Tok::Lsqb } ']' => { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.decrement_closing_parentheses(); - } self.nesting = self.nesting.saturating_sub(1); Tok::Rsqb } '{' => { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.increment_opening_parentheses(); - } self.nesting += 1; Tok::Lbrace } '}' => { - if let Some(fstring) = self.fstrings.current_mut() { - if !fstring.has_open_parentheses() { + if let Some(fstring) = self.fstrings.current() { + if !fstring.has_open_parentheses(self.nesting) { return Err(LexicalError { error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace), location: self.token_start(), }); } - fstring.decrement_closing_parentheses(); } self.nesting = self.nesting.saturating_sub(1); Tok::Rbrace @@ -1110,7 +1098,7 @@ impl<'source> Lexer<'source> { if self .fstrings .current_mut() - .is_some_and(FStringContext::try_start_format_spec) + .is_some_and(|fstring| fstring.try_start_format_spec(self.nesting)) { Tok::Colon } else if self.cursor.eat_char('=') { diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs index 40515151b6a73c..c7d964e3297e69 100644 --- a/crates/ruff_python_parser/src/lexer/fstring.rs +++ b/crates/ruff_python_parser/src/lexer/fstring.rs @@ -25,9 +25,8 @@ bitflags! { pub(crate) struct FStringContext { flags: FStringContextFlags, - /// The number of open parentheses for the current f-string. This includes all - /// three types of parentheses: round (`(`), square (`[`), and curly (`{`). - open_parentheses_count: u32, + /// The level of nesting for the lexer when it entered the current f-string. + nesting: u32, /// The current depth of format spec for the current f-string. This is because /// there can be multiple format specs nested for the same f-string. @@ -36,11 +35,11 @@ pub(crate) struct FStringContext { } impl FStringContext { - pub(crate) fn new(flags: FStringContextFlags) -> Self { + pub(crate) fn new(flags: FStringContextFlags, nesting: u32) -> Self { Self { flags, - open_parentheses_count: 0, format_spec_depth: 0, + nesting, } } @@ -86,32 +85,24 @@ impl FStringContext { self.flags.contains(FStringContextFlags::TRIPLE) } - /// Returns `true` if the current f-string has open parentheses. - pub(crate) fn has_open_parentheses(&mut self) -> bool { - self.open_parentheses_count > 0 - } - - /// Increments the number of parentheses for the current f-string. - pub(crate) fn increment_opening_parentheses(&mut self) { - self.open_parentheses_count += 1; + fn open_parentheses_count(&self, current_nesting: u32) -> u32 { + current_nesting.saturating_sub(self.nesting) } - /// Decrements the number of parentheses for the current f-string. If the - /// lexer is in a format spec, also decrements the format spec depth. - pub(crate) fn decrement_closing_parentheses(&mut self) { - self.try_end_format_spec(); - self.open_parentheses_count = self.open_parentheses_count.saturating_sub(1); + /// Returns `true` if the current f-string has open parentheses. + pub(crate) fn has_open_parentheses(&self, current_nesting: u32) -> bool { + self.open_parentheses_count(current_nesting) > 0 } /// Returns `true` if the lexer is in a f-string expression i.e., between /// two curly braces. - pub(crate) fn is_in_expression(&self) -> bool { - self.open_parentheses_count > self.format_spec_depth + pub(crate) fn is_in_expression(&self, current_nesting: u32) -> bool { + self.open_parentheses_count(current_nesting) > self.format_spec_depth } /// Returns `true` if the lexer is in a f-string format spec i.e., after a colon. - pub(crate) fn is_in_format_spec(&self) -> bool { - self.format_spec_depth > 0 && !self.is_in_expression() + pub(crate) fn is_in_format_spec(&self, current_nesting: u32) -> bool { + self.format_spec_depth > 0 && !self.is_in_expression(current_nesting) } /// Returns `true` if the context is in a valid position to start format spec @@ -119,9 +110,9 @@ impl FStringContext { /// Increments the format spec depth if it is. /// /// This assumes that the current character for the lexer is a colon (`:`). - pub(crate) fn try_start_format_spec(&mut self) -> bool { + pub(crate) fn try_start_format_spec(&mut self, current_nesting: u32) -> bool { if self - .open_parentheses_count + .open_parentheses_count(current_nesting) .saturating_sub(self.format_spec_depth) == 1 { @@ -132,11 +123,9 @@ impl FStringContext { } } - /// Decrements the format spec depth if the lexer is in a format spec. - pub(crate) fn try_end_format_spec(&mut self) { - if self.is_in_format_spec() { - self.format_spec_depth = self.format_spec_depth.saturating_sub(1); - } + /// Decrements the format spec depth unconditionally. + pub(crate) fn end_format_spec(&mut self) { + self.format_spec_depth = self.format_spec_depth.saturating_sub(1); } }