From c137200e5d40d23770e7255ce54aadca1bbba715 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 30 May 2024 11:29:13 +0530 Subject: [PATCH] Fix various bugs found via running the test suite --- .../src/checkers/ast/analyze/expression.rs | 2 +- .../rules/implicit.rs | 2 +- .../rules/pycodestyle/rules/blank_lines.rs | 91 ++++++++++--------- .../rules/invalid_literal_comparisons.rs | 14 ++- .../ruff_linter/src/rules/pyupgrade/fixes.rs | 10 +- .../rules/printf_string_formatting.rs | 14 +-- crates/ruff_python_formatter/src/context.rs | 6 +- crates/ruff_python_formatter/src/verbatim.rs | 8 +- .../ruff_python_index/src/multiline_ranges.rs | 2 +- crates/ruff_python_parser/src/lexer.rs | 35 +++---- crates/ruff_python_parser/src/token_source.rs | 6 +- 11 files changed, 100 insertions(+), 90 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 2dd5648ca73b4d..9c12ac03339c97 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -1160,7 +1160,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { } } if checker.enabled(Rule::PrintfStringFormatting) { - pyupgrade::rules::printf_string_formatting(checker, format_string, right); + pyupgrade::rules::printf_string_formatting(checker, bin_op, format_string); } if checker.enabled(Rule::BadStringFormatCharacter) { pylint::rules::bad_string_format_character::percent( diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs index 75fa1d3eaa6402..5cbd3f46e76b88 100644 --- a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs +++ b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs @@ -110,7 +110,7 @@ pub(crate) fn implicit( let (a_range, b_range) = match (a_token.kind(), b_token.kind()) { (TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()), (TokenKind::String, TokenKind::FStringStart) => { - match indexer.fstring_ranges().innermost(a_token.start()) { + match indexer.fstring_ranges().innermost(b_token.start()) { Some(b_range) => (a_token.range(), b_range), None => continue, } diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs index 002209df814c0f..172ff40e5b6c9a 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs @@ -2,7 +2,6 @@ use itertools::Itertools; use ruff_notebook::CellOffsets; use ruff_python_parser::Token; use ruff_python_parser::Tokens; -use ruff_text_size::Ranged; use std::cmp::Ordering; use std::iter::Peekable; use std::num::NonZeroU32; @@ -436,55 +435,59 @@ impl<'a> Iterator for LinePreprocessor<'a> { continue; } - let (logical_line_kind, first_token_range) = if let Some(first_token_range) = - first_logical_line_token - { - first_token_range - } - // At the start of the line... - else { - // Check if we are at the beginning of a cell in a notebook. - if let Some(ref mut cell_offsets) = self.cell_offsets { - if cell_offsets - .peek() - .is_some_and(|offset| offset == &&self.line_start) - { - self.is_beginning_of_cell = true; - cell_offsets.next(); - blank_lines = BlankLines::Zero; - self.max_preceding_blank_lines = BlankLines::Zero; - } + let (logical_line_kind, first_token_range) = + if let Some(first_token_range) = first_logical_line_token { + first_token_range } + // At the start of the line... + else { + // Check if we are at the beginning of a cell in a notebook. + if let Some(ref mut cell_offsets) = self.cell_offsets { + if cell_offsets + .peek() + .is_some_and(|offset| offset == &&self.line_start) + { + self.is_beginning_of_cell = true; + cell_offsets.next(); + blank_lines = BlankLines::Zero; + self.max_preceding_blank_lines = BlankLines::Zero; + } + } - // An empty line - if kind == TokenKind::NonLogicalNewline { - blank_lines.add(range); - - self.line_start = range.end(); + // An empty line + if kind == TokenKind::NonLogicalNewline { + blank_lines.add(range); - continue; - } + self.line_start = range.end(); - is_docstring = kind == TokenKind::String; - - let logical_line_kind = match kind { - TokenKind::Class => LogicalLineKind::Class, - TokenKind::Comment => LogicalLineKind::Comment, - TokenKind::At => LogicalLineKind::Decorator, - TokenKind::Def => LogicalLineKind::Function, - // Lookahead to distinguish `async def` from `async with`. - TokenKind::Async if matches!(self.tokens.peek(), Some((TokenKind::Def, _))) => { - LogicalLineKind::Function + continue; } - TokenKind::Import => LogicalLineKind::Import, - TokenKind::From => LogicalLineKind::FromImport, - _ => LogicalLineKind::Other, - }; - first_logical_line_token = Some((logical_line_kind, range)); + is_docstring = kind == TokenKind::String; + + let logical_line_kind = match kind { + TokenKind::Class => LogicalLineKind::Class, + TokenKind::Comment => LogicalLineKind::Comment, + TokenKind::At => LogicalLineKind::Decorator, + TokenKind::Def => LogicalLineKind::Function, + // Lookahead to distinguish `async def` from `async with`. + TokenKind::Async + if self + .tokens + .peek() + .is_some_and(|token| token.kind() == TokenKind::Def) => + { + LogicalLineKind::Function + } + TokenKind::Import => LogicalLineKind::Import, + TokenKind::From => LogicalLineKind::FromImport, + _ => LogicalLineKind::Other, + }; + + first_logical_line_token = Some((logical_line_kind, range)); - (logical_line_kind, range) - }; + (logical_line_kind, range) + }; if !kind.is_trivia() { line_is_comment_only = false; @@ -543,7 +546,7 @@ impl<'a> Iterator for LinePreprocessor<'a> { } if !kind.is_trivia() { - last_token = token; + last_token = kind; } } diff --git a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs index 9191b923772e11..6df04ed5afce04 100644 --- a/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs +++ b/crates/ruff_linter/src/rules/pyflakes/rules/invalid_literal_comparisons.rs @@ -5,7 +5,7 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers; use ruff_python_parser::{TokenKind, Tokens}; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; @@ -110,7 +110,7 @@ pub(crate) fn invalid_literal_comparison( } { diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( content, - located_op.range + expr.start(), + located_op.range, ))); } } else { @@ -175,11 +175,9 @@ fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec { match token.kind() { TokenKind::Not => { - if let Some((_, next_range)) = - tok_iter.next_if(|token| token.kind() == TokenKind::In) - { + if let Some(next_token) = tok_iter.next_if(|token| token.kind() == TokenKind::In) { ops.push(LocatedCmpOp::new( - TextRange::new(token.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::NotIn, )); } @@ -188,11 +186,11 @@ fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec { ops.push(LocatedCmpOp::new(token.range(), CmpOp::In)); } TokenKind::Is => { - let op = if let Some((_, next_range)) = + let op = if let Some(next_token) = tok_iter.next_if(|token| token.kind() == TokenKind::Not) { LocatedCmpOp::new( - TextRange::new(token.start(), next_range.end()), + TextRange::new(token.start(), next_token.end()), CmpOp::IsNot, ) } else { diff --git a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs index 54ee126ee52c14..7fd746138e8a59 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/fixes.rs @@ -1,7 +1,7 @@ -use ruff_python_ast::{Alias, StmtImportFrom}; -use ruff_python_parser::{lexer, Mode, Tok, TokenKind, Tokens}; +use ruff_python_ast::StmtImportFrom; +use ruff_python_parser::{TokenKind, Tokens}; use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange}; /// Remove any imports matching `members` from an import-from statement. pub(crate) fn remove_import_members( @@ -25,7 +25,7 @@ pub(crate) fn remove_import_members( // Reconstruct the source code by skipping any names that are in `members`. let mut output = String::with_capacity(import_from_stmt.range().len().to_usize()); - let mut last_pos = TextSize::default(); + let mut last_pos = import_from_stmt.start(); let mut is_first = true; for (index, member) in import_from_stmt.names.iter().enumerate() { @@ -58,7 +58,7 @@ pub(crate) fn remove_import_members( } // Add the remaining content. - let slice = locator.after(last_pos); + let slice = locator.slice(TextRange::new(last_pos, import_from_stmt.end())); output.push_str(slice); output } diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs index 5aecaa39094445..4fbbfe1e39fb5e 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -8,7 +8,7 @@ use ruff_python_codegen::Stylist; use ruff_python_literal::cformat::{ CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, }; -use ruff_python_parser::{lexer, AsMode, Tok, TokenKind}; +use ruff_python_parser::TokenKind; use ruff_python_stdlib::identifiers::is_identifier; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -346,9 +346,11 @@ fn convertible(format_string: &CFormatString, params: &Expr) -> bool { /// UP031 pub(crate) fn printf_string_formatting( checker: &mut Checker, + bin_op: &ast::ExprBinOp, string_expr: &ast::ExprStringLiteral, - right: &Expr, ) { + let right = &*bin_op.right; + let mut num_positional_arguments = 0; let mut num_keyword_arguments = 0; let mut format_strings: Vec<(TextRange, String)> = @@ -434,14 +436,14 @@ pub(crate) fn printf_string_formatting( // Reconstruct the string. let mut contents = String::new(); let mut prev_end = None; - for (range, format_string) in format_strings.into_iter() { + for (range, format_string) in format_strings { // Add the content before the string segment. match prev_end { None => { contents.push_str( checker .locator() - .slice(TextRange::new(string_expr.start(), range.start())), + .slice(TextRange::new(bin_op.start(), range.start())), ); } Some(prev_end) => { @@ -478,10 +480,10 @@ pub(crate) fn printf_string_formatting( // Add the `.format` call. contents.push_str(&format!(".format{params_string}")); - let mut diagnostic = Diagnostic::new(PrintfStringFormatting, string_expr.range()); + let mut diagnostic = Diagnostic::new(PrintfStringFormatting, bin_op.range()); diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement( contents, - string_expr.range(), + bin_op.range(), ))); checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index 61fc302ccbde9a..32169ccf7dc924 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -13,7 +13,7 @@ pub struct PyFormatContext<'a> { options: PyFormatOptions, contents: &'a str, comments: Comments<'a>, - tokens: &Tokens, + tokens: &'a Tokens, node_level: NodeLevel, indent_level: IndentLevel, /// Set to a non-None value when the formatter is running on a code @@ -34,7 +34,7 @@ impl<'a> PyFormatContext<'a> { options: PyFormatOptions, contents: &'a str, comments: Comments<'a>, - tokens: &Tokens, + tokens: &'a Tokens, ) -> Self { Self { options, @@ -77,7 +77,7 @@ impl<'a> PyFormatContext<'a> { &self.comments } - pub(crate) fn tokens(&self) -> &Tokens { + pub(crate) fn tokens(&self) -> &'a Tokens { self.tokens } diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index a6bb434fdfca45..dcff8c497bb34e 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -792,8 +792,8 @@ struct LogicalLinesIter<'a> { content_end: TextSize, } -impl LogicalLinesIter<'_> { - fn new(tokens: Iter<'_, parser::Token>, verbatim_range: TextRange) -> Self { +impl<'a> LogicalLinesIter<'a> { + fn new(tokens: Iter<'a, parser::Token>, verbatim_range: TextRange) -> Self { Self { tokens, last_line_end: verbatim_range.start(), @@ -802,7 +802,7 @@ impl LogicalLinesIter<'_> { } } -impl Iterator for LogicalLinesIter { +impl<'a> Iterator for LogicalLinesIter<'a> { type Item = FormatResult; fn next(&mut self) -> Option { @@ -856,7 +856,7 @@ impl Iterator for LogicalLinesIter { } } -impl FusedIterator for LogicalLinesIter where I: Iterator {} +impl<'a> FusedIterator for LogicalLinesIter<'a> {} /// A logical line or a comment (or form feed only) line struct LogicalLine { diff --git a/crates/ruff_python_index/src/multiline_ranges.rs b/crates/ruff_python_index/src/multiline_ranges.rs index 75fc5d90b79968..585ff6f1ae8e95 100644 --- a/crates/ruff_python_index/src/multiline_ranges.rs +++ b/crates/ruff_python_index/src/multiline_ranges.rs @@ -46,7 +46,7 @@ pub(crate) struct MultilineRangesBuilder { impl MultilineRangesBuilder { pub(crate) fn visit_token(&mut self, token: &Token) { - if matches!(token.kind(), TokenKind::String | TokenKind::FStringStart) { + if matches!(token.kind(), TokenKind::String | TokenKind::FStringMiddle) { if token.is_triple_quoted_string() { self.ranges.push(token.range()); } diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 64c9f942a3584f..fdef12fa987c21 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -180,7 +180,10 @@ impl<'src> Lexer<'src> { fn lex_identifier(&mut self, first: char) -> TokenKind { // Detect potential string like rb'' b'' f'' u'' r'' let quote = match (first, self.cursor.first()) { - (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then_some(quote), + (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then(|| { + self.cursor.bump(); + quote + }), (_, second) if is_quote(self.cursor.second()) => { self.try_double_char_prefix([first, second]).then(|| { self.cursor.bump(); @@ -573,7 +576,7 @@ impl<'src> Lexer<'src> { } if self.cursor.eat_char2(quote, quote) { - self.current_flags = TokenFlags::TRIPLE_QUOTED_STRING; + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; } self.fstrings @@ -734,7 +737,7 @@ impl<'src> Lexer<'src> { // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close if self.cursor.eat_char2(quote, quote) { - self.current_flags = TokenFlags::TRIPLE_QUOTED_STRING; + self.current_flags |= TokenFlags::TRIPLE_QUOTED_STRING; } let value_start = self.offset(); @@ -1412,7 +1415,7 @@ bitflags! { impl StringFlags for TokenFlags { fn quote_style(self) -> Quote { - if self.contains(TokenFlags::DOUBLE_QUOTES) { + if self.intersects(TokenFlags::DOUBLE_QUOTES) { Quote::Double } else { Quote::Single @@ -1420,31 +1423,31 @@ impl StringFlags for TokenFlags { } fn is_triple_quoted(self) -> bool { - self.contains(TokenFlags::TRIPLE_QUOTED_STRING) + self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) } fn prefix(self) -> AnyStringPrefix { - if self.contains(TokenFlags::F_STRING) { - if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + if self.intersects(TokenFlags::F_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) - } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) } else { AnyStringPrefix::Format(FStringPrefix::Regular) } - } else if self.contains(TokenFlags::BYTE_STRING) { - if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + } else if self.intersects(TokenFlags::BYTE_STRING) { + if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) - } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) } else { AnyStringPrefix::Bytes(ByteStringPrefix::Regular) } - } else if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) { AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) - } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) { AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) - } else if self.contains(TokenFlags::UNICODE_STRING) { + } else if self.intersects(TokenFlags::UNICODE_STRING) { AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) } else { AnyStringPrefix::Regular(StringLiteralPrefix::Empty) @@ -1455,12 +1458,12 @@ impl StringFlags for TokenFlags { impl TokenFlags { /// Returns `true` if the token is an f-string. const fn is_f_string(self) -> bool { - self.contains(TokenFlags::F_STRING) + self.intersects(TokenFlags::F_STRING) } /// Returns `true` if the token is a raw string. const fn is_raw_string(self) -> bool { - self.contains(TokenFlags::RAW_STRING) + self.intersects(TokenFlags::RAW_STRING) } pub(crate) fn as_any_string_flags(self) -> AnyStringFlags { diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 3cb52c878c803b..26bba3124a1b28 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -139,13 +139,17 @@ impl<'src> TokenSource<'src> { /// Consumes the token source, returning the collected tokens and any errors encountered during /// lexing. The token collection includes both the trivia and non-trivia tokens. - pub(crate) fn finish(self) -> (Vec, CommentRanges, Vec) { + pub(crate) fn finish(mut self) -> (Vec, CommentRanges, Vec) { assert_eq!( self.current_kind(), TokenKind::EndOfFile, "TokenSource was not fully consumed" ); + if let Some(last) = self.tokens.pop() { + assert_eq!(last.kind(), TokenKind::EndOfFile); + } + let comment_ranges = CommentRanges::new(self.comments); (self.tokens, comment_ranges, self.lexer.finish()) }