Skip to content

Commit

Permalink
Avoid match statement misidentification in token rules (#3129)
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh authored Feb 22, 2023
1 parent df3932f commit 1efa2e0
Show file tree
Hide file tree
Showing 25 changed files with 128 additions and 66 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "f2f0b7a487a87
once_cell = { version = "1.16.0" }
regex = { version = "1.6.0" }
rustc-hash = { version = "1.1.0" }
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" }
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" }
rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" }
rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" }
schemars = { version = "0.8.11" }
serde = { version = "1.0.147", features = ["derive"] }
serde_json = { version = "1.0.87" }
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff/resources/test/fixtures/pycodestyle/E70.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ class C: ...; ...
#: E701:2:12
match *0, 1, *2:
case 0,: y = 0
#:
class Foo:
match: Optional[Match] = None
24 changes: 14 additions & 10 deletions crates/ruff/src/ast/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use rustpython_parser::ast::{
};
use rustpython_parser::lexer;
use rustpython_parser::lexer::Tok;
use rustpython_parser::mode::Mode;
use rustpython_parser::token::StringKind;
use smallvec::{smallvec, SmallVec};

Expand Down Expand Up @@ -655,7 +656,7 @@ pub fn has_comments<T>(located: &Located<T>, locator: &Locator) -> bool {

/// Returns `true` if a [`Range`] includes at least one comment.
pub fn has_comments_in(range: Range, locator: &Locator) -> bool {
for tok in lexer::make_tokenizer(locator.slice(&range)) {
for tok in lexer::make_tokenizer_located(locator.slice(&range), Mode::Module, range.location) {
match tok {
Ok((_, tok, _)) => {
if matches!(tok, Tok::Comment(..)) {
Expand Down Expand Up @@ -870,7 +871,8 @@ pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
let mut fix_start = None;
let mut fix_end = None;
let mut count: usize = 0;
for (start, tok, end) in lexer::make_tokenizer_located(contents, start).flatten() {
for (start, tok, end) in lexer::make_tokenizer_located(contents, Mode::Module, start).flatten()
{
if matches!(tok, Tok::Lpar) {
if count == 0 {
fix_start = Some(start);
Expand Down Expand Up @@ -902,7 +904,9 @@ pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> Range {
| StmtKind::AsyncFunctionDef { .. }
) {
let contents = locator.slice(&Range::from_located(stmt));
for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt.location).flatten() {
for (start, tok, end) in
lexer::make_tokenizer_located(contents, Mode::Module, stmt.location).flatten()
{
if matches!(tok, Tok::Name { .. }) {
return Range::new(start, end);
}
Expand Down Expand Up @@ -933,7 +937,7 @@ pub fn find_names<'a, T, U>(
locator: &'a Locator,
) -> impl Iterator<Item = Range> + 'a {
let contents = locator.slice(&Range::from_located(located));
lexer::make_tokenizer_located(contents, located.location)
lexer::make_tokenizer_located(contents, Mode::Module, located.location)
.flatten()
.filter(|(_, tok, _)| matches!(tok, Tok::Name { .. }))
.map(|(start, _, end)| Range {
Expand All @@ -951,7 +955,7 @@ pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> O
(Some(_), Some(type_)) => {
let type_end_location = type_.end_location.unwrap();
let contents = locator.slice(&Range::new(type_end_location, body[0].location));
let range = lexer::make_tokenizer_located(contents, type_end_location)
let range = lexer::make_tokenizer_located(contents, Mode::Module, type_end_location)
.flatten()
.tuple_windows()
.find(|(tok, next_tok)| {
Expand All @@ -978,7 +982,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
location: handler.location,
end_location: end,
});
let range = lexer::make_tokenizer_located(contents, handler.location)
let range = lexer::make_tokenizer_located(contents, Mode::Module, handler.location)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Except { .. }))
.map(|(location, _, end_location)| Range {
Expand All @@ -992,7 +996,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
/// Find f-strings that don't contain any formatted values in a `JoinedStr`.
pub fn find_useless_f_strings(expr: &Expr, locator: &Locator) -> Vec<(Range, Range)> {
let contents = locator.slice(&Range::from_located(expr));
lexer::make_tokenizer_located(contents, expr.location)
lexer::make_tokenizer_located(contents, Mode::Module, expr.location)
.flatten()
.filter_map(|(location, tok, end_location)| match tok {
Tok::String {
Expand Down Expand Up @@ -1046,7 +1050,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
.expect("Expected orelse to be non-empty")
.location,
});
let range = lexer::make_tokenizer_located(contents, body_end)
let range = lexer::make_tokenizer_located(contents, Mode::Module, body_end)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Else))
.map(|(location, _, end_location)| Range {
Expand All @@ -1062,7 +1066,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
pub fn first_colon_range(range: Range, locator: &Locator) -> Option<Range> {
let contents = locator.slice(&range);
let range = lexer::make_tokenizer_located(contents, range.location)
let range = lexer::make_tokenizer_located(contents, Mode::Module, range.location)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Colon))
.map(|(location, _, end_location)| Range {
Expand Down Expand Up @@ -1092,7 +1096,7 @@ pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
_ => return None,
};
let contents = locator.slice(&Range::new(start, end));
let range = lexer::make_tokenizer_located(contents, start)
let range = lexer::make_tokenizer_located(contents, Mode::Module, start)
.flatten()
.find(|(_, kind, _)| matches!(kind, Tok::Elif | Tok::Else))
.map(|(location, _, end_location)| Range {
Expand Down
5 changes: 4 additions & 1 deletion crates/ruff/src/ast/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Located, Stmt, StmtKind};
use rustpython_parser::lexer;
use rustpython_parser::lexer::Tok;
use rustpython_parser::mode::Mode;

use crate::ast::helpers::any_over_expr;
use crate::ast::types::{BindingKind, Scope};
Expand Down Expand Up @@ -283,7 +284,9 @@ pub type LocatedCmpop<U = ()> = Located<Cmpop, U>;
/// `CPython` doesn't either. This method iterates over the token stream and
/// re-identifies [`Cmpop`] nodes, annotating them with valid ranges.
pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
let mut tok_iter = lexer::make_tokenizer(contents).flatten().peekable();
let mut tok_iter = lexer::make_tokenizer(contents, Mode::Module)
.flatten()
.peekable();
let mut ops: Vec<LocatedCmpop> = vec![];
let mut count: usize = 0;
loop {
Expand Down
13 changes: 10 additions & 3 deletions crates/ruff/src/autofix/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use libcst_native::{
use rustpython_parser::ast::{ExcepthandlerKind, Expr, Keyword, Location, Stmt, StmtKind};
use rustpython_parser::lexer;
use rustpython_parser::lexer::Tok;
use rustpython_parser::mode::Mode;

use crate::ast::helpers;
use crate::ast::helpers::to_absolute;
Expand Down Expand Up @@ -371,7 +372,9 @@ pub fn remove_argument(
if n_arguments == 1 {
// Case 1: there is only one argument.
let mut count: usize = 0;
for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
for (start, tok, end) in
lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
{
if matches!(tok, Tok::Lpar) {
if count == 0 {
fix_start = Some(if remove_parentheses {
Expand Down Expand Up @@ -403,7 +406,9 @@ pub fn remove_argument(
{
// Case 2: argument or keyword is _not_ the last node.
let mut seen_comma = false;
for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
for (start, tok, end) in
lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
{
if seen_comma {
if matches!(tok, Tok::NonLogicalNewline) {
// Also delete any non-logical newlines after the comma.
Expand All @@ -426,7 +431,9 @@ pub fn remove_argument(
} else {
// Case 3: argument or keyword is the last node, so we have to find the last
// comma in the stmt.
for (start, tok, _) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
for (start, tok, _) in
lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
{
if start == expr_at {
fix_end = Some(expr_end);
break;
Expand Down
11 changes: 6 additions & 5 deletions crates/ruff/src/checkers/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ pub fn check_logical_lines(
mod tests {
use rustpython_parser::lexer;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::mode::Mode;

use crate::checkers::logical_lines::iter_logical_lines;
use crate::source_code::Locator;
Expand All @@ -164,7 +165,7 @@ mod tests {
x = 1
y = 2
z = x + 1"#;
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
.into_iter()
Expand All @@ -185,7 +186,7 @@ x = [
]
y = 2
z = x + 1"#;
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
.into_iter()
Expand All @@ -199,7 +200,7 @@ z = x + 1"#;
assert_eq!(actual, expected);

let contents = "x = 'abc'";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
.into_iter()
Expand All @@ -212,7 +213,7 @@ z = x + 1"#;
def f():
x = 1
f()"#;
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
.into_iter()
Expand All @@ -227,7 +228,7 @@ def f():
# Comment goes here.
x = 1
f()"#;
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
.into_iter()
Expand Down
Loading

0 comments on commit 1efa2e0

Please sign in to comment.