From 0bd61b14db2459eee179341ee69b03c11f4a0deb Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 12 Feb 2024 22:08:42 -0500 Subject: [PATCH] Add type inference --- .../rules/refurb/rules/readlines_in_for.rs | 51 ++++--- ...es__refurb__tests__FURB129_FURB129.py.snap | 75 ++-------- .../src/analyze/typing.rs | 132 +++++++++++++++++- 3 files changed, 170 insertions(+), 88 deletions(-) diff --git a/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs b/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs index 9911e9a921078..e548b499844df 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs @@ -1,42 +1,43 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::{Comprehension, Expr, StmtFor}; +use ruff_python_semantic::analyze::typing; +use ruff_python_semantic::analyze::typing::is_io_base_expr; use ruff_text_size::Ranged; use crate::checkers::ast::Checker; /// ## What it does -/// Checks for uses of `readlines()` when iterating a file object line-by-line. +/// Checks for uses of `readlines()` when iterating over a file line-by-line. /// /// ## Why is this bad? -/// Instead of iterating through `list[str]` which is returned from `readlines()`, use the iteration -/// through a file object which is a more convenient and performant way. +/// Rather than iterating over all lines in a file by calling `readlines()`, +/// it's more convenient and performant to iterate over the file object +/// directly. /// /// ## Example /// ```python -/// with open("file.txt") as f: -/// for line in f.readlines(): +/// with open("file.txt") as fp: +/// for line in fp.readlines(): /// ... /// ``` /// /// Use instead: /// ```python -/// with open("file.txt") as f: -/// for line in f: +/// with open("file.txt") as fp: +/// for line in fp: /// ... /// ``` /// /// ## References /// - [Python documentation: `io.IOBase.readlines`](https://docs.python.org/3/library/io.html#io.IOBase.readlines) -/// - [Python documentation: methods of file objects](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects) -/// #[violation] pub(crate) struct ReadlinesInFor; impl AlwaysFixableViolation for ReadlinesInFor { #[derive_message_formats] fn message(&self) -> String { - format!("Use of `readlines()` in loop") + format!("Instead of calling `readlines()`, iterate over file object directly") } fn fix_title(&self) -> String { @@ -63,11 +64,29 @@ fn readlines_in_iter(checker: &mut Checker, iter_expr: &Expr) { return; }; - if expr_attr.attr.as_str() == "readlines" && expr_call.arguments.is_empty() { - let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range()); - diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion( - expr_call.range().add_start(expr_attr.value.range().len()), - ))); - checker.diagnostics.push(diagnostic); + if expr_attr.attr.as_str() != "readlines" || !expr_call.arguments.is_empty() { + return; } + + // Determine whether `fp` in `fp.readlines()` was bound to a file object. + if let Expr::Name(name) = expr_attr.value.as_ref() { + if !checker + .semantic() + .resolve_name(name) + .map(|id| checker.semantic().binding(id)) + .is_some_and(|binding| typing::is_io_base(binding, checker.semantic())) + { + return; + } + } else { + if !is_io_base_expr(expr_attr.value.as_ref(), checker.semantic()) { + return; + } + } + + let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range()); + diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion( + expr_call.range().add_start(expr_attr.value.range().len()), + ))); + checker.diagnostics.push(diagnostic); } diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap index 22f0a60abcf73..2d3c00155b559 100644 --- a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB129_FURB129.py.snap @@ -1,7 +1,7 @@ --- source: crates/ruff_linter/src/rules/refurb/mod.rs --- -FURB129.py:7:18: FURB129 [*] Use of `readlines()` in loop +FURB129.py:7:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 5 | # Errors 6 | with open("FURB129.py") as f: @@ -22,7 +22,7 @@ FURB129.py:7:18: FURB129 [*] Use of `readlines()` in loop 9 9 | a = [line.lower() for line in f.readlines()] 10 10 | b = {line.upper() for line in f.readlines()} -FURB129.py:9:35: FURB129 [*] Use of `readlines()` in loop +FURB129.py:9:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 7 | for _line in f.readlines(): 8 | pass @@ -43,7 +43,7 @@ FURB129.py:9:35: FURB129 [*] Use of `readlines()` in loop 11 11 | c = {line.lower(): line.upper() for line in f.readlines()} 12 12 | -FURB129.py:10:35: FURB129 [*] Use of `readlines()` in loop +FURB129.py:10:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 8 | pass 9 | a = [line.lower() for line in f.readlines()] @@ -63,7 +63,7 @@ FURB129.py:10:35: FURB129 [*] Use of `readlines()` in loop 12 12 | 13 13 | with Path("FURB129.py").open() as f: -FURB129.py:11:49: FURB129 [*] Use of `readlines()` in loop +FURB129.py:11:49: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 9 | a = [line.lower() for line in f.readlines()] 10 | b = {line.upper() for line in f.readlines()} @@ -84,7 +84,7 @@ FURB129.py:11:49: FURB129 [*] Use of `readlines()` in loop 13 13 | with Path("FURB129.py").open() as f: 14 14 | for _line in f.readlines(): -FURB129.py:14:18: FURB129 [*] Use of `readlines()` in loop +FURB129.py:14:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 13 | with Path("FURB129.py").open() as f: 14 | for _line in f.readlines(): @@ -103,7 +103,7 @@ FURB129.py:14:18: FURB129 [*] Use of `readlines()` in loop 16 16 | 17 17 | for _line in open("FURB129.py").readlines(): -FURB129.py:17:14: FURB129 [*] Use of `readlines()` in loop +FURB129.py:17:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 15 | pass 16 | @@ -123,7 +123,7 @@ FURB129.py:17:14: FURB129 [*] Use of `readlines()` in loop 19 19 | 20 20 | for _line in Path("FURB129.py").open().readlines(): -FURB129.py:20:14: FURB129 [*] Use of `readlines()` in loop +FURB129.py:20:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 18 | pass 19 | @@ -143,7 +143,7 @@ FURB129.py:20:14: FURB129 [*] Use of `readlines()` in loop 22 22 | 23 23 | -FURB129.py:26:18: FURB129 [*] Use of `readlines()` in loop +FURB129.py:26:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 24 | def good1(): 25 | f = Path("FURB129.py").open() @@ -164,7 +164,7 @@ FURB129.py:26:18: FURB129 [*] Use of `readlines()` in loop 28 28 | f.close() 29 29 | -FURB129.py:32:18: FURB129 [*] Use of `readlines()` in loop +FURB129.py:32:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly | 31 | def good2(f: io.BytesIO): 32 | for _line in f.readlines(): @@ -183,61 +183,4 @@ FURB129.py:32:18: FURB129 [*] Use of `readlines()` in loop 34 34 | 35 35 | -FURB129.py:38:18: FURB129 [*] Use of `readlines()` in loop - | -36 | # False positives -37 | def bad(f): -38 | for _line in f.readlines(): - | ^^^^^^^^^^^^^ FURB129 -39 | pass - | - = help: Remove `readlines()` - -ℹ Unsafe fix -35 35 | -36 36 | # False positives -37 37 | def bad(f): -38 |- for _line in f.readlines(): - 38 |+ for _line in f: -39 39 | pass -40 40 | -41 41 | - -FURB129.py:43:18: FURB129 [*] Use of `readlines()` in loop - | -42 | def worse(f: codecs.StreamReader): -43 | for _line in f.readlines(): - | ^^^^^^^^^^^^^ FURB129 -44 | pass - | - = help: Remove `readlines()` - -ℹ Unsafe fix -40 40 | -41 41 | -42 42 | def worse(f: codecs.StreamReader): -43 |- for _line in f.readlines(): - 43 |+ for _line in f: -44 44 | pass -45 45 | -46 46 | - -FURB129.py:55:14: FURB129 [*] Use of `readlines()` in loop - | -55 | for _line in foo().readlines(): - | ^^^^^^^^^^^^^^^^^ FURB129 -56 | pass - | - = help: Remove `readlines()` - -ℹ Unsafe fix -52 52 | return A() -53 53 | -54 54 | -55 |-for _line in foo().readlines(): - 55 |+for _line in foo(): -56 56 | pass -57 57 | -58 58 | # OK - diff --git a/crates/ruff_python_semantic/src/analyze/typing.rs b/crates/ruff_python_semantic/src/analyze/typing.rs index 5c60b21f44e9b..48d3b597f176e 100644 --- a/crates/ruff_python_semantic/src/analyze/typing.rs +++ b/crates/ruff_python_semantic/src/analyze/typing.rs @@ -406,7 +406,7 @@ where } /// Abstraction for a type checker, conservatively checks for the intended type(s). -trait TypeChecker { +pub trait TypeChecker { /// Check annotation expression to match the intended type(s). fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool; /// Check initializer expression to match the intended type(s). @@ -441,6 +441,24 @@ fn check_type(binding: &Binding, semantic: &SemanticModel) -> bo _ => false, }, + BindingKind::WithItemVar => match binding.statement(semantic) { + // ```python + // with open("file.txt") as x: + // ... + // ``` + Some(Stmt::With(ast::StmtWith { items, .. })) => { + let Some(item) = items.iter().find(|item| { + item.optional_vars + .as_ref() + .is_some_and(|vars| vars.range().contains_range(binding.range)) + }) else { + return false; + }; + T::match_initializer(&item.context_expr, semantic) + } + _ => false, + }, + BindingKind::Argument => match binding.statement(semantic) { // ```python // def foo(x: annotation): @@ -565,35 +583,125 @@ impl BuiltinTypeChecker for TupleChecker { const EXPR_TYPE: PythonType = PythonType::Tuple; } -/// Test whether the given binding (and the given name) can be considered a list. +pub struct IoBaseChecker; + +impl TypeChecker for IoBaseChecker { + fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool { + semantic + .resolve_call_path(annotation) + .is_some_and(|call_path| { + if semantic.match_typing_call_path(&call_path, "IO") { + return true; + } + if semantic.match_typing_call_path(&call_path, "BinaryIO") { + return true; + } + if semantic.match_typing_call_path(&call_path, "TextIO") { + return true; + } + matches!( + call_path.as_slice(), + [ + "io", + "IOBase" + | "RawIOBase" + | "BufferedIOBase" + | "TextIOBase" + | "BytesIO" + | "StringIO" + | "BufferedReader" + | "BufferedWriter" + | "BufferedRandom" + | "BufferedRWPair" + | "TextIOWrapper" + ] | ["os", "Path" | "PathLike"] + | [ + "pathlib", + "Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath" + ] + ) + }) + } + + fn match_initializer(initializer: &Expr, semantic: &SemanticModel) -> bool { + let Expr::Call(ast::ExprCall { func, .. }) = initializer else { + return false; + }; + + // Ex) `pathlib.Path("file.txt")` + if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() { + if attr.as_str() == "open" { + if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() { + return semantic.resolve_call_path(func).is_some_and(|call_path| { + matches!( + call_path.as_slice(), + [ + "pathlib", + "Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath" + ] + ) + }); + } + } + } + + // Ex) `open("file.txt")` + semantic + .resolve_call_path(func.as_ref()) + .is_some_and(|call_path| { + matches!( + call_path.as_slice(), + ["io", "open" | "open_code"] | ["os" | "", "open"] + ) + }) + } +} + +/// Test whether the given binding can be considered a list. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `list` and `typing.List`). pub fn is_list(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a dictionary. +/// Test whether the given binding can be considered a dictionary. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `dict` and `typing.Dict`). pub fn is_dict(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a set. +/// Test whether the given binding can be considered a set. +/// /// For this, we check what value might be associated with it through it's initialization and /// what annotation it has (we consider `set` and `typing.Set`). pub fn is_set(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } -/// Test whether the given binding (and the given name) can be considered a -/// tuple. For this, we check what value might be associated with it through +/// Test whether the given binding can be considered a tuple. +/// +/// For this, we check what value might be associated with it through /// it's initialization and what annotation it has (we consider `tuple` and /// `typing.Tuple`). pub fn is_tuple(binding: &Binding, semantic: &SemanticModel) -> bool { check_type::(binding, semantic) } +/// Test whether the given binding can be considered a file-like object (i.e., a type that +/// implements `io.IOBase`). +pub fn is_io_base(binding: &Binding, semantic: &SemanticModel) -> bool { + check_type::(binding, semantic) +} + +/// Test whether the given expression can be considered a file-like object (i.e., a type that +/// implements `io.IOBase`). +pub fn is_io_base_expr(expr: &Expr, semantic: &SemanticModel) -> bool { + IoBaseChecker::match_initializer(expr, semantic) +} + /// Find the [`ParameterWithDefault`] corresponding to the given [`Binding`]. #[inline] fn find_parameter<'a>( @@ -699,6 +807,18 @@ pub fn find_binding_value<'a>(binding: &Binding, semantic: &'a SemanticModel) -> _ => {} } } + // Ex) `with open("file.txt") as f:` + BindingKind::WithItemVar => { + let parent_id = binding.source?; + let parent = semantic.statement(parent_id); + if let Stmt::With(ast::StmtWith { items, .. }) = parent { + return items.iter().find_map(|item| { + let target = item.optional_vars.as_ref()?; + let value = &item.context_expr; + match_value(binding, target, value) + }); + } + } _ => {} } None