Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Detect unnecessary dict comprehensions for iterables (RUF025) #9613

Merged
merged 13 commits into from
Jan 24, 2024
66 changes: 66 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF025.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Violation cases: RUF025

def foo():
numbers = [1,2,3]
{n: None for n in numbers} # RUF025

def foo():
for key, value in {n: 1 for n in [1,2,3]}.items(): # RUF025
pass

def foo():
{n: 1.1 for n in [1,2,3]} # RUF025

def foo():
{n: complex(3, 5) for n in [1,2,3]} # RUF025

def foo():
def f(data):
return data
f({c: "a" for c in "12345"}) # RUF025

def foo():
{n: True for n in [1,2,2]} # RUF025

def foo():
{n: b'hello' for n in (1,2,2)} # RUF025

def foo():
{n: ... for n in [1,2,3]} # RUF025

def foo():
values = ["a", "b", "c"]
[{n: values for n in [1,2,3]}] # RUF025

def foo():
def f():
return 1

a = f()
{n: a for n in [1,2,3]} # RUF025

def foo():
{n: False for n in {1: "a", 2: "b"}} # RUF025


# Non-violation cases: RUF025

def foo():
{n: 1 for n in [1,2,3,4,5] if n < 3} # OK

def foo():
{n: 1 for c in [1,2,3,4,5] for n in [1,2,3] if c < 3} # OK

def foo():
def f():
pass
{n: f() for n in [1,2,3]} # OK

def foo():
{n: n for n in [1,2,3,4,5]} # OK

def foo():
def f():
return {n: 1 for c in [1,2,3,4,5] for n in [1,2,3]} # OK

f()
8 changes: 8 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1422,11 +1422,19 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::UnnecessaryDictIndexLookup) {
pylint::rules::unnecessary_dict_index_lookup_comprehension(checker, expr);
}

if checker.enabled(Rule::UnnecessaryComprehension) {
flake8_comprehensions::rules::unnecessary_dict_comprehension(
checker, expr, key, value, generators,
);
}

if checker.enabled(Rule::UnnecessaryDictComprehensionForIterable) {
ruff::rules::unnecessary_dict_comprehension_for_iterable(
checker, expr, key, value, generators,
);
}

if checker.enabled(Rule::FunctionUsesLoopVariable) {
flake8_bugbear::rules::function_uses_loop_variable(checker, &Node::Expr(expr));
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "022") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderAll),
(Ruff, "023") => (RuleGroup::Preview, rules::ruff::rules::UnsortedDunderSlots),
(Ruff, "024") => (RuleGroup::Preview, rules::ruff::rules::MutableFromkeysValue),
(Ruff, "025") => (RuleGroup::Preview, rules::ruff::rules::UnnecessaryDictComprehensionForIterable),
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Stable, rules::ruff::rules::InvalidPyprojectToml),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ EXE001_1.py:1:1: EXE001 Shebang is present but file is not executable
3 | if __name__ == '__main__':
|


Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,3 @@ EXE002_1.py:1:1: EXE002 The file is executable but no shebang is present
| EXE002
2 | print('I should be executable.')
|


1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ mod tests {
#[test_case(Rule::UnsortedDunderAll, Path::new("RUF022.py"))]
#[test_case(Rule::UnsortedDunderSlots, Path::new("RUF023.py"))]
#[test_case(Rule::MutableFromkeysValue, Path::new("RUF024.py"))]
#[test_case(Rule::UnnecessaryDictComprehensionForIterable, Path::new("RUF025.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub(crate) use quadratic_list_summation::*;
pub(crate) use sort_dunder_all::*;
pub(crate) use sort_dunder_slots::*;
pub(crate) use static_key_dict_comprehension::*;
pub(crate) use unnecessary_dict_comprehension_for_iterable::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
pub(crate) use unnecessary_key_check::*;
pub(crate) use unused_noqa::*;
Expand All @@ -42,6 +43,7 @@ mod sequence_sorting;
mod sort_dunder_all;
mod sort_dunder_slots;
mod static_key_dict_comprehension;
mod unnecessary_dict_comprehension_for_iterable;
mod unnecessary_iterable_allocation_for_first_element;
mod unnecessary_key_check;
mod unused_noqa;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
use anyhow::{bail, Result};
use libcst_native::{Arg, Call, Expression, Name, ParenthesizableWhitespace};
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{self as ast, Comprehension, Expr};
use ruff_python_codegen::Stylist;
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;
use crate::cst::matchers::match_expression;
use crate::fix::codemods::CodegenStylist;
use crate::fix::edits::pad;

/// ## What it does
/// Checks for unnecessary `dict` comprehension when creating a new dictionary from iterable.
///
/// ## Why is this bad?
/// It's unnecessary to use a `dict` comprehension to build a dictionary from an iterable when the value is `static`.
/// Use `dict.fromkeys(iterable)` instead of `{value: None for value in iterable}`.
///
/// `dict.fromkeys(iterable)` is more readable and faster than `{value: None for value in iterable}`.
///
///
/// ## Examples
/// ```python
/// {a: None for a in iterable}
/// {a: 1 for a in iterable}
/// ```
///
/// Use instead:
/// ```python
/// dict.fromkeys(iterable)
/// dict.fromkeys(iterable, 1)
/// ```
#[violation]
pub struct UnnecessaryDictComprehensionForIterable {
is_value_none_literal: bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could use an enum instead of a boolean. A boolean value is hard to read until it is linked with some context like a variable name. An enum can prove to be readable in such cases.

enum Value {
	// A `None` literal
	NoneLiteral,
	// Any value other than `None`
	Other
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I thought about using an Enum but it kind of felt slightly over board for such a simple thing.

}

impl AlwaysFixableViolation for UnnecessaryDictComprehensionForIterable {
#[derive_message_formats]
#[allow(clippy::match_bool)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this must be a leftover and can be removed now?

fn message(&self) -> String {
if self.is_value_none_literal {
format!(
"Unnecessary dict comprehension for iterable (rewrite using `dict.fromkeys(iterable)`)"
)
} else {
format!(
"Unnecessary dict comprehension for iterable (rewrite using `dict.fromkeys(iterable, value)`)"
)
}
}

#[allow(clippy::match_bool)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this must be a leftover and can be removed now?

fn fix_title(&self) -> String {
if self.is_value_none_literal {
format!("Rewrite using `dict.fromkeys(iterable, value)`)",)
} else {
format!("Rewrite using `dict.fromkeys(iterable)`)")
}
}
}

/// RUF025
pub(crate) fn unnecessary_dict_comprehension_for_iterable(
checker: &mut Checker,
expr: &Expr,
key: &Expr,
value: &Expr,
generators: &[Comprehension],
) {
let [generator] = generators else {
return;
};

// Don't suggest `dict.fromkeys` for async generator expressions, because `dict.fromkeys` is not async.
// Don't suggest `dict.fromkeys` for nested generator expressions, because `dict.fromkeys` might be error-prone option at least for fixing.
// Don't suggest `dict.fromkeys` for generator expressions with `if` clauses, because `dict.fromkeys` might not be valid option.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we combine these comments and use bullet points instead?

// Don't suggest `dict.fromkeys` for
// - ..., because ...

if !generator.ifs.is_empty() && generator.is_async && generators.len() > 1 {
return;
}
let Expr::Name(_) = &generator.target else {
return;
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we can use the is_ method on the node: generator.target.is_name_expr


// Don't suggest `dict.fromkeys` if key and value are binded to the same name.
if let (Expr::Name(key_name), Expr::Name(value_name)) = (key, value) {
if key_name.id == value_name.id {
return;
}
}

if !has_valid_expression_type(value) {
return;
}

let mut diagnostic = Diagnostic::new(
UnnecessaryDictComprehensionForIterable {
is_value_none_literal: value.is_none_literal_expr(),
},
expr.range(),
);
diagnostic.try_set_fix(|| {
fix_unnecessary_dict_comprehension(expr, checker.locator(), checker.stylist())
.map(Fix::safe_edit)
});
checker.diagnostics.push(diagnostic);
}

// only accept `None`, `Ellipsis`, `True`, `False`, `Number`, `String`, `Bytes`, `Name` as value
fn has_valid_expression_type(node: &ast::Expr) -> bool {
matches!(
node,
ast::Expr::StringLiteral(_)
| ast::Expr::BytesLiteral(_)
| ast::Expr::NumberLiteral(_)
| ast::Expr::BooleanLiteral(_)
| ast::Expr::NoneLiteral(_)
| ast::Expr::EllipsisLiteral(_)
| ast::Expr::Name(_)
)
}

/// Generate a [`Fix`] to replace `dict` comprehension with `dict.fromkeys`.
/// (RUF025) Convert `{n: None for n in [1,2,3]}` to `dict.fromkeys([1,2,3])` or
/// `{n: 1 for n in [1,2,3]}` to `dict.fromkeys([1,2,3], 1)`.
fn fix_unnecessary_dict_comprehension(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a specific reason you choose to use libcst nodes instead of the AST nodes to construct the fixed node? I might be wrong but I think it might be easier to construct the node using the AST nodes with the Generator to generate the equivalent source code. I'm fine with either but just trying to understand this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, specific reason. I was not quite sure what would be the correct way to construct the fixed node. Seems that using AST node with Generator would be more reasonable.

expr: &Expr,
locator: &Locator,
stylist: &Stylist,
) -> Result<Edit> {
let module_text = locator.slice(expr);
let mut tree = match_expression(module_text)?;

match &tree {
Expression::DictComp(inner) => {
let args = match &*inner.value {
Expression::Name(value) if value.value == "None" => {
vec![Arg {
value: inner.for_in.iter.clone(),
keyword: None,
equal: None,
comma: None,
star: "",
whitespace_after_star: ParenthesizableWhitespace::default(),
whitespace_after_arg: ParenthesizableWhitespace::default(),
}]
}
_ => vec![
Arg {
value: inner.for_in.iter.clone(),
keyword: None,
equal: None,
comma: None,
star: "",
whitespace_after_star: ParenthesizableWhitespace::default(),
whitespace_after_arg: ParenthesizableWhitespace::default(),
},
Arg {
value: *inner.value.clone(),
keyword: None,
equal: None,
comma: None,
star: "",
whitespace_after_star: ParenthesizableWhitespace::default(),
whitespace_after_arg: ParenthesizableWhitespace::default(),
},
],
};
tree = Expression::Call(Box::new(Call {
func: Box::new(Expression::Name(Box::new(Name {
value: "dict.fromkeys",
lpar: vec![],
rpar: vec![],
}))),
args,
lpar: vec![],
rpar: vec![],
whitespace_after_func: ParenthesizableWhitespace::default(),
whitespace_before_args: ParenthesizableWhitespace::default(),
}));
}
_ => bail!("Expected a dict comprehension"),
}

Ok(Edit::range_replacement(
pad(tree.codegen_stylist(stylist), expr.range(), locator),
expr.range(),
))
}
Loading
Loading