Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement UnnecessaryListAllocationForFirstElement #5549

Merged
merged 19 commits into from
Jul 10, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF015.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
x = range(10)

# RUF015
list(x)[0]
evanrittenhouse marked this conversation as resolved.
Show resolved Hide resolved
list(x)[:1]
list(x)[:1:1]
list(x)[:1:2]
tuple(x)[0]
tuple(x)[:1]
tuple(x)[:1:1]
tuple(x)[:1:2]
list(i for i in x)[0]
list(i for i in x)[:1]
list(i for i in x)[:1:1]
list(i for i in x)[:1:2]
[i for i in x][0]
evanrittenhouse marked this conversation as resolved.
Show resolved Hide resolved
[i for i in x][:1]
[i for i in x][:1:1]
[i for i in x][:1:2]

# OK (not indexing (solely) the first element)
list(x)
list(x)[1]
list(x)[-1]
list(x)[1:]
list(x)[:3:2]
list(x)[::2]
list(x)[::]
[i for i in x]
[i for i in x][1]
[i for i in x][-1]
[i for i in x][1:]
[i for i in x][:3:2]
[i for i in x][::2]
[i for i in x][::]

# OK (doesn't mirror the underlying list)
[i + 1 for i in x][0]
[i for i in x if i > 5][0]
[(i, i + 1) for i in x][0]

# OK (multiple generators)
y = range(10)
[i + j for i in x for j in y][0]
5 changes: 4 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
@@ -2139,7 +2139,7 @@ where

// Pre-visit.
match expr {
Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
@@ -2222,6 +2222,9 @@ where
if self.enabled(Rule::UncapitalizedEnvironmentVariables) {
flake8_simplify::rules::use_capital_environment_variables(self, expr);
}
if self.enabled(Rule::UnnecessaryIterableAllocationForFirstElement) {
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}

pandas_vet::rules::subscript(self, value, expr);
}
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
@@ -778,6 +778,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "013") => (RuleGroup::Unspecified, rules::ruff::rules::ImplicitOptional),
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

4 changes: 4 additions & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
@@ -30,6 +30,10 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008.py"))]
#[test_case(Rule::PairwiseOverZipped, Path::new("RUF007.py"))]
#[test_case(Rule::StaticKeyDictComprehension, Path::new("RUF011.py"))]
#[test_case(
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
pub(crate) use pairwise_over_zipped::*;
pub(crate) use static_key_dict_comprehension::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
#[cfg(feature = "unreachable-code")]
pub(crate) use unreachable::*;
pub(crate) use unused_noqa::*;
@@ -26,6 +27,7 @@ mod mutable_class_default;
mod mutable_dataclass_default;
mod pairwise_over_zipped;
mod static_key_dict_comprehension;
mod unnecessary_iterable_allocation_for_first_element;
#[cfg(feature = "unreachable-code")]
pub(crate) mod unreachable;
mod unused_noqa;
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
use num_bigint::BigInt;
use num_traits::{One, Zero};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr};

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::SemanticModel;

use crate::checkers::ast::Checker;
use crate::registry::AsRule;

/// ## What it does
/// Checks for uses of `list(...)[0]` that can be replaced with
/// `next(iter(...))`.
///
/// ## Why is this bad?
/// Calling `list(...)` will create a new list of the entire collection, which
/// can be very expensive for large collections. If you only need the first
/// element of the collection, you can use `next(iter(...))` to lazily fetch
/// the first element without creating a new list.
///
/// Note that migrating from `list(...)[0]` to `next(iter(...))` can change
/// the behavior of your program in two ways:
///
/// 1. First, `list(...)` will eagerly evaluate the entire collection, while
/// `next(iter(...))` will only evaluate the first element. As such, any
/// side effects that occur during iteration will be delayed.
/// 2. Second, `list(...)[0]` will raise `IndexError` if the collection is
/// empty, while `next(iter(...))` will raise `StopIteration`.
///
/// ## Example
/// ```python
/// head = list(range(1000000000000))[0]
/// ```
///
/// Use instead:
/// ```python
/// head = next(iter(range(1000000000000)))
/// ```
///
/// ## References
/// - [Iterators and Iterables in Python: Run Efficient Iterations](https://realpython.com/python-iterators-iterables/#when-to-use-an-iterator-in-python)
#[violation]
pub(crate) struct UnnecessaryIterableAllocationForFirstElement {
iterable: String,
subscript_kind: HeadSubscriptKind,
}

impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement {
#[derive_message_formats]
fn message(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => {
format!("Prefer `next(iter({iterable}))` over `list({iterable})[0]`")
}
HeadSubscriptKind::Slice => {
format!("Prefer `[next(iter({iterable}))]` over `list({iterable})[:1]`")
}
}
}

fn autofix_title(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => format!("Replace with `next(iter({iterable}))`"),
HeadSubscriptKind::Slice => format!("Replace with `[next(iter({iterable}))]"),
}
}
}

/// RUF015
pub(crate) fn unnecessary_iterable_allocation_for_first_element(
checker: &mut Checker,
subscript: &Expr,
) {
let Expr::Subscript(ast::ExprSubscript {
value,
slice,
range,
..
}) = subscript
else {
return;
};

let Some(subscript_kind) = classify_subscript(slice) else {
return;
};

let Some(iterable) = iterable_name(value, checker.semantic()) else {
return;
};

let mut diagnostic = Diagnostic::new(
UnnecessaryIterableAllocationForFirstElement {
iterable: iterable.to_string(),
subscript_kind,
},
*range,
);

if checker.patch(diagnostic.kind.rule()) {
let replacement = match subscript_kind {
HeadSubscriptKind::Index => format!("next(iter({iterable}))"),
HeadSubscriptKind::Slice => format!("[next(iter({iterable}))]"),
};
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range)));
}

checker.diagnostics.push(diagnostic);
}

/// A subscript slice that represents the first element of a list.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HeadSubscriptKind {
/// The subscript is an index (e.g., `[0]`).
Index,
/// The subscript is a slice (e.g., `[:1]`).
Slice,
}

/// Check that the slice [`Expr`] is functionally equivalent to slicing into the first element. The
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an
/// index.
fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> {
match expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) if value.is_zero() => Some(HeadSubscriptKind::Index),
Expr::Slice(ast::ExprSlice {
step, lower, upper, ..
}) => {
// Avoid, e.g., `list(...)[:2]`
let upper = upper.as_ref()?;
let upper = as_int(upper)?;
if !upper.is_one() {
return None;
}

// Avoid, e.g., `list(...)[2:]`.
if let Some(lower) = lower.as_ref() {
let lower = as_int(lower)?;
if !lower.is_zero() {
return None;
}
}

// Avoid, e.g., `list(...)[::-1]`
if let Some(step) = step.as_ref() {
let step = as_int(step)?;
if step < upper {
return None;
}
}

Some(HeadSubscriptKind::Slice)
}
_ => None,
}
}

/// Fetch the name of the iterable from an expression if the expression returns an unmodified list
/// which can be sliced into.
fn iterable_name<'a>(expr: &'a Expr, model: &SemanticModel) -> Option<&'a str> {
match expr {
Expr::Call(ast::ExprCall { func, args, .. }) => {
let ast::ExprName { id, .. } = func.as_name_expr()?;

if !matches!(id.as_str(), "tuple" | "list") {
return None;
}

if !model.is_builtin(id.as_str()) {
return None;
}

match args.first() {
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()),
Some(Expr::GeneratorExp(ast::ExprGeneratorExp {
elt, generators, ..
})) => generator_iterable(elt, generators),
_ => None,
}
}
Expr::ListComp(ast::ExprListComp {
elt, generators, ..
}) => generator_iterable(elt, generators),
_ => None,
}
}

/// Given a comprehension, returns the name of the iterable over which it iterates, if it's
/// a simple comprehension (e.g., `x` for `[i for i in x]`).
fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) -> Option<&'a str> {
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it
// doesn't modify the elements of the underlying iterator (e.g., `[i + 1 for i in x][0]`).
if !elt.is_name_expr() {
return None;
}

// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions
// (e.g., `[(i, j) for i in x for j in y][0]`).
let [generator] = generators.as_slice() else {
return None;
};

// Ignore if there's an `if` statement in the comprehension, since it filters the list.
if !generator.ifs.is_empty() {
return None;
}

let ast::ExprName { id, .. } = generator.iter.as_name_expr()?;
Some(id.as_str())
}

/// If an expression is a constant integer, returns the value of that integer; otherwise,
/// returns `None`.
fn as_int(expr: &Expr) -> Option<&BigInt> {
if let Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) = expr
{
Some(value)
} else {
None
}
}
Loading