Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flake8-simplify] Implementation for split-of-static-string (SIM905) #14008

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# setup
sep = ","
no_sep = None

# positives
"""
itemA
itemB
itemC
""".split()

"a,b,c,d".split(",")
"a,b,c,d".split(None)
"a,b,c,d".split(",", 1)
"a,b,c,d".split(None, 1)
"a,b,c,d".split(sep=",")
"a,b,c,d".split(sep=None)
"a,b,c,d".split(sep=",", maxsplit=1)
"a,b,c,d".split(sep=None, maxsplit=1)
"a,b,c,d".split(maxsplit=1, sep=",")
"a,b,c,d".split(maxsplit=1, sep=None)
"a,b,c,d".split(",", maxsplit=1)
"a,b,c,d".split(None, maxsplit=1)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=1.0)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=0)
"VERB AUX PRON ADP DET".split(" ")
' 1 2 3 '.split()
'1<>2<>3<4'.split('<>')

" a*a a*a a ".split("*", -1) # [' a', 'a a', 'a a ']
"".split() # []
"""
""".split() # []
" ".split() # []
"/abc/".split() # ['/abc/']
("a,b,c"
# comment
.split()
) # ['a,b,c']
("a,b,c"
# comment1
.split(",")
) # ['a', 'b', 'c']
("a,"
# comment
"b,"
"c"
.split(",")
) # ['a', 'b', 'c']

"hello "\
"world".split()
# ['hello', 'world']

# prefixes and isc
u"a b".split() # ['a', 'b']
r"a \n b".split() # ['a', '\\n', 'b']
("a " "b").split() # ['a', 'b']
"a " "b".split() # ['a', 'b']
u"a " "b".split() # ['a', 'b']
"a " u"b".split() # ['a', 'b']
u"a " r"\n".split() # ['a', '\\n']
r"\n " u"\n".split() # ['\\n']
r"\n " "\n".split() # ['\\n']
"a " r"\n".split() # ['a', '\\n']

"a,b,c".split(',', maxsplit=0) # ['a,b,c']
"a,b,c".split(',', maxsplit=-1) # ['a', 'b', 'c']
"a,b,c".split(',', maxsplit=-2) # ['a', 'b', 'c']
"a,b,c".split(',', maxsplit=-0) # ['a,b,c']

# negatives

# invalid values should not cause panic
"a,b,c,d".split(maxsplit="hello")
"a,b,c,d".split(maxsplit=-"hello")

# variable names not implemented
"a,b,c,d".split(sep)
"a,b,c,d".split(no_sep)
for n in range(3):
"a,b,c,d".split(",", maxsplit=n)

# f-strings not yet implemented
world = "world"
_ = f"{world}_hello_world".split("_")

hello = "hello_world"
_ = f"{hello}_world".split("_")

# split on bytes not yet implemented, much less frequent
b"TesT.WwW.ExamplE.CoM".split(b".")

# str.splitlines not yet implemented
"hello\nworld".splitlines()
"hello\nworld".splitlines(keepends=True)
"hello\nworld".splitlines(keepends=False)
12 changes: 12 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
Rule::StaticJoinToFString,
// refurb
Rule::HashlibDigestHex,
// flake8-simplify
Rule::SplitStaticString,
]) {
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
let attr = attr.as_str();
Expand All @@ -401,6 +403,16 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
string_value.to_str(),
);
}
} else if matches!(attr, "split" | "rsplit") {
// "...".split(...) call
if checker.enabled(Rule::SplitStaticString) {
flake8_simplify::rules::split_static_string(
checker,
attr,
call,
string_value.to_str(),
);
}
} else if attr == "format" {
// "...".format(...) call
let location = expr.range();
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Simplify, "223") => (RuleGroup::Stable, rules::flake8_simplify::rules::ExprAndFalse),
(Flake8Simplify, "300") => (RuleGroup::Stable, rules::flake8_simplify::rules::YodaConditions),
(Flake8Simplify, "401") => (RuleGroup::Stable, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictGet),
(Flake8Simplify, "905") => (RuleGroup::Preview, rules::flake8_simplify::rules::SplitStaticString),
(Flake8Simplify, "910") => (RuleGroup::Stable, rules::flake8_simplify::rules::DictGetWithNoneDefault),
(Flake8Simplify, "911") => (RuleGroup::Stable, rules::flake8_simplify::rules::ZipDictKeysAndValues),

Expand Down
5 changes: 3 additions & 2 deletions crates/ruff_linter/src/rules/flake8_simplify/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ mod tests {
#[test_case(Rule::ReimplementedBuiltin, Path::new("SIM111.py"))]
#[test_case(Rule::UncapitalizedEnvironmentVariables, Path::new("SIM112.py"))]
#[test_case(Rule::EnumerateForLoop, Path::new("SIM113.py"))]
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
#[test_case(Rule::OpenFileWithContextHandler, Path::new("SIM115.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
#[test_case(Rule::MultipleWithStatements, Path::new("SIM117.py"))]
#[test_case(Rule::InDictKeys, Path::new("SIM118.py"))]
#[test_case(Rule::NegateEqualOp, Path::new("SIM201.py"))]
Expand All @@ -44,9 +46,8 @@ mod tests {
#[test_case(Rule::ExprAndFalse, Path::new("SIM223.py"))]
#[test_case(Rule::YodaConditions, Path::new("SIM300.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictGet, Path::new("SIM401.py"))]
#[test_case(Rule::SplitStaticString, Path::new("SIM905.py"))]
#[test_case(Rule::DictGetWithNoneDefault, Path::new("SIM910.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub(crate) use needless_bool::*;
pub(crate) use open_file_with_context_handler::*;
pub(crate) use reimplemented_builtin::*;
pub(crate) use return_in_try_except_finally::*;
pub(crate) use split_static_string::*;
pub(crate) use suppressible_exception::*;
pub(crate) use yoda_conditions::*;
pub(crate) use zip_dict_keys_and_values::*;
Expand All @@ -35,6 +36,7 @@ mod needless_bool;
mod open_file_with_context_handler;
mod reimplemented_builtin;
mod return_in_try_except_finally;
mod split_static_string;
mod suppressible_exception;
mod yoda_conditions;
mod zip_dict_keys_and_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
use std::cmp::Ordering;

use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{
Expr, ExprCall, ExprContext, ExprList, ExprStringLiteral, ExprUnaryOp, StringLiteral,
StringLiteralFlags, StringLiteralValue, UnaryOp,
};
use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for static `str.split` calls that can be replaced with list literals.
///
/// ## Why is this bad?
/// List literals are more readable and do not require the overhead of calling `str.split`.
///
/// ## Example
/// ```python
/// "a,b,c,d".split(",")
/// ```
///
/// Use instead:
/// ```python
/// ["a", "b", "c", "d"]
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as unsafe for implicit string concatenations with comments interleaved
/// between segments, as comments may be removed.
///
/// For example, the fix would be marked as unsafe in the following case:
/// ```python
/// (
/// "a" # comment
/// "," # comment
/// "b" # comment
/// ).split(",")
/// ```
///
/// ## References
/// - [Python documentation: `str.split`](https://docs.python.org/3/library/stdtypes.html#str.split)
/// ```
#[violation]
pub struct SplitStaticString;

impl Violation for SplitStaticString {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
format!("Consider using a list literal instead of `str.split`")
}

fn fix_title(&self) -> Option<String> {
Some("Replace with list literal".to_string())
}
}

/// SIM905
pub(crate) fn split_static_string(
checker: &mut Checker,
attr: &str,
call: &ExprCall,
str_value: &str,
) {
let ExprCall { arguments, .. } = call;

let maxsplit_arg = arguments.find_argument("maxsplit", 1);
let Some(maxsplit_value) = get_maxsplit_value(maxsplit_arg) else {
return;
};

// `split` vs `rsplit`.
let direction = if attr == "split" {
Direction::Left
} else {
Direction::Right
};

let sep_arg = arguments.find_argument("sep", 0);
let split_replacement = if let Some(sep) = sep_arg {
match sep {
Expr::NoneLiteral(_) => split_default(str_value, maxsplit_value),
Expr::StringLiteral(sep_value) => {
let sep_value_str = sep_value.value.to_str();
Some(split_sep(
str_value,
sep_value_str,
maxsplit_value,
direction,
))
}
// Ignore names until type inference is available.
_ => {
return;
}
}
} else {
split_default(str_value, maxsplit_value)
};

let mut diagnostic = Diagnostic::new(SplitStaticString, call.range());
if let Some(ref replacement_expr) = split_replacement {
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_replacement(checker.generator().expr(replacement_expr), call.range()),
// The fix does not preserve comments within implicit string concatenations.
if checker.comment_ranges().intersects(call.range()) {
Applicability::Unsafe
} else {
Applicability::Safe
},
));
}
checker.diagnostics.push(diagnostic);
}

fn construct_replacement(elts: &[&str]) -> Expr {
Expr::List(ExprList {
elts: elts
.iter()
.map(|elt| {
Expr::StringLiteral(ExprStringLiteral {
value: StringLiteralValue::single(StringLiteral {
value: (*elt).to_string().into_boxed_str(),
range: TextRange::default(),
flags: StringLiteralFlags::default(),
}),
range: TextRange::default(),
})
})
.collect(),
ctx: ExprContext::Load,
range: TextRange::default(),
})
}

fn split_default(str_value: &str, max_split: i32) -> Option<Expr> {
// From the Python documentation:
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
// > consecutive whitespace are regarded as a single separator, and the result will contain
// > no empty strings at the start or end if the string has leading or trailing whitespace.
// > Consequently, splitting an empty string or a string consisting of just whitespace with
// > a None separator returns [].
// https://docs.python.org/3/library/stdtypes.html#str.split
match max_split.cmp(&0) {
Ordering::Greater => {
// Autofix for `maxsplit` without separator not yet implemented, as
// `split_whitespace().remainder()` is not stable:
// https://doc.rust-lang.org/std/str/struct.SplitWhitespace.html#method.remainder
None
}
Ordering::Equal => {
let list_items: Vec<&str> = vec![str_value];
Some(construct_replacement(&list_items))
}
Ordering::Less => {
let list_items: Vec<&str> = str_value.split_whitespace().collect();
Some(construct_replacement(&list_items))
}
}
}

fn split_sep(str_value: &str, sep_value: &str, max_split: i32, direction: Direction) -> Expr {
let list_items: Vec<&str> = if let Ok(split_n) = usize::try_from(max_split) {
match direction {
Direction::Left => str_value.splitn(split_n + 1, sep_value).collect(),
Direction::Right => str_value.rsplitn(split_n + 1, sep_value).collect(),
}
} else {
match direction {
Direction::Left => str_value.split(sep_value).collect(),
Direction::Right => str_value.rsplit(sep_value).collect(),
}
};

construct_replacement(&list_items)
}

/// Returns the value of the `maxsplit` argument as an `i32`, if it is a numeric value.
fn get_maxsplit_value(arg: Option<&Expr>) -> Option<i32> {
if let Some(maxsplit) = arg {
match maxsplit {
// Negative number.
Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::USub,
operand,
..
}) => {
match &**operand {
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
.value
.as_int()
.and_then(ruff_python_ast::Int::as_i32)
.map(|f| -f),
// Ignore when `maxsplit` is not a numeric value.
_ => None,
}
}
// Positive number
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
.value
.as_int()
.and_then(ruff_python_ast::Int::as_i32),
// Ignore when `maxsplit` is not a numeric value.
_ => None,
}
} else {
// Default value is -1 (no splits).
Some(-1)
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Direction {
Left,
Right,
}
Loading
Loading