Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Extend unnecessary-regular-expression to non-literal strings (RUF055) #14679

Merged
merged 22 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,21 @@ def dashrepl(matchobj):
"",
s, # string
)

# A diagnostic should not be emitted for `sub` replacements with backreferences or
# most other ASCII escapes
re.sub(r"a", r"\g<0>\g<0>\g<0>", "a")
re.sub(r"a", r"\1", "a")
re.sub(r"a", r"\s", "a")

# Escapes like \n are "processed":
# `re.sub(r"a", r"\n", some_string)` is fixed to `some_string.replace("a", "\n")`
# *not* `some_string.replace("a", "\\n")`.
# We currently emit diagnostics for some of these without fixing them.
re.sub(r"a", "\n", "a")
re.sub(r"a", r"\n", "a")
re.sub(r"a", "\a", "a")
re.sub(r"a", r"\a", "a")

re.sub(r"a", "\?", "a")
re.sub(r"a", r"\?", "a")
17 changes: 17 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Test that RUF055 can follow a single str assignment for both the pattern and
the replacement argument to re.sub
"""

import re

pat1 = "needle"

re.sub(pat1, "", haystack)

# aliases are not followed, so this one should not trigger the rule
if pat4 := pat1:
re.sub(pat4, "", haystack)

# also works for the `repl` argument in sub
repl = "new"
re.sub(r"abc", repl, haystack)
3 changes: 2 additions & 1 deletion crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,8 @@ mod tests {
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048_1.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_concat.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use ruff_diagnostics::{AlwaysFixableViolation, Applicability, Diagnostic, Edit, Fix};
use itertools::Itertools;
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, Identifier,
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, ExprStringLiteral,
Identifier,
};
use ruff_python_semantic::analyze::typing::find_binding_value;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::TextRange;

Expand Down Expand Up @@ -53,17 +56,19 @@ use crate::checkers::ast::Checker;
/// - [Python Regular Expression HOWTO: Common Problems - Use String Methods](https://docs.python.org/3/howto/regex.html#use-string-methods)
#[derive(ViolationMetadata)]
pub(crate) struct UnnecessaryRegularExpression {
replacement: String,
replacement: Option<String>,
}

impl AlwaysFixableViolation for UnnecessaryRegularExpression {
impl Violation for UnnecessaryRegularExpression {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
"Plain string pattern passed to `re` function".to_string()
}

fn fix_title(&self) -> String {
format!("Replace with `{}`", self.replacement)
fn fix_title(&self) -> Option<String> {
Some(format!("Replace with `{}`", self.replacement.as_ref()?))
}
}

Expand All @@ -90,8 +95,8 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
return;
};

// For now, restrict this rule to string literals
let Some(string_lit) = re_func.pattern.as_string_literal_expr() else {
// For now, restrict this rule to string literals and variables that can be resolved to literals
let Some(string_lit) = resolve_string_literal(re_func.pattern, semantic) else {
return;
};

Expand All @@ -110,33 +115,36 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
// we can proceed with the str method replacement
let new_expr = re_func.replacement();

let repl = checker.generator().expr(&new_expr);
let diagnostic = Diagnostic::new(
let repl = new_expr.map(|expr| checker.generator().expr(&expr));
let mut diagnostic = Diagnostic::new(
UnnecessaryRegularExpression {
replacement: repl.clone(),
},
call.range,
);

let fix = Fix::applicable_edit(
Edit::range_replacement(repl, call.range),
if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Applicability::Unsafe
} else {
Applicability::Safe
},
);
if let Some(repl) = repl {
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_replacement(repl, call.range),
if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Applicability::Unsafe
} else {
Applicability::Safe
},
));
}

checker.diagnostics.push(diagnostic.with_fix(fix));
checker.diagnostics.push(diagnostic);
}

/// The `re` functions supported by this rule.
#[derive(Debug)]
enum ReFuncKind<'a> {
Sub { repl: &'a Expr },
// Only `Some` if it's a fixable `re.sub()` call
Sub { repl: Option<&'a Expr> },
Match,
Search,
Fullmatch,
Expand All @@ -152,7 +160,7 @@ struct ReFunc<'a> {

impl<'a> ReFunc<'a> {
fn from_call_expr(
semantic: &SemanticModel,
semantic: &'a SemanticModel,
call: &'a ExprCall,
func_name: &str,
) -> Option<Self> {
Expand All @@ -173,11 +181,32 @@ impl<'a> ReFunc<'a> {
// version
("sub", 3) => {
let repl = call.arguments.find_argument("repl", 1)?;
if !repl.is_string_literal_expr() {
return None;
let lit = resolve_string_literal(repl, semantic)?;
let mut fixable = true;
for (c, next) in lit.value.chars().tuple_windows() {
// `\0` (or any other ASCII digit) and `\g` have special meaning in `repl` strings.
// Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes
// `re.PatternError` to be raised at runtime.
//
// If we see that the escaped character is an alphanumeric ASCII character,
// we should only emit a diagnostic suggesting to replace the `re.sub()` call with
// `str.replace`if we can detect that the escaped character is one that is both
// valid in a `repl` string *and* does not have any special meaning in a REPL string.
//
// It's out of scope for this rule to change invalid `re.sub()` calls into something
// that would not raise an exception at runtime. They should be left as-is.
if c == '\\' && next.is_ascii_alphanumeric() {
if "abfnrtv".contains(next) {
fixable = false;
} else {
return None;
}
}
}
Some(ReFunc {
kind: ReFuncKind::Sub { repl },
kind: ReFuncKind::Sub {
repl: fixable.then_some(repl),
},
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 2)?,
})
Expand All @@ -201,20 +230,20 @@ impl<'a> ReFunc<'a> {
}
}

fn replacement(&self) -> Expr {
fn replacement(&self) -> Option<Expr> {
match self.kind {
// string.replace(pattern, repl)
ReFuncKind::Sub { repl } => {
self.method_expr("replace", vec![self.pattern.clone(), repl.clone()])
}
ReFuncKind::Sub { repl } => repl
.cloned()
.map(|repl| self.method_expr("replace", vec![self.pattern.clone(), repl])),
// string.startswith(pattern)
ReFuncKind::Match => self.method_expr("startswith", vec![self.pattern.clone()]),
ReFuncKind::Match => Some(self.method_expr("startswith", vec![self.pattern.clone()])),
// pattern in string
ReFuncKind::Search => self.compare_expr(CmpOp::In),
ReFuncKind::Search => Some(self.compare_expr(CmpOp::In)),
// string == pattern
ReFuncKind::Fullmatch => self.compare_expr(CmpOp::Eq),
ReFuncKind::Fullmatch => Some(self.compare_expr(CmpOp::Eq)),
// string.split(pattern)
ReFuncKind::Split => self.method_expr("split", vec![self.pattern.clone()]),
ReFuncKind::Split => Some(self.method_expr("split", vec![self.pattern.clone()])),
}
}

Expand Down Expand Up @@ -248,3 +277,23 @@ impl<'a> ReFunc<'a> {
})
}
}

/// Try to resolve `name` to an [`ExprStringLiteral`] in `semantic`.
fn resolve_string_literal<'a>(
name: &'a Expr,
semantic: &'a SemanticModel,
) -> Option<&'a ExprStringLiteral> {
if name.is_string_literal_expr() {
return name.as_string_literal_expr();
}

if let Some(name_expr) = name.as_name_expr() {
let binding = semantic.binding(semantic.only_binding(name_expr)?);
let value = find_binding_value(binding, semantic)?;
if value.is_string_literal_expr() {
return value.as_string_literal_expr();
}
}

None
}

This file was deleted.

Loading
Loading