diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py new file mode 100644 index 00000000000000..f14e8ce2ebbe78 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py @@ -0,0 +1,44 @@ +import io +import sys +import tempfile +import io as hugo +import codecs + +# Errors. +open("test.txt") +io.TextIOWrapper(io.FileIO("test.txt")) +hugo.TextIOWrapper(hugo.FileIO("test.txt")) +tempfile.NamedTemporaryFile("w") +tempfile.TemporaryFile("w") +codecs.open("test.txt") +tempfile.SpooledTemporaryFile(0, "w") + +# Non-errors. +open("test.txt", encoding="utf-8") +open("test.bin", "wb") +open("test.bin", mode="wb") +open("test.txt", "r", -1, "utf-8") +open("test.txt", mode=sys.argv[1]) + +def func(*args, **kwargs): + open(*args) + open("text.txt", **kwargs) + +io.TextIOWrapper(io.FileIO("test.txt"), encoding="utf-8") +io.TextIOWrapper(io.FileIO("test.txt"), "utf-8") +tempfile.TemporaryFile("w", encoding="utf-8") +tempfile.TemporaryFile("w", -1, "utf-8") +tempfile.TemporaryFile("wb") +tempfile.TemporaryFile() +tempfile.NamedTemporaryFile("w", encoding="utf-8") +tempfile.NamedTemporaryFile("w", -1, "utf-8") +tempfile.NamedTemporaryFile("wb") +tempfile.NamedTemporaryFile() +codecs.open("test.txt", encoding="utf-8") +codecs.open("test.bin", "wb") +codecs.open("test.bin", mode="wb") +codecs.open("test.txt", "r", -1, "utf-8") +tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8") +tempfile.SpooledTemporaryFile(0, "w", -1, "utf-8") +tempfile.SpooledTemporaryFile(0, "wb") +tempfile.SpooledTemporaryFile(0, ) \ No newline at end of file diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 707d8e762bf987..39e3f08664f906 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -786,6 +786,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::SubprocessRunWithoutCheck) { pylint::rules::subprocess_run_without_check(checker, call); } + if checker.enabled(Rule::UnspecifiedEncoding) { + pylint::rules::unspecified_encoding(checker, call); + } if checker.any_enabled(&[ Rule::PytestRaisesWithoutException, Rule::PytestRaisesTooBroad, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index f0b9a808040516..f301ebb45d03bc 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "W1508") => (RuleGroup::Unspecified, rules::pylint::rules::InvalidEnvvarDefault), (Pylint, "W1509") => (RuleGroup::Unspecified, rules::pylint::rules::SubprocessPopenPreexecFn), (Pylint, "W1510") => (RuleGroup::Unspecified, rules::pylint::rules::SubprocessRunWithoutCheck), + (Pylint, "W1514") => (RuleGroup::Preview, rules::pylint::rules::UnspecifiedEncoding), #[allow(deprecated)] (Pylint, "W1641") => (RuleGroup::Nursery, rules::pylint::rules::EqWithoutHash), (Pylint, "R0904") => (RuleGroup::Preview, rules::pylint::rules::TooManyPublicMethods), diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index aee24e4e620374..24e80f07f7b387 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -132,6 +132,7 @@ mod tests { Rule::SubprocessRunWithoutCheck, Path::new("subprocess_run_without_check.py") )] + #[test_case(Rule::UnspecifiedEncoding, Path::new("unspecified_encoding.py"))] #[test_case(Rule::BadDunderMethodName, Path::new("bad_dunder_method_name.py"))] #[test_case(Rule::NoSelfUse, Path::new("no_self_use.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index 7643b55b52140b..b0a4832ea78906 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -51,6 +51,7 @@ pub(crate) use type_name_incorrect_variance::*; pub(crate) use type_param_name_mismatch::*; pub(crate) use unexpected_special_method_signature::*; pub(crate) use unnecessary_direct_lambda_call::*; +pub(crate) use unspecified_encoding::*; pub(crate) use useless_else_on_loop::*; pub(crate) use useless_import_alias::*; pub(crate) use useless_return::*; @@ -110,6 +111,7 @@ mod type_name_incorrect_variance; mod type_param_name_mismatch; mod unexpected_special_method_signature; mod unnecessary_direct_lambda_call; +mod unspecified_encoding; mod useless_else_on_loop; mod useless_import_alias; mod useless_return; diff --git a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs new file mode 100644 index 00000000000000..bdf979648125ef --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs @@ -0,0 +1,118 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast as ast; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `open` or similar calls without an explicit `encoding` argument. +/// +/// ## Why is this bad? +/// Using `open` in text mode without an explicit encoding specified can lead to +/// unportable code that leads to different behaviour on different systems. +/// +/// Instead, consider using the `encoding` parameter to explicitly enforce a specific encoding. +/// +/// ## Example +/// ```python +/// open("file.txt") +/// ``` +/// +/// Use instead: +/// ```python +/// open("file.txt", encoding="utf-8") +/// ``` +/// +/// ## References +/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open) +#[violation] +pub struct UnspecifiedEncoding { + function_name: String, +} + +impl Violation for UnspecifiedEncoding { + #[derive_message_formats] + fn message(&self) -> String { + format!( + "`{}` {}without explicit `encoding` argument", + self.function_name, + if self.function_name == "open" { + "in text mode " + } else { + "" + } + ) + } +} + +fn is_binary_mode(expr: &ast::Expr) -> Option { + Some(expr.as_constant_expr()?.value.as_str()?.value.contains("b")) +} + +fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool { + // this checks if we have something like *args which might contain the encoding argument + if call.arguments.args.iter().any(|a| a.is_starred_expr()) { + return false; + } + // this checks if we have something like **kwargs which might contain the encoding argument + if call.arguments.keywords.iter().any(|a| a.arg.is_none()) { + return false; + } + match path { + ["" | "codecs", "open"] => { + if let Some(mode_arg) = call.arguments.find_argument("mode", 1) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } + // else mode not specified, defaults to text mode + call.arguments.find_argument("encoding", 3).is_none() + } + ["io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(), + ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { + let mode_pos = if path[1] == "SpooledTemporaryFile" { + 1 + } else { + 0 + }; + + if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } else { + // defaults to binary mode + return false; + } + call.arguments + .find_argument("encoding", mode_pos + 2) + .is_none() + } + _ => false, + } +} + +/// PLW1514 +pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) { + let Some(path) = checker.semantic().resolve_call_path(&call.func) else { + return; + }; + if is_violation(call, path.as_slice()) { + let path_slice = if path[0] == "" { + &path[1..] + } else { + &path[0..] + }; + let result = Diagnostic::new( + UnspecifiedEncoding { + function_name: path_slice.join("."), + }, + call.func.range(), + ); + drop(path); + checker.diagnostics.push(result); + } +} diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap new file mode 100644 index 00000000000000..0fdd685396a441 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap @@ -0,0 +1,72 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +unspecified_encoding.py:8:1: PLW1514 `open` in text mode without explicit `encoding` argument + | + 7 | # Errors. + 8 | open("test.txt") + | ^^^^ PLW1514 + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) + | + +unspecified_encoding.py:9:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument + | + 7 | # Errors. + 8 | open("test.txt") + 9 | io.TextIOWrapper(io.FileIO("test.txt")) + | ^^^^^^^^^^^^^^^^ PLW1514 +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") + | + +unspecified_encoding.py:10:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument + | + 8 | open("test.txt") + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) + | ^^^^^^^^^^^^^^^^^^ PLW1514 +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") + | + +unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` without explicit `encoding` argument + | + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") + | + +unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` without explicit `encoding` argument + | +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") + | ^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +13 | codecs.open("test.txt") +14 | tempfile.SpooledTemporaryFile(0, "w") + | + +unspecified_encoding.py:13:1: PLW1514 `codecs.open` without explicit `encoding` argument + | +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") + | ^^^^^^^^^^^ PLW1514 +14 | tempfile.SpooledTemporaryFile(0, "w") + | + +unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` without explicit `encoding` argument + | +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") +14 | tempfile.SpooledTemporaryFile(0, "w") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +15 | +16 | # Non-errors. + | + + diff --git a/ruff.schema.json b/ruff.schema.json index df646ec363f9fd..a2df8882526e1c 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3003,6 +3003,7 @@ "PLW1509", "PLW151", "PLW1510", + "PLW1514", "PLW16", "PLW164", "PLW1641",