Skip to content

Commit

Permalink
Implement the pylint rule PLW1514
Browse files Browse the repository at this point in the history
  • Loading branch information
Sven Hager committed Oct 13, 2023
1 parent 4454fbf commit 4ba7b3e
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import io
import sys
import tempfile
import io as hugo
import codecs

# Errors.
open("test.txt")
io.TextIOWrapper(io.FileIO("test.txt"))
hugo.TextIOWrapper(hugo.FileIO("test.txt"))
tempfile.NamedTemporaryFile("w")
tempfile.TemporaryFile("w")
codecs.open("test.txt")
tempfile.SpooledTemporaryFile(0, "w")

# Non-errors.
open("test.txt", encoding="utf-8")
open("test.bin", "wb")
open("test.bin", mode="wb")
open("test.txt", "r", -1, "utf-8")
open("test.txt", mode=sys.argv[1])

def func(*args, **kwargs):
open(*args)
open("text.txt", **kwargs)

io.TextIOWrapper(io.FileIO("test.txt"), encoding="utf-8")
io.TextIOWrapper(io.FileIO("test.txt"), "utf-8")
tempfile.TemporaryFile("w", encoding="utf-8")
tempfile.TemporaryFile("w", -1, "utf-8")
tempfile.TemporaryFile("wb")
tempfile.TemporaryFile()
tempfile.NamedTemporaryFile("w", encoding="utf-8")
tempfile.NamedTemporaryFile("w", -1, "utf-8")
tempfile.NamedTemporaryFile("wb")
tempfile.NamedTemporaryFile()
codecs.open("test.txt", encoding="utf-8")
codecs.open("test.bin", "wb")
codecs.open("test.bin", mode="wb")
codecs.open("test.txt", "r", -1, "utf-8")
tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8")
tempfile.SpooledTemporaryFile(0, "w", -1, "utf-8")
tempfile.SpooledTemporaryFile(0, "wb")
tempfile.SpooledTemporaryFile(0, )
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::SubprocessRunWithoutCheck) {
pylint::rules::subprocess_run_without_check(checker, call);
}
if checker.enabled(Rule::UnspecifiedEncoding) {
pylint::rules::unspecified_encoding(checker, call);
}
if checker.any_enabled(&[
Rule::PytestRaisesWithoutException,
Rule::PytestRaisesTooBroad,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "W1508") => (RuleGroup::Unspecified, rules::pylint::rules::InvalidEnvvarDefault),
(Pylint, "W1509") => (RuleGroup::Unspecified, rules::pylint::rules::SubprocessPopenPreexecFn),
(Pylint, "W1510") => (RuleGroup::Unspecified, rules::pylint::rules::SubprocessRunWithoutCheck),
(Pylint, "W1514") => (RuleGroup::Preview, rules::pylint::rules::UnspecifiedEncoding),
#[allow(deprecated)]
(Pylint, "W1641") => (RuleGroup::Nursery, rules::pylint::rules::EqWithoutHash),
(Pylint, "R0904") => (RuleGroup::Preview, rules::pylint::rules::TooManyPublicMethods),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ mod tests {
Rule::SubprocessRunWithoutCheck,
Path::new("subprocess_run_without_check.py")
)]
#[test_case(Rule::UnspecifiedEncoding, Path::new("unspecified_encoding.py"))]
#[test_case(Rule::BadDunderMethodName, Path::new("bad_dunder_method_name.py"))]
#[test_case(Rule::NoSelfUse, Path::new("no_self_use.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub(crate) use type_name_incorrect_variance::*;
pub(crate) use type_param_name_mismatch::*;
pub(crate) use unexpected_special_method_signature::*;
pub(crate) use unnecessary_direct_lambda_call::*;
pub(crate) use unspecified_encoding::*;
pub(crate) use useless_else_on_loop::*;
pub(crate) use useless_import_alias::*;
pub(crate) use useless_return::*;
Expand Down Expand Up @@ -110,6 +111,7 @@ mod type_name_incorrect_variance;
mod type_param_name_mismatch;
mod unexpected_special_method_signature;
mod unnecessary_direct_lambda_call;
mod unspecified_encoding;
mod useless_else_on_loop;
mod useless_import_alias;
mod useless_return;
Expand Down
118 changes: 118 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for uses of `open` or similar calls without an explicit `encoding` argument.
///
/// ## Why is this bad?
/// Using `open` in text mode without an explicit encoding specified can lead to
/// unportable code that leads to different behaviour on different systems.
///
/// Instead, consider using the `encoding` parameter to explicitly enforce a specific encoding.
///
/// ## Example
/// ```python
/// open("file.txt")
/// ```
///
/// Use instead:
/// ```python
/// open("file.txt", encoding="utf-8")
/// ```
///
/// ## References
/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open)
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
}

impl Violation for UnspecifiedEncoding {
#[derive_message_formats]
fn message(&self) -> String {
format!(
"`{}` {}without explicit `encoding` argument",
self.function_name,
if self.function_name == "open" {
"in text mode "
} else {
""
}
)
}
}

fn is_binary_mode(expr: &ast::Expr) -> Option<bool> {
Some(expr.as_constant_expr()?.value.as_str()?.value.contains("b"))
}

fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
// this checks if we have something like *args which might contain the encoding argument
if call.arguments.args.iter().any(|a| a.is_starred_expr()) {
return false;
}
// this checks if we have something like **kwargs which might contain the encoding argument
if call.arguments.keywords.iter().any(|a| a.arg.is_none()) {
return false;
}
match path {
["" | "codecs", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = if path[1] == "SpooledTemporaryFile" {
1
} else {
0
};

if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
} else {
// defaults to binary mode
return false;
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
_ => false,
}
}

/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some(path) = checker.semantic().resolve_call_path(&call.func) else {
return;
};
if is_violation(call, path.as_slice()) {
let path_slice = if path[0] == "" {
&path[1..]
} else {
&path[0..]
};
let result = Diagnostic::new(
UnspecifiedEncoding {
function_name: path_slice.join("."),
},
call.func.range(),
);
drop(path);
checker.diagnostics.push(result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
source: crates/ruff_linter/src/rules/pylint/mod.rs
---
unspecified_encoding.py:8:1: PLW1514 `open` in text mode without explicit `encoding` argument
|
7 | # Errors.
8 | open("test.txt")
| ^^^^ PLW1514
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
|

unspecified_encoding.py:9:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument
|
7 | # Errors.
8 | open("test.txt")
9 | io.TextIOWrapper(io.FileIO("test.txt"))
| ^^^^^^^^^^^^^^^^ PLW1514
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
|

unspecified_encoding.py:10:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument
|
8 | open("test.txt")
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
| ^^^^^^^^^^^^^^^^^^ PLW1514
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
|

unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` without explicit `encoding` argument
|
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
|

unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` without explicit `encoding` argument
|
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
| ^^^^^^^^^^^^^^^^^^^^^^ PLW1514
13 | codecs.open("test.txt")
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:13:1: PLW1514 `codecs.open` without explicit `encoding` argument
|
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
| ^^^^^^^^^^^ PLW1514
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` without explicit `encoding` argument
|
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
14 | tempfile.SpooledTemporaryFile(0, "w")
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
15 |
16 | # Non-errors.
|


1 change: 1 addition & 0 deletions ruff.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4ba7b3e

Please sign in to comment.