Skip to content

Commit

Permalink
Implement the pylint rule PLW1514 (unspecified-encoding) (#7939)
Browse files Browse the repository at this point in the history
## Summary

Implemented the pylint rule W1514 ( unspecified-encoding).
See also:
https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/unspecified-encoding.html


## Test Plan

Tested it with the submitted test case.
Additionally, we tested the new ruff rule (PLW1514) on our proprietary
Python code base.
  • Loading branch information
shager authored Oct 17, 2023
1 parent bf0e578 commit 73049df
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import io
import sys
import tempfile
import io as hugo
import codecs

# Errors.
open("test.txt")
io.TextIOWrapper(io.FileIO("test.txt"))
hugo.TextIOWrapper(hugo.FileIO("test.txt"))
tempfile.NamedTemporaryFile("w")
tempfile.TemporaryFile("w")
codecs.open("test.txt")
tempfile.SpooledTemporaryFile(0, "w")

# Non-errors.
open("test.txt", encoding="utf-8")
open("test.bin", "wb")
open("test.bin", mode="wb")
open("test.txt", "r", -1, "utf-8")
open("test.txt", mode=sys.argv[1])

def func(*args, **kwargs):
open(*args)
open("text.txt", **kwargs)

io.TextIOWrapper(io.FileIO("test.txt"), encoding="utf-8")
io.TextIOWrapper(io.FileIO("test.txt"), "utf-8")
tempfile.TemporaryFile("w", encoding="utf-8")
tempfile.TemporaryFile("w", -1, "utf-8")
tempfile.TemporaryFile("wb")
tempfile.TemporaryFile()
tempfile.NamedTemporaryFile("w", encoding="utf-8")
tempfile.NamedTemporaryFile("w", -1, "utf-8")
tempfile.NamedTemporaryFile("wb")
tempfile.NamedTemporaryFile()
codecs.open("test.txt", encoding="utf-8")
codecs.open("test.bin", "wb")
codecs.open("test.bin", mode="wb")
codecs.open("test.txt", "r", -1, "utf-8")
tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8")
tempfile.SpooledTemporaryFile(0, "w", -1, "utf-8")
tempfile.SpooledTemporaryFile(0, "wb")
tempfile.SpooledTemporaryFile(0, )
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::SubprocessRunWithoutCheck) {
pylint::rules::subprocess_run_without_check(checker, call);
}
if checker.enabled(Rule::UnspecifiedEncoding) {
pylint::rules::unspecified_encoding(checker, call);
}
if checker.any_enabled(&[
Rule::PytestRaisesWithoutException,
Rule::PytestRaisesTooBroad,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Pylint, "W1508") => (RuleGroup::Stable, rules::pylint::rules::InvalidEnvvarDefault),
(Pylint, "W1509") => (RuleGroup::Stable, rules::pylint::rules::SubprocessPopenPreexecFn),
(Pylint, "W1510") => (RuleGroup::Stable, rules::pylint::rules::SubprocessRunWithoutCheck),
(Pylint, "W1514") => (RuleGroup::Preview, rules::pylint::rules::UnspecifiedEncoding),
#[allow(deprecated)]
(Pylint, "W1641") => (RuleGroup::Nursery, rules::pylint::rules::EqWithoutHash),
(Pylint, "R0904") => (RuleGroup::Preview, rules::pylint::rules::TooManyPublicMethods),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ mod tests {
Rule::SubprocessRunWithoutCheck,
Path::new("subprocess_run_without_check.py")
)]
#[test_case(Rule::UnspecifiedEncoding, Path::new("unspecified_encoding.py"))]
#[test_case(Rule::BadDunderMethodName, Path::new("bad_dunder_method_name.py"))]
#[test_case(Rule::NoSelfUse, Path::new("no_self_use.py"))]
#[test_case(Rule::MisplacedBareRaise, Path::new("misplaced_bare_raise.py"))]
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ pub(crate) use type_name_incorrect_variance::*;
pub(crate) use type_param_name_mismatch::*;
pub(crate) use unexpected_special_method_signature::*;
pub(crate) use unnecessary_direct_lambda_call::*;
pub(crate) use unspecified_encoding::*;
pub(crate) use useless_else_on_loop::*;
pub(crate) use useless_import_alias::*;
pub(crate) use useless_return::*;
Expand Down Expand Up @@ -114,6 +115,7 @@ mod type_name_incorrect_variance;
mod type_param_name_mismatch;
mod unexpected_special_method_signature;
mod unnecessary_direct_lambda_call;
mod unspecified_encoding;
mod useless_else_on_loop;
mod useless_import_alias;
mod useless_return;
Expand Down
157 changes: 157 additions & 0 deletions crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_ast::call_path::{format_call_path, CallPath};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for uses of `open` and related calls without an explicit `encoding`
/// argument.
///
/// ## Why is this bad?
/// Using `open` in text mode without an explicit encoding can lead to
/// non-portable code, with differing behavior across platforms.
///
/// Instead, consider using the `encoding` parameter to enforce a specific
/// encoding.
///
/// ## Example
/// ```python
/// open("file.txt")
/// ```
///
/// Use instead:
/// ```python
/// open("file.txt", encoding="utf-8")
/// ```
///
/// ## References
/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open)
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
mode: Mode,
}

impl Violation for UnspecifiedEncoding {
#[derive_message_formats]
fn message(&self) -> String {
let UnspecifiedEncoding {
function_name,
mode,
} = self;

match mode {
Mode::Supported => {
format!("`{function_name}` in text mode without explicit `encoding` argument")
}
Mode::Unsupported => {
format!("`{function_name}` without explicit `encoding` argument")
}
}
}
}

/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some((function_name, mode)) = checker
.semantic()
.resolve_call_path(&call.func)
.filter(|call_path| is_violation(call, call_path))
.map(|call_path| {
(
format_call_path(call_path.as_slice()),
Mode::from(&call_path),
)
})
else {
return;
};

checker.diagnostics.push(Diagnostic::new(
UnspecifiedEncoding {
function_name,
mode,
},
call.func.range(),
));
}

/// Returns `true` if the given expression is a string literal containing a `b` character.
fn is_binary_mode(expr: &ast::Expr) -> Option<bool> {
Some(expr.as_constant_expr()?.value.as_str()?.value.contains('b'))
}

/// Returns `true` if the given call lacks an explicit `encoding`.
fn is_violation(call: &ast::ExprCall, call_path: &CallPath) -> bool {
// If we have something like `*args`, which might contain the encoding argument, abort.
if call
.arguments
.args
.iter()
.any(ruff_python_ast::Expr::is_starred_expr)
{
return false;
}
// If we have something like `**kwargs`, which might contain the encoding argument, abort.
if call
.arguments
.keywords
.iter()
.any(|keyword| keyword.arg.is_none())
{
return false;
}
match call_path.as_slice() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = usize::from(call_path[1] == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
} else {
// defaults to binary mode
return false;
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
/// The call supports a `mode` argument.
Supported,
/// The call does not support a `mode` argument.
Unsupported,
}

impl From<&CallPath<'_>> for Mode {
fn from(value: &CallPath<'_>) -> Self {
match value.as_slice() {
["" | "codecs" | "_io", "open"] => Mode::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
Mode::Supported
}
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
_ => Mode::Unsupported,
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
source: crates/ruff_linter/src/rules/pylint/mod.rs
---
unspecified_encoding.py:8:1: PLW1514 `open` in text mode without explicit `encoding` argument
|
7 | # Errors.
8 | open("test.txt")
| ^^^^ PLW1514
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
|

unspecified_encoding.py:9:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument
|
7 | # Errors.
8 | open("test.txt")
9 | io.TextIOWrapper(io.FileIO("test.txt"))
| ^^^^^^^^^^^^^^^^ PLW1514
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
|

unspecified_encoding.py:10:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument
|
8 | open("test.txt")
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
| ^^^^^^^^^^^^^^^^^^ PLW1514
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
|

unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` in text mode without explicit `encoding` argument
|
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
|

unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` in text mode without explicit `encoding` argument
|
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
| ^^^^^^^^^^^^^^^^^^^^^^ PLW1514
13 | codecs.open("test.txt")
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:13:1: PLW1514 `codecs.open` in text mode without explicit `encoding` argument
|
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
| ^^^^^^^^^^^ PLW1514
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` in text mode without explicit `encoding` argument
|
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
14 | tempfile.SpooledTemporaryFile(0, "w")
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514
15 |
16 | # Non-errors.
|


1 change: 1 addition & 0 deletions ruff.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 73049df

Please sign in to comment.