Skip to content

Commit

Permalink
Minor tweaks to implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Oct 17, 2023
1 parent f020d15 commit 6c958f8
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ def func(*args, **kwargs):
tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8")
tempfile.SpooledTemporaryFile(0, "w", -1, "utf-8")
tempfile.SpooledTemporaryFile(0, "wb")
tempfile.SpooledTemporaryFile(0, )
tempfile.SpooledTemporaryFile(0, )
117 changes: 78 additions & 39 deletions crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_python_ast::call_path::{format_call_path, CallPath};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for uses of `open` or similar calls without an explicit `encoding` argument.
/// Checks for uses of `open` and related calls without an explicit `encoding`
/// argument.
///
/// ## Why is this bad?
/// Using `open` in text mode without an explicit encoding specified can lead to
/// unportable code that leads to different behaviour on different systems.
/// Using `open` in text mode without an explicit encoding can lead to
/// non-portable code, with differing behavior across platforms.
///
/// Instead, consider using the `encoding` parameter to explicitly enforce a specific encoding.
/// Instead, consider using the `encoding` parameter to enforce a specific
/// encoding.
///
/// ## Example
/// ```python
Expand All @@ -29,29 +32,61 @@ use crate::checkers::ast::Checker;
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
mode: Mode,
}

impl Violation for UnspecifiedEncoding {
#[derive_message_formats]
fn message(&self) -> String {
format!(
"`{}` {}without explicit `encoding` argument",
self.function_name,
if self.function_name == "open" {
"in text mode "
} else {
""
let UnspecifiedEncoding {
function_name,
mode,
} = self;

match mode {
Mode::Supported => {
format!("`{function_name}` in text mode without explicit `encoding` argument")
}
Mode::Unsupported => {
format!("`{function_name}` without explicit `encoding` argument")
}
)
}
}
}

/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some((function_name, mode)) = checker
.semantic()
.resolve_call_path(&call.func)
.filter(|call_path| is_violation(call, call_path))
.map(|call_path| {
(
format_call_path(call_path.as_slice()),
Mode::from(&call_path),
)
})
else {
return;
};

checker.diagnostics.push(Diagnostic::new(
UnspecifiedEncoding {
function_name,
mode,
},
call.func.range(),
));
}

/// Returns `true` if the given expression is a string literal containing a `b` character.
fn is_binary_mode(expr: &ast::Expr) -> Option<bool> {
Some(expr.as_constant_expr()?.value.as_str()?.value.contains('b'))
}

fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
// this checks if we have something like *args which might contain the encoding argument
/// Returns `true` if the given call lacks an explicit `encoding`.
fn is_violation(call: &ast::ExprCall, call_path: &CallPath) -> bool {
// If we have something like `*args`, which might contain the encoding argument, abort.
if call
.arguments
.args
Expand All @@ -60,12 +95,17 @@ fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
{
return false;
}
// this checks if we have something like **kwargs which might contain the encoding argument
if call.arguments.keywords.iter().any(|a| a.arg.is_none()) {
// If we have something like `**kwargs`, which might contain the encoding argument, abort.
if call
.arguments
.keywords
.iter()
.any(|keyword| keyword.arg.is_none())
{
return false;
}
match path {
["" | "codecs", "open"] => {
match call_path.as_slice() {
["" | "codecs" | "_io", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
Expand All @@ -75,9 +115,8 @@ fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = usize::from(path[1] == "SpooledTemporaryFile");
let mode_pos = usize::from(call_path[1] == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
Expand All @@ -91,28 +130,28 @@ fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
.find_argument("encoding", mode_pos + 2)
.is_none()
}
["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
_ => false,
}
}

/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some(path) = checker.semantic().resolve_call_path(&call.func) else {
return;
};
if is_violation(call, path.as_slice()) {
let path_slice = if path[0].is_empty() {
&path[1..]
} else {
&path[0..]
};
let result = Diagnostic::new(
UnspecifiedEncoding {
function_name: path_slice.join("."),
},
call.func.range(),
);
drop(path);
checker.diagnostics.push(result);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
/// The call supports a `mode` argument.
Supported,
/// The call does not support a `mode` argument.
Unsupported,
}

impl From<&CallPath<'_>> for Mode {
fn from(value: &CallPath<'_>) -> Self {
match value.as_slice() {
["" | "codecs" | "_io", "open"] => Mode::Supported,
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
Mode::Supported
}
["io" | "_io", "TextIOWrapper"] => Mode::Unsupported,
_ => Mode::Unsupported,
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ unspecified_encoding.py:10:1: PLW1514 `io.TextIOWrapper` without explicit `encod
12 | tempfile.TemporaryFile("w")
|

unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` without explicit `encoding` argument
unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` in text mode without explicit `encoding` argument
|
9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
Expand All @@ -40,7 +40,7 @@ unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` without expl
13 | codecs.open("test.txt")
|

unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` without explicit `encoding` argument
unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` in text mode without explicit `encoding` argument
|
10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 | tempfile.NamedTemporaryFile("w")
Expand All @@ -50,7 +50,7 @@ unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` without explicit
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:13:1: PLW1514 `codecs.open` without explicit `encoding` argument
unspecified_encoding.py:13:1: PLW1514 `codecs.open` in text mode without explicit `encoding` argument
|
11 | tempfile.NamedTemporaryFile("w")
12 | tempfile.TemporaryFile("w")
Expand All @@ -59,7 +59,7 @@ unspecified_encoding.py:13:1: PLW1514 `codecs.open` without explicit `encoding`
14 | tempfile.SpooledTemporaryFile(0, "w")
|

unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` without explicit `encoding` argument
unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` in text mode without explicit `encoding` argument
|
12 | tempfile.TemporaryFile("w")
13 | codecs.open("test.txt")
Expand Down

0 comments on commit 6c958f8

Please sign in to comment.