Skip to content

Commit

Permalink
[ruff] Adds an allowlist for unsafe-markup-use (RUF035) (#15076)
Browse files Browse the repository at this point in the history
Closes: #14523

## Summary

Adds a whitelist of calls allowed to be used within a
`markupsafe.Markup` call.

## Test Plan

`cargo nextest run`
  • Loading branch information
Daverball authored Dec 20, 2024
1 parent 913bce3 commit 089a98e
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ linter.pylint.max_locals = 15
linter.pyupgrade.keep_runtime_typing = false
linter.ruff.parenthesize_tuple_in_subscript = false
linter.ruff.extend_markup_names = []
linter.ruff.allowed_markup_calls = []

# Formatter Settings
formatter.exclude = []
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from bleach import clean
from markupsafe import Markup

content = "<script>alert('Hello, world!')</script>"
Markup(clean(content))

# indirect assignments are currently not supported
cleaned = clean(content)
Markup(cleaned) # RUF035
26 changes: 26 additions & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
},
Expand All @@ -107,6 +108,7 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: false,
extend_markup_names: vec![],
allowed_markup_calls: vec![],
},
target_version: PythonVersion::Py310,
..LinterSettings::for_rule(Rule::IncorrectlyParenthesizedTupleInSubscript)
Expand Down Expand Up @@ -447,6 +449,30 @@ mod tests {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec!["webhelpers.html.literal".to_string()],
allowed_markup_calls: vec![],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)
},
)?;
assert_messages!(snapshot, diagnostics);
Ok(())
}

#[test_case(Rule::UnsafeMarkupUse, Path::new("RUF035_whitelisted_markup_calls.py"))]
fn whitelisted_markup_calls(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"whitelisted_markup_calls__{}_{}",
rule_code.noqa_code(),
path.to_string_lossy()
);
let diagnostics = test_path(
Path::new("ruff").join(path).as_path(),
&LinterSettings {
ruff: super::settings::Settings {
parenthesize_tuple_in_subscript: true,
extend_markup_names: vec![],
allowed_markup_calls: vec!["bleach.clean".to_string()],
},
preview: PreviewMode::Enabled,
..LinterSettings::for_rule(rule_code)
Expand Down
32 changes: 26 additions & 6 deletions crates/ruff_linter/src/rules/ruff/rules/unsafe_markup_use.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use ruff_python_ast::ExprCall;
use ruff_python_ast::{Expr, ExprCall};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::name::QualifiedName;
use ruff_python_semantic::Modules;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::Ranged;

use crate::{checkers::ast::Checker, settings::LinterSettings};
Expand All @@ -22,7 +22,9 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// treated like [`markupsafe.Markup`].
///
/// This rule was originally inspired by [flake8-markupsafe] but doesn't carve
/// out any exceptions for i18n related calls.
/// out any exceptions for i18n related calls by default.
///
/// You can use [`lint.ruff.allowed-markup-calls`] to specify exceptions.
///
/// ## Example
/// Given:
Expand Down Expand Up @@ -64,6 +66,7 @@ use crate::{checkers::ast::Checker, settings::LinterSettings};
/// ```
/// ## Options
/// - `lint.ruff.extend-markup-names`
/// - `lint.ruff.allowed-markup-calls`
///
/// ## References
/// - [MarkupSafe](https://pypi.org/project/MarkupSafe/)
Expand Down Expand Up @@ -95,7 +98,7 @@ pub(crate) fn unsafe_markup_call(checker: &mut Checker, call: &ExprCall) {
return;
}

if !is_unsafe_call(call) {
if !is_unsafe_call(call, checker.semantic(), checker.settings) {
return;
}

Expand Down Expand Up @@ -127,12 +130,29 @@ fn is_markup_call(qualified_name: &QualifiedName, settings: &LinterSettings) ->
.any(|target| *qualified_name == target)
}

fn is_unsafe_call(call: &ExprCall) -> bool {
fn is_unsafe_call(call: &ExprCall, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
// technically this could be circumvented by using a keyword argument
// but without type-inference we can't really know which keyword argument
// corresponds to the first positional argument and either way it is
// unlikely that someone will actually use a keyword argument here
// TODO: Eventually we may want to allow dynamic values, as long as they
// have a __html__ attribute, since that is part of the API
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr())
matches!(&*call.arguments.args, [first] if !first.is_string_literal_expr() && !first.is_bytes_literal_expr() && !is_whitelisted_call(first, semantic, settings))
}

fn is_whitelisted_call(expr: &Expr, semantic: &SemanticModel, settings: &LinterSettings) -> bool {
let Expr::Call(ExprCall { func, .. }) = expr else {
return false;
};

let Some(qualified_name) = semantic.resolve_qualified_name(func) else {
return false;
};

settings
.ruff
.allowed_markup_calls
.iter()
.map(|target| QualifiedName::from_dotted_name(target))
.any(|target| qualified_name == target)
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/ruff/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::fmt;
pub struct Settings {
pub parenthesize_tuple_in_subscript: bool,
pub extend_markup_names: Vec<String>,
pub allowed_markup_calls: Vec<String>,
}

impl fmt::Display for Settings {
Expand All @@ -18,6 +19,7 @@ impl fmt::Display for Settings {
fields = [
self.parenthesize_tuple_in_subscript,
self.extend_markup_names | array,
self.allowed_markup_calls | array,
]
}
Ok(())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF035_whitelisted_markup_calls.py:9:1: RUF035 Unsafe use of `markupsafe.Markup` detected
|
7 | # indirect assignments are currently not supported
8 | cleaned = clean(content)
9 | Markup(cleaned) # RUF035
| ^^^^^^^^^^^^^^^ RUF035
|
38 changes: 35 additions & 3 deletions crates/ruff_workspace/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3079,18 +3079,49 @@ pub struct RuffOptions {
)]
pub parenthesize_tuple_in_subscript: Option<bool>,

/// A list of additional callable names that behave like [`markupsafe.Markup`].
/// A list of additional callable names that behave like
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `webhelpers.html.literal`, rather than
/// `literal`).
///
/// [markupsafe.Markup]: https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup
#[option(
default = "[]",
value_type = "list[str]",
example = "extend-markup-names = [\"webhelpers.html.literal\", \"my_package.Markup\"]"
)]
pub extend_markup_names: Option<Vec<String>>,

/// A list of callable names, whose result may be safely passed into
/// [`markupsafe.Markup`](https://markupsafe.palletsprojects.com/en/stable/escaping/#markupsafe.Markup).
///
/// Expects to receive a list of fully-qualified names (e.g., `bleach.clean`, rather than `clean`).
///
/// This setting helps you avoid false positives in code like:
///
/// ```python
/// from bleach import clean
/// from markupsafe import Markup
///
/// cleaned_markup = Markup(clean(some_user_input))
/// ```
///
/// Where the use of [`bleach.clean`](https://bleach.readthedocs.io/en/latest/clean.html)
/// usually ensures that there's no XSS vulnerability.
///
/// Although it is not recommended, you may also use this setting to whitelist other
/// kinds of calls, e.g. calls to i18n translation functions, where how safe that is
/// will depend on the implementation and how well the translations are audited.
///
/// Another common use-case is to wrap the output of functions that generate markup
/// like [`xml.etree.ElementTree.tostring`](https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.tostring)
/// or template rendering engines where sanitization of potential user input is either
/// already baked in or has to happen before rendering.
#[option(
default = "[]",
value_type = "list[str]",
example = "allowed-markup-calls = [\"bleach.clean\", \"my_package.sanitize\"]"
)]
pub allowed_markup_calls: Option<Vec<String>>,
}

impl RuffOptions {
Expand All @@ -3100,6 +3131,7 @@ impl RuffOptions {
.parenthesize_tuple_in_subscript
.unwrap_or_default(),
extend_markup_names: self.extend_markup_names.unwrap_or_default(),
allowed_markup_calls: self.allowed_markup_calls.unwrap_or_default(),
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion ruff.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 089a98e

Please sign in to comment.