Skip to content

Commit

Permalink
Removing trailing whitespace inside multiline strings is unsafe (#9744)
Browse files Browse the repository at this point in the history
Fix #8037.
  • Loading branch information
sanxiyn authored Jan 31, 2024
1 parent 7992583 commit 6e225cb
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
'''trailing whitespace
inside a multiline string'''
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ mod tests {
#[test_case(Rule::SyntaxError, Path::new("E999.py"))]
#[test_case(Rule::TabIndentation, Path::new("W19.py"))]
#[test_case(Rule::TrailingWhitespace, Path::new("W29.py"))]
#[test_case(Rule::TrailingWhitespace, Path::new("W291.py"))]
#[test_case(Rule::TrueFalseComparison, Path::new("E712.py"))]
#[test_case(Rule::TypeComparison, Path::new("E721.py"))]
#[test_case(Rule::UselessSemicolon, Path::new("E70.py"))]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_diagnostics::{AlwaysFixableViolation, Applicability, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_source_file::{Line, Locator};
Expand Down Expand Up @@ -102,7 +102,15 @@ pub(crate) fn trailing_whitespace(
}
} else if settings.rules.enabled(Rule::TrailingWhitespace) {
let mut diagnostic = Diagnostic::new(TrailingWhitespace, range);
diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(range)));
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_deletion(range),
// Removing trailing whitespace is not safe inside multiline strings.
if indexer.multiline_ranges().intersects(range) {
Applicability::Unsafe
} else {
Applicability::Safe
},
));
return Some(diagnostic);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
W291.py:1:23: W291 [*] Trailing whitespace
|
1 | '''trailing whitespace
| ^ W291
2 | inside a multiline string'''
|
= help: Remove trailing whitespace

Unsafe fix
1 |-'''trailing whitespace
1 |+'''trailing whitespace
2 2 | inside a multiline string'''
12 changes: 12 additions & 0 deletions crates/ruff_python_index/src/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};

use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};
use crate::CommentRangesBuilder;

pub struct Indexer {
Expand All @@ -21,6 +22,9 @@ pub struct Indexer {

/// The range of all f-string in the source document.
fstring_ranges: FStringRanges,

/// The range of all multiline strings in the source document.
multiline_ranges: MultilineRanges,
}

impl Indexer {
Expand All @@ -29,6 +33,7 @@ impl Indexer {

let mut comment_ranges_builder = CommentRangesBuilder::default();
let mut fstring_ranges_builder = FStringRangesBuilder::default();
let mut multiline_ranges_builder = MultilineRangesBuilder::default();
let mut continuation_lines = Vec::new();
// Token, end
let mut prev_end = TextSize::default();
Expand Down Expand Up @@ -61,6 +66,7 @@ impl Indexer {

comment_ranges_builder.visit_token(tok, *range);
fstring_ranges_builder.visit_token(tok, *range);
multiline_ranges_builder.visit_token(tok, *range);

match tok {
Tok::Newline | Tok::NonLogicalNewline => {
Expand All @@ -82,6 +88,7 @@ impl Indexer {
comment_ranges: comment_ranges_builder.finish(),
continuation_lines,
fstring_ranges: fstring_ranges_builder.finish(),
multiline_ranges: multiline_ranges_builder.finish(),
}
}

Expand All @@ -95,6 +102,11 @@ impl Indexer {
&self.fstring_ranges
}

/// Returns the byte offset ranges of multiline strings.
pub const fn multiline_ranges(&self) -> &MultilineRanges {
&self.multiline_ranges
}

/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_python_index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod comment_ranges;
mod fstring_ranges;
mod indexer;
mod multiline_ranges;

pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
pub use indexer::Indexer;
46 changes: 46 additions & 0 deletions crates/ruff_python_index/src/multiline_ranges.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use ruff_python_parser::Tok;
use ruff_text_size::TextRange;

/// Stores the range of all multiline strings in a file sorted by
/// [`TextRange::start`].
pub struct MultilineRanges {
ranges: Vec<TextRange>,
}

impl MultilineRanges {
/// Returns `true` if the given range is inside a multiline string.
pub fn intersects(&self, target: TextRange) -> bool {
self.ranges
.binary_search_by(|range| {
if range.contains_range(target) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
}

#[derive(Default)]
pub(crate) struct MultilineRangesBuilder {
ranges: Vec<TextRange>,
}

impl MultilineRangesBuilder {
pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) {
if let Tok::String { triple_quoted, .. } = token {
if *triple_quoted {
self.ranges.push(range);
}
}
}

pub(crate) fn finish(self) -> MultilineRanges {
MultilineRanges {
ranges: self.ranges,
}
}
}

0 comments on commit 6e225cb

Please sign in to comment.