Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor noqa directive parsing away from regex-based implementation #5554

Merged
merged 7 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/ruff/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ ruff_rustpython = { path = "../ruff_rustpython" }
ruff_text_size = { workspace = true }
ruff_textwrap = { path = "../ruff_textwrap" }

aho-corasick = { version = "1.0.2" }
annotate-snippets = { version = "0.9.1", features = ["color"] }
anyhow = { workspace = true }
bitflags = { workspace = true }
Expand Down
176 changes: 146 additions & 30 deletions crates/ruff/src/noqa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use std::fs;
use std::ops::Add;
use std::path::Path;

use aho_corasick::AhoCorasick;
use anyhow::Result;
use itertools::Itertools;
use log::warn;
use once_cell::sync::Lazy;
use regex::Regex;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser::ast::Ranged;

Expand All @@ -20,8 +20,10 @@ use crate::codes::NoqaCode;
use crate::registry::{AsRule, Rule, RuleSet};
use crate::rule_redirects::get_redirect_target;

static NOQA_LINE_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?P<noqa>(?i:# noqa)(?::\s?(?P<codes>[A-Z]+[0-9]+(?:[,\s]+[A-Z]+[0-9]+)*))?)")
static NOQA_MATCHER: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(["noqa"])
.unwrap()
});

Expand All @@ -38,32 +40,104 @@ pub(crate) enum Directive<'a> {
impl<'a> Directive<'a> {
/// Extract the noqa `Directive` from a line of Python source code.
pub(crate) fn try_extract(text: &'a str, offset: TextSize) -> Option<Self> {
let caps = NOQA_LINE_REGEX.captures(text)?;
match (caps.name("noqa"), caps.name("codes")) {
(Some(noqa), Some(codes)) => {
let codes = codes
.as_str()
.split(|c: char| c.is_whitespace() || c == ',')
.map(str::trim)
.filter(|code| !code.is_empty())
.collect_vec();
if codes.is_empty() {
warn!("Expected rule codes on `noqa` directive: \"{text}\"");
}
let range = TextRange::at(
TextSize::try_from(noqa.start()).unwrap().add(offset),
noqa.as_str().text_len(),
);
Some(Self::Codes(Codes { range, codes }))
}
(Some(noqa), None) => {
let range = TextRange::at(
TextSize::try_from(noqa.start()).unwrap().add(offset),
noqa.as_str().text_len(),
);
Some(Self::All(All { range }))
for mat in NOQA_MATCHER.find_iter(text) {
let noqa_literal_start = mat.start();

// Determine the start of the comment.
let mut comment_start = noqa_literal_start;

// Trim any whitespace between the `#` character and the `noqa` literal.
comment_start = text[..comment_start].trim_end().len();

// The next character has to be the `#` character.
if text[..comment_start]
.chars()
.last()
.map_or(false, |c| c != '#')
{
continue;
}
_ => None,
comment_start -= '#'.len_utf8();

// If the next character is `:`, then it's a list of codes. Otherwise, it's a directive
// to ignore all rules.
let noqa_literal_end = mat.end();
return Some(
if text[noqa_literal_end..]
.chars()
.next()
.map_or(false, |c| c == ':')
{
// E.g., `# noqa: F401, F841`.
let mut codes_start = noqa_literal_end;

// Skip the `:` character.
codes_start += ':'.len_utf8();

// Skip any whitespace between the `:` and the codes.
codes_start += text[codes_start..]
.find(|c: char| !c.is_whitespace())
.unwrap_or(0);

// Extract the comma-separated list of codes.
let mut codes = vec![];
let mut codes_end = codes_start;
let mut leading_space = 0;
while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) {
codes.push(code);
codes_end += leading_space;
codes_end += code.len();

// Codes can be comma- or whitespace-delimited. Compute the length of the
// delimiter, but only add it in the next iteration, once we find the next
// code.
if let Some(space_between) =
text[codes_end..].find(|c: char| !(c.is_whitespace() || c == ','))
{
leading_space = space_between;
} else {
break;
}
}

let range = TextRange::new(
TextSize::try_from(comment_start).unwrap(),
TextSize::try_from(codes_end).unwrap(),
);

Self::Codes(Codes {
range: range.add(offset),
codes,
})
} else {
// E.g., `# noqa`.
let range = TextRange::new(
TextSize::try_from(comment_start).unwrap(),
TextSize::try_from(noqa_literal_end).unwrap(),
);
Self::All(All {
range: range.add(offset),
})
},
);
}

None
}

/// Lex an individual rule code (e.g., `F401`).
fn lex_code(text: &str) -> Option<&str> {
// Extract, e.g., the `F` in `F401`.
let prefix = text.chars().take_while(char::is_ascii_uppercase).count();
charliermarsh marked this conversation as resolved.
Show resolved Hide resolved
// Extract, e.g., the `401` in `F401`.
let suffix = text[prefix..]
.chars()
.take_while(char::is_ascii_digit)
.count();
if prefix > 0 && suffix > 0 {
Some(&text[..prefix + suffix])
} else {
None
}
}
}
Expand Down Expand Up @@ -488,7 +562,7 @@ impl NoqaMapping {
}

/// Returns the re-mapped position or `position` if no mapping exists.
pub fn resolve(&self, offset: TextSize) -> TextSize {
pub(crate) fn resolve(&self, offset: TextSize) -> TextSize {
let index = self.ranges.binary_search_by(|range| {
if range.end() < offset {
std::cmp::Ordering::Less
Expand All @@ -506,7 +580,7 @@ impl NoqaMapping {
}
}

pub fn push_mapping(&mut self, range: TextRange) {
pub(crate) fn push_mapping(&mut self, range: TextRange) {
if let Some(last_range) = self.ranges.last_mut() {
// Strictly sorted insertion
if last_range.end() <= range.start() {
Expand Down Expand Up @@ -634,6 +708,48 @@ mod tests {
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_all_leading_comment() {
let source = "# Some comment describing the noqa # noqa";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_code_leading_comment() {
let source = "# Some comment describing the noqa # noqa: F401";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_codes_leading_comment() {
let source = "# Some comment describing the noqa # noqa: F401, F841";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_all_trailing_comment() {
let source = "# noqa # Some comment describing the noqa";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_code_trailing_comment() {
let source = "# noqa: F401 # Some comment describing the noqa";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_codes_trailing_comment() {
let source = "# noqa: F401, F841 # Some comment describing the noqa";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn noqa_invalid_codes() {
let source = "# noqa: F401, unused-import, some other code";
assert_debug_snapshot!(Directive::try_extract(source, TextSize::default()));
}

#[test]
fn modification() {
let contents = "x = 1";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::try_extract(source, TextSize::default())"
---
Some(
All(
All {
range: 35..41,
},
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::extract(range, &locator)"
expression: "Directive::try_extract(source, TextSize::default())"
---
None
Some(
All(
All {
range: 0..7,
},
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::extract(range, &locator)"
expression: "Directive::try_extract(source, TextSize::default())"
---
None
Some(
All(
All {
range: 0..5,
},
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::try_extract(source, TextSize::default())"
---
Some(
All(
All {
range: 0..6,
},
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::try_extract(source, TextSize::default())"
---
Some(
Codes(
Codes {
range: 35..47,
codes: [
"F401",
],
},
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::extract(range, &locator)"
expression: "Directive::try_extract(source, TextSize::default())"
---
None
Some(
Codes(
Codes {
range: 0..13,
codes: [
"F401",
],
},
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::extract(range, &locator)"
expression: "Directive::try_extract(source, TextSize::default())"
---
None
Some(
Codes(
Codes {
range: 0..10,
codes: [
"F401",
],
},
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::try_extract(source, TextSize::default())"
---
Some(
Codes(
Codes {
range: 0..12,
codes: [
"F401",
],
},
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::try_extract(source, TextSize::default())"
---
Some(
Codes(
Codes {
range: 35..53,
codes: [
"F401",
"F841",
],
},
),
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
---
source: crates/ruff/src/noqa.rs
expression: "Directive::extract(range, &locator)"
expression: "Directive::try_extract(source, TextSize::default())"
---
None
Some(
Codes(
Codes {
range: 0..20,
codes: [
"F401",
"F841",
],
},
),
)
Loading