diff --git a/Cargo.lock b/Cargo.lock index f7333fa504d9a7..db8cdef9665e9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1818,7 +1818,6 @@ dependencies = [ "chrono", "clap", "colored", - "criterion", "dirs 5.0.1", "fern", "glob", @@ -1873,7 +1872,6 @@ dependencies = [ "thiserror", "toml", "typed-arena", - "unicase", "unicode-width", "unicode_names2", ] @@ -2888,15 +2886,6 @@ dependencies = [ "unic-common", ] -[[package]] -name = "unicase" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 205ab65cd0106c..7503cdddd55acb 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -27,7 +27,7 @@ ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { workspace = true } ruff_textwrap = { path = "../ruff_textwrap" } -aho-corasick = {version = "1.0.2"} +aho-corasick = { version = "1.0.2" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } bitflags = { workspace = true } @@ -79,8 +79,6 @@ toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { version = "0.1.10" } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -unicase = "2.6.0" -criterion = "0.5.1" [dev-dependencies] insta = { workspace = true } @@ -94,7 +92,3 @@ default = [] schemars = ["dep:schemars"] # Enables the UnreachableCode rule unreachable-code = [] - -[[bench]] -name = "benchmark" -harness = false diff --git a/crates/ruff/benches/benchmark.rs b/crates/ruff/benches/benchmark.rs deleted file mode 100644 index 0f26b2ffa84374..00000000000000 --- a/crates/ruff/benches/benchmark.rs +++ /dev/null @@ -1,58 +0,0 @@ -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use ruff_text_size::TextSize; - -use ruff::noqa::{Directive, ParsedFileExemption}; - -pub fn directive_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("Directive"); - // for i in [ - // "# noqa: F401", - // "# noqa: F401, F841", - // "# noqa", - // "# type: ignore # noqa: E501", - // "# type: ignore # nosec", - // "# some very long comment that # is interspersed with characters but # no directive", - // ] - // .iter() - // { - // group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { - // b.iter(|| Directive::try_extract(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("Find", i), i, |b, _i| { - // b.iter(|| Directive::try_parse(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("AhoCorasick", i), i, |b, _i| { - // b.iter(|| Directive::try_parse_aho_corasick(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("Memchr", i), i, |b, _i| { - // b.iter(|| Directive::try_parse_memchr(black_box(i), TextSize::default())) - // }); - // } - - for i in [ - "# ruff: noqa", - "# flake8: NOQA", - "# noqa: F401, F841", - "# noqa", - "# type: ignore # noqa: E501", - "# type: ignore # nosec", - "# some very long comment that # is interspersed with characters but # no directive", - ] - .iter() - { - group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::extract(black_box(i))) - }); - group.bench_with_input(BenchmarkId::new("Parser", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::parse(black_box(i))) - }); - group.bench_with_input(BenchmarkId::new("Matches", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::matches(black_box(i))) - }); - } - - group.finish(); -} - -criterion_group!(benches, directive_benchmark); -criterion_main!(benches); diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index 155414c1bcd918..2a69194fad4e98 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -27,7 +27,7 @@ pub mod line_width; pub mod linter; pub mod logging; pub mod message; -pub mod noqa; +mod noqa; pub mod packaging; pub mod pyproject_toml; pub mod registry; diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index 1771be904b2dfc..176dd6d3452014 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -11,7 +11,6 @@ use log::warn; use once_cell::sync::Lazy; use ruff_text_size::{TextLen, TextRange, TextSize}; use rustpython_parser::ast::Ranged; -use unicase::UniCase; use ruff_diagnostics::Diagnostic; use ruff_python_ast::source_code::Locator; @@ -31,7 +30,7 @@ static NOQA_MATCHER: Lazy = Lazy::new(|| { /// A directive to ignore a set of rules for a given line of Python source code (e.g., /// `# noqa: F401, F841`). #[derive(Debug)] -pub enum Directive<'a> { +pub(crate) enum Directive<'a> { /// The `noqa` directive ignores all rules (e.g., `# noqa`). All(All), /// The `noqa` directive ignores specific rules (e.g., `# noqa: F401, F841`). @@ -40,75 +39,87 @@ pub enum Directive<'a> { impl<'a> Directive<'a> { /// Extract the noqa `Directive` from a line of Python source code. - pub fn try_extract(text: &'a str, offset: TextSize) -> Option { + pub(crate) fn try_extract(text: &'a str, offset: TextSize) -> Option { for mat in NOQA_MATCHER.find_iter(text) { - let mut comment_start = mat.start(); + let noqa_literal_start = mat.start(); + + // Determine the start of the comment. + let mut comment_start = noqa_literal_start; // Trim any whitespace between the `#` character and the `noqa` literal. - comment_start -= text[..comment_start].len() - text[..comment_start].trim_end().len(); + comment_start = text[..comment_start].trim_end().len(); // The next character has to be the `#` character. - if !text[..comment_start].ends_with('#') { + if text[..comment_start] + .chars() + .last() + .map_or(false, |c| c != '#') + { continue; } + comment_start -= '#'.len_utf8(); - // The start of the `#` character. - comment_start -= 1; - - // If the next character is `:`, then it's a list of codes. Otherwise, it's an `all` - // directive. + // If the next character is `:`, then it's a list of codes. Otherwise, it's a directive + // to ignore all rules. let noqa_literal_end = mat.end(); - return Some(if text[noqa_literal_end..].starts_with(':') { - // E.g., `# noqa: F401, F841`. - let mut codes_start = noqa_literal_end; - - // Skip the `:` character. - codes_start += 1; - - // Skip any whitespace between the `:` and the codes. - codes_start += text[codes_start..] - .find(|c: char| !c.is_whitespace()) - .unwrap_or(0); - - // Extract the comma-separated list of codes. - let mut codes = vec![]; - let mut codes_end = codes_start; - let mut leading_space = 0; - while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) { - codes.push(code); - codes_end += leading_space; - codes_end += code.len(); - - // Codes can be comma- or whitespace-delimited. Compute the length of the - // delimiter, but only add it in the next iteration, once we find the next - // code. - leading_space = text[codes_end..] - .find(|c: char| !(c.is_whitespace() || c == ',')) + return Some( + if text[noqa_literal_end..] + .chars() + .next() + .map_or(false, |c| c == ':') + { + // E.g., `# noqa: F401, F841`. + let mut codes_start = noqa_literal_end; + + // Skip the `:` character. + codes_start += ':'.len_utf8(); + + // Skip any whitespace between the `:` and the codes. + codes_start += text[codes_start..] + .find(|c: char| !c.is_whitespace()) .unwrap_or(0); - if leading_space == 0 { - break; + + // Extract the comma-separated list of codes. + let mut codes = vec![]; + let mut codes_end = codes_start; + let mut leading_space = 0; + while let Some(code) = Directive::lex_code(&text[codes_end + leading_space..]) { + codes.push(code); + codes_end += leading_space; + codes_end += code.len(); + + // Codes can be comma- or whitespace-delimited. Compute the length of the + // delimiter, but only add it in the next iteration, once we find the next + // code. + if let Some(space_between) = + text[codes_end..].find(|c: char| !(c.is_whitespace() || c == ',')) + { + leading_space = space_between; + } else { + break; + } } - } - let range = TextRange::new( - TextSize::try_from(comment_start).unwrap(), - TextSize::try_from(codes_end).unwrap(), - ); + let range = TextRange::new( + TextSize::try_from(comment_start).unwrap(), + TextSize::try_from(codes_end).unwrap(), + ); - Self::Codes(Codes { - range: range.add(offset), - codes, - }) - } else { - // E.g., `# noqa`. - let range = TextRange::new( - TextSize::try_from(comment_start).unwrap(), - TextSize::try_from(noqa_literal_end).unwrap(), - ); - Self::All(All { - range: range.add(offset), - }) - }); + Self::Codes(Codes { + range: range.add(offset), + codes, + }) + } else { + // E.g., `# noqa`. + let range = TextRange::new( + TextSize::try_from(comment_start).unwrap(), + TextSize::try_from(noqa_literal_end).unwrap(), + ); + Self::All(All { + range: range.add(offset), + }) + }, + ); } None @@ -116,7 +127,9 @@ impl<'a> Directive<'a> { /// Lex an individual rule code (e.g., `F401`). fn lex_code(text: &str) -> Option<&str> { + // Extract, e.g., the `F` in `F401`. let prefix = text.chars().take_while(char::is_ascii_uppercase).count(); + // Extract, e.g., the `401` in `F401`. let suffix = text[prefix..] .chars() .take_while(char::is_ascii_digit) @@ -130,7 +143,7 @@ impl<'a> Directive<'a> { } #[derive(Debug)] -pub struct All { +pub(crate) struct All { range: TextRange, } @@ -142,7 +155,7 @@ impl Ranged for All { } #[derive(Debug)] -pub struct Codes<'a> { +pub(crate) struct Codes<'a> { range: TextRange, codes: Vec<&'a str>, } @@ -233,7 +246,7 @@ impl FileExemption { /// [`FileExemption`], but only for a single line, as opposed to an aggregated set of exemptions /// across a source file. #[derive(Debug)] -pub enum ParsedFileExemption<'a> { +enum ParsedFileExemption<'a> { /// The file-level exemption ignores all rules (e.g., `# ruff: noqa`). All, /// The file-level exemption ignores specific rules (e.g., `# ruff: noqa: F401, F841`). @@ -242,77 +255,7 @@ pub enum ParsedFileExemption<'a> { impl<'a> ParsedFileExemption<'a> { /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn parse(line: &'a str) -> Option { - let line = line.trim_whitespace_start(); - - if line.len() >= "# flake8: noqa".len() { - if UniCase::new(&line[.."# flake8: noqa".len()]) == UniCase::new("# flake8: noqa") { - return Some(Self::All); - } - } - - if line.len() >= "# ruff: noqa".len() { - if UniCase::new(&line[.."# ruff: noqa".len()]) == UniCase::new("# ruff: noqa") { - let remainder = &line["# ruff: noqa".len()..]; - if remainder.is_empty() { - return Some(Self::All); - } else if let Some(codes) = remainder.strip_prefix(':') { - let codes = codes - .split(|c: char| c.is_whitespace() || c == ',') - .map(str::trim) - .filter(|code| !code.is_empty()) - .collect_vec(); - if codes.is_empty() { - warn!("Expected rule codes on `noqa` directive: \"{line}\""); - } - return Some(Self::Codes(codes)); - } - } - } - - None - } - - /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn matches(line: &'a str) -> Option { - let line = line.trim_whitespace_start(); - - if line.len() >= "# flake8: noqa".len() { - if matches!( - &line[.."# flake8: noqa".len()], - "# flake8: noqa" | "# flake8: NOQA" | "# flake8: NoQA" - ) { - return Some(Self::All); - } - } - - if line.len() >= "# ruff: noqa".len() { - if matches!( - &line[.."# ruff: noqa".len()], - "# ruff: noqa" | "# ruff: NOQA" | "# ruff: NoQA" - ) { - let remainder = &line["# ruff: noqa".len()..]; - if remainder.is_empty() { - return Some(Self::All); - } else if let Some(codes) = remainder.strip_prefix(':') { - let codes = codes - .split(|c: char| c.is_whitespace() || c == ',') - .map(str::trim) - .filter(|code| !code.is_empty()) - .collect_vec(); - if codes.is_empty() { - warn!("Expected rule codes on `noqa` directive: \"{line}\""); - } - return Some(Self::Codes(codes)); - } - } - } - - None - } - - /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn extract(line: &'a str) -> Option { + fn try_extract(line: &'a str) -> Option { let line = line.trim_whitespace_start(); if line.starts_with("# flake8: noqa") @@ -619,7 +562,7 @@ impl NoqaMapping { } /// Returns the re-mapped position or `position` if no mapping exists. - pub fn resolve(&self, offset: TextSize) -> TextSize { + pub(crate) fn resolve(&self, offset: TextSize) -> TextSize { let index = self.ranges.binary_search_by(|range| { if range.end() < offset { std::cmp::Ordering::Less @@ -637,7 +580,7 @@ impl NoqaMapping { } } - pub fn push_mapping(&mut self, range: TextRange) { + pub(crate) fn push_mapping(&mut self, range: TextRange) { if let Some(last_range) = self.ranges.last_mut() { // Strictly sorted insertion if last_range.end() <= range.start() { diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py index 8c4023ce9b75e0..a9904b305ac569 100755 --- a/scripts/check_ecosystem.py +++ b/scripts/check_ecosystem.py @@ -20,7 +20,7 @@ from contextlib import asynccontextmanager, nullcontext from pathlib import Path from signal import SIGINT, SIGTERM -from typing import TYPE_CHECKING, NamedTuple, Self +from typing import TYPE_CHECKING, NamedTuple, Self, TypeVar if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterator, Sequence @@ -272,6 +272,9 @@ def read_projects_jsonl(projects_jsonl: Path) -> dict[tuple[str, str], Repositor return repositories +T = TypeVar("T") + + async def main( *, ruff1: Path, @@ -291,7 +294,7 @@ async def main( # Otherwise doing 3k repositories can take >8GB RAM semaphore = asyncio.Semaphore(50) - async def limited_parallelism(coroutine): # noqa: ANN + async def limited_parallelism(coroutine: T) -> T: async with semaphore: return await coroutine