diff --git a/crates/ruff_dev/src/check_formatter_stability.rs b/crates/ruff_dev/src/check_formatter_stability.rs index 26b1d68902e4d6..39f90224b20208 100644 --- a/crates/ruff_dev/src/check_formatter_stability.rs +++ b/crates/ruff_dev/src/check_formatter_stability.rs @@ -42,6 +42,9 @@ pub(crate) struct Args { /// Print only the first error and exit, `-x` is same as pytest #[arg(long, short = 'x')] pub(crate) exit_first_error: bool, + /// Checks each project inside a directory + #[arg(long)] + pub(crate) multi_project: bool, } /// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but @@ -54,6 +57,35 @@ struct WrapperArgs { } pub(crate) fn main(args: &Args) -> anyhow::Result { + let all_success = if args.multi_project { + let mut all_success = true; + for base_dir in &args.files { + for dir in base_dir.read_dir()? { + let dir = dir?; + println!("Starting {}", dir.path().display()); + let success = check_repo(&Args { + files: vec![dir.path().clone()], + ..*args + }); + println!("Finished {}: {:?}", dir.path().display(), success); + if !matches!(success, Ok(true)) { + all_success = false; + } + } + } + all_success + } else { + check_repo(args)? + }; + if all_success { + Ok(ExitCode::SUCCESS) + } else { + Ok(ExitCode::FAILURE) + } +} + +/// Returns whether the check was successful +pub(crate) fn check_repo(args: &Args) -> anyhow::Result { let start = Instant::now(); // Find files to check (or in this case, format twice). Adapted from ruff_cli @@ -77,13 +109,20 @@ pub(crate) fn main(args: &Args) -> anyhow::Result { let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?; assert!(!paths.is_empty(), "no python files in {:?}", cli.files); + let mut formatted_counter = 0; let errors = paths .into_iter() .map(|dir_entry| { // Doesn't make sense to recover here in this test script - let file = dir_entry - .expect("Iterating the files in the repository failed") - .into_path(); + dir_entry.expect("Iterating the files in the repository failed") + }) + .filter(|dir_entry| { + // For some reason it does not filter in the beginning + dir_entry.file_name() != "pyproject.toml" + }) + .map(|dir_entry| { + let file = dir_entry.path().to_path_buf(); + formatted_counter += 1; // Handle panics (mostly in `debug_assert!`) let result = match catch_unwind(|| check_file(&file)) { Ok(result) => result, @@ -166,20 +205,20 @@ Formatted twice: } if args.exit_first_error { - return Ok(ExitCode::FAILURE); + return Ok(false); } } let duration = start.elapsed(); println!( "Formatting {} files twice took {:.2}s", - cli.files.len(), + formatted_counter, duration.as_secs_f32() ); if any_errors { - Ok(ExitCode::FAILURE) + Ok(false) } else { - Ok(ExitCode::SUCCESS) + Ok(true) } } diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py index d60ad3fc6dd061..c8c19162999c08 100755 --- a/scripts/check_ecosystem.py +++ b/scripts/check_ecosystem.py @@ -44,11 +44,11 @@ class Repository(NamedTuple): async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]: """Shallow clone this repository to a temporary directory.""" if checkout_dir.exists(): - logger.debug(f"Reusing {self.org}/{self.repo}") + logger.debug(f"Reusing {self.org}:{self.repo}") yield Path(checkout_dir) return - logger.debug(f"Cloning {self.org}/{self.repo}") + logger.debug(f"Cloning {self.org}:{self.repo}") git_command = [ "git", "clone", @@ -177,18 +177,17 @@ async def compare( """Check a specific repository against two versions of ruff.""" removed, added = set(), set() - # Allows to keep the checkouts locations + # By the default, the git clone are transient, but if the user provides a + # directory for permanent storage we keep it there if checkouts: - checkout_parent = checkouts.joinpath(repo.org) - # Don't create the repodir itself, we need that for checking for existing - # clones - checkout_parent.mkdir(exist_ok=True, parents=True) - location_context = nullcontext(checkout_parent) + location_context = nullcontext(checkouts) else: location_context = tempfile.TemporaryDirectory() with location_context as checkout_parent: - checkout_dir = Path(checkout_parent).joinpath(repo.repo) + assert ":" not in repo.org + assert ":" not in repo.repo + checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}") async with repo.clone(checkout_dir) as path: try: async with asyncio.TaskGroup() as tg: @@ -284,8 +283,19 @@ async def main( logger.debug(f"Checking {len(repositories)} projects") + # https://stackoverflow.com/a/61478547/3549270 + # Otherwise doing 3k repositories can take >8GB RAM + semaphore = asyncio.Semaphore(50) + + async def limited_parallelism(coroutine): # noqa: ANN + async with semaphore: + return await coroutine + results = await asyncio.gather( - *[compare(ruff1, ruff2, repo, checkouts) for repo in repositories.values()], + *[ + limited_parallelism(compare(ruff1, ruff2, repo, checkouts)) + for repo in repositories.values() + ], return_exceptions=True, ) @@ -433,6 +443,8 @@ async def main( logging.basicConfig(level=logging.INFO) loop = asyncio.get_event_loop() + if args.checkouts: + args.checkouts.mkdir(exist_ok=True, parents=True) main_task = asyncio.ensure_future( main( ruff1=args.ruff1,