Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically ignore files specified in .gitignore #1234

Merged
merged 2 commits into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ fern = { version = "0.6.1" }
filetime = { version = "0.2.17" }
glob = { version = "0.3.0" }
globset = { version = "0.4.9" }
ignore = { version = "0.4.18" }
itertools = { version = "0.10.5" }
libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "f2f0b7a487a8725d161fe8b3ed73a6758b21e177" }
log = { version = "0.4.17" }
Expand Down
1 change: 1 addition & 0 deletions resources/test/project/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
examples/generated
29 changes: 18 additions & 11 deletions resources/test/project/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,37 @@ Running from the repo root should pick up and enforce the appropriate settings f

```
∴ cargo run resources/test/project/
Found 5 error(s).
Found 7 error(s).
resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/file.py:1:8: F401 `os` imported but unused
resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
3 potentially fixable with the --fix option.
4 potentially fixable with the --fix option.
```

Running from the project directory itself should exhibit the same behavior:

```
∴ cd resources/test/project/ && cargo run .
Found 5 error(s).
∴ (cd resources/test/project/ && cargo run .)
Found 7 error(s).
examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
src/file.py:1:8: F401 `os` imported but unused
src/file.py:5:5: F841 Local variable `x` is assigned to but never used
src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
3 potentially fixable with the --fix option.
4 potentially fixable with the --fix option.
```

Running from the sub-package directory should exhibit the same behavior, but omit the top-level
files:

```
∴ cd resources/test/project/examples/docs && cargo run .
(cd resources/test/project/examples/docs && cargo run .)
Found 2 error(s).
docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
Expand All @@ -46,8 +50,10 @@ docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
file paths from the current working directory:

```
∴ cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/
Found 9 error(s).
∴ (cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/)
Found 11 error(s).
resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/concepts/file.py:1:8: F401 `os` imported but unused
resources/test/project/examples/docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/file.py:1:8: F401 `os` imported but unused
Expand All @@ -57,15 +63,16 @@ resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` i
resources/test/project/src/file.py:1:8: F401 `os` imported but unused
resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
6 potentially fixable with the --fix option.
7 potentially fixable with the --fix option.
```

Running from a parent directory should this "ignore" the `exclude` (hence, `concepts/file.py` gets
included in the output):

```
∴ cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml .
Found 3 error(s).
∴ (cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml .)
Found 4 error(s).
.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used
docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
Expand Down
5 changes: 5 additions & 0 deletions resources/test/project/examples/.dotfiles/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os


def f():
x = 1
17 changes: 11 additions & 6 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::path::{Path, PathBuf};
use std::time::Instant;

use anyhow::{bail, Result};
use ignore::Error;
use itertools::Itertools;
use log::{debug, error};
#[cfg(not(target_family = "wasm"))]
Expand Down Expand Up @@ -30,7 +31,7 @@ pub fn run(
) -> Result<Diagnostics> {
// Collect all the files to check.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand All @@ -45,7 +46,11 @@ pub fn run(
.map_err(|e| (Some(path.to_owned()), e.to_string()))
}
Err(e) => Err((
e.path().map(Path::to_owned),
if let Error::WithPath { path, .. } = e {
Some(path.clone())
} else {
None
},
e.io_error()
.map_or_else(|| e.to_string(), io::Error::to_string),
)),
Expand Down Expand Up @@ -111,7 +116,7 @@ pub fn run_stdin(
pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<usize> {
// Collect all the files to check.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand Down Expand Up @@ -141,7 +146,7 @@ pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -
pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<usize> {
// Collect all the files to format.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand Down Expand Up @@ -170,7 +175,7 @@ pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides)
/// Print the user-facing configuration settings.
pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> {
// Collect all files in the hierarchy.
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;

// Print the list of files.
let Some(entry) = paths
Expand All @@ -190,7 +195,7 @@ pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrid
/// Show the list of files to be checked based on current settings.
pub fn show_files(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> {
// Collect all files in the hierarchy.
let (paths, _resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, _resolver) = resolver::python_files_in_path(files, strategy, overrides)?;

// Print the list of files.
for entry in paths
Expand Down
152 changes: 84 additions & 68 deletions src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::RwLock;

use anyhow::{bail, Result};
use anyhow::{anyhow, bail, Result};
use ignore::{DirEntry, WalkBuilder, WalkState};
use log::debug;
use path_absolutize::path_dedot;
use rustc_hash::FxHashSet;
use walkdir::{DirEntry, WalkDir};

use crate::cli::Overrides;
use crate::fs;
Expand Down Expand Up @@ -169,35 +170,20 @@ fn is_python_file(path: &Path) -> bool {
.map_or(false, |ext| ext == "py" || ext == "pyi")
}

/// Find all Python (`.py` and `.pyi` files) in a set of `PathBuf`s.
pub fn resolve_python_files(
/// Find all Python (`.py` and `.pyi` files) in a set of paths.
pub fn python_files_in_path(
paths: &[PathBuf],
strategy: &Strategy,
overrides: &Overrides,
) -> Result<(Vec<Result<DirEntry, walkdir::Error>>, Resolver)> {
let mut files = Vec::new();
let mut resolver = Resolver::default();
for path in paths {
let (files_in_path, file_resolver) = python_files_in_path(path, strategy, overrides)?;
files.extend(files_in_path);
resolver.merge(file_resolver);
}
Ok((files, resolver))
}

/// Find all Python (`.py` and `.pyi` files) in a given `Path`.
fn python_files_in_path(
path: &Path,
strategy: &Strategy,
overrides: &Overrides,
) -> Result<(Vec<Result<DirEntry, walkdir::Error>>, Resolver)> {
let path = fs::normalize_path(path);
) -> Result<(Vec<Result<DirEntry, ignore::Error>>, Resolver)> {
// Normalize every path (e.g., convert from relative to absolute).
let paths: Vec<PathBuf> = paths.iter().map(|path| fs::normalize_path(path)).collect();

// Search for `pyproject.toml` files in all parent directories.
let mut resolver = Resolver::default();
for path in path.ancestors() {
if path.is_dir() {
let pyproject = path.join("pyproject.toml");
for path in &paths {
for ancestor in path.ancestors() {
let pyproject = ancestor.join("pyproject.toml");
if pyproject.is_file() {
let (root, settings) =
resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides))?;
Expand All @@ -206,57 +192,87 @@ fn python_files_in_path(
}
}

// Collect all Python files.
let files: Vec<Result<DirEntry, walkdir::Error>> = WalkDir::new(path)
.into_iter()
.filter_entry(|entry| {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to use WalkBuilder, which should be pretty much a drop-in replacement for WalkDir (and let's me use the same filter_entry), but it's requiring that the lifetime of any variable in the closure if 'static? So I can't iteratively build up the shared resolver like I am now.

Copy link
Member Author

@charliermarsh charliermarsh Dec 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current code could be slow if you had a very large directory that wasn't marked as .gitignore but was excluded by Ruff. E.g., if you had a monorepo with a bunch of directories that contained JS files, we'd spend time traversing all of those.

// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if entry.file_type().is_dir() {
let pyproject = entry.path().join("pyproject.toml");
if pyproject.is_file() {
// TODO(charlie): Return a `Result` here.
let (root, settings) =
resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides))
.unwrap();
resolver.add(root, settings);
// Create the `WalkBuilder`.
let mut builder = WalkBuilder::new(
paths
.get(0)
.ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?,
);
for path in &paths[1..] {
builder.add(path);
}
let walker = builder.hidden(false).build_parallel();

// Run the `WalkParallel` to collect all Python files.
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(()));
let resolver: RwLock<Resolver> = RwLock::new(resolver);
let files: std::sync::Mutex<Vec<Result<DirEntry, ignore::Error>>> =
std::sync::Mutex::new(vec![]);
walker.run(|| {
Box::new(|result| {
if let Ok(entry) = &result {
// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
{
let pyproject = entry.path().join("pyproject.toml");
if pyproject.is_file() {
match resolve_scoped_settings(
&pyproject,
&Relativity::Parent,
Some(overrides),
) {
Ok((root, settings)) => resolver.write().unwrap().add(root, settings),
Err(err) => {
*error.lock().unwrap() = Err(err);
return WalkState::Quit;
}
}
}
}
}

let path = entry.path();
let settings = resolver.resolve(path, strategy);
match fs::extract_path_names(path) {
Ok((file_path, file_basename)) => {
if !settings.exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.exclude)
{
debug!("Ignored path via `exclude`: {:?}", path);
false
} else if !settings.extend_exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.extend_exclude)
{
debug!("Ignored path via `extend-exclude`: {:?}", path);
false
} else {
true
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path, strategy);
match fs::extract_path_names(path) {
Ok((file_path, file_basename)) => {
if !settings.exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.exclude)
{
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if !settings.extend_exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.extend_exclude)
{
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip;
}
}
Err(e) => {
debug!("Ignored path due to error in parsing: {:?}: {}", path, e);
return WalkState::Skip;
}
}
Err(e) => {
debug!("Ignored path due to error in parsing: {:?}: {}", path, e);
true
}
}
})
.filter(|entry| {
entry.as_ref().map_or(true, |entry| {

if result.as_ref().map_or(true, |entry| {
(entry.depth() == 0 || is_python_file(entry.path()))
&& !entry.file_type().is_dir()
&& !(entry.file_type().is_symlink() && entry.path().is_dir())
})
&& !entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
}) {
files.lock().unwrap().push(result);
}

WalkState::Continue
})
.collect::<Vec<_>>();
});

error.into_inner().unwrap()?;

Ok((files, resolver))
Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap()))
}

#[cfg(test)]
Expand Down