-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix ignoring input files for symlink reasons #4222
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,12 +44,12 @@ | |
STDIN_PLACEHOLDER, | ||
) | ||
from black.files import ( | ||
best_effort_relative_path, | ||
find_project_root, | ||
find_pyproject_toml, | ||
find_user_pyproject_toml, | ||
gen_python_files, | ||
get_gitignore, | ||
get_root_relative_path, | ||
parse_pyproject_toml, | ||
path_is_excluded, | ||
resolves_outside_root_or_cannot_stat, | ||
|
@@ -734,6 +734,7 @@ def get_sources( | |
"""Compute the set of files to be formatted.""" | ||
sources: Set[Path] = set() | ||
|
||
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" | ||
using_default_exclude = exclude is None | ||
exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude | ||
gitignore: Optional[Dict[Path, PathSpec]] = None | ||
|
@@ -749,11 +750,12 @@ def get_sources( | |
|
||
# Compare the logic here to the logic in `gen_python_files`. | ||
if is_stdin or path.is_file(): | ||
root_relative_path = get_root_relative_path(path, root, report) | ||
|
||
if root_relative_path is None: | ||
if resolves_outside_root_or_cannot_stat(path, root, report): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you may have guessed from the name, the difference between this and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This helped me located where the filtering happens and I just added |
||
if verbose: | ||
out(f'Skipping invalid source: "{path}"', fg="red") | ||
continue | ||
|
||
root_relative_path = best_effort_relative_path(path, root).as_posix() | ||
root_relative_path = "/" + root_relative_path | ||
|
||
# Hard-exclude any files that matches the `--force-exclude` regex. | ||
|
@@ -763,11 +765,6 @@ def get_sources( | |
) | ||
continue | ||
|
||
if resolves_outside_root_or_cannot_stat(path, root, report): | ||
if verbose: | ||
out(f'Skipping invalid source: "{path}"', fg="red") | ||
continue | ||
|
||
if is_stdin: | ||
path = Path(f"{STDIN_PLACEHOLDER}{str(path)}") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,11 @@ def _load_toml(path: Union[Path, str]) -> Dict[str, Any]: | |
return tomllib.load(f) | ||
|
||
|
||
@lru_cache | ||
def _cached_resolve(path: Path) -> Path: | ||
return path.resolve() | ||
|
||
|
||
@lru_cache | ||
def find_project_root( | ||
srcs: Sequence[str], stdin_filename: Optional[str] = None | ||
|
@@ -67,9 +72,9 @@ def find_project_root( | |
if stdin_filename is not None: | ||
srcs = tuple(stdin_filename if s == "-" else s for s in srcs) | ||
if not srcs: | ||
srcs = [str(Path.cwd().resolve())] | ||
srcs = [str(_cached_resolve(Path.cwd()))] | ||
|
||
path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs] | ||
path_srcs = [_cached_resolve(Path(Path.cwd(), src)) for src in srcs] | ||
|
||
# A list of lists of parents for each 'src'. 'src' is included as a | ||
# "parent" of itself if it is a directory | ||
|
@@ -236,7 +241,7 @@ def find_user_pyproject_toml() -> Path: | |
else: | ||
config_root = os.environ.get("XDG_CONFIG_HOME", "~/.config") | ||
user_config_path = Path(config_root).expanduser() / "black" | ||
return user_config_path.resolve() | ||
return _cached_resolve(user_config_path) | ||
|
||
|
||
@lru_cache | ||
|
@@ -266,27 +271,31 @@ def resolves_outside_root_or_cannot_stat( | |
try: | ||
if sys.version_info < (3, 8, 6): | ||
path = path.absolute() # https://bugs.python.org/issue33660 | ||
resolved_path = path.resolve() | ||
return get_root_relative_path(resolved_path, root, report) is None | ||
resolved_path = _cached_resolve(path) | ||
except OSError as e: | ||
if report: | ||
report.path_ignored(path, f"cannot be read because {e}") | ||
return True | ||
|
||
|
||
def get_root_relative_path( | ||
path: Path, | ||
root: Path, | ||
report: Optional[Report] = None, | ||
) -> Optional[str]: | ||
"""Returns the file path relative to the 'root' directory""" | ||
try: | ||
root_relative_path = path.absolute().relative_to(root).as_posix() | ||
resolved_path.relative_to(root).as_posix() | ||
hauntsaninja marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except ValueError: | ||
if report: | ||
report.path_ignored(path, f"is a symbolic link that points outside {root}") | ||
return None | ||
return root_relative_path | ||
return True | ||
return False | ||
|
||
|
||
def best_effort_relative_path(path: Path, root: Path) -> Path: | ||
# Precondition: resolves_outside_root_or_cannot_stat(path, root) is False | ||
try: | ||
return path.absolute().relative_to(root) | ||
except ValueError: | ||
pass | ||
root_parent = next((p for p in path.parents if _cached_resolve(p) == root), None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added |
||
if root_parent is not None: | ||
return path.relative_to(root_parent) | ||
# something adversarial, fallback to path guaranteed by precondition | ||
return _cached_resolve(path).relative_to(root) | ||
|
||
|
||
def _path_is_ignored( | ||
|
@@ -339,7 +348,8 @@ def gen_python_files( | |
|
||
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}" | ||
for child in paths: | ||
root_relative_path = child.absolute().relative_to(root).as_posix() | ||
assert child.is_absolute() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not new, this should be guaranteed by |
||
root_relative_path = child.relative_to(root).as_posix() | ||
|
||
# First ignore files matching .gitignore, if passed | ||
if gitignore_dict and _path_is_ignored( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2567,32 +2567,32 @@ def test_symlinks(self) -> None: | |
gitignore = PathSpec.from_lines("gitwildmatch", []) | ||
|
||
regular = MagicMock() | ||
regular.absolute.return_value = root / "regular.py" | ||
regular.relative_to.return_value = Path("regular.py") | ||
regular.resolve.return_value = root / "regular.py" | ||
regular.is_dir.return_value = False | ||
regular.is_file.return_value = True | ||
|
||
outside_root_symlink = MagicMock() | ||
outside_root_symlink.absolute.return_value = root / "symlink.py" | ||
outside_root_symlink.relative_to.return_value = Path("symlink.py") | ||
outside_root_symlink.resolve.return_value = Path("/nowhere") | ||
outside_root_symlink.is_dir.return_value = False | ||
outside_root_symlink.is_file.return_value = True | ||
|
||
ignored_symlink = MagicMock() | ||
ignored_symlink.absolute.return_value = root / ".mypy_cache" / "symlink.py" | ||
ignored_symlink.relative_to.return_value = Path(".mypy_cache") / "symlink.py" | ||
ignored_symlink.is_dir.return_value = False | ||
ignored_symlink.is_file.return_value = True | ||
|
||
# A symlink that has an excluded name, but points to an included name | ||
symlink_excluded_name = MagicMock() | ||
symlink_excluded_name.absolute.return_value = root / "excluded_name" | ||
symlink_excluded_name.relative_to.return_value = Path("excluded_name") | ||
symlink_excluded_name.resolve.return_value = root / "included_name.py" | ||
symlink_excluded_name.is_dir.return_value = False | ||
symlink_excluded_name.is_file.return_value = True | ||
|
||
# A symlink that has an included name, but points to an excluded name | ||
symlink_included_name = MagicMock() | ||
symlink_included_name.absolute.return_value = root / "included_name.py" | ||
symlink_included_name.relative_to.return_value = Path("included_name.py") | ||
symlink_included_name.resolve.return_value = root / "excluded_name" | ||
symlink_included_name.is_dir.return_value = False | ||
symlink_included_name.is_file.return_value = True | ||
|
@@ -2626,39 +2626,100 @@ def test_symlinks(self) -> None: | |
outside_root_symlink.resolve.assert_called_once() | ||
ignored_symlink.resolve.assert_not_called() | ||
|
||
def test_get_sources_symlink_and_force_exclude(self) -> None: | ||
with TemporaryDirectory() as tempdir: | ||
tmp = Path(tempdir).resolve() | ||
actual = tmp / "actual" | ||
actual.mkdir() | ||
symlink = tmp / "symlink" | ||
symlink.symlink_to(actual) | ||
|
||
actual_proj = actual / "project" | ||
actual_proj.mkdir() | ||
(actual_proj / "module.py").write_text("print('hello')", encoding="utf-8") | ||
|
||
symlink_proj = symlink / "project" | ||
|
||
with change_directory(symlink_proj): | ||
assert_collected_sources( | ||
src=["module.py"], | ||
root=symlink_proj.resolve(), | ||
expected=["module.py"], | ||
) | ||
|
||
absolute_module = symlink_proj / "module.py" | ||
assert_collected_sources( | ||
src=[absolute_module], | ||
root=symlink_proj.resolve(), | ||
expected=[absolute_module], | ||
) | ||
|
||
# a few tricky tests for force_exclude | ||
flat_symlink = symlink_proj / "symlink_module.py" | ||
flat_symlink.symlink_to(actual_proj / "module.py") | ||
assert_collected_sources( | ||
src=[flat_symlink], | ||
root=symlink_proj.resolve(), | ||
force_exclude=r"/symlink_module.py", | ||
expected=[], | ||
) | ||
|
||
target = actual_proj / "target" | ||
target.mkdir() | ||
(target / "another.py").write_text("print('hello')", encoding="utf-8") | ||
(symlink_proj / "nested").symlink_to(target) | ||
|
||
assert_collected_sources( | ||
src=[symlink_proj / "nested" / "another.py"], | ||
root=symlink_proj.resolve(), | ||
force_exclude=r"nested", | ||
expected=[], | ||
) | ||
assert_collected_sources( | ||
src=[symlink_proj / "nested" / "another.py"], | ||
root=symlink_proj.resolve(), | ||
force_exclude=r"target", | ||
expected=[symlink_proj / "nested" / "another.py"], | ||
) | ||
|
||
def test_get_sources_with_stdin_symlink_outside_root( | ||
self, | ||
) -> None: | ||
path = THIS_DIR / "data" / "include_exclude_tests" | ||
stdin_filename = str(path / "b/exclude/a.py") | ||
outside_root_symlink = Path("/target_directory/a.py") | ||
root = Path("target_dir/").resolve().absolute() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. target_dir/ or target_directory? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, it would have been slightly clearer if you used two names that didn't look like one was a variant of the other, e.g. |
||
with patch("pathlib.Path.resolve", return_value=outside_root_symlink): | ||
assert_collected_sources( | ||
root=Path("target_directory/"), | ||
root=root, | ||
src=["-"], | ||
expected=[], | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin(self) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really related, but while I was here I removed some monkeypatching since it's not needed. We probably do a little too much mocking, so the new tests touch the file system There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree we should prefer to avoid mocking. |
||
src = ["-"] | ||
expected = ["-"] | ||
assert_collected_sources(src, expected, include="", exclude=r"/exclude/|a\.py") | ||
assert_collected_sources( | ||
src, | ||
root=THIS_DIR.resolve(), | ||
expected=expected, | ||
include="", | ||
exclude=r"/exclude/|a\.py", | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin_filename(self) -> None: | ||
src = ["-"] | ||
stdin_filename = str(THIS_DIR / "data/collections.py") | ||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"] | ||
assert_collected_sources( | ||
src, | ||
expected, | ||
root=THIS_DIR.resolve(), | ||
expected=expected, | ||
exclude=r"/exclude/a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin_filename_and_exclude(self) -> None: | ||
# Exclude shouldn't exclude stdin_filename since it is mimicking the | ||
# file being passed directly. This is the same as | ||
|
@@ -2669,12 +2730,12 @@ def test_get_sources_with_stdin_filename_and_exclude(self) -> None: | |
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"] | ||
assert_collected_sources( | ||
src, | ||
expected, | ||
root=THIS_DIR.resolve(), | ||
expected=expected, | ||
exclude=r"/exclude/|a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None: | ||
# Extend exclude shouldn't exclude stdin_filename since it is mimicking the | ||
# file being passed directly. This is the same as | ||
|
@@ -2685,40 +2746,45 @@ def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None: | |
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"] | ||
assert_collected_sources( | ||
src, | ||
expected, | ||
root=THIS_DIR.resolve(), | ||
expected=expected, | ||
extend_exclude=r"/exclude/|a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None: | ||
# Force exclude should exclude the file when passing it through | ||
# stdin_filename | ||
path = THIS_DIR / "data" / "include_exclude_tests" | ||
stdin_filename = str(path / "b/exclude/a.py") | ||
assert_collected_sources( | ||
src=["-"], | ||
root=THIS_DIR.resolve(), | ||
expected=[], | ||
force_exclude=r"/exclude/|a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None)) | ||
def test_get_sources_with_stdin_filename_and_force_exclude_and_symlink( | ||
self, | ||
) -> None: | ||
# Force exclude should exclude a symlink based on the symlink, not its target | ||
path = THIS_DIR / "data" / "include_exclude_tests" | ||
stdin_filename = str(path / "symlink.py") | ||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"] | ||
target = path / "b/exclude/a.py" | ||
with patch("pathlib.Path.resolve", return_value=target): | ||
assert_collected_sources( | ||
src=["-"], | ||
expected=expected, | ||
force_exclude=r"exclude/a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
with TemporaryDirectory() as tempdir: | ||
tmp = Path(tempdir).resolve() | ||
(tmp / "exclude").mkdir() | ||
(tmp / "exclude" / "a.py").write_text("print('hello')", encoding="utf-8") | ||
(tmp / "symlink.py").symlink_to(tmp / "exclude" / "a.py") | ||
|
||
stdin_filename = str(tmp / "symlink.py") | ||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"] | ||
with change_directory(tmp): | ||
assert_collected_sources( | ||
src=["-"], | ||
root=tmp, | ||
expected=expected, | ||
force_exclude=r"exclude/a\.py", | ||
stdin_filename=stdin_filename, | ||
) | ||
|
||
|
||
class TestDeFactoAPI: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have a similar assert in
gen_python_files
(which this usually calls)