Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ignoring input files for symlink reasons #4222

Merged
merged 5 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions src/black/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@
STDIN_PLACEHOLDER,
)
from black.files import (
best_effort_relative_path,
find_project_root,
find_pyproject_toml,
find_user_pyproject_toml,
gen_python_files,
get_gitignore,
get_root_relative_path,
parse_pyproject_toml,
path_is_excluded,
resolves_outside_root_or_cannot_stat,
Expand Down Expand Up @@ -734,6 +734,7 @@ def get_sources(
"""Compute the set of files to be formatted."""
sources: Set[Path] = set()

assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a similar assert in gen_python_files (which this usually calls)

using_default_exclude = exclude is None
exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude
gitignore: Optional[Dict[Path, PathSpec]] = None
Expand All @@ -749,11 +750,12 @@ def get_sources(

# Compare the logic here to the logic in `gen_python_files`.
if is_stdin or path.is_file():
root_relative_path = get_root_relative_path(path, root, report)

if root_relative_path is None:
if resolves_outside_root_or_cannot_stat(path, root, report):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you may have guessed from the name, the difference between this and get_root_relative_path (from #4161) is the .resolve()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helped me located where the filtering happens and I just added sources.add(path) before the old "continue"in line 755/756 and my problem was solved.
Tho this, if you need, I can still test it against my case where my home folder ~ was set to a symlink like /homes/user -> /external/homes/user. Please don't hesitate to tell me if you need any help! Thanks for the great work again!

if verbose:
out(f'Skipping invalid source: "{path}"', fg="red")
continue

root_relative_path = best_effort_relative_path(path, root).as_posix()
root_relative_path = "/" + root_relative_path

# Hard-exclude any files that matches the `--force-exclude` regex.
Expand All @@ -763,11 +765,6 @@ def get_sources(
)
continue

if resolves_outside_root_or_cannot_stat(path, root, report):
if verbose:
out(f'Skipping invalid source: "{path}"', fg="red")
continue

if is_stdin:
path = Path(f"{STDIN_PLACEHOLDER}{str(path)}")

Expand Down
44 changes: 27 additions & 17 deletions src/black/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def _load_toml(path: Union[Path, str]) -> Dict[str, Any]:
return tomllib.load(f)


@lru_cache
def _cached_resolve(path: Path) -> Path:
return path.resolve()


@lru_cache
def find_project_root(
srcs: Sequence[str], stdin_filename: Optional[str] = None
Expand All @@ -67,9 +72,9 @@ def find_project_root(
if stdin_filename is not None:
srcs = tuple(stdin_filename if s == "-" else s for s in srcs)
if not srcs:
srcs = [str(Path.cwd().resolve())]
srcs = [str(_cached_resolve(Path.cwd()))]

path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
path_srcs = [_cached_resolve(Path(Path.cwd(), src)) for src in srcs]

# A list of lists of parents for each 'src'. 'src' is included as a
# "parent" of itself if it is a directory
Expand Down Expand Up @@ -236,7 +241,7 @@ def find_user_pyproject_toml() -> Path:
else:
config_root = os.environ.get("XDG_CONFIG_HOME", "~/.config")
user_config_path = Path(config_root).expanduser() / "black"
return user_config_path.resolve()
return _cached_resolve(user_config_path)


@lru_cache
Expand Down Expand Up @@ -266,27 +271,31 @@ def resolves_outside_root_or_cannot_stat(
try:
if sys.version_info < (3, 8, 6):
path = path.absolute() # https://bugs.python.org/issue33660
resolved_path = path.resolve()
return get_root_relative_path(resolved_path, root, report) is None
resolved_path = _cached_resolve(path)
except OSError as e:
if report:
report.path_ignored(path, f"cannot be read because {e}")
return True


def get_root_relative_path(
path: Path,
root: Path,
report: Optional[Report] = None,
) -> Optional[str]:
"""Returns the file path relative to the 'root' directory"""
try:
root_relative_path = path.absolute().relative_to(root).as_posix()
resolved_path.relative_to(root).as_posix()
hauntsaninja marked this conversation as resolved.
Show resolved Hide resolved
except ValueError:
if report:
report.path_ignored(path, f"is a symbolic link that points outside {root}")
return None
return root_relative_path
return True
return False


def best_effort_relative_path(path: Path, root: Path) -> Path:
# Precondition: resolves_outside_root_or_cannot_stat(path, root) is False
try:
return path.absolute().relative_to(root)
except ValueError:
pass
root_parent = next((p for p in path.parents if _cached_resolve(p) == root), None)
Copy link
Collaborator Author

@hauntsaninja hauntsaninja Feb 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added _cached_resolve mainly in case this loop x many input files is expensive

if root_parent is not None:
return path.relative_to(root_parent)
# something adversarial, fallback to path guaranteed by precondition
return _cached_resolve(path).relative_to(root)


def _path_is_ignored(
Expand Down Expand Up @@ -339,7 +348,8 @@ def gen_python_files(

assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
for child in paths:
root_relative_path = child.absolute().relative_to(root).as_posix()
assert child.is_absolute()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not new, this should be guaranteed by path = root / (path.resolve().relative_to(root)) in get_sources

root_relative_path = child.relative_to(root).as_posix()

# First ignore files matching .gitignore, if passed
if gitignore_dict and _path_is_ignored(
Expand Down
120 changes: 93 additions & 27 deletions tests/test_black.py
Original file line number Diff line number Diff line change
Expand Up @@ -2567,32 +2567,32 @@ def test_symlinks(self) -> None:
gitignore = PathSpec.from_lines("gitwildmatch", [])

regular = MagicMock()
regular.absolute.return_value = root / "regular.py"
regular.relative_to.return_value = Path("regular.py")
regular.resolve.return_value = root / "regular.py"
regular.is_dir.return_value = False
regular.is_file.return_value = True

outside_root_symlink = MagicMock()
outside_root_symlink.absolute.return_value = root / "symlink.py"
outside_root_symlink.relative_to.return_value = Path("symlink.py")
outside_root_symlink.resolve.return_value = Path("/nowhere")
outside_root_symlink.is_dir.return_value = False
outside_root_symlink.is_file.return_value = True

ignored_symlink = MagicMock()
ignored_symlink.absolute.return_value = root / ".mypy_cache" / "symlink.py"
ignored_symlink.relative_to.return_value = Path(".mypy_cache") / "symlink.py"
ignored_symlink.is_dir.return_value = False
ignored_symlink.is_file.return_value = True

# A symlink that has an excluded name, but points to an included name
symlink_excluded_name = MagicMock()
symlink_excluded_name.absolute.return_value = root / "excluded_name"
symlink_excluded_name.relative_to.return_value = Path("excluded_name")
symlink_excluded_name.resolve.return_value = root / "included_name.py"
symlink_excluded_name.is_dir.return_value = False
symlink_excluded_name.is_file.return_value = True

# A symlink that has an included name, but points to an excluded name
symlink_included_name = MagicMock()
symlink_included_name.absolute.return_value = root / "included_name.py"
symlink_included_name.relative_to.return_value = Path("included_name.py")
symlink_included_name.resolve.return_value = root / "excluded_name"
symlink_included_name.is_dir.return_value = False
symlink_included_name.is_file.return_value = True
Expand Down Expand Up @@ -2626,39 +2626,100 @@ def test_symlinks(self) -> None:
outside_root_symlink.resolve.assert_called_once()
ignored_symlink.resolve.assert_not_called()

def test_get_sources_symlink_and_force_exclude(self) -> None:
with TemporaryDirectory() as tempdir:
tmp = Path(tempdir).resolve()
actual = tmp / "actual"
actual.mkdir()
symlink = tmp / "symlink"
symlink.symlink_to(actual)

actual_proj = actual / "project"
actual_proj.mkdir()
(actual_proj / "module.py").write_text("print('hello')", encoding="utf-8")

symlink_proj = symlink / "project"

with change_directory(symlink_proj):
assert_collected_sources(
src=["module.py"],
root=symlink_proj.resolve(),
expected=["module.py"],
)

absolute_module = symlink_proj / "module.py"
assert_collected_sources(
src=[absolute_module],
root=symlink_proj.resolve(),
expected=[absolute_module],
)

# a few tricky tests for force_exclude
flat_symlink = symlink_proj / "symlink_module.py"
flat_symlink.symlink_to(actual_proj / "module.py")
assert_collected_sources(
src=[flat_symlink],
root=symlink_proj.resolve(),
force_exclude=r"/symlink_module.py",
expected=[],
)

target = actual_proj / "target"
target.mkdir()
(target / "another.py").write_text("print('hello')", encoding="utf-8")
(symlink_proj / "nested").symlink_to(target)

assert_collected_sources(
src=[symlink_proj / "nested" / "another.py"],
root=symlink_proj.resolve(),
force_exclude=r"nested",
expected=[],
)
assert_collected_sources(
src=[symlink_proj / "nested" / "another.py"],
root=symlink_proj.resolve(),
force_exclude=r"target",
expected=[symlink_proj / "nested" / "another.py"],
)

def test_get_sources_with_stdin_symlink_outside_root(
self,
) -> None:
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "b/exclude/a.py")
outside_root_symlink = Path("/target_directory/a.py")
root = Path("target_dir/").resolve().absolute()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

target_dir/ or target_directory?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

target_dir is right. Rename doesn't affect semantics, I did it to make the test clearer, the relevant difference is /target_directory is absolute and so is outside root

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, it would have been slightly clearer if you used two names that didn't look like one was a variant of the other, e.g. target_dir1 and target_dir2. What you have now is fine too though.

with patch("pathlib.Path.resolve", return_value=outside_root_symlink):
assert_collected_sources(
root=Path("target_directory/"),
root=root,
src=["-"],
expected=[],
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin(self) -> None:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really related, but while I was here I removed some monkeypatching since it's not needed. We probably do a little too much mocking, so the new tests touch the file system

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree we should prefer to avoid mocking.

src = ["-"]
expected = ["-"]
assert_collected_sources(src, expected, include="", exclude=r"/exclude/|a\.py")
assert_collected_sources(
src,
root=THIS_DIR.resolve(),
expected=expected,
include="",
exclude=r"/exclude/|a\.py",
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename(self) -> None:
src = ["-"]
stdin_filename = str(THIS_DIR / "data/collections.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
exclude=r"/exclude/a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
# Exclude shouldn't exclude stdin_filename since it is mimicking the
# file being passed directly. This is the same as
Expand All @@ -2669,12 +2730,12 @@ def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
# Extend exclude shouldn't exclude stdin_filename since it is mimicking the
# file being passed directly. This is the same as
Expand All @@ -2685,40 +2746,45 @@ def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
assert_collected_sources(
src,
expected,
root=THIS_DIR.resolve(),
expected=expected,
extend_exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None:
# Force exclude should exclude the file when passing it through
# stdin_filename
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "b/exclude/a.py")
assert_collected_sources(
src=["-"],
root=THIS_DIR.resolve(),
expected=[],
force_exclude=r"/exclude/|a\.py",
stdin_filename=stdin_filename,
)

@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
def test_get_sources_with_stdin_filename_and_force_exclude_and_symlink(
self,
) -> None:
# Force exclude should exclude a symlink based on the symlink, not its target
path = THIS_DIR / "data" / "include_exclude_tests"
stdin_filename = str(path / "symlink.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
target = path / "b/exclude/a.py"
with patch("pathlib.Path.resolve", return_value=target):
assert_collected_sources(
src=["-"],
expected=expected,
force_exclude=r"exclude/a\.py",
stdin_filename=stdin_filename,
)
with TemporaryDirectory() as tempdir:
tmp = Path(tempdir).resolve()
(tmp / "exclude").mkdir()
(tmp / "exclude" / "a.py").write_text("print('hello')", encoding="utf-8")
(tmp / "symlink.py").symlink_to(tmp / "exclude" / "a.py")

stdin_filename = str(tmp / "symlink.py")
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
with change_directory(tmp):
assert_collected_sources(
src=["-"],
root=tmp,
expected=expected,
force_exclude=r"exclude/a\.py",
stdin_filename=stdin_filename,
)


class TestDeFactoAPI:
Expand Down
Loading