Skip to content

Commit

Permalink
Reimplement file exclusion logic
Browse files Browse the repository at this point in the history
  • Loading branch information
ssbarnea committed May 31, 2023
1 parent c4e37e4 commit 605da34
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 142 deletions.
1 change: 1 addition & 0 deletions .config/dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ parseable
pathex
pathlib
pathspec
pathspecs
pbrun
pfexec
pickleable
Expand Down
1 change: 1 addition & 0 deletions .config/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ black>=22.8.0 # MIT
filelock>=3.3.0 # The Unlicense
jsonschema>=4.10.0 # MIT, version needed for improved errors
packaging>=21.3 # Apache-2.0,BSD-2-Clause
pathspec>=0.9.0 # Mozilla Public License 2.0 (MPL 2.0)
pyyaml>=5.4.1 # MIT (centos 9 has 5.3.1)
rich>=12.0.0 # MIT
ruamel.yaml>=0.17.0,<0.18,!=0.17.29,!=0.17.30 # MIT, next version is planned to have breaking changes
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
WSLENV: FORCE_COLOR:PYTEST_REQPASS:TOXENV:GITHUB_STEP_SUMMARY
# Number of expected test passes, safety measure for accidental skip of
# tests. Update value if you add/remove tests.
PYTEST_REQPASS: 806
PYTEST_REQPASS: 803
steps:
- name: Activate WSL1
if: "contains(matrix.shell, 'wsl')"
Expand Down
2 changes: 1 addition & 1 deletion src/ansiblelint/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def path_inject() -> None:
# We do know that finding ansible in PATH does not guarantee that it is
# functioning or that is in fact the same version that was installed as
# our dependency, but addressing this would be done by ansible-compat.
for cmd in ("ansible", "git"):
for cmd in ("ansible",):
if not shutil.which(cmd):
msg = f"Failed to find runtime dependency '{cmd}' in PATH"
raise RuntimeError(msg)
Expand Down
153 changes: 67 additions & 86 deletions src/ansiblelint/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@
import copy
import logging
import os
import subprocess
import sys
from collections import OrderedDict, defaultdict
from collections import defaultdict
from contextlib import contextmanager
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, cast

import pathspec
import wcmatch.pathlib
import wcmatch.wcmatch
from yaml.error import YAMLError

from ansiblelint.config import BASE_KINDS, Options, options
from ansiblelint.constants import CONFIG_FILENAMES, GIT_CMD, FileType, States
from ansiblelint.logger import warn_or_fail
from ansiblelint.constants import CONFIG_FILENAMES, FileType, States

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -419,93 +418,22 @@ def data(self) -> Any:


# pylint: disable=redefined-outer-name
def discover_lintables(options: Options) -> dict[str, Any]:
def discover_lintables(options: Options) -> list[str]:
"""Find all files that we know how to lint.
Return format is normalized, relative for stuff below cwd, ~/ for content
under current user and absolute for everything else.
"""
# git is preferred as it also considers .gitignore
# As --recurse-submodules is incompatible with --others we need to run
# twice to get combined results.
commands = {
"tracked": {
"cmd": [
*GIT_CMD,
"ls-files",
"--cached",
"--exclude-standard",
"--recurse-submodules",
"-z",
],
"remove": False,
},
"others": {
"cmd": [
*GIT_CMD,
"ls-files",
"--cached",
"--others",
"--exclude-standard",
"-z",
],
"remove": False,
},
"absent": {
"cmd": [
*GIT_CMD,
"ls-files",
"--deleted",
"-z",
],
"remove": True,
},
}

out: set[str] = set()
try:
for k, value in commands.items():
if not isinstance(value["cmd"], list):
msg = f"Expected list but got {type(value['cmd'])}"
raise TypeError(msg)
result = subprocess.check_output(
value["cmd"], # noqa: S603
stderr=subprocess.STDOUT,
text=True,
).split("\x00")[:-1]
_logger.info(
"Discovered files to lint using git (%s): %s",
k,
" ".join(value["cmd"]),
)
out = out.union(result) if not value["remove"] else out - set(result)

except subprocess.CalledProcessError as exc:
if not (exc.returncode == 128 and "fatal: not a git repository" in exc.output):
err = exc.output.rstrip("\n")
warn_or_fail(f"Failed to discover lintable files using git: {err}")
except FileNotFoundError as exc:
if options.verbosity:
warn_or_fail(f"Failed to locate command: {exc}")

# Applying exclude patterns
if not out:
out = set(".")

exclude_pattern = "|".join(str(x) for x in options.exclude_paths)
_logger.info("Looking up for files, excluding %s ...", exclude_pattern)
# remove './' prefix from output of WcMatch
out = {
strip_dotslash_prefix(fname)
for fname in wcmatch.wcmatch.WcMatch(
".",
exclude_pattern=exclude_pattern,
flags=wcmatch.wcmatch.RECURSIVE,
limit=256,
).match()
}

return OrderedDict.fromkeys(sorted(out))
if not options.lintables:
options.lintables = ["."]

return [
str(filename)
for filename in get_all_files(
*[Path(s) for s in options.lintables],
exclude_paths=options.exclude_paths,
)
]


def strip_dotslash_prefix(fname: str) -> str:
Expand Down Expand Up @@ -598,3 +526,56 @@ def _guess_parent(lintable: Lintable) -> Lintable | None:
except IndexError:
pass
return None


def get_all_files(
*paths: Path,
exclude_paths: list[str] | None = None,
) -> list[Path]:
"""Recursively retrieve all files from given folders."""
all_files: list[Path] = []
exclude_paths = [] if exclude_paths is None else exclude_paths

def is_excluded(path_to_check: Path) -> bool:
"""Check if a file is exclude by current specs."""
return any(spec.match_file(str(path_to_check)) for spec in pathspecs)

for path in paths:
pathspecs = [
pathspec.GitIgnoreSpec.from_lines(
[
".git",
".tox",
".mypy_cache",
"__pycache__",
".DS_Store",
".coverage",
".pytest_cache",
".ruff_cache",
*exclude_paths,
],
),
]
gitignore = path / ".gitignore"
if gitignore.exists():
with gitignore.open(encoding="UTF-8") as f:
_logger.info("Loading ignores from %s", gitignore)
pathspecs.append(
pathspec.GitIgnoreSpec.from_lines(f.read().splitlines()),
)

# Iterate over all items in the directory
if path.is_file():
all_files.append(path)
else:
for item in sorted(path.iterdir()):
if is_excluded(item):
_logger.info("Excluded: %s", item)
continue
if item.is_file():
all_files.append(item)
# If it's a directory, recursively call the function
elif item.is_dir():
all_files.extend(get_all_files(item, exclude_paths=exclude_paths))

return all_files
57 changes: 7 additions & 50 deletions test/test_file_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for file utility functions."""
from __future__ import annotations

import copy
import logging
import os
import time
Expand All @@ -10,7 +11,6 @@
import pytest

from ansiblelint import cli, file_utils
from ansiblelint.__main__ import initialize_logger
from ansiblelint.file_utils import (
Lintable,
cwd,
Expand All @@ -27,7 +27,6 @@
from _pytest.logging import LogCaptureFixture
from _pytest.monkeypatch import MonkeyPatch

from ansiblelint.config import Options
from ansiblelint.constants import FileType
from ansiblelint.rules import RulesCollection

Expand Down Expand Up @@ -73,39 +72,7 @@ def test_expand_paths_vars(
assert expand_paths_vars([test_path]) == [expected] # type: ignore[list-item]


@pytest.mark.parametrize(
("reset_env_var", "message_prefix"),
(
# simulate absence of git command
("PATH", "Failed to locate command: "),
# simulate a missing git repo
("GIT_DIR", "Looking up for files"),
),
ids=("no-git-cli", "outside-git-repo"),
)
def test_discover_lintables_git_verbose(
reset_env_var: str,
message_prefix: str,
monkeypatch: MonkeyPatch,
caplog: LogCaptureFixture,
) -> None:
"""Ensure that autodiscovery lookup failures are logged."""
options = cli.get_config(["-v"])
initialize_logger(options.verbosity)
monkeypatch.setenv(reset_env_var, "")
file_utils.discover_lintables(options)

assert any(m[2].startswith("Looking up for files") for m in caplog.record_tuples)
assert any(m.startswith(message_prefix) for m in caplog.messages)


@pytest.mark.parametrize(
"is_in_git",
(True, False),
ids=("in Git", "outside Git"),
)
def test_discover_lintables_silent(
is_in_git: bool,
monkeypatch: MonkeyPatch,
capsys: CaptureFixture[str],
caplog: LogCaptureFixture,
Expand All @@ -119,16 +86,16 @@ def test_discover_lintables_silent(
caplog.set_level(logging.FATAL)
options = cli.get_config([])
test_dir = Path(__file__).resolve().parent
lint_path = test_dir / ".." / "examples" / "roles" / "test-role"
if not is_in_git:
monkeypatch.setenv("GIT_DIR", "")
lint_path = (test_dir / ".." / "examples" / "roles" / "test-role").resolve()

yaml_count = len(list(lint_path.glob("**/*.yml"))) + len(
list(lint_path.glob("**/*.yaml")),
)

monkeypatch.chdir(str(lint_path))
files = file_utils.discover_lintables(options)
my_options = copy.deepcopy(options)
my_options.lintables = [str(lint_path)]
files = file_utils.discover_lintables(my_options)
stderr = capsys.readouterr().err
assert (
not stderr
Expand All @@ -144,7 +111,7 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None:
"""Verify that filenames containing German umlauts are not garbled by the discover_lintables."""
options = cli.get_config([])
test_dir = Path(__file__).resolve().parent
lint_path = test_dir / ".." / "examples" / "playbooks"
lint_path = (test_dir / ".." / "examples" / "playbooks").resolve()

monkeypatch.chdir(str(lint_path))
files = file_utils.discover_lintables(options)
Expand Down Expand Up @@ -293,23 +260,13 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None:
), # content should determine it as a play
),
)
def test_kinds(monkeypatch: MonkeyPatch, path: str, kind: FileType) -> None:
def test_kinds(path: str, kind: FileType) -> None:
"""Verify auto-detection logic based on DEFAULT_KINDS."""
options = cli.get_config([])

# pylint: disable=unused-argument
def mockreturn(options: Options) -> dict[str, Any]: # noqa: ARG001
return {normpath(path): kind}

# assert Lintable is able to determine file type
lintable_detected = Lintable(path)
lintable_expected = Lintable(path, kind=kind)
assert lintable_detected == lintable_expected

monkeypatch.setattr(file_utils, "discover_lintables", mockreturn)
result = file_utils.discover_lintables(options)
assert lintable_detected.kind == result[lintable_expected.name]


def test_find_project_root_1(tmp_path: Path) -> None:
"""Verify find_project_root()."""
Expand Down
4 changes: 2 additions & 2 deletions test/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_runner_exclude_globs(
)

matches = runner.run()
# we expect to find one 2 matches from the very few .yaml file we have there (most of them have .yml extension)
assert len(matches) == 2
# we expect to find one match from the very few .yaml file we have there (most of them have .yml extension)
assert len(matches) == 1


@pytest.mark.parametrize(
Expand Down
2 changes: 0 additions & 2 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,6 @@ def test_cli_auto_detect(capfd: CaptureFixture[str]) -> None:

out, err = capfd.readouterr()

# Confirmation that it runs in auto-detect mode
assert "Discovered files to lint using git" in err
# An expected rule match from our examples
assert (
"examples/playbooks/empty_playbook.yml:1:1: "
Expand Down

0 comments on commit 605da34

Please sign in to comment.