diff --git a/dvc/cli.py b/dvc/cli.py index 1392332040..2b020d4263 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -6,6 +6,7 @@ from .command import ( add, cache, + check_ignore, checkout, commit, completion, @@ -79,6 +80,7 @@ git_hook, plots, experiments, + check_ignore, ] diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py new file mode 100644 index 0000000000..ef3f1250bb --- /dev/null +++ b/dvc/command/check_ignore.py @@ -0,0 +1,70 @@ +import argparse +import logging + +from dvc.command import completion +from dvc.command.base import CmdBase, append_doc_link +from dvc.exceptions import DvcException + +logger = logging.getLogger(__name__) + + +class CmdCheckIgnore(CmdBase): + def __init__(self, args): + super().__init__(args) + self.ignore_filter = self.repo.tree.dvcignore + + def _show_results(self, result): + if result.match or self.args.non_matching: + if self.args.details: + logger.info("{}\t{}".format(result.patterns[-1], result.file)) + else: + logger.info(result.file) + + def run(self): + if self.args.non_matching and not self.args.details: + raise DvcException("--non-matching is only valid with --details") + + if self.args.quiet and self.args.details: + raise DvcException("cannot both --details and --quiet") + + ret = 1 + for target in self.args.targets: + result = self.ignore_filter.check_ignore(target) + self._show_results(result) + if result.match: + ret = 0 + return ret + + +def add_parser(subparsers, parent_parser): + ADD_HELP = "Debug DVC ignore/exclude files" + + parser = subparsers.add_parser( + "check-ignore", + parents=[parent_parser], + description=append_doc_link(ADD_HELP, "check-ignore"), + help=ADD_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-d", + "--details", + action="store_true", + default=False, + help="Show the exclude pattern together with each target path.", + ) + parser.add_argument( + "-n", + "--non-matching", + action="store_true", + default=False, + help="Show the target paths which don’t match any pattern. " + "Only usable when `--details` is also employed", + ) + parser.add_argument( + "targets", + nargs="+", + help="Exact or wildcard paths of files or directories to check " + "ignore patterns.", + ).complete = completion.FILE + parser.set_defaults(func=CmdCheckIgnore) diff --git a/dvc/ignore.py b/dvc/ignore.py index a87e750c47..0e80a02103 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -1,6 +1,7 @@ import logging import os import re +from collections import namedtuple from itertools import groupby, takewhile from pathspec.patterns import GitWildMatchPattern @@ -8,7 +9,7 @@ from pygtrie import StringTrie from dvc.path_info import PathInfo -from dvc.pathspec_math import merge_patterns +from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.system import System from dvc.utils import relpath @@ -24,18 +25,26 @@ def __call__(self, root, dirs, files): class DvcIgnorePatterns(DvcIgnore): def __init__(self, pattern_list, dirname): + if pattern_list: + if isinstance(pattern_list[0], str): + pattern_list = [ + PatternInfo(pattern, "") for pattern in pattern_list + ] self.pattern_list = pattern_list self.dirname = dirname self.prefix = self.dirname + os.sep - regex_pattern_list = map( - GitWildMatchPattern.pattern_to_regex, pattern_list - ) + self.regex_pattern_list = [ + GitWildMatchPattern.pattern_to_regex(pattern_info.patterns) + for pattern_info in pattern_list + ] self.ignore_spec = [ (ignore, re.compile("|".join(item[0] for item in group))) - for ignore, group in groupby(regex_pattern_list, lambda x: x[1]) + for ignore, group in groupby( + self.regex_pattern_list, lambda x: x[1] + ) if ignore is not None ] @@ -43,9 +52,19 @@ def __init__(self, pattern_list, dirname): def from_files(cls, ignore_file_path, tree): assert os.path.isabs(ignore_file_path) dirname = os.path.normpath(os.path.dirname(ignore_file_path)) + ignore_file_rel_path = os.path.relpath( + ignore_file_path, tree.tree_root + ) with tree.open(ignore_file_path, encoding="utf-8") as fobj: path_spec_lines = [ - line for line in map(str.strip, fobj.readlines()) if line + PatternInfo( + line, + "{}:{}:{}".format(ignore_file_rel_path, line_no + 1, line), + ) + for line_no, line in enumerate( + map(str.strip, fobj.readlines()) + ) + if line ] return cls(path_spec_lines, dirname) @@ -56,7 +75,7 @@ def __call__(self, root, dirs, files): return dirs, files - def matches(self, dirname, basename, is_dir=False): + def _get_normalize_path(self, dirname, basename): # NOTE: `relpath` is too slow, so we have to assume that both # `dirname` and `self.dirname` are relative or absolute together. if dirname == self.dirname: @@ -70,6 +89,12 @@ def matches(self, dirname, basename, is_dir=False): if not System.is_unix(): path = normalize_file(path) + return path + + def matches(self, dirname, basename, is_dir=False): + path = self._get_normalize_path(dirname, basename) + if not path: + return False return self.ignore(path, is_dir) def ignore(self, path, is_dir): @@ -85,20 +110,48 @@ def ignore(self, path, is_dir): result = ignore return result + def match_details(self, dirname, basename, is_dir=False): + path = self._get_normalize_path(dirname, basename) + if not path: + return False + return self._ignore_details(path, is_dir) + + def _ignore_details(self, path, is_dir): + result = [] + for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list): + regex = re.compile(ignore[0]) + # skip system pattern + if not pattern.file_info: + continue + if is_dir: + path_dir = f"{path}/" + if regex.match(path) or regex.match(path_dir): + result.append(pattern.file_info) + else: + if regex.match(path): + result.append(pattern.file_info) + return result + def __hash__(self): - return hash(self.dirname + ":" + "\n".join(self.pattern_list)) + return hash(self.dirname + ":" + str(self.pattern_list)) def __eq__(self, other): if not isinstance(other, DvcIgnorePatterns): return NotImplemented return (self.dirname == other.dirname) & ( - self.pattern_list == other.pattern_list + [pattern.patterns for pattern in self.pattern_list] + == [pattern.patterns for pattern in other.pattern_list] ) def __bool__(self): return bool(self.pattern_list) +CheckIgnoreResult = namedtuple( + "CheckIgnoreResult", ["file", "match", "patterns"] +) + + class DvcIgnoreFilterNoop: def __init__(self, tree, root_dir): pass @@ -112,6 +165,9 @@ def is_ignored_dir(self, _): def is_ignored_file(self, _): return False + def check_ignore(self, _): + return [] + class DvcIgnoreFilter: @staticmethod @@ -166,20 +222,19 @@ def _update(self, dirname): def _update_sub_repo(self, root, dirs): for d in dirs: if self._is_dvc_repo(root, d): + new_pattern = DvcIgnorePatterns(["/{}/".format(d)], root) old_pattern = self.ignores_trie_tree.longest_prefix(root).value if old_pattern: self.ignores_trie_tree[root] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, - ["/{}/".format(d)], - root, + new_pattern.pattern_list, + new_pattern.dirname, ) ) else: - self.ignores_trie_tree[root] = DvcIgnorePatterns( - ["/{}/".format(d)], root - ) + self.ignores_trie_tree[root] = new_pattern def __call__(self, root, dirs, files): ignore_pattern = self._get_trie_pattern(root) @@ -245,3 +300,17 @@ def _outside_repo(self, path): ): return True return False + + def check_ignore(self, target): + full_target = os.path.abspath(target) + if not self._outside_repo(full_target): + dirname, basename = os.path.split(os.path.normpath(full_target)) + pattern = self._get_trie_pattern(dirname) + if pattern: + matches = pattern.match_details( + dirname, basename, os.path.isdir(full_target) + ) + + if matches: + return CheckIgnoreResult(target, True, matches) + return CheckIgnoreResult(target, False, ["::"]) diff --git a/dvc/pathspec_math.py b/dvc/pathspec_math.py index 5af503cd2d..6191f7717e 100644 --- a/dvc/pathspec_math.py +++ b/dvc/pathspec_math.py @@ -3,11 +3,14 @@ # of two path specification patterns with different base # All the operations follow the documents of `gitignore` import os +from collections import namedtuple from pathspec.util import normalize_file from dvc.utils import relpath +PatternInfo = namedtuple("PatternInfo", ["patterns", "file_info"]) + def _not_ignore(rule): return (True, rule[1:]) if rule.startswith("!") else (False, rule) @@ -59,7 +62,10 @@ def _change_dirname(dirname, pattern_list, new_dirname): if rel.startswith(".."): raise ValueError("change dirname can only change to parent path") - return [change_rule(rule, rel) for rule in pattern_list] + return [ + PatternInfo(change_rule(rule.patterns, rel), rule.file_info) + for rule in pattern_list + ] def merge_patterns(pattern_a, prefix_a, pattern_b, prefix_b): diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py new file mode 100644 index 0000000000..b53f72f531 --- /dev/null +++ b/tests/func/test_check_ignore.py @@ -0,0 +1,109 @@ +import os + +import pytest + +from dvc.ignore import DvcIgnore +from dvc.main import main + + +@pytest.mark.parametrize( + "file,ret,output", [("ignored", 0, "ignored\n"), ("not_ignored", 1, "")] +) +def test_check_ignore(tmp_dir, dvc, file, ret, output, caplog): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored") + + assert main(["check-ignore", file]) == ret + assert output in caplog.text + + +@pytest.mark.parametrize( + "file,ret,output", + [ + ("file", 0, "{}:1:f*\tfile\n".format(DvcIgnore.DVCIGNORE_FILE)), + ("foo", 0, "{}:2:!foo\tfoo\n".format(DvcIgnore.DVCIGNORE_FILE)), + ( + os.path.join("dir", "foobar"), + 0, + "{}:1:foobar\t{}\n".format( + os.path.join("dir", DvcIgnore.DVCIGNORE_FILE), + os.path.join("dir", "foobar"), + ), + ), + ], +) +def test_check_ignore_details(tmp_dir, dvc, file, ret, output, caplog): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "f*\n!foo") + tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "foobar"}}) + + assert main(["check-ignore", "-d", file]) == ret + assert output in caplog.text + + +@pytest.mark.parametrize( + "non_matching,output", [(["-n"], "::\tfile\n"), ([], "")] +) +def test_check_ignore_non_matching(tmp_dir, dvc, non_matching, output, caplog): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-d"] + non_matching + ["file"]) == 1 + assert output in caplog.text + + +def test_check_ignore_non_matching_without_details(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-n", "file"]) == 255 + + +def test_check_ignore_details_with_quiet(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-d", "-q", "file"]) == 255 + + +@pytest.mark.parametrize("path,ret", [({"dir": {}}, 0), ({"dir": "files"}, 1)]) +def test_check_ignore_dir(tmp_dir, dvc, path, ret): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/") + tmp_dir.gen(path) + + assert main(["check-ignore", "-q", "dir"]) == ret + + +def test_check_ignore_default_dir(tmp_dir, dvc): + assert main(["check-ignore", "-q", ".dvc"]) == 1 + + +def test_check_ignore_out_side_repo(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "file") + assert main(["check-ignore", "-q", "../file"]) == 1 + + +def test_check_ignore_sub_repo(tmp_dir, dvc): + tmp_dir.gen( + {DvcIgnore.DVCIGNORE_FILE: "other", "dir": {".dvc": {}, "foo": "bar"}} + ) + + assert main(["check-ignore", "-q", os.path.join("dir", "foo")]) == 1 + + +def test_check_sub_dir_ignore_file(tmp_dir, dvc, caplog): + tmp_dir.gen( + { + DvcIgnore.DVCIGNORE_FILE: "other", + "dir": {DvcIgnore.DVCIGNORE_FILE: "bar\nfoo", "foo": "bar"}, + } + ) + + assert main(["check-ignore", "-d", os.path.join("dir", "foo")]) == 0 + assert ( + "{}:2:foo\t{}".format( + os.path.join("dir", DvcIgnore.DVCIGNORE_FILE), + os.path.join("dir", "foo"), + ) + in caplog.text + ) + + sub_dir = tmp_dir / "dir" + with sub_dir.chdir(): + assert main(["check-ignore", "-d", "foo"]) == 0 + assert ".dvcignore:2:foo\tfoo" in caplog.text diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 16afb978af..67e8f71a5a 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -6,13 +6,17 @@ from dvc.exceptions import DvcIgnoreInCollectedDirError from dvc.ignore import DvcIgnore, DvcIgnorePatterns from dvc.path_info import PathInfo -from dvc.pathspec_math import merge_patterns +from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.repo import Repo from dvc.utils import relpath from dvc.utils.fs import get_mtime_and_size from tests.dir_helpers import TmpDir +def _to_pattern_info_list(str_list): + return [PatternInfo(a, "") for a in str_list] + + def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") @@ -106,9 +110,9 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): assert ( DvcIgnorePatterns( *merge_patterns( - [".hg/", ".git/", ".dvc/"], + _to_pattern_info_list([".hg/", ".git/", ".dvc/"]), os.fspath(tmp_dir), - [os.path.basename(dname)], + _to_pattern_info_list([os.path.basename(dname)]), top_ignore_path, ) ) @@ -309,17 +313,24 @@ def test_pattern_trie_tree(tmp_dir, dvc): os.fspath(tmp_dir / "top" / "first" / "middle" / "second" / "bottom") ) - base_pattern = [".hg/", ".git/", ".dvc/"], os.fspath(tmp_dir) + base_pattern = ( + _to_pattern_info_list([".hg/", ".git/", ".dvc/"]), + os.fspath(tmp_dir), + ) first_pattern = merge_patterns( - *base_pattern, ["a", "b", "c"], os.fspath(tmp_dir / "top" / "first") + *base_pattern, + _to_pattern_info_list(["a", "b", "c"]), + os.fspath(tmp_dir / "top" / "first") ) second_pattern = merge_patterns( *first_pattern, - ["d", "e", "f"], + _to_pattern_info_list(["d", "e", "f"]), os.fspath(tmp_dir / "top" / "first" / "middle" / "second") ) other_pattern = merge_patterns( - *base_pattern, ["1", "2", "3"], os.fspath(tmp_dir / "other") + *base_pattern, + _to_pattern_info_list(["1", "2", "3"]), + os.fspath(tmp_dir / "other") ) assert DvcIgnorePatterns(*base_pattern) == ignore_pattern_top diff --git a/tests/unit/test_pathspec_math.py b/tests/unit/test_pathspec_math.py index 1cc6837153..d843a71a3d 100644 --- a/tests/unit/test_pathspec_math.py +++ b/tests/unit/test_pathspec_math.py @@ -1,6 +1,6 @@ import pytest -from dvc.pathspec_math import _change_dirname +from dvc.pathspec_math import PatternInfo, _change_dirname @pytest.mark.parametrize( @@ -69,4 +69,6 @@ ], ) def test_dvcignore_pattern_change_dir(tmp_dir, patterns, dirname, changed): - assert _change_dirname(dirname, [patterns], "/") == [changed] + assert _change_dirname(dirname, [PatternInfo(patterns, "")], "/") == [ + PatternInfo(changed, "") + ]