-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dvc check-ignore
command
#4282
dvc check-ignore
command
#4282
Changes from all commits
3a377ce
32154f7
86a4ff7
1f13e84
c3e2fbd
3eea263
76481bd
27eec49
10d23e6
0ac9c85
ab7237d
3cf35f0
4844735
2749112
4ee24f0
54fbe31
930534b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import argparse | ||
import logging | ||
|
||
from dvc.command import completion | ||
from dvc.command.base import CmdBase, append_doc_link | ||
from dvc.exceptions import DvcException | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class CmdCheckIgnore(CmdBase): | ||
def __init__(self, args): | ||
super().__init__(args) | ||
self.ignore_filter = self.repo.tree.dvcignore | ||
|
||
def _show_results(self, result): | ||
if result.match or self.args.non_matching: | ||
if self.args.details: | ||
logger.info("{}\t{}".format(result.patterns[-1], result.file)) | ||
else: | ||
logger.info(result.file) | ||
|
||
def run(self): | ||
if self.args.non_matching and not self.args.details: | ||
raise DvcException("--non-matching is only valid with --details") | ||
|
||
if self.args.quiet and self.args.details: | ||
raise DvcException("cannot both --details and --quiet") | ||
|
||
ret = 1 | ||
for target in self.args.targets: | ||
result = self.ignore_filter.check_ignore(target) | ||
self._show_results(result) | ||
if result.match: | ||
ret = 0 | ||
return ret | ||
|
||
|
||
def add_parser(subparsers, parent_parser): | ||
ADD_HELP = "Debug DVC ignore/exclude files" | ||
|
||
parser = subparsers.add_parser( | ||
"check-ignore", | ||
parents=[parent_parser], | ||
description=append_doc_link(ADD_HELP, "check-ignore"), | ||
help=ADD_HELP, | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
) | ||
parser.add_argument( | ||
"-d", | ||
"--details", | ||
action="store_true", | ||
default=False, | ||
help="Show the exclude pattern together with each target path.", | ||
) | ||
parser.add_argument( | ||
"-n", | ||
"--non-matching", | ||
action="store_true", | ||
default=False, | ||
help="Show the target paths which donβt match any pattern. " | ||
"Only usable when `--details` is also employed", | ||
) | ||
parser.add_argument( | ||
"targets", | ||
nargs="+", | ||
help="Exact or wildcard paths of files or directories to check " | ||
"ignore patterns.", | ||
Comment on lines
+65
to
+68
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sentence is a bit confusing. How about "File or directory paths, or ignore patterns to check" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorgeorpinel
in our current path. Only my personal opinion, not good at English, maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good to know, thanks! Confusing probably... but makes sense. So those depend on the OS? I.e. POSIX shells have accept different wildcards than, say, Windows PowerShell. And yeah no worries, it's getting too deep. I'll take care of rewriting it as needed in iterative/dvc.org/pull/1629 and submit a matching PR to the core repo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Went for "File or directory paths to check (wildcards supported)" in iterative/dvc.org/pull/1629. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just "File or directory paths to check" now... See #1673 |
||
).complete = completion.FILE | ||
parser.set_defaults(func=CmdCheckIgnore) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,15 @@ | ||
import logging | ||
import os | ||
import re | ||
from collections import namedtuple | ||
from itertools import groupby, takewhile | ||
|
||
from pathspec.patterns import GitWildMatchPattern | ||
from pathspec.util import normalize_file | ||
from pygtrie import StringTrie | ||
|
||
from dvc.path_info import PathInfo | ||
from dvc.pathspec_math import merge_patterns | ||
from dvc.pathspec_math import PatternInfo, merge_patterns | ||
from dvc.system import System | ||
from dvc.utils import relpath | ||
|
||
|
@@ -24,28 +25,46 @@ def __call__(self, root, dirs, files): | |
|
||
class DvcIgnorePatterns(DvcIgnore): | ||
def __init__(self, pattern_list, dirname): | ||
if pattern_list: | ||
if isinstance(pattern_list[0], str): | ||
pattern_list = [ | ||
PatternInfo(pattern, "") for pattern in pattern_list | ||
] | ||
|
||
self.pattern_list = pattern_list | ||
self.dirname = dirname | ||
self.prefix = self.dirname + os.sep | ||
|
||
regex_pattern_list = map( | ||
GitWildMatchPattern.pattern_to_regex, pattern_list | ||
) | ||
self.regex_pattern_list = [ | ||
GitWildMatchPattern.pattern_to_regex(pattern_info.patterns) | ||
for pattern_info in pattern_list | ||
] | ||
|
||
self.ignore_spec = [ | ||
(ignore, re.compile("|".join(item[0] for item in group))) | ||
for ignore, group in groupby(regex_pattern_list, lambda x: x[1]) | ||
for ignore, group in groupby( | ||
self.regex_pattern_list, lambda x: x[1] | ||
) | ||
if ignore is not None | ||
] | ||
|
||
@classmethod | ||
def from_files(cls, ignore_file_path, tree): | ||
assert os.path.isabs(ignore_file_path) | ||
dirname = os.path.normpath(os.path.dirname(ignore_file_path)) | ||
ignore_file_rel_path = os.path.relpath( | ||
ignore_file_path, tree.tree_root | ||
) | ||
with tree.open(ignore_file_path, encoding="utf-8") as fobj: | ||
path_spec_lines = [ | ||
line for line in map(str.strip, fobj.readlines()) if line | ||
PatternInfo( | ||
line, | ||
"{}:{}:{}".format(ignore_file_rel_path, line_no + 1, line), | ||
) | ||
for line_no, line in enumerate( | ||
map(str.strip, fobj.readlines()) | ||
) | ||
if line | ||
] | ||
|
||
return cls(path_spec_lines, dirname) | ||
|
@@ -56,7 +75,7 @@ def __call__(self, root, dirs, files): | |
|
||
return dirs, files | ||
|
||
def matches(self, dirname, basename, is_dir=False): | ||
def _get_normalize_path(self, dirname, basename): | ||
karajan1001 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# NOTE: `relpath` is too slow, so we have to assume that both | ||
# `dirname` and `self.dirname` are relative or absolute together. | ||
if dirname == self.dirname: | ||
|
@@ -70,6 +89,12 @@ def matches(self, dirname, basename, is_dir=False): | |
|
||
if not System.is_unix(): | ||
path = normalize_file(path) | ||
return path | ||
|
||
def matches(self, dirname, basename, is_dir=False): | ||
path = self._get_normalize_path(dirname, basename) | ||
if not path: | ||
return False | ||
return self.ignore(path, is_dir) | ||
|
||
def ignore(self, path, is_dir): | ||
|
@@ -85,20 +110,48 @@ def ignore(self, path, is_dir): | |
result = ignore | ||
return result | ||
|
||
def match_details(self, dirname, basename, is_dir=False): | ||
path = self._get_normalize_path(dirname, basename) | ||
if not path: | ||
return False | ||
return self._ignore_details(path, is_dir) | ||
|
||
def _ignore_details(self, path, is_dir): | ||
result = [] | ||
for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list): | ||
regex = re.compile(ignore[0]) | ||
# skip system pattern | ||
if not pattern.file_info: | ||
continue | ||
Comment on lines
+123
to
+125
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't that result in no matches for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If that's not too much trouble, sure. But if its, feel free to keep it that way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @karajan1001 you mean the proper git return code? |
||
if is_dir: | ||
path_dir = f"{path}/" | ||
if regex.match(path) or regex.match(path_dir): | ||
result.append(pattern.file_info) | ||
else: | ||
if regex.match(path): | ||
result.append(pattern.file_info) | ||
return result | ||
|
||
def __hash__(self): | ||
return hash(self.dirname + ":" + "\n".join(self.pattern_list)) | ||
return hash(self.dirname + ":" + str(self.pattern_list)) | ||
|
||
def __eq__(self, other): | ||
if not isinstance(other, DvcIgnorePatterns): | ||
return NotImplemented | ||
return (self.dirname == other.dirname) & ( | ||
self.pattern_list == other.pattern_list | ||
[pattern.patterns for pattern in self.pattern_list] | ||
== [pattern.patterns for pattern in other.pattern_list] | ||
) | ||
|
||
def __bool__(self): | ||
return bool(self.pattern_list) | ||
|
||
|
||
CheckIgnoreResult = namedtuple( | ||
"CheckIgnoreResult", ["file", "match", "patterns"] | ||
) | ||
|
||
|
||
class DvcIgnoreFilterNoop: | ||
def __init__(self, tree, root_dir): | ||
pass | ||
|
@@ -112,6 +165,9 @@ def is_ignored_dir(self, _): | |
def is_ignored_file(self, _): | ||
return False | ||
|
||
def check_ignore(self, _): | ||
return [] | ||
|
||
|
||
class DvcIgnoreFilter: | ||
@staticmethod | ||
|
@@ -166,20 +222,19 @@ def _update(self, dirname): | |
def _update_sub_repo(self, root, dirs): | ||
for d in dirs: | ||
if self._is_dvc_repo(root, d): | ||
new_pattern = DvcIgnorePatterns(["/{}/".format(d)], root) | ||
old_pattern = self.ignores_trie_tree.longest_prefix(root).value | ||
if old_pattern: | ||
self.ignores_trie_tree[root] = DvcIgnorePatterns( | ||
*merge_patterns( | ||
old_pattern.pattern_list, | ||
old_pattern.dirname, | ||
["/{}/".format(d)], | ||
root, | ||
new_pattern.pattern_list, | ||
new_pattern.dirname, | ||
) | ||
) | ||
else: | ||
self.ignores_trie_tree[root] = DvcIgnorePatterns( | ||
["/{}/".format(d)], root | ||
) | ||
self.ignores_trie_tree[root] = new_pattern | ||
|
||
def __call__(self, root, dirs, files): | ||
ignore_pattern = self._get_trie_pattern(root) | ||
|
@@ -245,3 +300,17 @@ def _outside_repo(self, path): | |
): | ||
return True | ||
return False | ||
|
||
def check_ignore(self, target): | ||
full_target = os.path.abspath(target) | ||
if not self._outside_repo(full_target): | ||
dirname, basename = os.path.split(os.path.normpath(full_target)) | ||
pattern = self._get_trie_pattern(dirname) | ||
if pattern: | ||
matches = pattern.match_details( | ||
dirname, basename, os.path.isdir(full_target) | ||
) | ||
|
||
if matches: | ||
return CheckIgnoreResult(target, True, matches) | ||
return CheckIgnoreResult(target, False, ["::"]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about something like "Check whether files or directories are excluded due to
.dvcignore
." ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@johntharian
No problem.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrong handle π