Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more tests according to gitignore #4166

Merged
merged 9 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def __init__(self, ignore_file_path, tree):

def __call__(self, root, dirs, files):
files = [f for f in files if not self.matches(root, f)]
dirs = [d for d in dirs if not self.matches(root, d)]
dirs = [d for d in dirs if not self.matches(root, d, True)]

return dirs, files

def matches(self, dirname, basename):
def matches(self, dirname, basename, is_dir=False):
# NOTE: `relpath` is too slow, so we have to assume that both
# `dirname` and `self.dirname` are relative or absolute together.
prefix = self.dirname + os.sep
Expand All @@ -63,13 +63,19 @@ def matches(self, dirname, basename):

if not System.is_unix():
path = normalize_file(path)
return self.ignore(path)
return self.ignore(path, is_dir)

def ignore(self, path):
def ignore(self, path, is_dir):
result = False
for ignore, pattern in self.ignore_spec:
if pattern.match(path):
result = ignore
if is_dir:
path_dir = f"{path}/"
for ignore, pattern in self.ignore_spec:
if pattern.match(path) or pattern.match(path_dir):
result = ignore
else:
for ignore, pattern in self.ignore_spec:
if pattern.match(path):
result = ignore
return result

def __hash__(self):
Expand Down
63 changes: 63 additions & 0 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,66 @@ def test_ignore_blank_line(tmp_dir, dvc):
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored")

assert _files_set("dir", dvc.tree) == {"dir/other"}


# It is not possible to re-include a file if a parent directory of
# that file is excluded.
# Git doesn’t list excluded directories for performance reasons,
# so any patterns on contained files have no effect,
# no matter where they are defined.
@pytest.mark.parametrize(
"data_struct, pattern_list, result_set",
[
(
{"dir": {"subdir": {"not_ignore": "121"}}},
["subdir/*", "!not_ignore"],
{"dir/subdir/not_ignore"},
),
(
{"dir": {"subdir": {"should_ignore": "121"}}},
["subdir", "!should_ignore"],
set(),
),
(
{"dir": {"subdir": {"should_ignore": "121"}}},
["subdir/", "!should_ignore"],
set(),
),
],
)
def test_ignore_file_in_parent_path(
tmp_dir, dvc, data_struct, pattern_list, result_set
):
tmp_dir.gen(data_struct)
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list))
assert _files_set("dir", dvc.tree) == result_set


# If there is a separator at the end of the pattern then the pattern
# will only match directories,
# otherwise the pattern can match both files and directories.
# For example, a pattern doc/frotz/ matches doc/frotz directory,
# but not a/doc/frotz directory;
def test_ignore_sub_directory(tmp_dir, dvc):
tmp_dir.gen(
{
"dir": {
"doc": {"fortz": {"b": "b"}},
"a": {"doc": {"fortz": {"a": "a"}}},
}
}
)
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}})
assert _files_set("dir", dvc.tree) == {
"dir/a/doc/fortz/a",
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
}


# however frotz/ matches frotz and a/frotz that is a directory
def test_ignore_directory(tmp_dir, dvc):
tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}})
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}})
assert _files_set("dir", dvc.tree) == {
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
}
160 changes: 119 additions & 41 deletions tests/unit/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,51 @@ def mock_dvcignore(dvcignore_path, patterns):
return ignore_patterns


def test_ignore_from_file_should_filter_dirs_and_files():
dvcignore_path = os.path.join(
os.path.sep, "full", "path", "to", "ignore", "file", ".dvcignore"
)

patterns = ["dir_to_ignore", "file_to_ignore"]

root = os.path.dirname(dvcignore_path)
dirs = ["dir1", "dir2", "dir_to_ignore"]
files = ["file1", "file2", "file_to_ignore"]

ignore = mock_dvcignore(dvcignore_path, patterns)
new_dirs, new_files = ignore(root, dirs, files)

assert {"dir1", "dir2"} == set(new_dirs)
assert {"file1", "file2"} == set(new_files)


@pytest.mark.parametrize(
"file_to_ignore_relpath, patterns, expected_match",
[
# all rules from https://git-scm.com/docs/gitignore
("to_ignore", ["to_ignore"], True),
("dont_ignore.txt", ["dont_ignore"], False),
# A blank line matches no files, so it can serve as a separator for
# readability.
("to_ignore", ["", "to_ignore"], True),
# A line starting with # serves as a comment.
# Put a backslash ("\") in front of the first hash for patterns
# that begin with a hash.
("#to_ignore", ["\\#to_ignore"], True),
("#to_ignore", ["#to_ignore"], False),
# Trailing spaces are ignored unless they are quoted with
# backslash ("\").
(" to_ignore", [" to_ignore"], False),
(" to_ignore", ["\\ to_ignore"], True),
# An optional prefix "!" which negates the pattern; any matching file
# excluded by a previous pattern will become included again.
("to_ignore.txt", ["to_ignore*"], True),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["rel/**/to_ignore"],
True,
),
("to_ignore.txt", ["to_ignore*", "!to_ignore.txt"], False),
("to_ignore.txt", ["!to_ignore.txt", "to_ignore*"], True),
# It is not possible to re-include a file if a parent directory of
# that file is excluded.
# Git doesn’t list excluded directories for performance reasons,
# so any patterns on contained files have no effect,
# no matter where they are defined.
# see (`tests/func/test_ignore.py::test_ignore_parent_path`)
# Put a backslash ("\") in front of the first "!"
# for patterns that begin with a literal "!",
# for example, "\!important!.txt".
("!to_ignore.txt", ["\\!to_ignore.txt"], True),
# The slash / is used as the directory separator.
# Separators may occur at the beginning, middle or end of the
# .gitignore search pattern.
# If there is a separator at the beginning or middle (or both)
# of the pattern, then the pattern is relative to the directory
# level of the particular .gitignore file itself.
# Otherwise the pattern may also match at any level below
# the .gitignore level.
("file", ["/file"], True),
(os.path.join("data", "file"), ["/file"], False),
(os.path.join("data", "file"), ["data/file"], True),
(os.path.join("other", "data", "file"), ["data/file"], False),
(
os.path.join(
os.path.sep,
Expand All @@ -55,20 +72,96 @@ def test_ignore_from_file_should_filter_dirs_and_files():
["to_ignore"],
True,
),
# If there is a separator at the end of the pattern then the pattern
# will only match directories,
# otherwise the pattern can match both files and directories.
# For example, a pattern doc/frotz/ matches doc/frotz directory,
# but not a/doc/frotz directory;
# see (`tests/func/test_ignore.py::test_ignore_sub_directory`)
# however frotz/ matches frotz and a/frotz that is a directory
# (all paths are relative from the .gitignore file).
# see (`tests/func/test_ignore.py::test_ignore_directory`)
# An asterisk "*" matches anything except a slash.
("to_ignore.txt", ["/*.txt"], True),
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
(os.path.join("data", "file.txt"), ["data/*"], True),
# wait for Git
# (os.path.join("data", "sub", "file.txt"), ["data/*"], True),
(
os.path.join("rel", "path", "path2", "to_ignore"),
["rel/*/to_ignore"],
False,
),
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
("file.txt", ["file.*"], True),
# The character "?" matches any one character except "/".
("file.txt", ["fi?e.t?t"], True),
("fi/e.txt", ["fi?e.t?t"], False),
# The range notation, e.g. [a-zA-Z], can be used
# to match one of the characters in a range. See fnmatch(3) and
# the FNM_PATHNAME flag for a more detailed description.
("file.txt", ["[a-zA-Z]ile.txt"], True),
("2ile.txt", ["[a-zA-Z]ile.txt"], False),
# Two consecutive asterisks ("**") in patterns matched against
# full pathname may have special meaning:
# A leading "**" followed by a slash means match in all directories.
# For example, "**/foo" matches file or directory "foo" anywhere, the
# same as pattern "foo".
# "**/foo/bar" matches file or directory "bar" anywhere that is
# directly under directory "foo".
(os.path.join("rel", "p", "p2", "to_ignore"), ["**/to_ignore"], True,),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["**/p2/to_ignore"],
True,
),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["**/to_ignore"],
False,
),
# A trailing "/**" matches everything inside.
# For example, "abc/**" matches all files inside directory "abc",
# relative to the location of the .gitignore file, with infinite depth.
(os.path.join("rel", "p", "p2", "to_ignore"), ["rel/**"], True,),
(os.path.join("rel", "p", "p2", "to_ignore"), ["p/**"], False,),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["rel/**"],
True,
),
# A slash followed by two consecutive asterisks then a slash matches
# zero or more directories.
# For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
(os.path.join("rel", "p", "to_ignore"), ["rel/**/to_ignore"], True,),
(
os.path.join("rel", "p", "p2", "to_ignore"),
["rel/**/to_ignore"],
True,
),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["rel/**/to_ignore"],
False,
),
("dont_ignore.txt", ["dont_ignore"], False),
("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False),
(
os.path.join("rel", "path", "path2", "dont_ignore"),
["path/**/dont_ignore"],
False,
),
# Other consecutive asterisks are considered regular asterisks
# and will match according to the previous rules.
("to_ignore.txt", ["/***.txt"], True),
(os.path.join("path", "to_ignore.txt"), ["/****.txt"], False),
(os.path.join("path", "to_ignore.txt"), ["****.txt"], True),
(os.path.join("data", "file.txt"), ["data/***"], True),
# bug from PathSpec
# (os.path.join("data", "p", "file.txt"), ["data/***"], False),
(os.path.join("data", "p", "file.txt"), ["***/file.txt"], False),
(
os.path.join("rel", "path", "path2", "to_ignore"),
["rel/***/to_ignore"],
False,
),
],
)
def test_match_ignore_from_file(
Expand Down Expand Up @@ -99,18 +192,3 @@ def test_should_ignore_dir(omit_dir):
new_dirs, _ = ignore(root, dirs, files)

assert set(new_dirs) == {"dir1", "dir2"}


def test_ignore_order():
dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore")

patterns = ["!ac*", "a*", "!ab*"]

root = os.path.dirname(dvcignore_path)
dirs = ["ignore_order"]
files = ["ac", "ab", "aa"]

ignore = mock_dvcignore(dvcignore_path, patterns)
_, new_files = ignore(root, dirs, files)

assert {"ab"} == set(new_files)