From 1160235a7effdd45a124b64c3f369cef3230a8e0 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 16 Jul 2020 08:30:27 +0300 Subject: [PATCH] dvc: get rid of CleanTree CleanTree is a very awkward wrapper that spreads the context across the codebase and results in unexpected behaviour when using it. This PR starts moving dvcignore-related logic into the trees themselves (it makes a lot of sense, kinda like `state`) so they could deal with it however they like. There are at least two temporary ugly parts about this PR: 1) dvcignore is used by individual trees and not packed into tree/base.py yet; 2) `dvcignore_root` argument. This one is caused by the dvcignore trying to collect everything topdown starting from the certain root dir. What it should do instead is for a certain path that is being checked look up the tree through the parents until it finds repo root (.dvc dir) and then stop. That would handle subrepos as well. At the same time we need to leverage existing dvcignore trie structure to cache those results. --- dvc/config.py | 2 +- dvc/ignore.py | 86 +++++++------------------------------ dvc/repo/__init__.py | 23 +++++----- dvc/repo/add.py | 12 +++--- dvc/repo/brancher.py | 8 +++- dvc/scm/git/__init__.py | 4 +- dvc/scm/git/tree.py | 39 ++++++++++++++--- dvc/state.py | 2 +- dvc/tree/base.py | 13 ++++-- dvc/tree/local.py | 48 ++++++++++++++++----- tests/func/test_ignore.py | 77 ++++++++++++++++++++------------- tests/func/test_tree.py | 9 ++-- tests/unit/utils/test_fs.py | 9 ++-- 13 files changed, 182 insertions(+), 150 deletions(-) diff --git a/dvc/config.py b/dvc/config.py index 3b083597a7..00b450eab0 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -249,7 +249,7 @@ def __init__( self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir)) self.wtree = LocalRemoteTree(None, {"url": self.dvc_dir}) - self.tree = tree.tree if tree else self.wtree + self.tree = tree or self.wtree self.load(validate=validate) diff --git a/dvc/ignore.py b/dvc/ignore.py index 4f791e8c9c..f72623295e 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -3,14 +3,12 @@ import re from itertools import groupby -from funcy import cached_property from pathspec.patterns import GitWildMatchPattern from pathspec.util import normalize_file from pygtrie import StringTrie from dvc.path_info import PathInfo from dvc.pathspec_math import merge_patterns -from dvc.scm.tree import BaseTree from dvc.system import System from dvc.utils import relpath @@ -162,6 +160,20 @@ def is_dvc_repo(directory): return dirs, files +class DvcIgnoreFilterNoop: + def __init__(self, tree, root_dir): + pass + + def __call__(self, root, dirs, files): + return dirs, files + + def is_ignored_dir(self, _): + return False + + def is_ignored_file(self, _): + return False + + class DvcIgnoreFilter: def __init__(self, tree, root_dir): self.tree = tree @@ -218,7 +230,7 @@ def _parents_exist(self, path): if path.parent == self.root_dir or Repo.DVC_DIR in path.parts: return True - # paths outside of the CleanTree root should be ignored + # paths outside of the repo should be ignored path = relpath(path, self.root_dir) if path.startswith("..") or ( os.name == "nt" @@ -239,71 +251,3 @@ def _parents_exist(self, path): if not dirs: return False return True - - -class CleanTree(BaseTree): - def __init__(self, tree, tree_root=None): - self.tree = tree - if tree_root: - self._tree_root = tree_root - else: - self._tree_root = self.tree.tree_root - - @cached_property - def dvcignore(self): - return DvcIgnoreFilter(self.tree, self.tree_root) - - @property - def tree_root(self): - return self._tree_root - - def open(self, path, mode="r", encoding="utf-8"): - if self.isfile(path): - return self.tree.open(path, mode, encoding) - raise FileNotFoundError - - def exists(self, path): - if not self.tree.exists(path): - return False - - if self.tree.isdir(path): - return not self.dvcignore.is_ignored_dir(path) - - return not self.dvcignore.is_ignored_file(path) - - def isdir(self, path): - if not self.tree.isdir(path): - return False - - return not self.dvcignore.is_ignored_dir(path) - - def isfile(self, path): - if not self.tree.isfile(path): - return False - - return not self.dvcignore.is_ignored_file(path) - - def isexec(self, path): - return self.exists(path) and self.tree.isexec(path) - - def walk(self, top, topdown=True, onerror=None): - for root, dirs, files in self.tree.walk( - top, topdown=topdown, onerror=onerror - ): - dirs[:], files[:] = self.dvcignore( - os.path.abspath(root), dirs, files - ) - - yield root, dirs, files - - def stat(self, path): - if self.exists(path): - return self.tree.stat(path) - raise FileNotFoundError - - @property - def hash_jobs(self): - return self.tree.hash_jobs - - def makedirs(self, path, mode=0o777, exist_ok=True): - self.tree.makedirs(path, mode=mode, exist_ok=exist_ok) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index a1c3bc7554..e8831881a5 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -14,10 +14,8 @@ NotDvcRepoError, OutputNotFoundError, ) -from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.repo.tree import RepoTree -from dvc.scm.tree import is_working_tree from dvc.utils.fs import path_isin from ..stage.exceptions import StageFileDoesNotExistError, StageNotFound @@ -85,12 +83,19 @@ def __init__(self, root_dir=None, scm=None, rev=None): tree = scm.get_tree(rev) self.root_dir = self.find_root(root_dir, tree) self.scm = scm - self.tree = tree + self.tree = scm.get_tree( + rev, use_dvcignore=True, dvcignore_root=self.root_dir + ) self.state = StateNoop() else: root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) - self.tree = LocalRemoteTree(None, {"url": self.root_dir}) + self.tree = LocalRemoteTree( + self, + {"url": self.root_dir}, + use_dvcignore=True, + dvcignore_root=self.root_dir, + ) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir, tree=self.tree) @@ -134,13 +139,7 @@ def tree(self): @tree.setter def tree(self, tree): - if is_working_tree(tree) or tree.tree_root == self.root_dir: - root = None - else: - root = self.root_dir - self._tree = ( - tree if isinstance(tree, CleanTree) else CleanTree(tree, root) - ) + self._tree = tree # Our graph cache is no longer valid, as it was based on the previous # tree. self._reset() @@ -605,4 +604,4 @@ def _reset(self): self.__dict__.pop("graph", None) self.__dict__.pop("stages", None) self.__dict__.pop("pipelines", None) - self.__dict__.pop("dvcignore", None) + self.tree.__dict__.pop("dvcignore", None) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 18983c6a95..dd1bf9594a 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -109,17 +109,17 @@ def add( def _find_all_targets(repo, target, recursive): if os.path.isdir(target) and recursive: return [ - fname - for fname in Tqdm( + os.fspath(path) + for path in Tqdm( repo.tree.walk_files(target), desc="Searching " + target, bar_format=Tqdm.BAR_FMT_NOTOTAL, unit="file", ) - if not repo.is_dvc_internal(fname) - if not is_dvc_file(fname) - if not repo.scm.belongs_to_scm(fname) - if not repo.scm.is_tracked(fname) + if not repo.is_dvc_internal(os.fspath(path)) + if not is_dvc_file(os.fspath(path)) + if not repo.scm.belongs_to_scm(os.fspath(path)) + if not repo.scm.is_tracked(os.fspath(path)) ] return [target] diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index c42aa556c6..0397b6e3ca 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -29,7 +29,9 @@ def brancher( # noqa: E302 scm = self.scm - self.tree = LocalRemoteTree(self, {"url": self.root_dir}) + self.tree = LocalRemoteTree( + self, {"url": self.root_dir}, use_dvcignore=True + ) yield "workspace" if revs and "workspace" in revs: @@ -47,7 +49,9 @@ def brancher( # noqa: E302 try: if revs: for sha, names in group_by(scm.resolve_rev, revs).items(): - self.tree = scm.get_tree(sha) + self.tree = scm.get_tree( + sha, use_dvcignore=True, dvcignore_root=self.root_dir + ) # ignore revs that don't contain repo root # (i.e. revs from before a subdir=True repo was init'ed) if self.tree.exists(self.root_dir): diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 8056d4177c..a78ee59a5f 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -375,8 +375,8 @@ def belongs_to_scm(self, path): path_parts = os.path.normpath(path).split(os.path.sep) return basename == self.ignore_file or Git.GIT_DIR in path_parts - def get_tree(self, rev): - return GitTree(self.repo, self.resolve_rev(rev)) + def get_tree(self, rev, **kwargs): + return GitTree(self.repo, self.resolve_rev(rev), **kwargs) def get_rev(self): return self.repo.rev_parse("HEAD").hexsha diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index 0793d3d04a..158ae2d91c 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -3,6 +3,8 @@ import os import stat +from funcy import cached_property + from dvc.exceptions import DvcException from dvc.scm.tree import BaseTree from dvc.utils import relpath @@ -22,7 +24,7 @@ def _item_basename(item): class GitTree(BaseTree): # pylint:disable=abstract-method """Proxies the repo file access methods to Git objects""" - def __init__(self, git, rev): + def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None): """Create GitTree instance Args: @@ -31,11 +33,25 @@ def __init__(self, git, rev): """ self.git = git self.rev = rev + self.use_dvcignore = use_dvcignore + self.dvcignore_root = dvcignore_root @property def tree_root(self): return self.git.working_dir + @cached_property + def dvcignore(self): + from dvc.ignore import DvcIgnoreFilter, DvcIgnoreFilterNoop + + root = self.dvcignore_root or self.tree_root + if not self.use_dvcignore: + return DvcIgnoreFilterNoop(self, root) + self.use_dvcignore = False + ret = DvcIgnoreFilter(self, root) + self.use_dvcignore = True + return ret + def open(self, path, mode="r", encoding="utf-8"): assert mode in {"r", "rb"} @@ -58,20 +74,29 @@ def open(self, path, mode="r", encoding="utf-8"): return io.StringIO(data.decode(encoding)) def exists(self, path): - return self._git_object_by_path(path) is not None + if self._git_object_by_path(path) is None: + return False + + return not self.dvcignore.is_ignored_file( + path + ) and not self.dvcignore.is_ignored_dir(path) def isdir(self, path): obj = self._git_object_by_path(path) if obj is None: return False - return obj.mode == GIT_MODE_DIR + if obj.mode != GIT_MODE_DIR: + return False + return not self.dvcignore.is_ignored_dir(path) def isfile(self, path): obj = self._git_object_by_path(path) if obj is None: return False # according to git-fast-import(1) file mode could be 644 or 755 - return obj.mode & GIT_MODE_FILE == GIT_MODE_FILE + if obj.mode & GIT_MODE_FILE != GIT_MODE_FILE: + return False + return not self.dvcignore.is_ignored_file(path) @staticmethod def _is_tree_and_contains(obj, path): @@ -145,7 +170,11 @@ def walk(self, top, topdown=True, onerror=None): onerror(NotADirectoryError(top)) return - yield from self._walk(tree, topdown=topdown) + for root, dirs, files in self._walk(tree, topdown=topdown): + dirs[:], files[:] = self.dvcignore( + os.path.abspath(root), dirs, files + ) + yield root, dirs, files def isexec(self, path): if not self.exists(path): diff --git a/dvc/state.py b/dvc/state.py index 7c41bb01e0..f46dbe4e4d 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -94,7 +94,7 @@ def __init__(self, local_cache): repo = local_cache.repo self.repo = repo self.root_dir = repo.root_dir - self.tree = LocalRemoteTree(None, {}) + self.tree = LocalRemoteTree(None, {"url": self.root_dir}) state_config = repo.config.get("state", {}) self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT) diff --git a/dvc/tree/base.py b/dvc/tree/base.py index 8fc0791e15..4eaf522326 100644 --- a/dvc/tree/base.py +++ b/dvc/tree/base.py @@ -8,6 +8,8 @@ from operator import itemgetter from urllib.parse import urlparse +from funcy import cached_property + from dvc.exceptions import ( DvcException, DvcIgnoreInCollectedDirError, @@ -86,13 +88,16 @@ def __init__(self, repo, config): shared = config.get("shared") self._file_mode, self._dir_mode = self.SHARED_MODE_MAP[shared] - self.hash_jobs = ( - config.get("hash_jobs") + self.verify = config.get("verify", self.DEFAULT_VERIFY) + self.path_info = None + + @cached_property + def hash_jobs(self): + return ( + self.config.get("hash_jobs") or (self.repo and self.repo.config["core"].get("hash_jobs")) or self.HASH_JOBS ) - self.verify = config.get("verify", self.DEFAULT_VERIFY) - self.path_info = None @classmethod def get_missing_deps(cls): diff --git a/dvc/tree/local.py b/dvc/tree/local.py index 6ddcf913a3..201aecdbe9 100644 --- a/dvc/tree/local.py +++ b/dvc/tree/local.py @@ -3,6 +3,7 @@ import os import stat +from funcy import cached_property from shortuuid import uuid from dvc.exceptions import DvcException @@ -35,10 +36,12 @@ class LocalRemoteTree(BaseRemoteTree): CACHE_MODE = 0o444 SHARED_MODE_MAP = {None: (0o644, 0o755), "group": (0o664, 0o775)} - def __init__(self, repo, config): + def __init__(self, repo, config, use_dvcignore=False, dvcignore_root=None): super().__init__(repo, config) url = config.get("url") self.path_info = self.PATH_CLS(url) if url else None + self.use_dvcignore = use_dvcignore + self.dvcignore_root = dvcignore_root @property def tree_root(self): @@ -50,25 +53,46 @@ def state(self): return self.repo.state if self.repo else StateNoop() + @cached_property + def dvcignore(self): + from dvc.ignore import DvcIgnoreFilter, DvcIgnoreFilterNoop + + root = self.dvcignore_root or self.tree_root + if not self.use_dvcignore: + return DvcIgnoreFilterNoop(self, root) + self.use_dvcignore = False + ret = DvcIgnoreFilter(self, root) + self.use_dvcignore = True + return ret + @staticmethod def open(path_info, mode="r", encoding=None): return open(path_info, mode=mode, encoding=encoding) def exists(self, path_info): assert isinstance(path_info, str) or path_info.scheme == "local" - if not self.repo: - return os.path.exists(path_info) - return os.path.lexists(path_info) + if self.repo: + ret = os.path.lexists(path_info) + else: + ret = os.path.exists(path_info) + if not ret: + return False + + return not self.dvcignore.is_ignored_file( + path_info + ) and not self.dvcignore.is_ignored_dir(path_info) def isfile(self, path_info): - if not self.repo: - return os.path.isfile(path_info) - return os.path.isfile(path_info) + if not os.path.isfile(path_info): + return False + + return not self.dvcignore.is_ignored_file(path_info) def isdir(self, path_info): - if not self.repo: - return os.path.isdir(path_info) - return os.path.isdir(path_info) + if not os.path.isdir(path_info): + return False + + return not self.dvcignore.is_ignored_dir(path_info) def iscopy(self, path_info): return not ( @@ -84,6 +108,10 @@ def walk(self, top, topdown=True, onerror=None): for root, dirs, files in os.walk( top, topdown=topdown, onerror=onerror ): + dirs[:], files[:] = self.dvcignore( + os.path.abspath(root), dirs, files + ) + yield os.path.normpath(root), dirs, files def walk_files(self, path_info, **kwargs): diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 31b66e047f..7fb1e73b8d 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -17,30 +17,33 @@ from dvc.utils import relpath from dvc.utils.fs import get_mtime_and_size from tests.dir_helpers import TmpDir -from tests.utils import to_posixpath def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - assert _files_set("dir", dvc.tree) == {"dir/other"} + dvc.tree.__dict__.pop("dvcignore", None) - monkeypatch.chdir("dir") - assert _files_set(".", dvc.tree) == {"./other"} + path = PathInfo(tmp_dir) + + assert set(dvc.tree.walk_files(path / "dir")) == {path / "dir" / "other"} def test_ignore_unicode(tmp_dir, dvc): tmp_dir.gen({"dir": {"other": "text", "тест": "проверка"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/тест") - - assert _files_set("dir", dvc.tree) == {"dir/other"} + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == {path / "dir" / "other"} def test_rename_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored*") + dvc.tree.__dict__.pop("dvcignore", None) + mtime, size = get_mtime_and_size("dir", dvc.tree) shutil.move("dir/ignored", "dir/ignored_new") @@ -62,6 +65,7 @@ def test_rename_file(tmp_dir, dvc): def test_remove_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") + dvc.tree.__dict__.pop("dvcignore", None) mtime, size = get_mtime_and_size("dir", dvc.tree) @@ -94,6 +98,7 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) top_ignore_file.write_text(os.path.basename(dname)) + dvc.tree.__dict__.pop("dvcignore", None) ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") @@ -125,17 +130,18 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): with tmp_dir.branch("branch", new=True): tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, "foo", commit="add ignore") - assert _files_set(".", dvc.tree) == {"./foo", "./bar"} - - dvc.tree = scm.get_tree("branch") - assert _files_set(".", dvc.tree) == { - to_posixpath(os.path.join(dvc.root_dir, DvcIgnore.DVCIGNORE_FILE)), - to_posixpath(os.path.join(dvc.root_dir, "bar")), + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path)) == { + path / "foo", + path / "bar", } - -def _files_set(root, tree): - return {to_posixpath(f) for f in tree.walk_files(root)} + dvc.tree = scm.get_tree("branch", use_dvcignore=True) + assert set(dvc.tree.walk_files(path)) == { + os.fspath(path / DvcIgnore.DVCIGNORE_FILE), + os.fspath(path / "bar"), + } def test_match_nested(tmp_dir, dvc): @@ -147,7 +153,7 @@ def test_match_nested(tmp_dir, dvc): "dir": {"x.backup": "x backup", "tmp": "content"}, } ) - + dvc.tree.__dict__.pop("dvcignore", None) result = {os.fspath(os.path.normpath(f)) for f in dvc.tree.walk_files(".")} assert result == {".dvcignore", "foo"} @@ -165,6 +171,7 @@ def test_ignore_subrepo(tmp_dir, scm, dvc): tmp_dir.gen({".dvcignore": "foo", "subdir": {"foo": "foo"}}) scm.add([".dvcignore"]) scm.commit("init parent dvcignore") + dvc.tree.__dict__.pop("dvcignore", None) subrepo_dir = tmp_dir / "subdir" assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo")) @@ -181,8 +188,9 @@ def test_ignore_subrepo(tmp_dir, scm, dvc): def test_ignore_blank_line(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored") - - assert _files_set("dir", dvc.tree) == {"dir/other"} + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == {path / "dir" / "other"} # It is not possible to re-include a file if a parent directory of @@ -196,7 +204,7 @@ def test_ignore_blank_line(tmp_dir, dvc): ( {"dir": {"subdir": {"not_ignore": "121"}}}, ["subdir/*", "!not_ignore"], - {"dir/subdir/not_ignore"}, + {os.path.join("dir", "subdir", "not_ignore")}, ), ( {"dir": {"subdir": {"should_ignore": "121"}}}, @@ -215,7 +223,11 @@ def test_ignore_file_in_parent_path( ): tmp_dir.gen(data_struct) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list)) - assert _files_set("dir", dvc.tree) == result_set + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == { + path / relpath for relpath in result_set + } # If there is a separator at the end of the pattern then the pattern @@ -233,9 +245,12 @@ def test_ignore_sub_directory(tmp_dir, dvc): } ) tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}}) - assert _files_set("dir", dvc.tree) == { - "dir/a/doc/fortz/a", - "dir/{}".format(DvcIgnore.DVCIGNORE_FILE), + + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == { + path / "dir" / "a" / "doc" / "fortz" / "a", + path / "dir" / DvcIgnore.DVCIGNORE_FILE, } @@ -243,8 +258,10 @@ def test_ignore_sub_directory(tmp_dir, dvc): def test_ignore_directory(tmp_dir, dvc): tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}}) tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}}) - assert _files_set("dir", dvc.tree) == { - "dir/{}".format(DvcIgnore.DVCIGNORE_FILE), + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == { + path / "dir" / DvcIgnore.DVCIGNORE_FILE, } @@ -252,10 +269,11 @@ def test_multi_ignore_file(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"subdir": {"should_ignore": "1", "not_ignore": "1"}}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/subdir/*_ignore") tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "!subdir/not_ignore"}}) - - assert _files_set("dir", dvc.tree) == { - "dir/subdir/not_ignore", - "dir/{}".format(DvcIgnore.DVCIGNORE_FILE), + dvc.tree.__dict__.pop("dvcignore", None) + path = PathInfo(tmp_dir) + assert set(dvc.tree.walk_files(path / "dir")) == { + path / "dir" / "subdir" / "not_ignore", + path / "dir" / DvcIgnore.DVCIGNORE_FILE, } @@ -276,6 +294,7 @@ def test_pattern_trie_tree(tmp_dir, dvc): "other": {DvcIgnore.DVCIGNORE_FILE: "1\n2\n3"}, } ) + dvc.tree.__dict__.pop("dvcignore", None) ignore_pattern_trie = None for ignore in dvc.tree.dvcignore.ignores: if isinstance(ignore, DvcIgnorePatternsTrie): diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index f6e6d7021e..9a533305c1 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -1,7 +1,6 @@ import os from os.path import join -from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.repo import Repo from dvc.repo.tree import RepoTree @@ -137,7 +136,9 @@ def test_subdir(self): class TestWalkInGit(AssertWalkEqualMixin, TestGit): def test_nobranch(self): - tree = CleanTree(LocalRemoteTree(None, {"url": self._root_dir})) + tree = LocalRemoteTree( + None, {"url": self._root_dir}, use_dvcignore=True + ) self.assertWalkEqual( tree.walk("."), [ @@ -232,13 +233,13 @@ def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch): path = PathInfo(subrepo_dir) - assert isinstance(dvc.tree, CleanTree) + assert dvc.tree.use_dvcignore assert not dvc.tree.exists(path / "foo") assert not dvc.tree.isfile(path / "foo") assert not dvc.tree.exists(path / "dir") assert not dvc.tree.isdir(path / "dir") - assert isinstance(subrepo.tree, CleanTree) + assert subrepo.tree.use_dvcignore assert subrepo.tree.exists(path / "foo") assert subrepo.tree.isfile(path / "foo") assert subrepo.tree.exists(path / "dir") diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index c78e4a2a47..acae898b91 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -6,7 +6,6 @@ from mock import patch import dvc -from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.system import System from dvc.tree.local import LocalRemoteTree @@ -29,7 +28,9 @@ class TestMtimeAndSize(TestDir): def test(self): - tree = CleanTree(LocalRemoteTree(None, {"url": self.root_dir})) + tree = LocalRemoteTree( + None, {"url": self.root_dir}, use_dvcignore=True + ) file_time, file_size = get_mtime_and_size(self.DATA, tree) dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, tree) @@ -130,7 +131,9 @@ def test_path_object_and_str_are_valid_types_get_mtime_and_size(tmp_dir): tmp_dir.gen( {"dir": {"dir_file": "dir file content"}, "file": "file_content"} ) - tree = CleanTree(LocalRemoteTree(None, {"url": os.fspath(tmp_dir)})) + tree = LocalRemoteTree( + None, {"url": os.fspath(tmp_dir)}, use_dvcignore=True + ) time, size = get_mtime_and_size("dir", tree) object_time, object_size = get_mtime_and_size(PathInfo("dir"), tree)