From 1fd74a471153f685f09028b629f15d366c1894dd Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 17 Jul 2020 09:06:26 +0300 Subject: [PATCH] dvc: get rid of BaseTree in favor of BaseRemoteTree Related to #4050 --- dvc/repo/tree.py | 30 ++++++++++++++++-------------- dvc/scm/git/tree.py | 23 ++++++++++++++++------- dvc/scm/tree.py | 39 --------------------------------------- 3 files changed, 32 insertions(+), 60 deletions(-) diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 6346e03bf4..05f97f3685 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -5,14 +5,14 @@ from dvc.exceptions import OutputNotFoundError from dvc.path_info import PathInfo from dvc.remote.base import RemoteActionNotImplemented -from dvc.scm.tree import BaseTree +from dvc.tree.base import BaseRemoteTree from dvc.utils import file_md5 from dvc.utils.fs import copy_fobj_to_file, makedirs logger = logging.getLogger(__name__) -class DvcTree(BaseTree): # pylint:disable=abstract-method +class DvcTree(BaseRemoteTree): # pylint:disable=abstract-method """DVC repo tree. Args: @@ -27,7 +27,7 @@ class DvcTree(BaseTree): # pylint:disable=abstract-method """ def __init__(self, repo, fetch=False, stream=False): - self.repo = repo + super().__init__(repo, {"url": repo.root_dir}) self.fetch = fetch self.stream = stream @@ -101,14 +101,14 @@ def open( cache_path = out.cache_path return open(cache_path, mode=mode, encoding=encoding) - def exists(self, path): + def exists(self, path): # pylint: disable=arguments-differ try: self._find_outs(path, strict=False, recursive=True) return True except OutputNotFoundError: return False - def isdir(self, path): + def isdir(self, path): # pylint: disable=arguments-differ if not self.exists(path): return False @@ -134,7 +134,7 @@ def isdir(self, path): except FileNotFoundError: return True - def isfile(self, path): + def isfile(self, path): # pylint: disable=arguments-differ if not self.exists(path): return False @@ -237,7 +237,7 @@ def get_file_hash(self, path_info): return out.checksum -class RepoTree(BaseTree): # pylint:disable=abstract-method +class RepoTree(BaseRemoteTree): # pylint:disable=abstract-method """DVC + git-tracked files tree. Args: @@ -247,7 +247,7 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method """ def __init__(self, repo, **kwargs): - self.repo = repo + super().__init__(repo, {"url": repo.root_dir}) if hasattr(repo, "dvc_dir"): self.dvctree = DvcTree(repo, **kwargs) else: @@ -266,7 +266,9 @@ def stream(self): return self.dvctree.stream return False - def open(self, path, mode="r", encoding="utf-8", **kwargs): + def open( + self, path, mode="r", encoding="utf-8", **kwargs + ): # pylint: disable=arguments-differ if "b" in mode: encoding = None @@ -276,12 +278,12 @@ def open(self, path, mode="r", encoding="utf-8", **kwargs): ) return self.repo.tree.open(path, mode=mode, encoding=encoding) - def exists(self, path): + def exists(self, path): # pylint: disable=arguments-differ return self.repo.tree.exists(path) or ( self.dvctree and self.dvctree.exists(path) ) - def isdir(self, path): + def isdir(self, path): # pylint: disable=arguments-differ return self.repo.tree.isdir(path) or ( self.dvctree and self.dvctree.isdir(path) ) @@ -289,7 +291,7 @@ def isdir(self, path): def isdvc(self, path, **kwargs): return self.dvctree is not None and self.dvctree.isdvc(path, **kwargs) - def isfile(self, path): + def isfile(self, path): # pylint: disable=arguments-differ return self.repo.tree.isfile(path) or ( self.dvctree and self.dvctree.isfile(path) ) @@ -393,7 +395,7 @@ def walk( repo_walk = self.repo.tree.walk(top, topdown=topdown) yield from self._walk(dvc_walk, repo_walk, dvcfiles=dvcfiles) - def walk_files(self, top, **kwargs): + def walk_files(self, top, **kwargs): # pylint: disable=arguments-differ for root, _, files in self.walk(top, **kwargs): for fname in files: yield PathInfo(root) / fname @@ -437,5 +439,5 @@ def copytree(self, top, dest): copy_fobj_to_file(fobj, dest_dir / fname) @property - def hash_jobs(self): + def hash_jobs(self): # pylint: disable=invalid-overridden-method return self.repo.tree.hash_jobs diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index 158ae2d91c..eae9b9355e 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -6,7 +6,7 @@ from funcy import cached_property from dvc.exceptions import DvcException -from dvc.scm.tree import BaseTree +from dvc.tree.base import BaseRemoteTree from dvc.utils import relpath # see git-fast-import(1) @@ -21,7 +21,7 @@ def _item_basename(item): return os.path.basename(item.path) -class GitTree(BaseTree): # pylint:disable=abstract-method +class GitTree(BaseRemoteTree): # pylint:disable=abstract-method """Proxies the repo file access methods to Git objects""" def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None): @@ -31,6 +31,7 @@ def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None): git (dvc.scm.Git): branch: """ + super().__init__(None, {}) self.git = git self.rev = rev self.use_dvcignore = use_dvcignore @@ -52,7 +53,9 @@ def dvcignore(self): self.use_dvcignore = True return ret - def open(self, path, mode="r", encoding="utf-8"): + def open( + self, path, mode="r", encoding="utf-8" + ): # pylint: disable=arguments-differ assert mode in {"r", "rb"} relative_path = relpath(path, self.git.working_dir) @@ -73,7 +76,7 @@ def open(self, path, mode="r", encoding="utf-8"): return io.BytesIO(data) return io.StringIO(data.decode(encoding)) - def exists(self, path): + def exists(self, path): # pylint: disable=arguments-differ if self._git_object_by_path(path) is None: return False @@ -81,7 +84,7 @@ def exists(self, path): path ) and not self.dvcignore.is_ignored_dir(path) - def isdir(self, path): + def isdir(self, path): # pylint: disable=arguments-differ obj = self._git_object_by_path(path) if obj is None: return False @@ -89,7 +92,7 @@ def isdir(self, path): return False return not self.dvcignore.is_ignored_dir(path) - def isfile(self, path): + def isfile(self, path): # pylint: disable=arguments-differ obj = self._git_object_by_path(path) if obj is None: return False @@ -214,7 +217,13 @@ def to_ctime(git_time): ) @property - def hash_jobs(self): + def hash_jobs(self): # pylint: disable=invalid-overridden-method # NOTE: gitpython is not threadsafe. See # https://github.com/iterative/dvc/issues/4079 return 1 + + def walk_files(self, top): # pylint: disable=arguments-differ + for root, _, files in self.walk(top): + for file in files: + # NOTE: os.path.join is ~5.5 times slower + yield f"{root}{os.sep}{file}" diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 0d8e91f618..a7ff4f442b 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -1,42 +1,3 @@ -import os - - -class BaseTree: - """Abstract class to represent access to files""" - - @property - def tree_root(self): - pass - - def open(self, path, mode="r", encoding="utf-8"): - """Open file and return a stream.""" - - def exists(self, path): - """Test whether a path exists.""" - - def isdir(self, path): - """Return true if the pathname refers to an existing directory.""" - - def isfile(self, path): - """Test whether a path is a regular file""" - - def walk(self, top, topdown=True, onerror=None): - """Directory tree generator. - - See `os.walk` for the docs. Differences: - - no support for symlinks - """ - - def walk_files(self, top): - for root, _, files in self.walk(top): - for file in files: - # NOTE: os.path.join is ~5.5 times slower - yield f"{root}{os.sep}{file}" - - def makedirs(self, path, mode=0o777, exist_ok=True): - raise NotImplementedError - - def is_working_tree(tree): from dvc.tree.local import LocalRemoteTree