Skip to content

Commit

Permalink
dvc: get rid of BaseTree in favor of BaseRemoteTree (#4240)
Browse files Browse the repository at this point in the history
Related to #4050
  • Loading branch information
efiop authored Jul 19, 2020
1 parent 488e0d8 commit 681fb82
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 60 deletions.
30 changes: 16 additions & 14 deletions dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
from dvc.exceptions import OutputNotFoundError
from dvc.path_info import PathInfo
from dvc.remote.base import RemoteActionNotImplemented
from dvc.scm.tree import BaseTree
from dvc.tree.base import BaseRemoteTree
from dvc.utils import file_md5
from dvc.utils.fs import copy_fobj_to_file, makedirs

logger = logging.getLogger(__name__)


class DvcTree(BaseTree): # pylint:disable=abstract-method
class DvcTree(BaseRemoteTree): # pylint:disable=abstract-method
"""DVC repo tree.
Args:
Expand All @@ -27,7 +27,7 @@ class DvcTree(BaseTree): # pylint:disable=abstract-method
"""

def __init__(self, repo, fetch=False, stream=False):
self.repo = repo
super().__init__(repo, {"url": repo.root_dir})
self.fetch = fetch
self.stream = stream

Expand Down Expand Up @@ -101,14 +101,14 @@ def open(
cache_path = out.cache_path
return open(cache_path, mode=mode, encoding=encoding)

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
try:
self._find_outs(path, strict=False, recursive=True)
return True
except OutputNotFoundError:
return False

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
if not self.exists(path):
return False

Expand All @@ -134,7 +134,7 @@ def isdir(self, path):
except FileNotFoundError:
return True

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
if not self.exists(path):
return False

Expand Down Expand Up @@ -237,7 +237,7 @@ def get_file_hash(self, path_info):
return out.checksum


class RepoTree(BaseTree): # pylint:disable=abstract-method
class RepoTree(BaseRemoteTree): # pylint:disable=abstract-method
"""DVC + git-tracked files tree.
Args:
Expand All @@ -247,7 +247,7 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method
"""

def __init__(self, repo, **kwargs):
self.repo = repo
super().__init__(repo, {"url": repo.root_dir})
if hasattr(repo, "dvc_dir"):
self.dvctree = DvcTree(repo, **kwargs)
else:
Expand All @@ -266,7 +266,9 @@ def stream(self):
return self.dvctree.stream
return False

def open(self, path, mode="r", encoding="utf-8", **kwargs):
def open(
self, path, mode="r", encoding="utf-8", **kwargs
): # pylint: disable=arguments-differ
if "b" in mode:
encoding = None

Expand All @@ -276,20 +278,20 @@ def open(self, path, mode="r", encoding="utf-8", **kwargs):
)
return self.repo.tree.open(path, mode=mode, encoding=encoding)

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
return self.repo.tree.exists(path) or (
self.dvctree and self.dvctree.exists(path)
)

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
return self.repo.tree.isdir(path) or (
self.dvctree and self.dvctree.isdir(path)
)

def isdvc(self, path, **kwargs):
return self.dvctree is not None and self.dvctree.isdvc(path, **kwargs)

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
return self.repo.tree.isfile(path) or (
self.dvctree and self.dvctree.isfile(path)
)
Expand Down Expand Up @@ -393,7 +395,7 @@ def walk(
repo_walk = self.repo.tree.walk(top, topdown=topdown)
yield from self._walk(dvc_walk, repo_walk, dvcfiles=dvcfiles)

def walk_files(self, top, **kwargs):
def walk_files(self, top, **kwargs): # pylint: disable=arguments-differ
for root, _, files in self.walk(top, **kwargs):
for fname in files:
yield PathInfo(root) / fname
Expand Down Expand Up @@ -437,5 +439,5 @@ def copytree(self, top, dest):
copy_fobj_to_file(fobj, dest_dir / fname)

@property
def hash_jobs(self):
def hash_jobs(self): # pylint: disable=invalid-overridden-method
return self.repo.tree.hash_jobs
23 changes: 16 additions & 7 deletions dvc/scm/git/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from funcy import cached_property

from dvc.exceptions import DvcException
from dvc.scm.tree import BaseTree
from dvc.tree.base import BaseRemoteTree
from dvc.utils import relpath

# see git-fast-import(1)
Expand All @@ -21,7 +21,7 @@ def _item_basename(item):
return os.path.basename(item.path)


class GitTree(BaseTree): # pylint:disable=abstract-method
class GitTree(BaseRemoteTree): # pylint:disable=abstract-method
"""Proxies the repo file access methods to Git objects"""

def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None):
Expand All @@ -31,6 +31,7 @@ def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None):
git (dvc.scm.Git):
branch:
"""
super().__init__(None, {})
self.git = git
self.rev = rev
self.use_dvcignore = use_dvcignore
Expand All @@ -52,7 +53,9 @@ def dvcignore(self):
self.use_dvcignore = True
return ret

def open(self, path, mode="r", encoding="utf-8"):
def open(
self, path, mode="r", encoding="utf-8"
): # pylint: disable=arguments-differ
assert mode in {"r", "rb"}

relative_path = relpath(path, self.git.working_dir)
Expand All @@ -73,23 +76,23 @@ def open(self, path, mode="r", encoding="utf-8"):
return io.BytesIO(data)
return io.StringIO(data.decode(encoding))

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
if self._git_object_by_path(path) is None:
return False

return not self.dvcignore.is_ignored_file(
path
) and not self.dvcignore.is_ignored_dir(path)

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
if obj is None:
return False
if obj.mode != GIT_MODE_DIR:
return False
return not self.dvcignore.is_ignored_dir(path)

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
if obj is None:
return False
Expand Down Expand Up @@ -214,7 +217,13 @@ def to_ctime(git_time):
)

@property
def hash_jobs(self):
def hash_jobs(self): # pylint: disable=invalid-overridden-method
# NOTE: gitpython is not threadsafe. See
# https://github.com/iterative/dvc/issues/4079
return 1

def walk_files(self, top): # pylint: disable=arguments-differ
for root, _, files in self.walk(top):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield f"{root}{os.sep}{file}"
39 changes: 0 additions & 39 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,3 @@
import os


class BaseTree:
"""Abstract class to represent access to files"""

@property
def tree_root(self):
pass

def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""

def exists(self, path):
"""Test whether a path exists."""

def isdir(self, path):
"""Return true if the pathname refers to an existing directory."""

def isfile(self, path):
"""Test whether a path is a regular file"""

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.
See `os.walk` for the docs. Differences:
- no support for symlinks
"""

def walk_files(self, top):
for root, _, files in self.walk(top):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield f"{root}{os.sep}{file}"

def makedirs(self, path, mode=0o777, exist_ok=True):
raise NotImplementedError


def is_working_tree(tree):
from dvc.tree.local import LocalRemoteTree

Expand Down

0 comments on commit 681fb82

Please sign in to comment.