Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restyle dvc: get rid of WorkingTree #4217

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ class Config(dict):
def __init__(
self, dvc_dir=None, validate=True, tree=None,
): # pylint: disable=super-init-not-called
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree

self.dvc_dir = dvc_dir

Expand All @@ -248,7 +248,7 @@ def __init__(
else:
self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir))

self.wtree = WorkingTree(self.dvc_dir)
self.wtree = LocalRemoteTree(None, {"url": self.dvc_dir})
self.tree = tree.tree if tree else self.wtree

self.load(validate=validate)
Expand Down Expand Up @@ -326,7 +326,7 @@ def _save_config(self, level, conf_dict):

logger.debug(f"Writing '{filename}'.")

tree.makedirs(os.path.dirname(filename), exist_ok=True)
tree.makedirs(os.path.dirname(filename))

config = configobj.ConfigObj(_pack_remotes(conf_dict))
with tree.open(filename, "wb") as fobj:
Expand Down
5 changes: 2 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,11 @@ def __init__(self, root_dir=None, scm=None, rev=None):
from dvc.repo.metrics import Metrics
from dvc.repo.plots import Plots
from dvc.repo.params import Params
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree
from dvc.utils.fs import makedirs
from dvc.stage.cache import StageCache

if scm:
# use GitTree instead of WorkingTree as default repo tree instance
tree = scm.get_tree(rev)
self.root_dir = self.find_root(root_dir, tree)
self.scm = scm
Expand All @@ -91,7 +90,7 @@ def __init__(self, root_dir=None, scm=None, rev=None):
else:
root_dir = self.find_root(root_dir)
self.root_dir = os.path.abspath(os.path.realpath(root_dir))
self.tree = WorkingTree(self.root_dir)
self.tree = LocalRemoteTree(None, {"url": self.root_dir})

self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
self.config = Config(self.dvc_dir, tree=self.tree)
Expand Down
4 changes: 2 additions & 2 deletions dvc/repo/brancher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from funcy import group_by

from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree


def brancher( # noqa: E302
Expand Down Expand Up @@ -29,7 +29,7 @@ def brancher( # noqa: E302

scm = self.scm

self.tree = WorkingTree(self.root_dir)
self.tree = LocalRemoteTree(self, {"url": self.root_dir})
yield "workspace"

if revs and "workspace" in revs:
Expand Down
62 changes: 4 additions & 58 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,63 +41,9 @@ def makedirs(self, path, mode=0o777, exist_ok=True):
raise NotImplementedError


class WorkingTree(BaseTree):
"""Proxies the repo file access methods to working tree files"""

def __init__(self, repo_root=None):
repo_root = repo_root or os.getcwd()
self.repo_root = repo_root

@property
def tree_root(self):
return self.repo_root

def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""
if "b" in mode:
encoding = None
return open(path, mode=mode, encoding=encoding)

def exists(self, path):
"""Test whether a path exists."""
return os.path.lexists(path)

def isdir(self, path):
"""Return true if the pathname refers to an existing directory."""
return os.path.isdir(path)

def isfile(self, path):
"""Test whether a path is a regular file"""
return os.path.isfile(path)

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
- no support for symlinks
"""
for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror
):
yield os.path.normpath(root), dirs, files

def isexec(self, path):
mode = os.stat(path).st_mode
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

@staticmethod
def stat(path):
return os.stat(path)

@cached_property
def hash_jobs(self):
return max(1, min(4, cpu_count() // 2))

def makedirs(self, path, mode=0o777, exist_ok=True):
os.makedirs(path, mode=mode, exist_ok=exist_ok)


def is_working_tree(tree):
return isinstance(tree, WorkingTree) or isinstance(
getattr(tree, "tree", None), WorkingTree
from dvc.tree.local import LocalRemoteTree

return isinstance(tree, LocalRemoteTree) or isinstance(
getattr(tree, "tree", None), LocalRemoteTree
)
8 changes: 5 additions & 3 deletions dvc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,12 @@ class State: # pylint: disable=too-many-instance-attributes
MAX_UINT = 2 ** 64 - 2

def __init__(self, local_cache):
from dvc.tree.local import LocalRemoteTree

repo = local_cache.repo
self.repo = repo
self.root_dir = repo.root_dir
self.tree = local_cache.tree.work_tree
self.tree = LocalRemoteTree(None, {})

state_config = repo.config.get("state", {})
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
Expand Down Expand Up @@ -394,8 +396,8 @@ def get(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
path = os.fspath(path_info)

# NOTE: use os.path.exists instead of WorkingTree.exists
# WorkingTree.exists uses lexists() and will return True for broken
# NOTE: use os.path.exists instead of LocalRemoteTree.exists
# because it uses lexists() and will return True for broken
# symlinks that we cannot stat() in get_mtime_and_size
if not os.path.exists(path):
return None
Expand Down
49 changes: 31 additions & 18 deletions dvc/tree/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dvc.exceptions import DvcException
from dvc.path_info import PathInfo
from dvc.scheme import Schemes
from dvc.scm.tree import WorkingTree, is_working_tree
from dvc.system import System
from dvc.utils import file_md5, relpath, tmp_fname
from dvc.utils.fs import (
Expand Down Expand Up @@ -42,21 +41,16 @@ def __init__(self, repo, config):
url = config.get("url")
self.path_info = self.PATH_CLS(url) if url else None

@property
def tree_root(self):
return self.config.get("url")

@property
def state(self):
from dvc.state import StateNoop

return self.repo.state if self.repo else StateNoop()

@cached_property
def work_tree(self):
# When using repo.brancher, repo.tree may change to/from WorkingTree to
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
# use its own WorkingTree instance.
if self.repo:
return WorkingTree(self.repo.root_dir)
return None

@staticmethod
def open(path_info, mode="r", encoding=None):
return open(path_info, mode=mode, encoding=encoding)
Expand All @@ -65,26 +59,39 @@ def exists(self, path_info):
assert isinstance(path_info, str) or path_info.scheme == "local"
if not self.repo:
return os.path.exists(path_info)
return self.work_tree.exists(path_info)
return os.path.lexists(path_info)

def isfile(self, path_info):
if not self.repo:
return os.path.isfile(path_info)
return self.work_tree.isfile(path_info)
return os.path.isfile(path_info)

def isdir(self, path_info):
if not self.repo:
return os.path.isdir(path_info)
return self.work_tree.isdir(path_info)
return os.path.isdir(path_info)

def iscopy(self, path_info):
return not (
System.is_symlink(path_info) or System.is_hardlink(path_info)
)

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
- no support for symlinks
"""
for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror
):
yield os.path.normpath(root), dirs, files

def walk_files(self, path_info, **kwargs):
for fname in self.work_tree.walk_files(path_info):
yield PathInfo(fname)
for root, _, files in self.walk(path_info):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield PathInfo(f"{root}{os.sep}{file}")

def is_empty(self, path_info):
path = path_info.fspath
Expand All @@ -111,6 +118,14 @@ def remove(self, path_info):
def makedirs(self, path_info):
makedirs(path_info, exist_ok=True, mode=self.dir_mode)

def isexec(self, path):
mode = os.stat(path).st_mode
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)

@staticmethod
def stat(path):
return os.stat(path)

def move(self, from_info, to_info, mode=None):
if from_info.scheme != "local" or to_info.scheme != "local":
raise NotImplementedError
Expand Down Expand Up @@ -215,9 +230,7 @@ def _unprotect_file(self, path):
os.chmod(path, self.file_mode)

def _unprotect_dir(self, path):
assert is_working_tree(self.repo.tree)

for fname in self.repo.tree.walk_files(path):
for fname in self.walk_files(path):
self._unprotect_file(fname)

def unprotect(self, path_info):
Expand Down
2 changes: 1 addition & 1 deletion dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_mtime_and_size(path, tree):
raise
continue
size += stats.st_size
files_mtimes[file_path] = stats.st_mtime
files_mtimes[os.fspath(file_path)] = stats.st_mtime

# We track file changes and moves, which cannot be detected with simply
# max(mtime(f) for f in non_ignored_files)
Expand Down
10 changes: 6 additions & 4 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
DvcIgnorePatternsTrie,
DvcIgnoreRepo,
)
from dvc.path_info import PathInfo
from dvc.repo import Repo
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree
from dvc.utils import relpath
from dvc.utils.fs import get_mtime_and_size
from tests.dir_helpers import TmpDir
Expand Down Expand Up @@ -107,7 +108,8 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
assert ignore_pattern_trie is not None
assert (
DvcIgnorePatterns.from_files(
os.fspath(top_ignore_file), WorkingTree(dvc.root_dir)
os.fspath(top_ignore_file),
LocalRemoteTree(None, {"url": dvc.root_dir}),
)
== ignore_pattern_trie[os.fspath(ignore_file)]
)
Expand Down Expand Up @@ -165,15 +167,15 @@ def test_ignore_subrepo(tmp_dir, scm, dvc):
scm.commit("init parent dvcignore")

subrepo_dir = tmp_dir / "subdir"
assert not dvc.tree.exists(subrepo_dir / "foo")
assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo"))

with subrepo_dir.chdir():
subrepo = Repo.init(subdir=True)
scm.add(str(subrepo_dir / "foo"))
scm.commit("subrepo init")

for _ in subrepo.brancher(all_commits=True):
assert subrepo.tree.exists(subrepo_dir / "foo")
assert subrepo.tree.exists(PathInfo(subrepo_dir / "foo"))


def test_ignore_blank_line(tmp_dir, dvc):
Expand Down
32 changes: 18 additions & 14 deletions tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from dvc.repo.tree import RepoTree
from dvc.scm import SCM
from dvc.scm.git import GitTree
from dvc.scm.tree import WorkingTree
from dvc.tree.local import LocalRemoteTree
from dvc.utils.fs import remove
from tests.basic_env import TestDir, TestGit, TestGitSubmodule


class TestWorkingTree(TestDir):
class TestLocalRemoteTree(TestDir):
def setUp(self):
super().setUp()
self.tree = WorkingTree()
self.tree = LocalRemoteTree(None, {})

def test_open(self):
with self.tree.open(self.FOO) as fd:
Expand Down Expand Up @@ -109,7 +109,7 @@ def convert_to_sets(walk_results):

class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir):
def test(self):
tree = WorkingTree(self._root_dir)
tree = LocalRemoteTree(None, {"url": self._root_dir})
self.assertWalkEqual(
tree.walk(self._root_dir),
[
Expand All @@ -128,7 +128,7 @@ def test(self):
)

def test_subdir(self):
tree = WorkingTree(self._root_dir)
tree = LocalRemoteTree(None, {"url": self._root_dir})
self.assertWalkEqual(
tree.walk(join("data_dir", "data_sub_dir")),
[(join("data_dir", "data_sub_dir"), [], ["data_sub"])],
Expand All @@ -137,7 +137,7 @@ def test_subdir(self):

class TestWalkInGit(AssertWalkEqualMixin, TestGit):
def test_nobranch(self):
tree = CleanTree(WorkingTree(self._root_dir))
tree = CleanTree(LocalRemoteTree(None, {"url": self._root_dir}))
self.assertWalkEqual(
tree.walk("."),
[
Expand Down Expand Up @@ -224,20 +224,24 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):


def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
from dvc.path_info import PathInfo

tmp_dir.gen({"subdir": {}})
subrepo_dir = tmp_dir / "subdir"
with subrepo_dir.chdir():
subrepo = Repo.init(subdir=True)
subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}})

path = PathInfo(subrepo_dir)

assert isinstance(dvc.tree, CleanTree)
assert not dvc.tree.exists(subrepo_dir / "foo")
assert not dvc.tree.isfile(subrepo_dir / "foo")
assert not dvc.tree.exists(subrepo_dir / "dir")
assert not dvc.tree.isdir(subrepo_dir / "dir")
assert not dvc.tree.exists(path / "foo")
assert not dvc.tree.isfile(path / "foo")
assert not dvc.tree.exists(path / "dir")
assert not dvc.tree.isdir(path / "dir")

assert isinstance(subrepo.tree, CleanTree)
assert subrepo.tree.exists(subrepo_dir / "foo")
assert subrepo.tree.isfile(subrepo_dir / "foo")
assert subrepo.tree.exists(subrepo_dir / "dir")
assert subrepo.tree.isdir(subrepo_dir / "dir")
assert subrepo.tree.exists(path / "foo")
assert subrepo.tree.isfile(path / "foo")
assert subrepo.tree.exists(path / "dir")
assert subrepo.tree.isdir(path / "dir")
Loading