Skip to content

Commit

Permalink
remote: support trees in get_dir_checksum
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla committed May 22, 2020
1 parent 0c951a8 commit 95ea7e2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
13 changes: 12 additions & 1 deletion dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,24 @@ def _make_repo(self, *, locked=True):
return external_repo(d["url"], rev=rev)

def _get_checksum(self, locked=True):
from dvc.repo.tree import RepoTree

with self._make_repo(locked=locked) as repo:
try:
return repo.find_out_by_relpath(self.def_path).info["md5"]
except OutputNotFoundError:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))

# we want stream but not fetch, so DVC out directories are
# walked, but dir contents is not fetched
tree = RepoTree(repo, stream=True)

# We are polluting our repo cache with some dir listing here
return self.repo.cache.local.get_checksum(path)
if tree.isdir(path):
return self.repo.cache.local.get_dir_checksum(
path, tree=tree
)
return tree.get_file_checksum(path)

def status(self):
current_checksum = self._get_checksum(locked=True)
Expand Down
9 changes: 6 additions & 3 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _collect_dir(self, path_info, tree=None, save_tree=False, **kwargs):
file_infos.add(fname)

if tree:
checksums = {fi: tree.get_checksum(fi) for fi in file_infos}
checksums = {fi: tree.get_file_checksum(fi) for fi in file_infos}
if save_tree:
for fi, checksum in checksums.items():
self._save_file(fi, checksum, tree=tree, **kwargs)
Expand Down Expand Up @@ -259,11 +259,14 @@ def _collect_dir(self, path_info, tree=None, save_tree=False, **kwargs):
# Sorting the list by path to ensure reproducibility
return sorted(result, key=itemgetter(self.PARAM_RELPATH))

def get_dir_checksum(self, path_info):
def get_dir_checksum(self, path_info, tree=None):
if not self.cache:
raise RemoteCacheRequiredError(path_info)

dir_info = self._collect_dir(path_info)
dir_info = self._collect_dir(path_info, tree=None)
if tree:
# don't save state entry for path_info if it is a tree path
path_info = None
return self._save_dir_info(dir_info, path_info)

def _save_dir_info(self, dir_info, path_info=None):
Expand Down

0 comments on commit 95ea7e2

Please sign in to comment.