From 636a019289ffcde30c7d94b8516bccb3c65bf47c Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sat, 5 Sep 2020 16:31:34 +0300 Subject: [PATCH] dvc: compute dir hash without external cache (#4528) Fixes #4144 --- dvc/cache/base.py | 9 ++++--- dvc/tree/base.py | 11 ++------- tests/func/test_import_url.py | 46 +++++++++++++++++++++++++++++------ tests/func/test_remote.py | 6 ++--- 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/dvc/cache/base.py b/dvc/cache/base.py index 12731994d6..ab8e0dabec 100644 --- a/dvc/cache/base.py +++ b/dvc/cache/base.py @@ -280,13 +280,14 @@ def _get_dir_info_hash(self, dir_info): return hash_info, to_info def save_dir_info(self, dir_info, hash_info=None): - if hash_info and not self.changed_cache_file(hash_info): + if ( + hash_info + and hash_info.name == self.tree.PARAM_CHECKSUM + and not self.changed_cache_file(hash_info) + ): return hash_info hi, tmp_info = self._get_dir_info_hash(dir_info) - if hash_info: - assert hi == hash_info - new_info = self.tree.hash_to_path_info(hi.value) if self.changed_cache_file(hi): self.tree.makedirs(new_info.parent) diff --git a/dvc/tree/base.py b/dvc/tree/base.py index 614c53a246..ef4ad89fd6 100644 --- a/dvc/tree/base.py +++ b/dvc/tree/base.py @@ -7,11 +7,7 @@ from funcy import cached_property -from dvc.exceptions import ( - DvcException, - DvcIgnoreInCollectedDirError, - RemoteCacheRequiredError, -) +from dvc.exceptions import DvcException, DvcIgnoreInCollectedDirError from dvc.hash_info import HashInfo from dvc.ignore import DvcIgnore from dvc.path_info import URLInfo @@ -331,11 +327,8 @@ def _collect_dir(self, path_info, **kwargs): ] def get_dir_hash(self, path_info, **kwargs): - if not self.cache: - raise RemoteCacheRequiredError(path_info) - dir_info = self._collect_dir(path_info, **kwargs) - return self.cache.save_dir_info(dir_info) + return self.repo.cache.local.save_dir_info(dir_info) def upload(self, from_info, to_info, name=None, no_progress_bar=False): if not hasattr(self, "_upload"): diff --git a/tests/func/test_import_url.py b/tests/func/test_import_url.py index 2454d96eed..855d68305b 100644 --- a/tests/func/test_import_url.py +++ b/tests/func/test_import_url.py @@ -3,6 +3,7 @@ import pytest +from dvc.cache import Cache from dvc.dependency.base import DependencyDoesNotExistError from dvc.main import main from dvc.stage import Stage @@ -141,23 +142,43 @@ def test_import_url(tmp_dir, dvc, workspace): @pytest.mark.parametrize( - "workspace", + "workspace, stage_md5, dir_md5", [ - pytest.lazy_fixture("local_cloud"), - pytest.lazy_fixture("s3"), - pytest.lazy_fixture("gs"), - pytest.lazy_fixture("hdfs"), + ( + pytest.lazy_fixture("local_cloud"), + "dc24e1271084ee317ac3c2656fb8812b", + "b6dcab6ccd17ca0a8bf4a215a37d14cc.dir", + ), + ( + pytest.lazy_fixture("s3"), + "2aa17f8daa26996b3f7a4cf8888ac9ac", + "ec602a6ba97b2dd07bd6d2cd89674a60.dir", + ), + (pytest.lazy_fixture("gs"), "fixme", "fixme",), + ( + pytest.lazy_fixture("hdfs"), + "ec0943f83357f702033c98e70b853c8c", + "e6dcd267966dc628d732874f94ef4280.dir", + ), pytest.param( pytest.lazy_fixture("ssh"), + "dc24e1271084ee317ac3c2656fb8812b", + "b6dcab6ccd17ca0a8bf4a215a37d14cc.dir", marks=pytest.mark.skipif( os.name == "nt", reason="disabled on windows" ), ), ], - indirect=True, + indirect=["workspace"], ) -def test_import_url_dir(tmp_dir, dvc, workspace): +def test_import_url_dir(tmp_dir, dvc, workspace, stage_md5, dir_md5): workspace.gen({"dir": {"file": "file", "subdir": {"subfile": "subfile"}}}) + + # remove external cache to make sure that we don't need it to import dirs + with dvc.config.edit() as conf: + del conf["cache"] + dvc.cache = Cache(dvc) + assert not (tmp_dir / "dir").exists() # sanity check dvc.imp_url("remote://workspace/dir") assert set(os.listdir(tmp_dir / "dir")) == {"file", "subdir"} @@ -165,4 +186,15 @@ def test_import_url_dir(tmp_dir, dvc, workspace): assert list(os.listdir(tmp_dir / "dir" / "subdir")) == ["subfile"] assert (tmp_dir / "dir" / "subdir" / "subfile").read_text() == "subfile" + assert (tmp_dir / "dir.dvc").read_text() == ( + f"md5: {stage_md5}\n" + "frozen: true\n" + "deps:\n" + f"- md5: {dir_md5}\n" + " path: remote://workspace/dir\n" + "outs:\n" + "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n" + " path: dir\n" + ) + assert dvc.status() == {} diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index a968658309..dc37084dc3 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -7,10 +7,10 @@ from mock import patch from dvc.config import Config -from dvc.exceptions import DownloadError, UploadError +from dvc.exceptions import DownloadError, RemoteCacheRequiredError, UploadError from dvc.main import main from dvc.path_info import PathInfo -from dvc.tree.base import BaseTree, RemoteCacheRequiredError +from dvc.tree.base import BaseTree from dvc.tree.local import LocalTree from dvc.utils.fs import remove from tests.basic_env import TestDvc @@ -262,7 +262,7 @@ def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory): with pytest.raises(RemoteCacheRequiredError): dvc.run( cmd="echo hello world", - deps=[os.fspath(external_dir)], + outs=[os.fspath(external_dir)], single_stage=True, )