diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index 68c88e5b00..5a3407759c 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -92,7 +92,7 @@ def pull( def _save_pulled_checksums(self, cache): for checksum in cache.scheme_keys("local"): - cache_file = self.repo.cache.local.hash_to_path_info(checksum) + cache_file = self.repo.cache.local.tree.hash_to_path_info(checksum) if self.repo.cache.local.tree.exists(cache_file): # We can safely save here, as existing corrupted files will # be removed upon status, while files corrupted during diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index be9eb2a066..a4994b8923 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -64,7 +64,7 @@ def _get_checksum(self, locked=True): # We are polluting our repo cache with some dir listing here if tree.isdir(path): - return self.repo.cache.local.get_hash(path, tree=tree) + return self.repo.cache.local.tree.get_hash(path, tree=tree) return tree.get_file_hash(path) def status(self): diff --git a/dvc/output/base.py b/dvc/output/base.py index ccce194540..f86fc4cd7b 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -162,7 +162,7 @@ def supported(cls, url): @property def cache_path(self): - return self.cache.hash_to_path_info(self.checksum).url + return self.cache.tree.hash_to_path_info(self.checksum).url @property def checksum_type(self): diff --git a/dvc/remote/base.py b/dvc/remote/base.py index acd9db2143..246868db87 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -216,39 +216,6 @@ def __init__(self, tree): self.cache_type_confirmed = False self._dir_info = {} - @property - def path_info(self): - return self.tree.path_info - - @property - def cache(self): - return getattr(self.repo.cache, self.scheme) - - @property - def scheme(self): - return self.tree.scheme - - @property - def state(self): - return self.tree.state - - def open(self, *args, **kwargs): - return self.tree.open(*args, **kwargs) - - def is_dir_hash(self, hash_): - return self.tree.is_dir_hash(hash_) - - def get_hash(self, path_info, **kwargs): - return self.tree.get_hash(path_info, **kwargs) - - # Override to return path as a string instead of PathInfo for clouds - # which support string paths (see local) - def hash_to_path(self, hash_): - return self.hash_to_path_info(hash_) - - def hash_to_path_info(self, hash_): - return self.tree.hash_to_path_info(hash_) - def get_dir_cache(self, hash_): assert hash_ @@ -265,10 +232,10 @@ def get_dir_cache(self, hash_): return dir_info def load_dir_cache(self, hash_): - path_info = self.hash_to_path_info(hash_) + path_info = self.tree.hash_to_path_info(hash_) try: - with self.cache.open(path_info, "r") as fobj: + with self.tree.open(path_info, "r") as fobj: d = json.load(fobj) except (ValueError, FileNotFoundError) as exc: raise DirCacheError(hash_) from exc @@ -325,7 +292,7 @@ def changed(self, path_info, hash_info): logger.debug("cache for '%s'('%s') has changed.", path_info, hash_) return True - actual = self.get_hash(path_info) + actual = self.tree.get_hash(path_info) if hash_ != actual: logger.debug( "hash value '%s' for '%s' has changed (actual '%s').", @@ -389,7 +356,7 @@ def _do_link(self, from_info, to_info, link_method): def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs): assert hash_ - cache_info = self.hash_to_path_info(hash_) + cache_info = self.tree.hash_to_path_info(hash_) if tree == self.tree: if self.changed_cache(hash_): self.tree.move(path_info, cache_info, mode=self.CACHE_MODE) @@ -404,11 +371,11 @@ def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs): self.link(cache_info, path_info) if save_link: - self.state.save_link(path_info) + self.tree.state.save_link(path_info) # we need to update path and cache, since in case of reflink, # or copy cache type moving original file results in updates on # next executed command, which causes md5 recalculation - self.state.save(path_info, hash_) + self.tree.state.save(path_info, hash_) else: if self.changed_cache(hash_): with tree.open(path_info, mode="rb") as fobj: @@ -422,7 +389,7 @@ def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs): if callback: callback(1) - self.state.save(cache_info, hash_) + self.tree.state.save(cache_info, hash_) return {self.tree.PARAM_CHECKSUM: hash_} def _cache_is_copy(self, path_info): @@ -459,18 +426,19 @@ def _save_dir(self, path_info, tree, hash_, save_link=True, **kwargs): ) if save_link: - self.state.save_link(path_info) + self.tree.state.save_link(path_info) if self.tree.exists(path_info): - self.state.save(path_info, hash_) + self.tree.state.save(path_info, hash_) - cache_info = self.hash_to_path_info(hash_) - self.state.save(cache_info, hash_) + cache_info = self.tree.hash_to_path_info(hash_) + self.tree.state.save(cache_info, hash_) return {self.tree.PARAM_CHECKSUM: hash_} def save(self, path_info, tree, hash_info, save_link=True, **kwargs): - if path_info.scheme != self.scheme: + if path_info.scheme != self.tree.scheme: raise RemoteActionNotImplemented( - f"save {path_info.scheme} -> {self.scheme}", self.scheme, + f"save {path_info.scheme} -> {self.tree.scheme}", + self.tree.scheme, ) if not hash_info: @@ -479,13 +447,18 @@ def save(self, path_info, tree, hash_info, save_link=True, **kwargs): return self._save(path_info, tree, hash_, save_link, **kwargs) def _save(self, path_info, tree, hash_, save_link=True, **kwargs): - to_info = self.hash_to_path_info(hash_) + to_info = self.tree.hash_to_path_info(hash_) logger.debug("Saving '%s' to '%s'.", path_info, to_info) if tree.isdir(path_info): return self._save_dir(path_info, tree, hash_, save_link, **kwargs) return self._save_file(path_info, tree, hash_, save_link, **kwargs) + # Override to return path as a string instead of PathInfo for clouds + # which support string paths (see local) + def hash_to_path(self, hash_): + return self.tree.hash_to_path_info(hash_) + def changed_cache_file(self, hash_): """Compare the given hash with the (corresponding) actual one. @@ -506,7 +479,7 @@ def changed_cache_file(self, hash_): ) return False - actual = self.get_hash(cache_info) + actual = self.tree.get_hash(cache_info) logger.debug( "cache '%s' expected '%s' actual '%s'", cache_info, hash_, actual, @@ -545,14 +518,14 @@ def _changed_dir_cache(self, hash_, path_info=None, filter_info=None): return False def changed_cache(self, hash_, path_info=None, filter_info=None): - if self.is_dir_hash(hash_): + if self.tree.is_dir_hash(hash_): return self._changed_dir_cache( hash_, path_info=path_info, filter_info=filter_info ) return self.changed_cache_file(hash_) def already_cached(self, path_info): - current = self.get_hash(path_info) + current = self.tree.get_hash(path_info) if not current: return False @@ -579,15 +552,15 @@ def _checkout_file( ): """The file is changed we need to checkout a new copy""" added, modified = True, False - cache_info = self.hash_to_path_info(hash_) + cache_info = self.tree.hash_to_path_info(hash_) if self.tree.exists(path_info): logger.debug("data '%s' will be replaced.", path_info) self.safe_remove(path_info, force=force) added, modified = False, True self.link(cache_info, path_info) - self.state.save_link(path_info) - self.state.save(path_info, hash_) + self.tree.state.save_link(path_info) + self.tree.state.save(path_info, hash_) if progress_callback: progress_callback(str(path_info)) @@ -616,7 +589,7 @@ def _checkout_dir( for entry in dir_info: relative_path = entry[self.tree.PARAM_RELPATH] entry_hash = entry[self.tree.PARAM_CHECKSUM] - entry_cache_info = self.hash_to_path_info(entry_hash) + entry_cache_info = self.tree.hash_to_path_info(entry_hash) entry_info = path_info / relative_path if filter_info and not entry_info.isin_or_eq(filter_info): @@ -627,7 +600,7 @@ def _checkout_dir( modified = True self.safe_remove(entry_info, force=force) self.link(entry_cache_info, entry_info) - self.state.save(entry_info, entry_hash) + self.tree.state.save(entry_info, entry_hash) if progress_callback: progress_callback(str(entry_info)) @@ -636,8 +609,8 @@ def _checkout_dir( or modified ) - self.state.save_link(path_info) - self.state.save(path_info, hash_) + self.tree.state.save_link(path_info) + self.tree.state.save(path_info, hash_) # relink is not modified, assume it as nochange return added, not added and modified and not relink @@ -663,7 +636,7 @@ def checkout( relink=False, filter_info=None, ): - if path_info.scheme not in ["local", self.scheme]: + if path_info.scheme not in ["local", self.tree.scheme]: raise NotImplementedError hash_ = hash_info.get(self.tree.PARAM_CHECKSUM) @@ -696,7 +669,9 @@ def checkout( if progress_callback: progress_callback( str(path_info), - self.get_files_number(self.path_info, hash_, filter_info), + self.get_files_number( + self.tree.path_info, hash_, filter_info + ), ) if failed: raise CheckoutError([failed]) @@ -717,7 +692,7 @@ def _checkout( relink=False, filter_info=None, ): - if not self.is_dir_hash(hash_): + if not self.tree.is_dir_hash(hash_): return self._checkout_file( path_info, hash_, force, progress_callback, relink ) @@ -732,7 +707,7 @@ def get_files_number(self, path_info, hash_, filter_info): if not hash_: return 0 - if not self.is_dir_hash(hash_): + if not self.tree.is_dir_hash(hash_): return 1 if not filter_info: diff --git a/dvc/remote/local.py b/dvc/remote/local.py index b7ca83930c..4e64d002d4 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -97,7 +97,7 @@ def hashes_exist( def already_cached(self, path_info): assert path_info.scheme in ["", "local"] - current_md5 = self.get_hash(path_info) + current_md5 = self.tree.get_hash(path_info) if not current_md5: return False @@ -147,7 +147,7 @@ def _status( logger.debug( f"Preparing to collect status from {remote.tree.path_info}" ) - md5s = set(named_cache.scheme_keys(self.scheme)) + md5s = set(named_cache.scheme_keys(self.tree.scheme)) logger.debug("Collecting information from local cache...") local_exists = frozenset( @@ -163,7 +163,7 @@ def _status( else: logger.debug("Collecting information from remote cache...") remote_exists = set() - dir_md5s = set(named_cache.dir_keys(self.scheme)) + dir_md5s = set(named_cache.dir_keys(self.tree.scheme)) if dir_md5s: remote_exists.update( self._indexed_dir_hashes(named_cache, remote, dir_md5s) @@ -188,7 +188,7 @@ def make_names(hash_, names): dir_status = {} file_status = {} dir_contents = {} - for hash_, item in named_cache[self.scheme].items(): + for hash_, item in named_cache[self.tree.scheme].items(): if item.children: dir_status[hash_] = make_names(hash_, item.names) dir_contents[hash_] = set() @@ -233,7 +233,9 @@ def _indexed_dir_hashes(self, named_cache, remote, dir_md5s): # If .dir hash exists on the remote, assume directory contents # still exists on the remote for dir_hash in dir_exists: - file_hashes = list(named_cache.child_keys(self.scheme, dir_hash)) + file_hashes = list( + named_cache.child_keys(self.tree.scheme, dir_hash) + ) if dir_hash not in remote.index: logger.debug( "Indexing new .dir '{}' with '{}' nested files".format( @@ -263,7 +265,7 @@ def _get_plans(self, download, remote, status_info, status): status_info.items(), desc="Analysing status", unit="file" ): if info["status"] == status: - cache.append(self.hash_to_path_info(md5)) + cache.append(self.tree.hash_to_path_info(md5)) path_infos.append(remote.tree.hash_to_path_info(md5)) names.append(info["name"]) hashes.append(md5) diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index ce8e6e8aee..02e01a9e60 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -37,7 +37,7 @@ def _to_path(output): def _to_checksum(output): if on_working_tree: - return self.cache.local.get_hash(output.path_info) + return self.cache.local.tree.get_hash(output.path_info) return output.checksum def _exists(output): diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 5b78e2fef3..6346e03bf4 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -96,7 +96,7 @@ def open( if out.is_dir_checksum: checksum = self._get_granular_checksum(path, out) - cache_path = out.cache.hash_to_path_info(checksum).url + cache_path = out.cache.tree.hash_to_path_info(checksum).url else: cache_path = out.cache_path return open(cache_path, mode=mode, encoding=encoding) diff --git a/dvc/tree/base.py b/dvc/tree/base.py index 00a982aea7..e3f559311b 100644 --- a/dvc/tree/base.py +++ b/dvc/tree/base.py @@ -282,7 +282,7 @@ def get_hash(self, path_info, tree=None, **kwargs): if ( hash_ and self.is_dir_hash(hash_) - and not tree.exists(self.cache.hash_to_path_info(hash_)) + and not tree.exists(self.cache.tree.hash_to_path_info(hash_)) ): hash_ = None @@ -375,7 +375,7 @@ def _collect_dir(self, path_info, tree, **kwargs): def _save_dir_info(self, dir_info, path_info): hash_, tmp_info = self._get_dir_info_hash(dir_info) - new_info = self.cache.hash_to_path_info(hash_) + new_info = self.cache.tree.hash_to_path_info(hash_) if self.cache.changed_cache_file(hash_): self.cache.tree.makedirs(new_info.parent) self.cache.tree.move( diff --git a/tests/func/test_cache.py b/tests/func/test_cache.py index bbf787ae53..14c9dac8cd 100644 --- a/tests/func/test_cache.py +++ b/tests/func/test_cache.py @@ -36,7 +36,7 @@ def test_all(self): self.assertIn(self.cache2_md5, md5_list) def test_get(self): - cache = Cache(self.dvc).local.hash_to_path_info(self.cache1_md5) + cache = Cache(self.dvc).local.tree.hash_to_path_info(self.cache1_md5) self.assertEqual(os.fspath(cache), self.cache1) @@ -47,13 +47,17 @@ def _do_test(self, ret): def test(self): dir_hash = "123.dir" - fname = os.fspath(self.dvc.cache.local.hash_to_path_info(dir_hash)) + fname = os.fspath( + self.dvc.cache.local.tree.hash_to_path_info(dir_hash) + ) self.create(fname, "not,json") with pytest.raises(DirCacheError): self.dvc.cache.local.load_dir_cache(dir_hash) dir_hash = "234.dir" - fname = os.fspath(self.dvc.cache.local.hash_to_path_info(dir_hash)) + fname = os.fspath( + self.dvc.cache.local.tree.hash_to_path_info(dir_hash) + ) self.create(fname, '{"a": "b"}') self._do_test(self.dvc.cache.local.load_dir_cache(dir_hash)) @@ -84,7 +88,7 @@ def test_remote_references(self): self.dvc.__init__() - assert self.dvc.cache.ssh.path_info == ssh_url + "/tmp" + assert self.dvc.cache.ssh.tree.path_info == ssh_url + "/tmp" class TestSharedCacheDir(TestDir): diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index c1482fa1c7..631d9e7521 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -101,7 +101,9 @@ def test(self): # to check if dvc will detect that the cache is corrupted. entry = self.dvc.cache.local.load_dir_cache(out.checksum)[0] entry_hash = entry[self.dvc.cache.local.tree.PARAM_CHECKSUM] - cache = os.fspath(self.dvc.cache.local.hash_to_path_info(entry_hash)) + cache = os.fspath( + self.dvc.cache.local.tree.hash_to_path_info(entry_hash) + ) os.chmod(cache, 0o644) with open(cache, "w+") as fobj: diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 1723beaadc..435950cefe 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -21,7 +21,7 @@ def setUp(self): self.dvc.add(self.FOO) self.dvc.add(self.DATA_DIR) self.good_cache = [ - self.dvc.cache.local.hash_to_path_info(md5) + self.dvc.cache.local.tree.hash_to_path_info(md5) for md5 in self.dvc.cache.local.tree.all() ] diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index db0a52d6d6..45d1a27f83 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -158,7 +158,7 @@ def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc): {"relpath": "2", "md5": "2"}, ], ): - hash1 = dvc.cache.local.get_hash(path_info) + hash1 = dvc.cache.local.tree.get_hash(path_info) with patch.object( BaseRemoteTree, @@ -168,7 +168,7 @@ def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc): {"md5": "2", "relpath": "2"}, ], ): - hash2 = dvc.cache.local.get_hash(path_info) + hash2 = dvc.cache.local.tree.get_hash(path_info) assert hash1 == hash2 diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 55e495c38f..2a6b8ba823 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -193,7 +193,7 @@ def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote): assert os.path.exists(out.cache_path) for entry in out.dir_cache: hash_ = entry[out.tree.PARAM_CHECKSUM] - assert os.path.exists(dvc.cache.local.hash_to_path_info(hash_)) + assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_)) def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud): @@ -217,10 +217,10 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud): with erepo_dir.dvc.state: cache = dvc.cache.local - with cache.state: + with cache.tree.state: cache.save(PathInfo(erepo_dir / "dir"), tree, None) for hash_ in expected: - assert os.path.exists(cache.hash_to_path_info(hash_)) + assert os.path.exists(cache.tree.hash_to_path_info(hash_)) def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):