diff --git a/dvc/cache/base.py b/dvc/cache/base.py index 64617ef994..a6c5596d77 100644 --- a/dvc/cache/base.py +++ b/dvc/cache/base.py @@ -276,8 +276,6 @@ def save(self, path_info, tree, hash_info, save_link=True, **kwargs): self.tree.scheme, ) - if not hash_info: - hash_info = self.tree.save_info(path_info, tree=tree, **kwargs) hash_ = hash_info[self.tree.PARAM_CHECKSUM] return self._save(path_info, tree, hash_, save_link, **kwargs) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index c611bdbd34..8840dc57ba 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -70,7 +70,6 @@ def run(self): ret = self._interactive_mode() else: ret = self._normal_mode() - return ret diff --git a/dvc/command/remote.py b/dvc/command/remote.py index db622a378b..1cb4a3a869 100644 --- a/dvc/command/remote.py +++ b/dvc/command/remote.py @@ -3,7 +3,7 @@ from dvc.command.base import append_doc_link, fix_subparsers from dvc.command.config import CmdConfig -from dvc.config import ConfigError +from dvc.config import ConfigError, merge from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -18,7 +18,7 @@ def __init__(self, args): def _check_exists(self, conf): if self.args.name not in conf["remote"]: - raise ConfigError(f"remote '{self.args.name}' doesn't exists.") + raise ConfigError(f"remote '{self.args.name}' doesn't exist.") class CmdRemoteAdd(CmdRemote): @@ -61,7 +61,12 @@ def run(self): class CmdRemoteModify(CmdRemote): def run(self): with self.config.edit(self.args.level) as conf: - self._check_exists(conf) + merged = self.config.load_config_to_level(self.args.level) + merge(merged, conf) + self._check_exists(merged) + + if self.args.name not in conf["remote"]: + conf["remote"][self.args.name] = {} section = conf["remote"][self.args.name] if self.args.unset: section.pop(self.args.option, None) diff --git a/dvc/config.py b/dvc/config.py index 68cda1bece..26181e4d67 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -400,7 +400,7 @@ def load_config_to_level(self, level=None): if merge_level == level: break if merge_level in self.files: - _merge(merged_conf, self.load_one(merge_level)) + merge(merged_conf, self.load_one(merge_level)) return merged_conf @contextmanager @@ -414,7 +414,7 @@ def edit(self, level="repo"): conf = self._save_paths(conf, self.files[level]) merged_conf = self.load_config_to_level(level) - _merge(merged_conf, conf) + merge(merged_conf, conf) self.validate(merged_conf) self._save_config(level, conf) @@ -453,11 +453,11 @@ def _pack_remotes(conf): return result -def _merge(into, update): +def merge(into, update): """Merges second dict into first recursively""" for key, val in update.items(): if isinstance(into.get(key), dict) and isinstance(val, dict): - _merge(into[key], val) + merge(into[key], val) else: into[key] = val diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index eb3c58b5ce..7dffd2eca5 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -67,7 +67,7 @@ def _get_checksum(self, locked=True): return self.repo.cache.local.tree.get_hash(path, tree=tree) return tree.get_file_hash(path) - def status(self): + def workspace_status(self): current_checksum = self._get_checksum(locked=True) updated_checksum = self._get_checksum(locked=False) @@ -76,6 +76,9 @@ def status(self): return {} + def status(self): + return self.workspace_status() + def save(self): pass diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 15332e7de1..c2de3d8e81 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -90,15 +90,26 @@ def use_cache(self, cache): """Use the specified cache in place of default tmpdir cache for download operations. """ - if hasattr(self, "cache"): - save_cache = self.cache.local - self.cache.local = cache + has_cache = hasattr(self, "cache") + + if has_cache: + save_cache = self.cache.local # pylint: disable=E0203 + self.cache.local = cache # pylint: disable=E0203 + else: + from collections import namedtuple + + mock_cache = namedtuple("MockCache", ["local"])(local=cache) + self.cache = mock_cache # pylint: disable=W0201 + self._local_cache = cache yield - if hasattr(self, "cache"): + if has_cache: self.cache.local = save_cache + else: + del self.cache + self._local_cache = None @cached_property @@ -130,14 +141,17 @@ def download_update(result): for path in paths: if not self.repo_tree.exists(path): raise PathMissingError(path, self.url) - save_info = self.local_cache.save( + hash_info = self.repo_tree.save_info( + path, download_callback=download_update + ) + self.local_cache.save( path, self.repo_tree, - None, + hash_info, save_link=False, download_callback=download_update, ) - save_infos.append(save_info) + save_infos.append(hash_info) return sum(download_results), failed, save_infos diff --git a/dvc/output/base.py b/dvc/output/base.py index 242499fc55..f17e3df9e5 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -199,10 +199,7 @@ def changed_cache(self, filter_info=None): return self.cache.changed_cache(self.checksum, filter_info=filter_info) - def status(self): - if self.checksum and self.use_cache and self.changed_cache(): - return {str(self): "not in cache"} - + def workspace_status(self): if not self.exists: return {str(self): "deleted"} @@ -214,6 +211,12 @@ def status(self): return {} + def status(self): + if self.checksum and self.use_cache and self.changed_cache(): + return {str(self): "not in cache"} + + return self.workspace_status() + def changed(self): status = self.status() logger.debug(str(status)) @@ -280,6 +283,7 @@ def save(self): self.info = self.save_info() def commit(self): + assert self.info if self.use_cache: self.cache.save(self.path_info, self.cache.tree, self.info) diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 2b7810c29d..8011d0318c 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -245,6 +245,9 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method Any kwargs will be passed to `DvcTree()`. """ + scheme = "local" + PARAM_CHECKSUM = "md5" + def __init__(self, repo, **kwargs): super().__init__(repo, {"url": repo.root_dir}) if hasattr(repo, "dvc_dir"): diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 6316793bb5..0048c94fb9 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -396,7 +396,7 @@ def ignore_outs(self): @staticmethod def _changed_entries(entries): - return [str(entry) for entry in entries if entry.changed_checksum()] + return [str(entry) for entry in entries if entry.workspace_status()] def _changed_stage_entry(self): return f"'md5' of {self} changed." diff --git a/dvc/tree/base.py b/dvc/tree/base.py index 700513b19f..1860bafab6 100644 --- a/dvc/tree/base.py +++ b/dvc/tree/base.py @@ -236,22 +236,17 @@ def is_dir_hash(cls, hash_): return False return hash_.endswith(cls.CHECKSUM_DIR_SUFFIX) - def get_hash(self, path_info, tree=None, **kwargs): + def get_hash(self, path_info, **kwargs): assert path_info and ( isinstance(path_info, str) or path_info.scheme == self.scheme ) - if not tree: - tree = self - - if not tree.exists(path_info): + if not self.exists(path_info): return None - if tree == self: - # pylint: disable=assignment-from-none - hash_ = self.state.get(path_info) - else: - hash_ = None + # pylint: disable=assignment-from-none + hash_ = self.state.get(path_info) + # If we have dir hash in state db, but dir cache file is lost, # then we need to recollect the dir via .get_dir_hash() call below, # see https://github.com/iterative/dvc/issues/2219 for context @@ -267,10 +262,10 @@ def get_hash(self, path_info, tree=None, **kwargs): if hash_: return hash_ - if tree.isdir(path_info): - hash_ = self.get_dir_hash(path_info, tree, **kwargs) + if self.isdir(path_info): + hash_ = self.get_dir_hash(path_info, **kwargs) else: - hash_ = tree.get_file_hash(path_info) + hash_ = self.get_file_hash(path_info) if hash_ and self.exists(path_info): self.state.save(path_info, hash_) @@ -280,12 +275,12 @@ def get_hash(self, path_info, tree=None, **kwargs): def get_file_hash(self, path_info): raise NotImplementedError - def get_dir_hash(self, path_info, tree, **kwargs): + def get_dir_hash(self, path_info, **kwargs): if not self.cache: raise RemoteCacheRequiredError(path_info) - dir_info = self._collect_dir(path_info, tree, **kwargs) - return self._save_dir_info(dir_info, path_info) + dir_info = self._collect_dir(path_info, **kwargs) + return self._save_dir_info(dir_info) def hash_to_path_info(self, hash_): return self.path_info / hash_[0:2] / hash_[2:] @@ -298,29 +293,26 @@ def path_to_hash(self, path): return "".join(parts) - def save_info(self, path_info, tree=None, **kwargs): - return { - self.PARAM_CHECKSUM: self.get_hash(path_info, tree=tree, **kwargs) - } + def save_info(self, path_info, **kwargs): + return {self.PARAM_CHECKSUM: self.get_hash(path_info, **kwargs)} - @staticmethod - def _calculate_hashes(file_infos, tree): + def _calculate_hashes(self, file_infos): file_infos = list(file_infos) with Tqdm( total=len(file_infos), unit="md5", desc="Computing file/dir hashes (only done once)", ) as pbar: - worker = pbar.wrap_fn(tree.get_file_hash) - with ThreadPoolExecutor(max_workers=tree.hash_jobs) as executor: + worker = pbar.wrap_fn(self.get_file_hash) + with ThreadPoolExecutor(max_workers=self.hash_jobs) as executor: tasks = executor.map(worker, file_infos) hashes = dict(zip(file_infos, tasks)) return hashes - def _collect_dir(self, path_info, tree, **kwargs): + def _collect_dir(self, path_info, **kwargs): file_infos = set() - for fname in tree.walk_files(path_info, **kwargs): + for fname in self.walk_files(path_info, **kwargs): if DvcIgnore.DVCIGNORE_FILE == fname.name: raise DvcIgnoreInCollectedDirError(fname.parent) @@ -329,7 +321,7 @@ def _collect_dir(self, path_info, tree, **kwargs): hashes = {fi: self.state.get(fi) for fi in file_infos} not_in_state = {fi for fi, hash_ in hashes.items() if hash_ is None} - new_hashes = self._calculate_hashes(not_in_state, tree) + new_hashes = self._calculate_hashes(not_in_state) hashes.update(new_hashes) result = [ @@ -351,7 +343,7 @@ def _collect_dir(self, path_info, tree, **kwargs): # Sorting the list by path to ensure reproducibility return sorted(result, key=itemgetter(self.PARAM_RELPATH)) - def _save_dir_info(self, dir_info, path_info): + def _save_dir_info(self, dir_info): hash_, tmp_info = self._get_dir_info_hash(dir_info) new_info = self.cache.tree.hash_to_path_info(hash_) if self.cache.changed_cache_file(hash_): @@ -360,8 +352,6 @@ def _save_dir_info(self, dir_info, path_info): tmp_info, new_info, mode=self.cache.CACHE_MODE ) - if self.exists(path_info): - self.state.save(path_info, hash_) self.state.save(new_info, hash_) return hash_ diff --git a/dvc/version.py b/dvc/version.py index c883b6a67e..02ef7104a2 100644 --- a/dvc/version.py +++ b/dvc/version.py @@ -6,7 +6,7 @@ import os import subprocess -_BASE_VERSION = "1.2.3" +_BASE_VERSION = "1.3.1" def _generate_version(base_version): diff --git a/setup.py b/setup.py index da84576f16..b90e9b05af 100644 --- a/setup.py +++ b/setup.py @@ -105,9 +105,7 @@ def run(self): tests_requirements = [ "wheel>=0.31.1", # Test requirements: - # https://github.com/pytest-dev/pytest/issues/7558 - # FIXME: pylint complaining for pytest.mark.* on v6.0 - "pytest>=4.6.0,<6.0", + "pytest>=6.0.1", "pytest-docker>=0.7.2", "pytest-timeout>=1.3.3", "pytest-cov>=2.6.1", diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py index 6c91abff31..958a29c539 100644 --- a/tests/func/test_commit.py +++ b/tests/func/test_commit.py @@ -2,7 +2,9 @@ import pytest +from dvc.dependency.base import DependencyDoesNotExistError from dvc.dvcfile import PIPELINE_FILE +from dvc.output.base import OutputDoesNotExistError from dvc.stage.exceptions import StageCommitError from dvc.utils.yaml import dump_yaml, load_yaml @@ -86,6 +88,24 @@ def test_commit_no_exec(tmp_dir, dvc): assert dvc.status(stage.path) == {} +def test_commit_no_exec_missing_dep(tmp_dir, dvc): + stage = dvc.run( + name="my", cmd="mycmd", deps=["dep"], outs=["out"], no_exec=True + ) + assert dvc.status(stage.path) + + with pytest.raises(DependencyDoesNotExistError): + dvc.commit(stage.path, force=True) + + +def test_commit_no_exec_missing_out(tmp_dir, dvc): + stage = dvc.run(name="my", cmd="mycmd", outs=["out"], no_exec=True) + assert dvc.status(stage.path) + + with pytest.raises(OutputDoesNotExistError): + dvc.commit(stage.path, force=True) + + def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index 35ce310f06..38196ffbdd 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -232,6 +232,24 @@ def test_modify_missing_remote(tmp_dir, dvc): assert main(["remote", "modify", "myremote", "user", "xxx"]) == 251 +def test_remote_modify_local_on_repo_config(tmp_dir, dvc): + assert main(["remote", "add", "myremote", "http://example.com/path"]) == 0 + assert ( + main(["remote", "modify", "myremote", "user", "xxx", "--local"]) == 0 + ) + assert dvc.config.load_one("local")["remote"]["myremote"] == { + "user": "xxx" + } + assert dvc.config.load_one("repo")["remote"]["myremote"] == { + "url": "http://example.com/path" + } + dvc.config.load() + assert dvc.config["remote"]["myremote"] == { + "url": "http://example.com/path", + "user": "xxx", + } + + def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory): # https://github.com/iterative/dvc/issues/2647, is some situations # (external dir dependency) cache is required to calculate dir md5 diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 93b671bd75..3ab93efc9b 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -217,7 +217,10 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud): with erepo_dir.dvc.state: cache = dvc.cache.local with cache.tree.state: - cache.save(PathInfo(erepo_dir / "dir"), tree, None) + path_info = PathInfo(erepo_dir / "dir") + hash_info = cache.tree.save_info(path_info) + cache.save(path_info, tree, hash_info) + for hash_ in expected: assert os.path.exists(cache.tree.hash_to_path_info(hash_)) diff --git a/tests/func/test_version.py b/tests/func/test_version.py index c55955db40..753d53e6ee 100644 --- a/tests/func/test_version.py +++ b/tests/func/test_version.py @@ -16,7 +16,7 @@ def test_info_in_repo(scm_init, tmp_dir, caplog): assert main(["version"]) == 0 - assert re.search(r"DVC version: \d+\.\d+\.\d+\+.*", caplog.text) + assert re.search(r"DVC version: \d+\.\d+\.\d+.*", caplog.text) assert re.search(r"Platform: Python \d\.\d\.\d on .*", caplog.text) assert re.search(r"Supports: .*", caplog.text) assert re.search(r"Cache types: .*", caplog.text) @@ -60,7 +60,7 @@ def test_fs_info_in_repo(tmp_dir, dvc, caplog): def test_info_outside_of_repo(tmp_dir, caplog): assert main(["version"]) == 0 - assert re.search(r"DVC version: \d+\.\d+\.\d+\+.*", caplog.text) + assert re.search(r"DVC version: \d+\.\d+\.\d+.*", caplog.text) assert re.search(r"Platform: Python \d\.\d\.\d on .*", caplog.text) assert re.search(r"Supports: .*", caplog.text) assert not re.search(r"Cache types: .*", caplog.text)