From f575bc7a1dfb92f81d762aa7545597fba9ffca26 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 11 Nov 2019 16:13:13 +0100 Subject: [PATCH 1/4] import: intercept and rephrase OutputNotFound message --- dvc/exceptions.py | 9 +++++++++ dvc/repo/imp.py | 9 ++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 2916066146..3c85f3dae0 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -338,3 +338,12 @@ def __init__(self, url, cause=None): ), cause=cause, ) + + +class NoOutputInExternalRepoError(DvcException): + def __init__(self, repo_url, path): + super(NoOutputInExternalRepoError, self).__init__( + "Output '{}' not found in target repository: '{}'".format( + path, repo_url + ) + ) diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 68ead389b6..869039266a 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -1,6 +1,13 @@ +from dvc.exceptions import NoOutputInExternalRepoError +from dvc.exceptions import OutputNotFoundError + + def imp(self, url, path, out=None, rev=None): erepo = {"url": url} if rev is not None: erepo["rev"] = rev - return self.imp_url(path, out=out, erepo=erepo, locked=True) + try: + return self.imp_url(path, out=out, erepo=erepo, locked=True) + except OutputNotFoundError: + raise NoOutputInExternalRepoError(url, path) From e5aaf987768caa8bedff226b6ba6fb6181cc3fad Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 13 Nov 2019 18:05:58 +0100 Subject: [PATCH 2/4] import/get: make extarnal repo handle OutputNotFound --- dvc/dependency/repo.py | 6 +++--- dvc/exceptions.py | 6 ++++-- dvc/external_repo.py | 6 ++++++ dvc/repo/__init__.py | 14 ++++++++++---- dvc/repo/fetch.py | 2 +- dvc/repo/get.py | 2 +- dvc/repo/imp.py | 9 +-------- 7 files changed, 26 insertions(+), 19 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 20439c024e..68cd044a1d 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -49,10 +49,10 @@ def _make_repo(self, **overrides): def status(self): with self._make_repo() as repo: - current = repo.find_out_by_relpath(self.def_path).info + current = repo.find_out_by_path(self.def_path).info with self._make_repo(rev_lock=None) as repo: - updated = repo.find_out_by_relpath(self.def_path).info + updated = repo.find_out_by_path(self.def_path).info if current != updated: return {str(self): "update available"} @@ -71,7 +71,7 @@ def fetch(self): ) as repo: self.def_repo[self.PARAM_REV_LOCK] = repo.scm.get_rev() - out = repo.find_out_by_relpath(self.def_path) + out = repo.find_out_by_path(self.def_path) with repo.state: repo.cloud.pull(out.get_used_cache()) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 3c85f3dae0..369b33ac7f 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -59,10 +59,12 @@ class OutputNotFoundError(DvcException): output (unicode): path to the file/directory. """ - def __init__(self, output): + def __init__(self, output, repo=None): + self.repo = repo + self.failed_output = output super(OutputNotFoundError, self).__init__( "unable to find DVC-file with output '{path}'".format( - path=relpath(output) + path=relpath(self.failed_output) ) ) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 46657fc4d7..d528af6a55 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -9,6 +9,8 @@ from dvc.config import NoRemoteError from dvc.exceptions import RemoteNotSpecifiedInExternalRepoError +from dvc.exceptions import NoOutputInExternalRepoError +from dvc.exceptions import OutputNotFoundError from dvc.utils.fs import remove @@ -25,6 +27,10 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): yield repo except NoRemoteError as exc: raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc) + except OutputNotFoundError as exc: + if exc.repo is repo: + raise NoOutputInExternalRepoError(url, exc.failed_output) + raise repo.close() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index e0003b8087..86d6a343c7 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -418,10 +418,13 @@ def stages(self): return get_stages(self.graph) def find_outs_by_path(self, path, outs=None, recursive=False): + abs_path = ( + path if os.path.isabs(path) else os.path.join(self.root_dir, path) + ) + if not outs: outs = [out for stage in self.stages for out in stage.outs] - abs_path = os.path.abspath(path) is_dir = self.tree.isdir(abs_path) def func(out): @@ -435,12 +438,15 @@ def func(out): matched = list(filter(func, outs)) if not matched: - raise OutputNotFoundError(path) + if abs_path.startswith(self.root_dir): + failed_output = relpath(abs_path) + else: + failed_output = path + raise OutputNotFoundError(failed_output, self) return matched - def find_out_by_relpath(self, relpath): - path = os.path.join(self.root_dir, relpath) + def find_out_by_path(self, path): out, = self.find_outs_by_path(path) return out diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 531fd4d535..0440dddaea 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -81,7 +81,7 @@ def _fetch_external(self, repo_url, repo_rev, files): cache = NamedCache() for name in files: try: - out = repo.find_out_by_relpath(name) + out = repo.find_out_by_path(name) except OutputNotFoundError: failed += 1 logger.exception( diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 21d01ed7e7..64ca5eaf07 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -49,7 +49,7 @@ def get(url, path, out=None, rev=None): # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] - o = repo.find_out_by_relpath(path) + o = repo.find_out_by_path(path) with repo.state: repo.cloud.pull(o.get_used_cache()) o.path_info = PathInfo(os.path.abspath(out)) diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 869039266a..68ead389b6 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -1,13 +1,6 @@ -from dvc.exceptions import NoOutputInExternalRepoError -from dvc.exceptions import OutputNotFoundError - - def imp(self, url, path, out=None, rev=None): erepo = {"url": url} if rev is not None: erepo["rev"] = rev - try: - return self.imp_url(path, out=out, erepo=erepo, locked=True) - except OutputNotFoundError: - raise NoOutputInExternalRepoError(url, path) + return self.imp_url(path, out=out, erepo=erepo, locked=True) From 5a92c46033fc62c3e90dc1244242894278892951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Fri, 15 Nov 2019 17:21:54 +0100 Subject: [PATCH 3/4] import: just inform that output not found --- dvc/dependency/repo.py | 6 +++--- dvc/exceptions.py | 9 +++------ dvc/external_repo.py | 2 +- dvc/repo/__init__.py | 14 ++++---------- dvc/repo/fetch.py | 2 +- dvc/repo/get.py | 2 +- tests/func/test_import.py | 7 ++++++- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 68cd044a1d..20439c024e 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -49,10 +49,10 @@ def _make_repo(self, **overrides): def status(self): with self._make_repo() as repo: - current = repo.find_out_by_path(self.def_path).info + current = repo.find_out_by_relpath(self.def_path).info with self._make_repo(rev_lock=None) as repo: - updated = repo.find_out_by_path(self.def_path).info + updated = repo.find_out_by_relpath(self.def_path).info if current != updated: return {str(self): "update available"} @@ -71,7 +71,7 @@ def fetch(self): ) as repo: self.def_repo[self.PARAM_REV_LOCK] = repo.scm.get_rev() - out = repo.find_out_by_path(self.def_path) + out = repo.find_out_by_relpath(self.def_path) with repo.state: repo.cloud.pull(out.get_used_cache()) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 369b33ac7f..ce30ffa88f 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -61,10 +61,9 @@ class OutputNotFoundError(DvcException): def __init__(self, output, repo=None): self.repo = repo - self.failed_output = output super(OutputNotFoundError, self).__init__( "unable to find DVC-file with output '{path}'".format( - path=relpath(self.failed_output) + path=relpath(output) ) ) @@ -343,9 +342,7 @@ def __init__(self, url, cause=None): class NoOutputInExternalRepoError(DvcException): - def __init__(self, repo_url, path): + def __init__(self): super(NoOutputInExternalRepoError, self).__init__( - "Output '{}' not found in target repository: '{}'".format( - path, repo_url - ) + "Output not found in target repository." ) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index d528af6a55..e86c02926a 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -29,7 +29,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc) except OutputNotFoundError as exc: if exc.repo is repo: - raise NoOutputInExternalRepoError(url, exc.failed_output) + raise NoOutputInExternalRepoError() raise repo.close() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 86d6a343c7..9f6284ef48 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -418,13 +418,10 @@ def stages(self): return get_stages(self.graph) def find_outs_by_path(self, path, outs=None, recursive=False): - abs_path = ( - path if os.path.isabs(path) else os.path.join(self.root_dir, path) - ) - if not outs: outs = [out for stage in self.stages for out in stage.outs] + abs_path = os.path.abspath(path) is_dir = self.tree.isdir(abs_path) def func(out): @@ -438,15 +435,12 @@ def func(out): matched = list(filter(func, outs)) if not matched: - if abs_path.startswith(self.root_dir): - failed_output = relpath(abs_path) - else: - failed_output = path - raise OutputNotFoundError(failed_output, self) + raise OutputNotFoundError(path, self) return matched - def find_out_by_path(self, path): + def find_out_by_relpath(self, relpath): + path = os.path.join(self.root_dir, relpath) out, = self.find_outs_by_path(path) return out diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 0440dddaea..531fd4d535 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -81,7 +81,7 @@ def _fetch_external(self, repo_url, repo_rev, files): cache = NamedCache() for name in files: try: - out = repo.find_out_by_path(name) + out = repo.find_out_by_relpath(name) except OutputNotFoundError: failed += 1 logger.exception( diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 64ca5eaf07..21d01ed7e7 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -49,7 +49,7 @@ def get(url, path, out=None, rev=None): # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] - o = repo.find_out_by_path(path) + o = repo.find_out_by_relpath(path) with repo.state: repo.cloud.pull(o.get_used_cache()) o.path_info = PathInfo(os.path.abspath(out)) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 06e8f8c830..067d5be8eb 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -8,7 +8,7 @@ from mock import patch from dvc.config import Config -from dvc.exceptions import DownloadError +from dvc.exceptions import DownloadError, NoOutputInExternalRepoError from dvc.stage import Stage from dvc.system import System from dvc.utils import makedirs @@ -154,3 +154,8 @@ def test_pull_non_workspace(git, dvc_repo, erepo): os.remove(stage.outs[0].cache_path) dvc_repo.fetch(all_tags=True) assert os.path.exists(stage.outs[0].cache_path) + + +def test_import_non_existing(dvc_repo, erepo): + with pytest.raises(NoOutputInExternalRepoError): + dvc_repo.imp(erepo.root_dir, "invalid_output") From d1094c41b183690e32e3c8b0454eba4f8f1a7567 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Tue, 19 Nov 2019 14:58:16 +0100 Subject: [PATCH 4/4] external repo: make NoOutputInExternalRepo path-aware --- dvc/exceptions.py | 9 ++++++--- dvc/external_repo.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index ce30ffa88f..92f46a8829 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -60,10 +60,11 @@ class OutputNotFoundError(DvcException): """ def __init__(self, output, repo=None): + self.output = output self.repo = repo super(OutputNotFoundError, self).__init__( "unable to find DVC-file with output '{path}'".format( - path=relpath(output) + path=relpath(self.output) ) ) @@ -342,7 +343,9 @@ def __init__(self, url, cause=None): class NoOutputInExternalRepoError(DvcException): - def __init__(self): + def __init__(self, path, external_repo_path, external_repo_url): super(NoOutputInExternalRepoError, self).__init__( - "Output not found in target repository." + "Output '{}' not found in target repository '{}'".format( + relpath(path, external_repo_path), external_repo_url + ) ) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index e86c02926a..0555f2d756 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -29,7 +29,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc) except OutputNotFoundError as exc: if exc.repo is repo: - raise NoOutputInExternalRepoError() + raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) raise repo.close()