From bf2b9c5e9ad5568321fd9652ef59b85951bca963 Mon Sep 17 00:00:00 2001 From: Corentin Hembise Date: Sat, 5 Dec 2020 19:31:07 +0100 Subject: [PATCH 1/2] Add context about failing branches during error in used_cache() --- dvc/repo/__init__.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 19b1e4f505..2ab7c3fefe 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -8,7 +8,7 @@ from git import InvalidGitRepositoryError from dvc.config import Config -from dvc.exceptions import FileMissingError +from dvc.exceptions import DvcException, FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError from dvc.exceptions import NotDvcRepoError, OutputNotFoundError from dvc.path_info import PathInfo @@ -319,22 +319,26 @@ def used_cache( ): targets = targets or [None] - pairs = cat( - self.stage.collect_granular( - target, recursive=recursive, with_deps=with_deps + try: + pairs = cat( + self.stage.collect_granular( + target, recursive=recursive, with_deps=with_deps + ) + for target in targets ) - for target in targets - ) - suffix = f"({branch})" if branch else "" - for stage, filter_info in pairs: - used_cache = stage.get_used_cache( - remote=remote, - force=force, - jobs=jobs, - filter_info=filter_info, - ) - cache.update(used_cache, suffix=suffix) + suffix = f"({branch})" if branch else "" + for stage, filter_info in pairs: + used_cache = stage.get_used_cache( + remote=remote, + force=force, + jobs=jobs, + filter_info=filter_info, + ) + cache.update(used_cache, suffix=suffix) + except DvcException as e: + logger.error(f"Cache for '{branch}' could not be collected") + raise e if used_run_cache: used_cache = self.stage_cache.get_used_cache( From eb532efd99085ff212aa0e18902d2fd28a1f1ecf Mon Sep 17 00:00:00 2001 From: Corentin Hembise Date: Sat, 5 Dec 2020 19:32:03 +0100 Subject: [PATCH 2/2] Add ability to ignore dvc exceptions during used_cache() --- dvc/command/gc.py | 11 +++++++++++ dvc/repo/__init__.py | 5 +++++ dvc/repo/gc.py | 2 ++ 3 files changed, 18 insertions(+) diff --git a/dvc/command/gc.py b/dvc/command/gc.py index 2948da26b7..7c2bf8c1e0 100644 --- a/dvc/command/gc.py +++ b/dvc/command/gc.py @@ -55,6 +55,7 @@ def run(self): jobs=self.args.jobs, repos=self.args.repos, workspace=self.args.workspace, + skip_failing_collect=self.args.skip_failing_collect, ) return 0 @@ -141,4 +142,14 @@ def add_parser(subparsers, parent_parser): "Useful if you share a single cache across repos.", metavar="", ) + gc_parser.add_argument( + "--skip-failing-collect", + action="store_true", + default=False, + help=( + "Skip commits were the cache cannot be " + "collected due to user errors in the dvc " + "state (eg. badly formatted dvc.yaml)." + ), + ) gc_parser.set_defaults(func=CmdGC) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 2ab7c3fefe..d82d62cac4 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -293,6 +293,7 @@ def used_cache( jobs=None, recursive=False, used_run_cache=None, + ignore_dvc_exceptions=False, ): """Get the stages related to the given target and collect the `info` of its outputs. @@ -338,6 +339,10 @@ def used_cache( cache.update(used_cache, suffix=suffix) except DvcException as e: logger.error(f"Cache for '{branch}' could not be collected") + if ignore_dvc_exceptions: + logger.debug(e) + continue + raise e if used_run_cache: diff --git a/dvc/repo/gc.py b/dvc/repo/gc.py index 9b415b7a87..cf7c285222 100644 --- a/dvc/repo/gc.py +++ b/dvc/repo/gc.py @@ -30,6 +30,7 @@ def gc( jobs=None, repos=None, workspace=False, + skip_failing_collect=False, ): # require `workspace` to be true to come into effect. @@ -66,6 +67,7 @@ def gc( remote=remote, force=force, jobs=jobs, + ignore_dvc_exceptions=skip_failing_collect, ) )