diff --git a/dvc/command/gc.py b/dvc/command/gc.py index 7cbffc537c..9e7f84680a 100644 --- a/dvc/command/gc.py +++ b/dvc/command/gc.py @@ -12,17 +12,18 @@ class CmdGC(CmdBase): def run(self): - msg = "This will remove all cache except items used in " - - msg += "the working tree" - if self.args.all_commits: - msg += " and all git commits" - elif self.args.all_branches and self.args.all_tags: - msg += " and all git branches and tags" - elif self.args.all_branches: - msg += " and all git branches" - elif self.args.all_tags: - msg += " and all git tags" + msg = ( + "This will remove all cache except items used in the working tree" + "{tags}{history}{branches}{history}" + ).format( + tags="" if self.args.remove_all_tags else "and all git tags", + branches="" + if self.args.remove_all_branches + else "and all git branches", + history="" + if self.args.remove_all_history + else "and their history", + ) if self.args.repos: msg += " of the current and the following repos:" @@ -39,9 +40,9 @@ def run(self): return 1 self.repo.gc( - all_branches=self.args.all_branches, - all_tags=self.args.all_tags, - all_commits=self.args.all_commits, + remove_all_tags=self.args.remove_all_tags, + remove_all_branches=self.args.remove_all_branches, + remove_all_history=self.args.remove_all_history, cloud=self.args.cloud, remote=self.args.remote, force=self.args.force, @@ -65,34 +66,32 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) gc_parser.add_argument( - "-a", - "--all-branches", + "-c", + "--cloud", action="store_true", default=False, - help="Keep data files for the tips of all git branches.", + help="Collect garbage in remote repository.", ) gc_parser.add_argument( - "-T", - "--all-tags", - action="store_true", - default=False, - help="Keep data files for all git tags.", + "-r", "--remote", help="Remote storage to collect garbage in." ) gc_parser.add_argument( - "--all-commits", + "--remove-all-tags", action="store_true", default=False, - help=argparse.SUPPRESS, + help="Remove cache for all git tags.", ) gc_parser.add_argument( - "-c", - "--cloud", + "--remove-all-branches", action="store_true", default=False, - help="Collect garbage in remote repository.", + help="Remove cache for all git branches.", ) gc_parser.add_argument( - "-r", "--remote", help="Remote storage to collect garbage in." + "--remove-all-history", + action="store_true", + default=False, + help="Remove cache for all history of all branches and tags.", ) gc_parser.add_argument( "-f", diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index 955b135135..5a95257e24 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -33,7 +33,10 @@ def brancher( # noqa: E302 yield "working tree" if all_commits: - revs = scm.list_all_commits() + revs = scm.list_all_commits( + exclude_all_tags=not all_tags, + exclude_all_branches=not all_branches, + ) else: if all_branches: revs.extend(scm.list_branches()) diff --git a/dvc/repo/gc.py b/dvc/repo/gc.py index e55a34dc3f..fc38bf147a 100644 --- a/dvc/repo/gc.py +++ b/dvc/repo/gc.py @@ -16,12 +16,12 @@ def _do_gc(typ, func, clist): @locked def gc( self, - all_branches=False, + remove_all_tags=False, + remove_all_branches=False, + remove_all_history=False, cloud=False, remote=None, with_deps=False, - all_tags=False, - all_commits=False, force=False, jobs=None, repos=None, @@ -43,10 +43,10 @@ def gc( for repo in all_repos + [self]: used.update( repo.used_cache( - all_branches=all_branches, with_deps=with_deps, - all_tags=all_tags, - all_commits=all_commits, + all_branches=not remove_all_branches, + all_tags=not remove_all_tags, + all_commits=not remove_all_history, remote=remote, force=force, jobs=jobs, diff --git a/dvc/scm/base.py b/dvc/scm/base.py index 412731ca89..34c72aba52 100644 --- a/dvc/scm/base.py +++ b/dvc/scm/base.py @@ -115,7 +115,9 @@ def list_tags(self): # pylint: disable=no-self-use """Returns a list of available tags in the repo.""" return [] - def list_all_commits(self): # pylint: disable=no-self-use + def list_all_commits( + self, exclude_all_tags=False, exclude_all_branches=False + ): # pylint: disable=no-self-use """Returns a list of commits in the repo.""" return [] diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 3872c8e72e..8ffc842a2f 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -244,8 +244,18 @@ def list_branches(self): def list_tags(self): return [t.name for t in self.repo.tags] - def list_all_commits(self): - return [c.hexsha for c in self.repo.iter_commits("--all")] + def list_all_commits( + self, exclude_all_tags=False, exclude_all_branches=False + ): + args = [] + if exclude_all_tags: + args.extend(["--exclude", "refs/tags/*"]) + if exclude_all_branches: + args.extend(["--exclude", "refs/heads/*"]) + # NOTE: order matters, `--exclude` needs to be before `--all` in order + # to affect it. + args.append("--all") + return self.repo.git.rev_list(*args).splitlines() def _install_hook(self, name, cmd): command = ( diff --git a/scripts/completion/dvc.bash b/scripts/completion/dvc.bash index 2af965a67d..5c9ea3c4ff 100644 --- a/scripts/completion/dvc.bash +++ b/scripts/completion/dvc.bash @@ -26,7 +26,7 @@ _dvc_diff='-t --target' _dvc_fetch='-j --jobs -r --remote -a --all-branches -T --all-tags -d --with-deps -R --recursive $(compgen -G *.dvc)' _dvc_get_url='' _dvc_get='-o --out --rev --show-url' -_dvc_gc='-a --all-branches -T --all-tags -c --cloud -r --remote -f --force -p --projects -j --jobs' +_dvc_gc='--remove-all-branches --remove-all-tags --remove-all-history -c --cloud -r --remote -f --force -p --projects -j --jobs' _dvc_import_url='-f --file' _dvc_import='-o --out --rev' _dvc_init='--no-scm -f --force' diff --git a/scripts/completion/dvc.zsh b/scripts/completion/dvc.zsh index 32bdef47ff..3b6911c450 100644 --- a/scripts/completion/dvc.zsh +++ b/scripts/completion/dvc.zsh @@ -124,8 +124,8 @@ _dvc_get=( ) _dvc_gc=( - {-a,--all-branches}"[Keep data files for the tips of all git branches.]" - {-T,--all-tags}"[Keep data files for all git tags.]" + "--remove-all-branches[Keep data files for the tips of all git branches.]" + "--remove-all-tags[Keep data files for all git tags.]" {-c,--cloud}"[Collect garbage in remote repository.]" {-r,--remote}"[Remote storage to collect garbage in.]:Remote repository:" {-f,--force}"[Force garbage collection - automatically agree to all prompts.]:Repos:_files" diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index 41c4f2204a..fd29ea8ef8 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -92,16 +92,26 @@ def test(self): self._check_cache(4) - self.dvc.gc(all_tags=True, all_branches=True) + self.dvc.gc() + self._check_cache(4) + self.dvc.gc(remove_all_history=True) self._check_cache(3) - self.dvc.gc(all_tags=False, all_branches=True) + self.dvc.gc(remove_all_tags=True) + self._check_cache(3) + self.dvc.gc(remove_all_branches=True) self._check_cache(2) - self.dvc.gc(all_tags=True, all_branches=False) + self.dvc.gc(remove_all_branches=True, remove_all_tags=True) + self._check_cache(2) + self.dvc.gc( + remove_all_branches=True, + remove_all_tags=True, + remove_all_history=True, + ) self._check_cache(1) @@ -183,7 +193,7 @@ def test_all_commits(tmp_dir, scm, dvc): tmp_dir.dvc_gen("testfile", "workspace") n = _count_files(dvc.cache.local.cache_dir) - dvc.gc(all_commits=True) + dvc.gc() # Only one uncommitted file should go away assert _count_files(dvc.cache.local.cache_dir) == n - 1 diff --git a/tests/unit/command/test_gc.py b/tests/unit/command/test_gc.py new file mode 100644 index 0000000000..a390c8f29b --- /dev/null +++ b/tests/unit/command/test_gc.py @@ -0,0 +1,39 @@ +from dvc.cli import parse_args +from dvc.command.gc import CmdGC + + +def test_gc(mocker): + cli_args = parse_args( + [ + "gc", + "--remove-all-tags", + "--remove-all-branches", + "--remove-all-history", + "--cloud", + "--remote", + "myremote", + "--force", + "--jobs", + "10", + "--projects", + "/some/path", + "/some/other/path", + ] + ) + assert cli_args.func == CmdGC + + cmd = cli_args.func(cli_args) + m = mocker.patch("dvc.repo.Repo.gc") + + assert cmd.run() == 0 + + m.assert_called_once_with( + remove_all_tags=True, + remove_all_branches=True, + remove_all_history=True, + cloud=True, + remote="myremote", + force=True, + jobs=10, + repos=["/some/path", "/some/other/path"], + ) diff --git a/tests/unit/scm/test_git.py b/tests/unit/scm/test_git.py index 2874da16b5..40f08dab4c 100644 --- a/tests/unit/scm/test_git.py +++ b/tests/unit/scm/test_git.py @@ -15,3 +15,16 @@ def test_belongs_to_scm_true_on_git_internal(self): def test_belongs_to_scm_false(self): path = os.path.join("some", "non-.git", "file") self.assertFalse(self.dvc.scm.belongs_to_scm(path)) + + +def test_list_all_commits_detached_head(tmp_dir, scm): + tmp_dir.scm_gen({"first": "first"}, commit="first") + tmp_dir.scm_gen({"second": "second"}, commit="second") + scm.branch("branch_second") + scm.checkout("branch_third", create_new=True) + tmp_dir.scm_gen({"third": "third"}, commit="third") + scm.checkout("branch_second") + assert len(scm.list_all_commits()) == 3 + # deleting the branch so that `third` commit gets lost + scm.repo.git.branch("branch_third", D=True) + assert len(scm.list_all_commits()) == 2