-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] gc: change option to use single remove flags #3207
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,17 +12,18 @@ | |
|
||
class CmdGC(CmdBase): | ||
def run(self): | ||
msg = "This will remove all cache except items used in " | ||
|
||
msg += "the working tree" | ||
if self.args.all_commits: | ||
msg += " and all git commits" | ||
elif self.args.all_branches and self.args.all_tags: | ||
msg += " and all git branches and tags" | ||
elif self.args.all_branches: | ||
msg += " and all git branches" | ||
elif self.args.all_tags: | ||
msg += " and all git tags" | ||
msg = ( | ||
"This will remove all cache except items used in the working tree" | ||
"{tags}{history}{branches}{history}" | ||
).format( | ||
tags="" if self.args.remove_all_tags else "and all git tags", | ||
branches="" | ||
if self.args.remove_all_branches | ||
else "and all git branches", | ||
history="" | ||
if self.args.remove_all_history | ||
else "and their history", | ||
) | ||
|
||
if self.args.repos: | ||
msg += " of the current and the following repos:" | ||
|
@@ -39,9 +40,9 @@ def run(self): | |
return 1 | ||
|
||
self.repo.gc( | ||
all_branches=self.args.all_branches, | ||
all_tags=self.args.all_tags, | ||
all_commits=self.args.all_commits, | ||
remove_all_tags=self.args.remove_all_tags, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opted for explicit flags to avoid confusion such as previous |
||
remove_all_branches=self.args.remove_all_branches, | ||
remove_all_history=self.args.remove_all_history, | ||
cloud=self.args.cloud, | ||
remote=self.args.remote, | ||
force=self.args.force, | ||
|
@@ -65,34 +66,32 @@ def add_parser(subparsers, parent_parser): | |
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
) | ||
gc_parser.add_argument( | ||
"-a", | ||
"--all-branches", | ||
"-c", | ||
"--cloud", | ||
action="store_true", | ||
default=False, | ||
help="Keep data files for the tips of all git branches.", | ||
help="Collect garbage in remote repository.", | ||
) | ||
gc_parser.add_argument( | ||
"-T", | ||
"--all-tags", | ||
action="store_true", | ||
default=False, | ||
help="Keep data files for all git tags.", | ||
"-r", "--remote", help="Remote storage to collect garbage in." | ||
) | ||
gc_parser.add_argument( | ||
skshetry marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"--all-commits", | ||
"--remove-all-tags", | ||
action="store_true", | ||
default=False, | ||
help=argparse.SUPPRESS, | ||
help="Remove cache for all git tags.", | ||
) | ||
gc_parser.add_argument( | ||
"-c", | ||
"--cloud", | ||
"--remove-all-branches", | ||
action="store_true", | ||
default=False, | ||
help="Collect garbage in remote repository.", | ||
help="Remove cache for all git branches.", | ||
) | ||
gc_parser.add_argument( | ||
"-r", "--remote", help="Remote storage to collect garbage in." | ||
"--remove-all-history", | ||
action="store_true", | ||
default=False, | ||
help="Remove cache for all history of all branches and tags.", | ||
Comment on lines
+91
to
+94
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I recall, your intention was to remove history by default. Why did you change your mind? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Suor To keep it as safe as possible for now π And, well, I implemented a proper approach by excluding refs when not deleting full history. |
||
) | ||
gc_parser.add_argument( | ||
"-f", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,10 @@ def brancher( # noqa: E302 | |
yield "working tree" | ||
|
||
if all_commits: | ||
revs = scm.list_all_commits() | ||
revs = scm.list_all_commits( | ||
exclude_all_tags=not all_tags, | ||
exclude_all_branches=not all_branches, | ||
) | ||
Comment on lines
+36
to
+39
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also makes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, the naming But There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, it's even more confusing. You need a whole paragraph of text to explain how it works, but still it's usually just a noop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Suor Well, that is because of |
||
else: | ||
if all_branches: | ||
revs.extend(scm.list_branches()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,8 +244,18 @@ def list_branches(self): | |
def list_tags(self): | ||
return [t.name for t in self.repo.tags] | ||
|
||
def list_all_commits(self): | ||
return [c.hexsha for c in self.repo.iter_commits("--all")] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was a misuse of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, previous implementation was silently keeping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it was previously simply "keep anything, which is referenced from git". Now it's more complicated ) |
||
def list_all_commits( | ||
self, exclude_all_tags=False, exclude_all_branches=False | ||
): | ||
args = [] | ||
if exclude_all_tags: | ||
args.extend(["--exclude", "refs/tags/*"]) | ||
if exclude_all_branches: | ||
args.extend(["--exclude", "refs/heads/*"]) | ||
# NOTE: order matters, `--exclude` needs to be before `--all` in order | ||
# to affect it. | ||
args.append("--all") | ||
return self.repo.git.rev_list(*args).splitlines() | ||
|
||
def _install_hook(self, name, cmd): | ||
command = ( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,16 +92,26 @@ def test(self): | |
|
||
self._check_cache(4) | ||
|
||
self.dvc.gc(all_tags=True, all_branches=True) | ||
self.dvc.gc() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will look into converting to pytest and possibly adding new tests soon. |
||
self._check_cache(4) | ||
|
||
self.dvc.gc(remove_all_history=True) | ||
self._check_cache(3) | ||
|
||
self.dvc.gc(all_tags=False, all_branches=True) | ||
self.dvc.gc(remove_all_tags=True) | ||
self._check_cache(3) | ||
|
||
self.dvc.gc(remove_all_branches=True) | ||
self._check_cache(2) | ||
|
||
self.dvc.gc(all_tags=True, all_branches=False) | ||
self.dvc.gc(remove_all_branches=True, remove_all_tags=True) | ||
self._check_cache(2) | ||
|
||
self.dvc.gc( | ||
remove_all_branches=True, | ||
remove_all_tags=True, | ||
remove_all_history=True, | ||
) | ||
self._check_cache(1) | ||
|
||
|
||
|
@@ -183,7 +193,7 @@ def test_all_commits(tmp_dir, scm, dvc): | |
tmp_dir.dvc_gen("testfile", "workspace") | ||
|
||
n = _count_files(dvc.cache.local.cache_dir) | ||
dvc.gc(all_commits=True) | ||
dvc.gc() | ||
|
||
# Only one uncommitted file should go away | ||
assert _count_files(dvc.cache.local.cache_dir) == n - 1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from dvc.cli import parse_args | ||
from dvc.command.gc import CmdGC | ||
|
||
|
||
def test_gc(mocker): | ||
cli_args = parse_args( | ||
[ | ||
"gc", | ||
"--remove-all-tags", | ||
"--remove-all-branches", | ||
"--remove-all-history", | ||
"--cloud", | ||
"--remote", | ||
"myremote", | ||
"--force", | ||
"--jobs", | ||
"10", | ||
"--projects", | ||
"/some/path", | ||
"/some/other/path", | ||
] | ||
) | ||
assert cli_args.func == CmdGC | ||
|
||
cmd = cli_args.func(cli_args) | ||
m = mocker.patch("dvc.repo.Repo.gc") | ||
|
||
assert cmd.run() == 0 | ||
|
||
m.assert_called_once_with( | ||
remove_all_tags=True, | ||
remove_all_branches=True, | ||
remove_all_history=True, | ||
cloud=True, | ||
remote="myremote", | ||
force=True, | ||
jobs=10, | ||
repos=["/some/path", "/some/other/path"], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes the phrase weird - many ands. Maybe just list things instead:
? Will make it more glanceable, the changing items will be always in the same place, any user already familiar with gc, will be simply skipping the entering phrase without loose of any meaning.