Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gc: make it safer by only activating by use of certain flags #3363

Merged
merged 2 commits into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions dvc/command/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@

class CmdGC(CmdBase):
def run(self):
from dvc.repo.gc import _raise_error_if_all_disabled

_raise_error_if_all_disabled(
all_branches=self.args.all_branches,
all_tags=self.args.all_tags,
all_commits=self.args.all_commits,
workspace=self.args.workspace,
cloud=self.args.cloud,
)

msg = "This will remove all cache except items used in "

msg += "the working tree"
Expand Down Expand Up @@ -47,6 +57,7 @@ def run(self):
force=self.args.force,
jobs=self.args.jobs,
repos=self.args.repos,
workspace=self.args.workspace,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A general thought, not about this PR, - we should stop automatically making command line options into boolean kwargs, e.g. using set() would be more appropriate here.

)
return 0

Expand All @@ -64,6 +75,13 @@ def add_parser(subparsers, parent_parser):
help=GC_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
gc_parser.add_argument(
"-w",
"--workspace",
action="store_true",
default=False,
help="Keep data files used in the current workspace.",
)
gc_parser.add_argument(
"-a",
"--all-branches",
Expand Down
4 changes: 4 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ class DvcException(Exception):
"""Base class for all dvc exceptions."""


class InvalidArgumentError(ValueError, DvcException):
"""Thrown if arguments are invalid."""
Comment on lines +10 to +11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice one! Should we use it in otherplaces like dvc/repo/__init__.py and dvc/repo/reproduce.py too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd opt for it on a different PR. 🙂

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@skshetry Sure, let's not forget about it though 🙂



class OutputDuplicationError(DvcException):
"""Thrown if a file/directory is specified as an output in more than one
stage.
Expand Down
23 changes: 23 additions & 0 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from . import locked
from dvc.cache import NamedCache
from dvc.exceptions import InvalidArgumentError


logger = logging.getLogger(__name__)
Expand All @@ -13,6 +14,15 @@ def _do_gc(typ, func, clist):
logger.info("No unused '{}' cache to remove.".format(typ))


def _raise_error_if_all_disabled(**kwargs):
if not any(kwargs.values()):
raise InvalidArgumentError(
"Invalid Arguments. Either of ({}) needs to be enabled.".format(
", ".join(kwargs.keys())
)
)


@locked
def gc(
self,
Expand All @@ -25,7 +35,20 @@ def gc(
force=False,
jobs=None,
repos=None,
workspace=False,
):

# require `workspace` to be true to come into effect.
# assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
# or `all_branches` are enabled.
_raise_error_if_all_disabled(
skshetry marked this conversation as resolved.
Show resolved Hide resolved
workspace=workspace,
all_tags=all_tags,
all_commits=all_commits,
all_branches=all_branches,
cloud=cloud,
)

from contextlib import ExitStack
from dvc.repo import Repo

Expand Down
70 changes: 63 additions & 7 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
import os

import configobj
import pytest
from git import Repo

from dvc.compat import fspath
from dvc.exceptions import CollectCacheError
from dvc.main import main
from dvc.repo import Repo as DvcRepo
Expand All @@ -28,11 +30,11 @@ def setUp(self):
self.bad_cache.append(path)

def test_api(self):
self.dvc.gc()
self.dvc.gc(workspace=True)
self._test_gc()

def test_cli(self):
ret = main(["gc", "-f"])
ret = main(["gc", "-wf"])
self.assertEqual(ret, 0)
self._test_gc()

Expand Down Expand Up @@ -169,10 +171,10 @@ def test(self):

self._check_cache(3)

self.dvc.gc(repos=[self.additional_path])
self.dvc.gc(repos=[self.additional_path], workspace=True)
self._check_cache(3)

self.dvc.gc()
self.dvc.gc(workspace=True)
self._check_cache(2)


Expand All @@ -196,10 +198,10 @@ def test_gc_no_dir_cache(tmp_dir, dvc):
os.unlink(dir_stage.outs[0].cache_path)

with pytest.raises(CollectCacheError):
dvc.gc()
dvc.gc(workspace=True)

assert _count_files(dvc.cache.local.cache_dir) == 4
dvc.gc(force=True)
dvc.gc(force=True, workspace=True)
assert _count_files(dvc.cache.local.cache_dir) == 2


Expand All @@ -218,5 +220,59 @@ def test_gc_no_unpacked_dir(tmp_dir, dvc):

assert os.path.exists(unpackeddir)

dvc.gc(force=True)
dvc.gc(force=True, workspace=True)
assert not os.path.exists(unpackeddir)


def test_gc_without_workspace_raises_error(tmp_dir, dvc):
dvc.gc(force=True, workspace=True) # works without error

from dvc.exceptions import InvalidArgumentError

with pytest.raises(InvalidArgumentError):
dvc.gc(force=True)

with pytest.raises(InvalidArgumentError):
dvc.gc(force=True, workspace=False)


def test_gc_without_workspace_on_tags_branches_commits(tmp_dir, dvc):
dvc.gc(force=True, all_tags=True)
dvc.gc(force=True, all_commits=True)
dvc.gc(force=False, all_branches=True)

# even if workspace is disabled, and others are enabled, assume as if
# workspace is enabled.
dvc.gc(force=False, all_branches=True, all_commits=False, workspace=False)


def test_gc_without_workspace(tmp_dir, dvc, caplog):
with caplog.at_level(logging.WARNING, logger="dvc"):
assert main(["gc", "-vf"]) == 255

assert "Invalid Arguments" in caplog.text


def test_gc_with_possible_args_positive(tmp_dir, dvc):
for flag in [
"-w",
"-a",
"-T",
"--all-commits",
"-aT",
"-wa",
"-waT",
]:
assert main(["gc", "-vf", flag]) == 0


def test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory):
with dvc.config.edit() as conf:
storage = fspath(tmp_path_factory.mktemp("test_remote_base"))
conf["remote"]["local_remote"] = {"url": storage}
conf["core"]["remote"] = "local_remote"

dvc.push()

for flag in ["-c", "-ca", "-cT", "-caT", "-cwT"]:
assert main(["gc", "-vf", flag]) == 0
2 changes: 1 addition & 1 deletion tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ def test(self, mock_prompt):
self.assertEqual(self.dvc.status([cmd_stage.path]), {})

self.assertEqual(self.dvc.status(), {})
self.dvc.gc()
self.dvc.gc(workspace=True)
self.assertEqual(self.dvc.status(), {})

self.dvc.remove(cmd_stage.path, outs_only=True)
Expand Down