Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dvc: update gc to remove unpacked dir #3054

Merged
merged 1 commit into from
Jan 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,8 @@ def gc(self, named_cache):
if checksum in used:
continue
path_info = self.checksum_to_path_info(checksum)
if self.is_dir_checksum(checksum):
self._remove_unpacked_dir(checksum)
self.remove(path_info)
removed = True
return removed
Expand Down Expand Up @@ -1009,3 +1011,6 @@ def _changed_unpacked_dir(self, checksum):

def _update_unpacked_dir(self, checksum):
pass

def _remove_unpacked_dir(self, checksum):
pass
4 changes: 4 additions & 0 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ def _get_unpacked_dir_path_info(self, checksum):
info = self.checksum_to_path_info(checksum)
return info.with_name(info.name + self.UNPACKED_DIR_SUFFIX)

def _remove_unpacked_dir(self, checksum):
path_info = self._get_unpacked_dir_path_info(checksum)
self.remove(path_info)

def _path_info_changed(self, path_info):
if self.exists(path_info) and self.state.get(path_info):
return False
Expand Down
16 changes: 16 additions & 0 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dvc.exceptions import CollectCacheError
from dvc.main import main
from dvc.repo import Repo as DvcRepo
from dvc.remote.local import RemoteLOCAL
from tests.basic_env import TestDir, TestDvcGit


Expand Down Expand Up @@ -204,3 +205,18 @@ def test_gc_no_dir_cache(tmp_dir, dvc, repo_template):

def _count_files(path):
return sum(len(files) for _, _, files in os.walk(path))


def test_gc_no_unpacked_dir(tmp_dir, dvc, repo_template):
dir_stages = dvc.add("dir")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can use stage, = tmp_dir.dvc_gen({"dir": {"file": "file_content"}})

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you choose to go this way there will be no need for tepo_template :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pared repo_template is fine too though.

dvc.status()

os.remove("dir.dvc")
unpackeddir = (
dir_stages[0].outs[0].cache_path + RemoteLOCAL.UNPACKED_DIR_SUFFIX
)

assert os.path.exists(unpackeddir)

dvc.gc(force=True)
assert not os.path.exists(unpackeddir)