Skip to content

Commit

Permalink
Merge pull request #1841 from pared/1257
Browse files Browse the repository at this point in the history
cache: maker reflink, copy, default links
  • Loading branch information
efiop authored Apr 15, 2019
2 parents f20818c + 074b6ea commit bcd9373
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 35 deletions.
6 changes: 4 additions & 2 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import unicode_literals

from copy import copy

from dvc.utils.compat import str, makedirs

import os
Expand Down Expand Up @@ -49,7 +51,7 @@ class RemoteLOCAL(RemoteBase):
PARAM_RELPATH = "relpath"
MD5_DIR_SUFFIX = ".dir"

CACHE_TYPES = ["reflink", "hardlink", "symlink", "copy"]
DEFAULT_CACHE_TYPES = ["reflink", "copy"]
CACHE_TYPE_MAP = {
"copy": shutil.copyfile,
"symlink": System.symlink,
Expand All @@ -74,7 +76,7 @@ def __init__(self, repo, config):
types = [t.strip() for t in types.split(",")]
self.cache_types = types
else:
self.cache_types = self.CACHE_TYPES
self.cache_types = copy(self.DEFAULT_CACHE_TYPES)

if self.cache_dir is not None and not os.path.exists(self.cache_dir):
os.mkdir(self.cache_dir)
Expand Down
5 changes: 5 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,8 @@ def test_relative_path(self):
self.assertEqual(len(subdirs), 1)
files = os.listdir(os.path.join(tmpdir, subdirs[0]))
self.assertEqual(len(files), 1)


class TestShouldCacheBeReflinkOrCopyByDefault(TestDvc):
def test(self):
self.assertEqual(self.dvc.cache.local.cache_types, ["reflink", "copy"])
21 changes: 14 additions & 7 deletions tests/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dvc import progress
from dvc.repo import Repo as DvcRepo
from dvc.system import System
from dvc.utils import walk_files
from dvc.utils import walk_files, load_stage_file
from tests.basic_env import TestDvc
from tests.test_repro import TestRepro
from dvc.stage import Stage, StageFileBadNameError, StageFileDoesNotExistError
Expand Down Expand Up @@ -134,7 +134,9 @@ def outs_info(self, stage):
FileInfo = collections.namedtuple("FileInfo", "path inode")

paths = [
path for output in stage.outs for path in walk_files(output.path)
path
for output in stage["outs"]
for path in walk_files(output["path"])
]

return [
Expand Down Expand Up @@ -210,14 +212,19 @@ def test(self):
ret = main(["config", "cache.type", "copy"])
self.assertEqual(ret, 0)

stages = self.dvc.add(self.DATA_DIR)
self.assertEqual(len(stages), 1)
stage = stages[0]
ret = main(["add", self.DATA_DIR])
self.assertEqual(0, ret)

stage_path = self.DATA_DIR + Stage.STAGE_FILE_SUFFIX
stage = load_stage_file(stage_path)
staged_files = self.outs_info(stage)

os.remove(staged_files[0].path)
# move instead of remove, to lock inode assigned to stage_files[0].path
# if we were to use remove, we might end up with same inode assigned to
# newly checked out file
shutil.move(staged_files[0].path, "random_name")

ret = main(["checkout", "--force", stage.relpath])
ret = main(["checkout", "--force", stage_path])
self.assertEqual(ret, 0)

checkedout_files = self.outs_info(stage)
Expand Down
26 changes: 0 additions & 26 deletions tests/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,30 +854,6 @@ def check_already_cached(self, stage):
mock_download.assert_not_called()
mock_checkout.assert_called_once()

def corrupted_cache(self):
os.unlink("bar.dvc")

stage = self.dvc.run(
deps=[self.FOO], outs=[self.BAR], cmd="echo bar > bar"
)

with open(self.BAR, "w") as fd:
fd.write("corrupting the cache")

patch_checkout = patch.object(
stage.outs[0], "checkout", wraps=stage.outs[0].checkout
)

patch_run = patch.object(stage, "_run", wraps=stage._run)

with self.dvc.state:
with patch_checkout as mock_checkout:
with patch_run as mock_run:
stage.run()

mock_run.assert_called_once()
mock_checkout.assert_not_called()

@patch("dvc.prompt.confirm", return_value=True)
def test(self, mock_prompt):
if not self.should_test():
Expand Down Expand Up @@ -977,8 +953,6 @@ def test(self, mock_prompt):
self.dvc.checkout(cmd_stage.path, force=True)
self.assertEqual(self.dvc.status(cmd_stage.path), {})

self.corrupted_cache()


class TestReproExternalS3(TestReproExternalBase):
def should_test(self):
Expand Down
31 changes: 31 additions & 0 deletions tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from dvc.main import main
from dvc.output import OutputBase
from dvc.repo import Repo as DvcRepo
from dvc.utils import file_md5, load_stage_file
from dvc.system import System
from dvc.stage import Stage, StagePathNotFoundError, StagePathNotDirectoryError
Expand Down Expand Up @@ -920,3 +921,33 @@ def test(self):
@property
def _outs_command(self):
return "--outs-persist"


class TestShouldNotCheckoutUponCorruptedLocalHardlinkCache(TestDvc):
def setUp(self):
super(
TestShouldNotCheckoutUponCorruptedLocalHardlinkCache, self
).setUp()
ret = main(["config", "cache.type", "hardlink"])
self.assertEqual(ret, 0)
self.dvc = DvcRepo(".")

def test(self):
cmd = "cp {} {}".format(self.FOO, self.BAR)
stage = self.dvc.run(deps=[self.FOO], outs=[self.BAR], cmd=cmd)

with open(self.BAR, "w") as fd:
fd.write("corrupting the output cache")

patch_checkout = mock.patch.object(
stage.outs[0], "checkout", wraps=stage.outs[0].checkout
)
patch_run = mock.patch.object(stage, "_run", wraps=stage._run)

with self.dvc.state:
with patch_checkout as mock_checkout:
with patch_run as mock_run:
stage.run()

mock_run.assert_called_once()
mock_checkout.assert_not_called()

0 comments on commit bcd9373

Please sign in to comment.