From 7a01d5fe47be00ae6aec1d5091237ae2ac0de6e3 Mon Sep 17 00:00:00 2001 From: pared Date: Thu, 28 Feb 2019 16:15:53 +0100 Subject: [PATCH 1/2] add: cleanup .gitignore decorator --- dvc/repo/add.py | 7 +++---- dvc/scm/__init__.py | 19 +++++++++++++++++++ dvc/scm/base.py | 12 ++++++++++++ dvc/scm/git.py | 12 ++++++++++++ tests/test_add.py | 22 +++++++++++++++++++++- tests/test_scm.py | 9 ++++----- tests/utils/__init__.py | 6 ++++++ 7 files changed, 77 insertions(+), 10 deletions(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index b85883f6d6..be91a9b45d 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -1,9 +1,11 @@ import os + +from dvc.scm import scm_context from dvc.stage import Stage from dvc.utils import walk_files from dvc.exceptions import RecursiveAddingWhileUsingFilename - +@scm_context def add(repo, target, recursive=False, no_commit=False, fname=None): if recursive and fname: raise RecursiveAddingWhileUsingFilename() @@ -25,9 +27,6 @@ def add(repo, target, recursive=False, no_commit=False, fname=None): for stage in stages: stage.dump() - - repo.remind_to_git_add() - return stages diff --git a/dvc/scm/__init__.py b/dvc/scm/__init__.py index 3ebd2e67bb..2ae851edbe 100644 --- a/dvc/scm/__init__.py +++ b/dvc/scm/__init__.py @@ -5,6 +5,8 @@ from dvc.scm.base import Base from dvc.scm.git import Git +import dvc + def SCM(root_dir, repo=None): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified @@ -21,3 +23,20 @@ def SCM(root_dir, repo=None): # pylint: disable=invalid-name return Git(root_dir, repo=repo) return Base(root_dir, repo=repo) + + +def scm_context(method): + def run(*args, **kw): + repo = args[0] + assert type(repo) is dvc.repo.Repo + scm = repo.scm + try: + result = method(*args, **kw) + scm.reset_ignores() + repo.remind_to_git_add() + return result + except Exception as e: + scm.cleanup_ignores() + raise e + + return run diff --git a/dvc/scm/base.py b/dvc/scm/base.py index 59d082a1e6..fba7b27110 100644 --- a/dvc/scm/base.py +++ b/dvc/scm/base.py @@ -139,3 +139,15 @@ def list_tags(self): # pylint: disable=no-self-use def install(self): """Adds dvc commands to SCM hooks for the repo.""" + + def cleanup_ignores(self): + """ + This method should clean up ignores (eg. entries in .gitignore), + use, when method editing ignores (eg. add, run, import) fails to + perform its task. + """ + + def reset_ignores(self): + """ + Method to reset in-memory ignore storing mechanism. + """ diff --git a/dvc/scm/git.py b/dvc/scm/git.py index 3e93490cbb..96d30dff80 100644 --- a/dvc/scm/git.py +++ b/dvc/scm/git.py @@ -39,6 +39,8 @@ def __init__(self, root_dir=os.curdir, repo=None): libpath = env.get("LD_LIBRARY_PATH", None) self.git.git.update_environment(LD_LIBRARY_PATH=libpath) + self.ignored_paths = [] + @staticmethod def is_repo(root_dir): return os.path.isdir(Git._get_git_dir(root_dir)) @@ -98,6 +100,8 @@ def ignore(self, path): if self.repo is not None: self.repo.files_to_git_add.append(os.path.relpath(gitignore)) + self.ignored_paths.append(path) + def ignore_remove(self, path): entry, gitignore = self._get_gitignore(path) @@ -173,3 +177,11 @@ def _install_hook(self, name, cmd): def install(self): self._install_hook("post-checkout", "checkout") self._install_hook("pre-commit", "status") + + def cleanup_ignores(self): + for path in self.ignored_paths: + self.ignore_remove(path) + self.reset_ignores() + + def reset_ignores(self): + self.ignored_paths = [] diff --git a/tests/test_add.py b/tests/test_add.py index 447b11d164..cfc9eabcdb 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -21,7 +21,7 @@ from dvc.repo import Repo as DvcRepo from tests.basic_env import TestDvc -from tests.utils import spy +from tests.utils import spy, get_gitignore_content from tests.utils.logger import MockLoggerHandlers, ConsoleFontColorsRemover @@ -457,3 +457,23 @@ def test(self): self.assertEqual(0, ret) self.assertTrue(os.path.exists("bar.dvc")) self.assertFalse(os.path.exists("foo.dvc")) + + +class TestShouldCleanUpAfterFailedAdd(TestDvc): + def test(self): + ret = main(["add", self.FOO]) + self.assertEqual(0, ret) + + foo_stage_file = self.FOO + Stage.STAGE_FILE_SUFFIX + # corrupt stage file + with open(foo_stage_file, "a+") as file: + file.write("this will break yaml file structure") + + ret = main(["add", self.BAR]) + self.assertEqual(1, ret) + + bar_stage_file = self.BAR + Stage.STAGE_FILE_SUFFIX + self.assertFalse(os.path.exists(bar_stage_file)) + + gitignore_content = get_gitignore_content() + self.assertFalse(any(self.BAR in line for line in gitignore_content)) diff --git a/tests/test_scm.py b/tests/test_scm.py index 7c66cd5968..fd2a2a6222 100644 --- a/tests/test_scm.py +++ b/tests/test_scm.py @@ -5,6 +5,7 @@ from dvc.scm import SCM, Base, Git from tests.basic_env import TestDir, TestGit, TestGitSubmodule +from tests.utils import get_gitignore_content class TestSCM(TestDir): @@ -43,11 +44,9 @@ def test_commit_in_submodule(self): class TestIgnore(TestGit): def _count_gitignore(self): - with open(Git.GITIGNORE, "r") as fd: - lines = fd.readlines() - return len( - list(filter(lambda x: x.strip() == "/" + self.FOO, lines)) - ) + lines = get_gitignore_content() + + return len(list(filter(lambda x: x.strip() == "/" + self.FOO, lines))) def test(self): git = Git(self._root_dir) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index f452a1c31e..5188d19a24 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -1,3 +1,4 @@ +import yaml import os from dvc.scm import Git @@ -21,6 +22,11 @@ def get_gitignore_content(): return gitignore.readlines() +def load_stage_file(path): + with open(path, "r") as fobj: + return yaml.safe_load(fobj) + + @contextmanager def cd(newdir): prevdir = os.getcwd() From f8711b3faa06cf095f594b461da9476144e75b85 Mon Sep 17 00:00:00 2001 From: pared Date: Thu, 7 Mar 2019 10:30:11 +0100 Subject: [PATCH 2/2] repo/scm: move tracking reminder from repo to scm --- dvc/repo/__init__.py | 13 ------------- dvc/repo/add.py | 1 + dvc/repo/imp.py | 8 ++++---- dvc/repo/move.py | 4 ++-- dvc/repo/reproduce.py | 6 ++---- dvc/repo/run.py | 6 +++--- dvc/scm/__init__.py | 9 ++++----- dvc/scm/base.py | 11 +++++++++++ dvc/scm/git.py | 21 +++++++++++++++++---- dvc/stage.py | 2 +- tests/unit/scm/__init__.py | 0 tests/unit/scm/scm.py | 36 ++++++++++++++++++++++++++++++++++++ 12 files changed, 81 insertions(+), 36 deletions(-) create mode 100644 tests/unit/scm/__init__.py create mode 100644 tests/unit/scm/scm.py diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 176bda17b7..3831d695cc 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -62,8 +62,6 @@ def __init__(self, root_dir=None): self.metrics = Metrics(self) - self.files_to_git_add = [] - self._ignore() self.updater.check() @@ -92,17 +90,6 @@ def find_dvc_dir(root=None): root_dir = Repo.find_root(root) return os.path.join(root_dir, Repo.DVC_DIR) - def remind_to_git_add(self): - if not self.files_to_git_add: - return - - logger.info( - "\n" - "To track the changes with git run:\n" - "\n" - "\tgit add {files}".format(files=" ".join(self.files_to_git_add)) - ) - @staticmethod def init(root_dir=os.curdir, no_scm=False, force=False): from dvc.repo.init import init diff --git a/dvc/repo/add.py b/dvc/repo/add.py index be91a9b45d..27caffc27e 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -5,6 +5,7 @@ from dvc.utils import walk_files from dvc.exceptions import RecursiveAddingWhileUsingFilename + @scm_context def add(repo, target, recursive=False, no_commit=False, fname=None): if recursive and fname: diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 39e1e00f3f..fc4c2acdf0 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -1,3 +1,7 @@ +from dvc.scm import scm_context + + +@scm_context def imp(self, url, out, resume=False): from dvc.stage import Stage @@ -9,13 +13,9 @@ def imp(self, url, out, resume=False): self.check_dag(self.stages() + [stage]) - self.files_to_git_add = [] - with self.state: stage.run(resume=resume) stage.dump() - self.remind_to_git_add() - return stage diff --git a/dvc/repo/move.py b/dvc/repo/move.py index c186fbc1c2..360532ed98 100644 --- a/dvc/repo/move.py +++ b/dvc/repo/move.py @@ -1,6 +1,7 @@ import os from dvc.exceptions import MoveNotDataSourceError +from dvc.scm import scm_context def _expand_target_path(from_path, to_path): @@ -9,6 +10,7 @@ def _expand_target_path(from_path, to_path): return to_path +@scm_context def move(self, from_path, to_path): """ Renames an output file and modifies the stage associated @@ -64,5 +66,3 @@ def move(self, from_path, to_path): out.move(to_out) stage.dump() - - self.remind_to_git_add() diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 0bc6d8656a..762af22751 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -4,6 +4,7 @@ import dvc.logger as logger from dvc.exceptions import ReproductionError +from dvc.scm import scm_context def _reproduce_stage(stages, node, force, dry, interactive, no_commit): @@ -27,6 +28,7 @@ def _reproduce_stage(stages, node, force, dry, interactive, no_commit): return [stage] +@scm_context def reproduce( self, target=None, @@ -65,8 +67,6 @@ def reproduce( else: targets.append(target) - self.files_to_git_add = [] - ret = [] with self.state: for target in targets: @@ -82,8 +82,6 @@ def reproduce( ) ret.extend(stages) - self.remind_to_git_add() - return ret diff --git a/dvc/repo/run.py b/dvc/repo/run.py index d2bfffc965..889310dfb6 100644 --- a/dvc/repo/run.py +++ b/dvc/repo/run.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals +from dvc.scm import scm_context + +@scm_context def run( self, cmd=None, @@ -53,13 +56,10 @@ def run( self.check_dag(self.stages() + [stage]) - self.files_to_git_add = [] with self.state: if not no_exec: stage.run(no_commit=no_commit) stage.dump() - self.remind_to_git_add() - return stage diff --git a/dvc/scm/__init__.py b/dvc/scm/__init__.py index 2ae851edbe..e3f2f4b635 100644 --- a/dvc/scm/__init__.py +++ b/dvc/scm/__init__.py @@ -28,15 +28,14 @@ def SCM(root_dir, repo=None): # pylint: disable=invalid-name def scm_context(method): def run(*args, **kw): repo = args[0] - assert type(repo) is dvc.repo.Repo - scm = repo.scm + assert isinstance(repo, dvc.repo.Repo) try: result = method(*args, **kw) - scm.reset_ignores() - repo.remind_to_git_add() + repo.scm.reset_ignores() + repo.scm.remind_to_track() return result except Exception as e: - scm.cleanup_ignores() + repo.scm.cleanup_ignores() raise e return run diff --git a/dvc/scm/base.py b/dvc/scm/base.py index fba7b27110..07611853af 100644 --- a/dvc/scm/base.py +++ b/dvc/scm/base.py @@ -151,3 +151,14 @@ def reset_ignores(self): """ Method to reset in-memory ignore storing mechanism. """ + + def remind_to_track(self): + """ + Method to remind user to track newly created files handled by scm + """ + + def track_file(self, path): + """ + Method to add file to mechanism that will remind user + to track new files + """ diff --git a/dvc/scm/git.py b/dvc/scm/git.py index 96d30dff80..b2bb76cca8 100644 --- a/dvc/scm/git.py +++ b/dvc/scm/git.py @@ -40,6 +40,7 @@ def __init__(self, root_dir=os.curdir, repo=None): self.git.git.update_environment(LD_LIBRARY_PATH=libpath) self.ignored_paths = [] + self.files_to_track = [] @staticmethod def is_repo(root_dir): @@ -97,8 +98,7 @@ def ignore(self, path): with open(gitignore, "a") as fobj: fobj.write(content) - if self.repo is not None: - self.repo.files_to_git_add.append(os.path.relpath(gitignore)) + self.track_file(os.path.relpath(gitignore)) self.ignored_paths.append(path) @@ -116,8 +116,7 @@ def ignore_remove(self, path): with open(gitignore, "w") as fobj: fobj.writelines(filtered) - if self.repo is not None: - self.repo.files_to_git_add.append(os.path.relpath(gitignore)) + self.track_file(os.path.relpath(gitignore)) def add(self, paths): # NOTE: GitPython is not currently able to handle index version >= 3. @@ -185,3 +184,17 @@ def cleanup_ignores(self): def reset_ignores(self): self.ignored_paths = [] + + def remind_to_track(self): + if not self.files_to_track: + return + + logger.info( + "\n" + "To track the changes with git run:\n" + "\n" + "\tgit add {files}".format(files=" ".join(self.files_to_track)) + ) + + def track_file(self, path): + self.files_to_track.append(path) diff --git a/dvc/stage.py b/dvc/stage.py index ef2f7c8dc2..4b428e103c 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -552,7 +552,7 @@ def dump(self): with open(fname, "w") as fd: yaml.safe_dump(d, fd, default_flow_style=False) - self.repo.files_to_git_add.append(os.path.relpath(fname)) + self.repo.scm.track_file(os.path.relpath(fname)) def _compute_md5(self): from dvc.output.local import OutputLOCAL diff --git a/tests/unit/scm/__init__.py b/tests/unit/scm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/scm/scm.py b/tests/unit/scm/scm.py new file mode 100644 index 0000000000..40061bd3ac --- /dev/null +++ b/tests/unit/scm/scm.py @@ -0,0 +1,36 @@ +from dvc.repo import Repo +from dvc.scm import scm_context, Base +from unittest import TestCase +import mock + + +class TestScmContext(TestCase): + def setUp(self): + self.repo_mock = mock.Mock(spec=Repo) + self.scm_mock = mock.Mock(spec=Base) + self.repo_mock.scm = self.scm_mock + + def test_should_successfully_perform_method(self): + method = mock.Mock() + wrapped = scm_context(method) + + wrapped(self.repo_mock) + + self.assertEqual(1, method.call_count) + self.assertEqual(1, self.scm_mock.reset_ignores.call_count) + self.assertEqual(1, self.scm_mock.remind_to_track.call_count) + + self.assertEqual(0, self.scm_mock.cleanup_ignores.call_count) + + def test_should_throw_and_cleanup(self): + method = mock.Mock(side_effect=Exception("some problem")) + wrapped = scm_context(method) + + with self.assertRaises(Exception): + wrapped(self.repo_mock) + + self.assertEqual(1, method.call_count) + self.assertEqual(1, self.scm_mock.cleanup_ignores.call_count) + + self.assertEqual(0, self.scm_mock.reset_ignores.call_count) + self.assertEqual(0, self.scm_mock.remind_to_track.call_count)