From 23f55aee7ddc262412d34203a12a074a3c140831 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 30 Jan 2020 16:02:53 +0100 Subject: [PATCH] SCM: allow multiple DVC repos inside single SCM repo --- dvc/command/init.py | 14 ++++++++++- dvc/config.py | 1 + dvc/ignore.py | 17 ++++++++++++- dvc/repo/__init__.py | 7 +++--- dvc/repo/init.py | 29 +++++++++++++++------ dvc/scm/__init__.py | 15 +++++++---- dvc/scm/base.py | 6 ++++- dvc/scm/git/__init__.py | 20 +++++++-------- tests/dir_helpers.py | 4 ++- tests/func/test_ignore.py | 20 +++++++++++---- tests/func/test_init.py | 53 +++++++++++++++++++++++++++++++-------- tests/func/test_scm.py | 35 ++++++++++++++++---------- tests/func/test_stage.py | 17 +++++++++++++ 13 files changed, 178 insertions(+), 60 deletions(-) diff --git a/dvc/command/init.py b/dvc/command/init.py index fe0951d488..03821eec3d 100644 --- a/dvc/command/init.py +++ b/dvc/command/init.py @@ -15,7 +15,10 @@ def run(self): try: self.repo = Repo.init( - ".", no_scm=self.args.no_scm, force=self.args.force + ".", + no_scm=self.args.no_scm, + force=self.args.force, + subdir=self.args.subdir, ) self.config = self.repo.config except InitError: @@ -56,4 +59,13 @@ def add_parser(subparsers, parent_parser): "This operation removes local cache." ), ) + init_parser.add_argument( + "--subdir", + action="store_true", + default=False, + help=( + "Necessary for running this command inside a subdirectory of a " + "parent SCM repository." + ), + ) init_parser.set_defaults(func=CmdInit) diff --git a/dvc/config.py b/dvc/config.py index 6a59fff1c4..1d6118ec45 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -115,6 +115,7 @@ class RelPath(str): Optional("interactive", default=False): Bool, Optional("analytics", default=True): Bool, Optional("hardlink_lock", default=False): Bool, + Optional("no_scm", default=False): Bool, }, "cache": { "local": str, diff --git a/dvc/ignore.py b/dvc/ignore.py index 95494670a3..6db132d7b2 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -71,10 +71,25 @@ def __eq__(self, other): return self.basenames == other.basenames +class DvcIgnoreRepo(DvcIgnore): + def __call__(self, root, dirs, files): + def is_dvc_repo(directory): + from dvc.repo import Repo + + return os.path.isdir(os.path.join(directory, Repo.DVC_DIR)) + + dirs = [d for d in dirs if not is_dvc_repo(d)] + + return dirs, files + + class DvcIgnoreFilter(object): def __init__(self, tree): self.tree = tree - self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} + self.ignores = { + DvcIgnoreDirs([".git", ".hg", ".dvc"]), + DvcIgnoreRepo(), + } for root, dirs, files in self.tree.walk(self.tree.tree_root): self._update(root) dirs[:], files[:] = self(root, dirs, files) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 6205e24e51..5912d5dc57 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -78,7 +78,8 @@ def __init__(self, root_dir=None): self.config = Config(self.dvc_dir) - self.scm = SCM(self.root_dir) + no_scm = self.config["core"].get("no_scm", False) + self.scm = SCM(self.root_dir, no_scm=no_scm) self.tree = WorkingTree(self.root_dir) @@ -146,10 +147,10 @@ def find_dvc_dir(cls, root=None): return os.path.join(root_dir, cls.DVC_DIR) @staticmethod - def init(root_dir=os.curdir, no_scm=False, force=False): + def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): from dvc.repo.init import init - init(root_dir=root_dir, no_scm=no_scm, force=force) + init(root_dir=root_dir, no_scm=no_scm, force=force, subdir=subdir) return Repo(root_dir) def unprotect(self, target): diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 1359cf2ac9..3710ad344a 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -7,8 +7,8 @@ from dvc.config import Config from dvc.exceptions import InitError from dvc.repo import Repo -from dvc.scm import NoSCM from dvc.scm import SCM +from dvc.scm.base import SCMError from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove @@ -44,7 +44,7 @@ def _welcome_message(): logger.info(msg) -def init(root_dir=os.curdir, no_scm=False, force=False): +def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): """ Creates an empty repo on the given directory -- basically a `.dvc` directory with subdirectories for configuration and cache. @@ -63,15 +63,23 @@ def init(root_dir=os.curdir, no_scm=False, force=False): Raises: KeyError: Raises an exception. """ + + if no_scm and subdir: + raise InitError( + "Cannot initialize repo with `--no-scm` and `--subdir`" + ) + root_dir = os.path.realpath(root_dir) dvc_dir = os.path.join(root_dir, Repo.DVC_DIR) - scm = SCM(root_dir) - if isinstance(scm, NoSCM) and not no_scm: + + try: + scm = SCM(root_dir, search_parent_directories=subdir, no_scm=no_scm) + except SCMError: raise InitError( - "{repo} is not tracked by any supported scm tool (e.g. git). " - "Use `--no-scm` if you don't want to use any scm.".format( - repo=root_dir - ) + "{repo} is not tracked by any supported SCM tool (e.g. Git). " + "Use `--no-scm` if you don't want to use any SCM or " + "`--subdir` if initializing inside a subdirectory of a parent SCM " + "repository.".format(repo=root_dir) ) if os.path.isdir(dvc_dir): @@ -87,6 +95,11 @@ def init(root_dir=os.curdir, no_scm=False, force=False): os.mkdir(dvc_dir) config = Config.init(dvc_dir) + + if no_scm: + with config.edit() as conf: + conf["core"]["no_scm"] = True + proj = Repo(root_dir) scm.add([config.files["repo"]]) diff --git a/dvc/scm/__init__.py b/dvc/scm/__init__.py index b6b867232b..62f106314d 100644 --- a/dvc/scm/__init__.py +++ b/dvc/scm/__init__.py @@ -10,18 +10,23 @@ class NoSCM(Base): pass -def SCM(root_dir): # pylint: disable=invalid-name +def SCM( + root_dir, search_parent_directories=True, no_scm=False +): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified path. Args: root_dir (str): path to a root directory of the repo. - repo (dvc.repo.Repo): DVC repo instance that root_dir belongs to. + search_parent_directories (bool): whether to look for repo root in + parent directories. + no_scm (bool): return NoSCM if True. Returns: dvc.scm.base.Base: SCM instance. """ - if Git.is_repo(root_dir) or Git.is_submodule(root_dir): - return Git(root_dir) - return NoSCM(root_dir) + if no_scm: + return NoSCM(root_dir) + + return Git(root_dir, search_parent_directories=search_parent_directories) diff --git a/dvc/scm/base.py b/dvc/scm/base.py index 51a52217bd..779b1009fa 100644 --- a/dvc/scm/base.py +++ b/dvc/scm/base.py @@ -28,7 +28,11 @@ class Base(object): """Base class for source control management driver implementations.""" def __init__(self, root_dir=os.curdir): - self.root_dir = os.path.realpath(root_dir) + self._root_dir = os.path.realpath(root_dir) + + @property + def root_dir(self): + return self._root_dir def __repr__(self): return "{class_name}: '{directory}'".format( diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index a2b9c2569b..efcf5b5aa6 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -22,7 +22,7 @@ class Git(Base): GITIGNORE = ".gitignore" GIT_DIR = ".git" - def __init__(self, root_dir=os.curdir): + def __init__(self, root_dir=os.curdir, search_parent_directories=True): """Git class constructor. Requires `Repo` class from `git` module (from gitpython package). """ @@ -32,10 +32,12 @@ def __init__(self, root_dir=os.curdir): from git.exc import InvalidGitRepositoryError try: - self.repo = git.Repo(self.root_dir) + self.repo = git.Repo( + root_dir, search_parent_directories=search_parent_directories + ) except InvalidGitRepositoryError: msg = "{} is not a git repository" - raise SCMError(msg.format(self.root_dir)) + raise SCMError(msg.format(root_dir)) # NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller. # http://pyinstaller.readthedocs.io/en/stable/runtime-information.html @@ -46,6 +48,10 @@ def __init__(self, root_dir=os.curdir): self.ignored_paths = [] self.files_to_track = set() + @property + def root_dir(self): + return self.repo.working_tree_dir + @staticmethod def clone(url, to_path, rev=None): import git @@ -95,14 +101,6 @@ def is_sha(rev): return rev and git.Repo.re_hexsha_shortened.search(rev) - @staticmethod - def is_repo(root_dir): - return os.path.isdir(Git._get_git_dir(root_dir)) - - @staticmethod - def is_submodule(root_dir): - return os.path.isfile(Git._get_git_dir(root_dir)) - @staticmethod def _get_git_dir(root_dir): return os.path.join(root_dir, Git.GIT_DIR) diff --git a/tests/dir_helpers.py b/tests/dir_helpers.py index f4606ba3f4..158bb20531 100644 --- a/tests/dir_helpers.py +++ b/tests/dir_helpers.py @@ -99,7 +99,9 @@ def init(self, *, scm=False, dvc=False): if scm: _git_init(str_path) if dvc: - self.dvc = Repo.init(str_path, no_scm=True) + self.dvc = Repo.init( + str_path, no_scm=not scm and not hasattr(self, "scm") + ) if scm: self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path) if dvc and hasattr(self, "scm"): diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 7383121dee..a4270750f4 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -4,7 +4,12 @@ import pytest from dvc.exceptions import DvcIgnoreInCollectedDirError -from dvc.ignore import DvcIgnore, DvcIgnoreDirs, DvcIgnorePatterns +from dvc.ignore import ( + DvcIgnore, + DvcIgnoreDirs, + DvcIgnorePatterns, + DvcIgnoreRepo, +) from dvc.scm.tree import WorkingTree from dvc.utils import relpath from dvc.compat import fspath_py35, fspath @@ -93,10 +98,15 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") - assert dvc.tree.dvcignore.ignores == { - DvcIgnoreDirs([".git", ".hg", ".dvc"]), - DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)), - } + assert len(dvc.tree.dvcignore.ignores) == 3 + assert DvcIgnoreDirs([".git", ".hg", ".dvc"]) in dvc.tree.dvcignore.ignores + assert ( + DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)) + in dvc.tree.dvcignore.ignores + ) + assert any( + i for i in dvc.tree.dvcignore.ignores if isinstance(i, DvcIgnoreRepo) + ) def test_ignore_on_branch(tmp_dir, scm, dvc): diff --git a/tests/func/test_init.py b/tests/func/test_init.py index df9fca8983..39f2e2d949 100644 --- a/tests/func/test_init.py +++ b/tests/func/test_init.py @@ -1,5 +1,8 @@ +import logging import os +from dvc.compat import fspath +from dvc.config import Config from dvc.exceptions import InitError from dvc.main import main from dvc.repo import Repo as DvcRepo @@ -49,21 +52,20 @@ def test_cli(self): self.assertNotEqual(ret, 0) -class TestInitNoSCM(TestDir): - def _test_init(self): - self.assertTrue(os.path.exists(DvcRepo.DVC_DIR)) - self.assertTrue(os.path.isdir(DvcRepo.DVC_DIR)) +def test_init_no_scm_api(tmp_dir): + repo = DvcRepo.init(no_scm=True) - def test_api(self): - DvcRepo.init(no_scm=True) + assert (tmp_dir / DvcRepo.DVC_DIR).is_dir() + assert repo.config["core"]["no_scm"] - self._test_init() - def test_cli(self): - ret = main(["init", "--no-scm"]) - self.assertEqual(ret, 0) +def test_init_no_scm_cli(tmp_dir): + ret = main(["init", "--no-scm"]) + assert ret == 0 - self._test_init() + dvc_path = tmp_dir / DvcRepo.DVC_DIR + assert dvc_path.is_dir() + assert Config(fspath(dvc_path))["core"]["no_scm"] def test_init_quiet_should_not_display_welcome_screen(tmp_dir, scm, caplog): @@ -71,3 +73,32 @@ def test_init_quiet_should_not_display_welcome_screen(tmp_dir, scm, caplog): assert 0 == ret assert "" == caplog.text + + +def test_allow_init_dvc_subdir(tmp_dir, scm, monkeypatch): + tmp_dir.gen({"subdir": {}}) + + with monkeypatch.context() as m: + m.chdir("subdir") + assert main(["init", "--subdir"]) == 0 + + repo = DvcRepo("subdir") + assert repo.root_dir == fspath(tmp_dir / "subdir") + assert repo.scm.root_dir == fspath(tmp_dir) + + +def test_subdir_init_no_option(tmp_dir, scm, monkeypatch, caplog): + tmp_dir.gen({"subdir": {}}) + + caplog.clear() + with monkeypatch.context() as m: + m.chdir("subdir") + with caplog.at_level(logging.ERROR, logger="dvc"): + assert main(["init"]) == 1 + + assert ( + "{} is not tracked by any supported SCM tool (e.g. Git). " + "Use `--no-scm` if you don't want to use any SCM or " + "`--subdir` if initializing inside a subdirectory of a parent SCM " + "repository.".format(fspath(tmp_dir / "subdir")) + ) in caplog.text diff --git a/tests/func/test_scm.py b/tests/func/test_scm.py index 185c8b60cd..a3d9bdf5e3 100644 --- a/tests/func/test_scm.py +++ b/tests/func/test_scm.py @@ -1,31 +1,43 @@ import os +import pytest from git import Repo from dvc.scm import Git from dvc.scm import NoSCM from dvc.scm import SCM +from dvc.scm.base import SCMError from dvc.system import System from dvc.compat import fspath -from tests.basic_env import TestDir from tests.basic_env import TestGit from tests.basic_env import TestGitSubmodule from tests.utils import get_gitignore_content -class TestSCM(TestDir): - def test_none(self): - self.assertIsInstance(SCM(self._root_dir), NoSCM) +def test_init_none(tmp_dir): + assert isinstance(SCM(fspath(tmp_dir), no_scm=True), NoSCM) - def test_git(self): - Repo.init(os.curdir) - self.assertIsInstance(SCM(self._root_dir), Git) +def test_init_git(tmp_dir): + Repo.init(fspath(tmp_dir)) + assert isinstance(SCM(fspath(tmp_dir)), Git) -class TestSCMGit(TestGit): - def test_is_repo(self): - self.assertTrue(Git.is_repo(os.curdir)) +def test_init_no_git(tmp_dir): + with pytest.raises(SCMError): + SCM(fspath(tmp_dir)) + + +def test_init_sub_dir(tmp_dir): + Repo.init(fspath(tmp_dir)) + subdir = tmp_dir / "dir" + subdir.mkdir() + + scm = SCM(fspath(subdir)) + assert scm.root_dir == fspath(tmp_dir) + + +class TestSCMGit(TestGit): def test_commit(self): G = Git(self._root_dir) G.add(["foo"]) @@ -52,9 +64,6 @@ class TestSCMGitSubmodule(TestGitSubmodule): def test_git_submodule(self): self.assertIsInstance(SCM(os.curdir), Git) - def test_is_submodule(self): - self.assertTrue(Git.is_submodule(os.curdir)) - def test_commit_in_submodule(self): G = Git(self._root_dir) G.add(["foo"]) diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index 949e8b5671..64c934fb67 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -5,6 +5,7 @@ from dvc.main import main from dvc.output.local import OutputLOCAL from dvc.remote.local import RemoteLOCAL +from dvc.repo import Repo from dvc.stage import Stage from dvc.stage import StageFileFormatError from dvc.utils.stage import dump_stage_file @@ -179,3 +180,19 @@ def test_meta_is_preserved(tmp_dir, dvc): new_data = load_stage_file(stage.path) assert new_data["meta"] == data["meta"] + + +def test_parent_repo_collect_stages(tmp_dir, scm, dvc): + tmp_dir.gen({"subdir": {}}) + subrepo_dir = tmp_dir / "subdir" + + with subrepo_dir.chdir(): + subrepo = Repo.init(subdir=True) + subrepo_dir.gen("subrepo_file", "subrepo file content") + subrepo.add("subrepo_file") + + stages = dvc.collect(None) + subrepo_stages = subrepo.collect(None) + + assert stages == [] + assert subrepo_stages != []