diff --git a/dvc/dvcfile.py b/dvc/dvcfile.py index f6e0e2edeb..5092668ab7 100644 --- a/dvc/dvcfile.py +++ b/dvc/dvcfile.py @@ -37,6 +37,11 @@ PIPELINE_LOCK = "dvc.lock" +class FileIsGitIgnored(DvcException): + def __init__(self, path): + super().__init__(f"'{path}' is git-ignored.") + + class LockfileCorruptedError(DvcException): pass @@ -103,18 +108,29 @@ def relpath(self): def exists(self): return self.repo.tree.exists(self.path) + def _is_git_ignored(self): + from dvc.tree.local import LocalTree + + return isinstance( + self.repo.tree, LocalTree + ) and self.repo.scm.is_ignored(self.path) + def _load(self): # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a DVC-file # 3. path doesn't represent a regular file + # 4. when the file is git ignored if not self.exists(): - raise StageFileDoesNotExistError(self.path) + is_ignored = self.repo.tree.exists(self.path, use_dvcignore=False) + raise StageFileDoesNotExistError(self.path, dvc_ignored=is_ignored) + if self.verify: check_dvc_filename(self.path) if not self.repo.tree.isfile(self.path): raise StageFileIsNotDvcFileError(self.path) - + if self._is_git_ignored(): + raise FileIsGitIgnored(self.path) with self.repo.tree.open(self.path) as fd: stage_text = fd.read() d = parse_yaml(stage_text, self.path) diff --git a/dvc/repo/stage.py b/dvc/repo/stage.py index 0366d7da88..0bb3b45c38 100644 --- a/dvc/repo/stage.py +++ b/dvc/repo/stage.py @@ -3,6 +3,7 @@ import os import typing from contextlib import suppress +from functools import partial from typing import ( Callable, Iterable, @@ -371,13 +372,33 @@ def collect_repo(self, onerror: Callable[[str, Exception], None] = None): the collection. """ from dvc.dvcfile import is_valid_filename + from dvc.tree.local import LocalTree - stages = [] + scm = self.repo.scm + sep = os.sep outs: Set[str] = set() + + is_local_tree = isinstance(self.tree, LocalTree) + + def is_ignored(path): + # apply only for the local tree + return is_local_tree and scm.is_ignored(path) + + def is_dvcfile_and_not_ignored(root, file): + return is_valid_filename(file) and not is_ignored( + f"{root}{sep}{file}" + ) + + def is_out_or_ignored(root, directory): + dir_path = f"{root}{sep}{directory}" + return dir_path in outs or is_ignored(dir_path) + + stages = [] for root, dirs, files in self.tree.walk(self.repo.root_dir): - for file_name in filter(is_valid_filename, files): - file_path = os.path.join(root, file_name) + dvcfile_filter = partial(is_dvcfile_and_not_ignored, root) + for file in filter(dvcfile_filter, files): + file_path = os.path.join(root, file) try: new_stages = self.load_file(file_path) except DvcException as exc: @@ -393,5 +414,5 @@ def collect_repo(self, onerror: Callable[[str, Exception], None] = None): for out in stage.outs if out.scheme == "local" ) - dirs[:] = [d for d in dirs if os.path.join(root, d) not in outs] + dirs[:] = [d for d in dirs if not is_out_or_ignored(root, d)] return stages diff --git a/dvc/stage/exceptions.py b/dvc/stage/exceptions.py index c842a08d32..43744d2365 100644 --- a/dvc/stage/exceptions.py +++ b/dvc/stage/exceptions.py @@ -14,9 +14,13 @@ class StageFileFormatError(DvcException): class StageFileDoesNotExistError(DvcException): - def __init__(self, fname): + DVC_IGNORED = "is dvc-ignored" + DOES_NOT_EXIST = "does not exist" + + def __init__(self, fname, dvc_ignored=False): self.file = fname - super().__init__(f"'{self.file}' does not exist.") + message = self.DVC_IGNORED if dvc_ignored else self.DOES_NOT_EXIST + super().__init__(f"'{self.file}' {message}") class StageFileAlreadyExistsError(DvcException): diff --git a/tests/func/test_stage_load.py b/tests/func/test_stage_load.py index 8f8e20c92b..d33a47b98c 100644 --- a/tests/func/test_stage_load.py +++ b/tests/func/test_stage_load.py @@ -4,7 +4,7 @@ import pytest from funcy import raiser -from dvc.dvcfile import PIPELINE_FILE +from dvc.dvcfile import PIPELINE_FILE, FileIsGitIgnored from dvc.exceptions import NoOutputOrStageError from dvc.path_info import PathInfo from dvc.repo import Repo @@ -451,3 +451,45 @@ def test_collect_repo_callback(tmp_dir, dvc, mocker): file_path, exc = mock.call_args[0] assert file_path == PIPELINE_FILE assert isinstance(exc, StageFileFormatError) + + +def test_gitignored_collect_repo(tmp_dir, dvc, scm): + (stage,) = tmp_dir.dvc_gen({"data": {"foo": "foo", "bar": "bar"}}) + + assert dvc.stage.collect_repo() == [stage] + + scm.ignore(stage.path) + scm._reset() + + assert not dvc.stage.collect_repo() + + +def test_gitignored_file_try_collect_granular_for_data_files( + tmp_dir, dvc, scm +): + (stage,) = tmp_dir.dvc_gen({"data": {"foo": "foo", "bar": "bar"}}) + path = PathInfo("data") / "foo" + + assert dvc.stage.collect_granular(str(path)) == [ + (stage, PathInfo(tmp_dir / path)) + ] + + scm.ignore(stage.path) + dvc._reset() + + with pytest.raises(NoOutputOrStageError): + dvc.stage.collect_granular(str(path)) + + +def test_gitignored_file_try_collect_granular_for_dvc_yaml_files( + tmp_dir, dvc, scm, stages +): + assert dvc.stage.collect_granular("bar") == [ + (stages["copy-foo-bar"], PathInfo(tmp_dir / "bar")) + ] + + scm.ignore(tmp_dir / "dvc.yaml") + scm._reset() + + with pytest.raises(FileIsGitIgnored): + dvc.stage.collect_granular("bar") diff --git a/tests/unit/test_dvcfile.py b/tests/unit/test_dvcfile.py index 43b6bbcbda..f0017c5172 100644 --- a/tests/unit/test_dvcfile.py +++ b/tests/unit/test_dvcfile.py @@ -4,6 +4,7 @@ PIPELINE_FILE, PIPELINE_LOCK, Dvcfile, + FileIsGitIgnored, PipelineFile, SingleStageFile, ) @@ -41,12 +42,23 @@ def test_pipelines_single_stage_file(path): @pytest.mark.parametrize("file", ["stage.dvc", "dvc.yaml"]) -def test_stage_load_on_not_existing_file(tmp_dir, dvc, file): +@pytest.mark.parametrize("is_dvcignored", [True, False]) +def test_stage_load_on_not_existing_file(tmp_dir, dvc, file, is_dvcignored): dvcfile = Dvcfile(dvc, file) + if is_dvcignored: + (tmp_dir / ".dvcignore").write_text(file) + assert not dvcfile.exists() - with pytest.raises(StageFileDoesNotExistError): + with pytest.raises(StageFileDoesNotExistError) as exc_info: assert dvcfile.stages.values() + + assert str(exc_info.value) == f"'{file}' does not exist" + + +@pytest.mark.parametrize("file", ["stage.dvc", "dvc.yaml"]) +def test_stage_load_on_non_file(tmp_dir, dvc, file): (tmp_dir / file).mkdir() + dvcfile = Dvcfile(dvc, file) with pytest.raises(StageFileIsNotDvcFileError): assert dvcfile.stages.values() @@ -83,3 +95,31 @@ def test_dump_stage(tmp_dir, dvc): assert (tmp_dir / PIPELINE_FILE).exists() assert (tmp_dir / PIPELINE_LOCK).exists() assert list(dvcfile.stages.values()) == [stage] + + +@pytest.mark.parametrize("file", ["stage.dvc", "dvc.yaml"]) +def test_stage_load_file_exists_but_dvcignored(tmp_dir, dvc, scm, file): + (tmp_dir / file).write_text("") + (tmp_dir / ".dvcignore").write_text(file) + + dvcfile = Dvcfile(dvc, file) + with pytest.raises(StageFileDoesNotExistError) as exc_info: + assert dvcfile.stages.values() + + assert str(exc_info.value) == f"'{file}' is dvc-ignored" + + +@pytest.mark.parametrize("file", ["foo.dvc", "dvc.yaml"]) +def test_try_loading_dvcfile_that_is_gitignored(tmp_dir, dvc, scm, file): + with open(tmp_dir / ".gitignore", "a+") as fd: + fd.write(file) + + # create a file just to avoid other checks + (tmp_dir / file).write_text("") + scm._reset() + + dvcfile = Dvcfile(dvc, file) + with pytest.raises(FileIsGitIgnored) as exc_info: + dvcfile._load() + + assert str(exc_info.value) == f"'{file}' is git-ignored."