Skip to content

Commit

Permalink
repo: Add ignore_revs.
Browse files Browse the repository at this point in the history
Support to write a list of line-separated revisions to `.dvc/ignore_revs`.
If `.dvc/ignore_revs` exists, parse the revisions and use them in `brancher` to skip.

Allows to skip broken commits in `gc`, `pull` and `push`.

Closes #5037
Closes #5066
Closes #7585
  • Loading branch information
daavoo committed Aug 5, 2022
1 parent a9d01a0 commit 947b624
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 2 deletions.
14 changes: 14 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ def _ignore(self):
def brancher(self, *args, **kwargs):
from dvc.repo.brancher import brancher

kwargs["ignore_revs"] = self.ignore_revs
return brancher(self, *args, **kwargs)

def used_objs(
Expand Down Expand Up @@ -448,6 +449,19 @@ def used_objs(
def stages(self): # obsolete, only for backward-compatibility
return self.index.stages

@property
def ignore_revs_file(self):
if self.dvc_dir:
return self.fs.path.join(self.dvc_dir, "ignore_revs")
return ""

@property
def ignore_revs(self):
if self.fs.exists(self.ignore_revs_file):
with self.fs.open(self.ignore_revs_file) as f:
return set(f.read().strip().splitlines())
return set()

def find_outs_by_path(self, path, outs=None, recursive=False, strict=True):
# using `outs_graph` to ensure graph checks are run
outs = outs or self.index.outs_graph
Expand Down
4 changes: 3 additions & 1 deletion dvc/repo/brancher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Set

from dvc.scm import iter_revs

Expand All @@ -13,6 +13,7 @@ def brancher( # noqa: E302
commit_date: Optional[str] = None,
sha_only=False,
num=1,
ignore_revs: Optional[Set[str]] = None,
):
"""Generator that iterates over specified revisions.
Expand Down Expand Up @@ -71,6 +72,7 @@ def brancher( # noqa: E302
all_experiments=all_experiments,
commit_date=commit_date,
num=num,
ignore_revs=ignore_revs,
)

try:
Expand Down
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,17 @@ def run(
return stage

return run


@pytest.fixture
def broken_rev(tmp_dir, scm, dvc):
tmp_dir.gen("params.yaml", "foo: 1")
dvc.run(cmd="echo ${foo}", name="foo")

scm.add(["dvc.yaml", "dvc.lock"])
scm.commit("init broken")
_broken_rev = scm.get_rev()

scm.add(["params.yaml"])
scm.commit("fixed")
return _broken_rev
31 changes: 31 additions & 0 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import dvc_data
from dvc.cli import main
from dvc.exceptions import DvcException
from dvc.external_repo import clean_repos
from dvc.stage.exceptions import StageNotFound
from dvc.testing.test_remote import ( # noqa, pylint: disable=unused-import
Expand Down Expand Up @@ -461,6 +462,36 @@ def test_push_pull_all(tmp_dir, scm, dvc, local_remote, key, expected):
assert dvc.pull(**{key: True})["fetched"] == expected


def test_push_pull_ignore_revs(tmp_dir, scm, dvc, local_remote, broken_rev):
tmp_dir.dvc_gen({"foo": "foo"}, commit="first")
scm.tag("v1")
dvc.remove("foo.dvc")
tmp_dir.dvc_gen({"bar": "bar"}, commit="second")
scm.tag("v2")
with tmp_dir.branch("branch", new=True):
dvc.remove("bar.dvc")
tmp_dir.dvc_gen({"baz": "baz"}, commit="branch")

with pytest.raises(DvcException):
dvc.push(all_commits=True)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)

assert dvc.push(all_commits=True) == 3

remove(dvc.ignore_revs_file)
clean(["foo", "bar", "baz"], dvc)

with pytest.raises(DvcException):
dvc.pull(all_commits=True)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)

assert dvc.pull(all_commits=True)["fetched"] == 3


def test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, local_remote):
tmp_dir.dvc_gen("foo", "foo")
run_copy("foo", "bar", no_commit=True, name="copy-foo-bar")
Expand Down
23 changes: 22 additions & 1 deletion tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from git import Repo

from dvc.cli import main
from dvc.exceptions import CollectCacheError
from dvc.exceptions import CollectCacheError, DvcException
from dvc.fs import LocalFileSystem
from dvc.repo import Repo as DvcRepo
from dvc.utils.fs import remove
Expand Down Expand Up @@ -412,3 +412,24 @@ def test_gc_rev_num(tmp_dir, scm, dvc):
assert not cache.exists()
else:
assert cache.read_text() == str(i)


def test_gc_ignore_revs(tmp_dir, dvc, broken_rev):
"""Covers #5037 and #7585"""
uncommitted = tmp_dir.dvc_gen("testfile", "uncommitted")
uncommitted_hash = uncommitted[0].outs[0].hash_info.value
tmp_dir.dvc_gen("testfile", "committed", commit="add testfile")

cache = tmp_dir / ".dvc" / "cache"
uncommitted_cache = cache / uncommitted_hash[:2] / uncommitted_hash[2:]

with pytest.raises(DvcException, match="Could not find 'foo'"):
dvc.gc(all_commits=True)

assert uncommitted_cache.exists()

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)
dvc.gc(all_commits=True)

assert not uncommitted_cache.exists()
27 changes: 27 additions & 0 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,33 @@ def test_used_objs(tmp_dir, dvc, path):
assert used == expected


def test_used_objs_ignore_revs(tmp_dir, scm, dvc):
def _get_used_values(dvc):
used_obj_ids = list(dvc.used_objs(all_commits=True).values())[0]
return {o.value for o in used_obj_ids}

obj_ids = {}
revs = {}
for i in range(3):
o = tmp_dir.dvc_gen("foo", str(i), commit=str(i))
obj_ids[i] = o[0].outs[0].hash_info.value
revs[i] = scm.get_rev()

assert set(obj_ids.values()) == _get_used_values(dvc)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(revs[0])

expected = {obj_ids[1], obj_ids[2]}
assert expected == _get_used_values(dvc)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(f"{revs[0]}\n{revs[1]}")

expected = {obj_ids[2]}
assert expected == _get_used_values(dvc)


def test_locked(mocker):
repo = mocker.MagicMock()
repo._lock_depth = 0
Expand Down

0 comments on commit 947b624

Please sign in to comment.