Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

repro: pull all missing files #9395

Merged
merged 2 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion dvc/repo/reproduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _track_stage(stage: "Stage") -> None:

@locked
@scm_context
def reproduce( # noqa: C901
def reproduce( # noqa: C901, PLR0912
self: "Repo",
targets=None,
recursive=False,
Expand Down Expand Up @@ -125,6 +125,10 @@ def reproduce( # noqa: C901
)
)

if kwargs.get("pull", False):
logger.debug("Pulling run cache")
self.stage_cache.pull(None)
Comment on lines +128 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might take awhile, but I guess this is acceptable with --pull. But still hope to finally get to converting run-cache to dvc-objects to that this operation could be efficient.


return _reproduce_stages(self.index.graph, list(stages), **kwargs)


Expand Down Expand Up @@ -190,6 +194,10 @@ def _reproduce_stages( # noqa: C901
)

try:
if kwargs.get("pull") and stage.changed():
logger.debug("Pulling %s", stage.addressing)
stage.repo.pull(stage.addressing, allow_missing=True)

ret = _reproduce_stage(stage, **kwargs)

if len(ret) == 0:
Expand Down
42 changes: 42 additions & 0 deletions tests/func/test_repro_multistage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dvc.exceptions import CyclicGraphError, ReproductionError
from dvc.stage import PipelineStage
from dvc.stage.exceptions import StageNotFound
from dvc.utils.fs import remove


def test_non_existing_stage_name(tmp_dir, dvc, run_copy):
Expand Down Expand Up @@ -397,3 +398,44 @@ def test_repro_list_of_commands_raise_and_stops_after_failure(tmp_dir, dvc, mult
dvc.reproduce(targets=["multi"])
assert (tmp_dir / "foo").read_text() == "foo\n"
assert not (tmp_dir / "bar").exists()


def test_repro_pulls_mising_data_source(tmp_dir, dvc, mocker, local_remote):
(foo,) = tmp_dir.dvc_gen("foo", "foo")

dvc.push()

dvc.stage.add(name="copy-foo", cmd="cp foo bar", deps=["foo"], outs=["bar"])
remove("foo")
remove(foo.outs[0].cache_path)

assert dvc.reproduce(pull=True)


def test_repro_pulls_mising_import(tmp_dir, dvc, mocker, erepo_dir, local_remote):
with erepo_dir.chdir():
erepo_dir.dvc_gen("foo", "foo", commit="first")

foo_import = dvc.imp(os.fspath(erepo_dir), "foo")

dvc.push()

dvc.stage.add(name="copy-foo", cmd="cp foo bar", deps=["foo"], outs=["bar"])
remove("foo")
remove(foo_import.outs[0].cache_path)

assert dvc.reproduce(pull=True)


def test_repro_pulls_intermediate_out(tmp_dir, dvc, mocker, local_remote):
tmp_dir.gen("fixed", "fixed")
dvc.stage.add(name="create-foo", cmd="echo foo > foo", deps=["fixed"], outs=["foo"])
dvc.stage.add(name="copy-foo", cmd="cp foo bar", deps=["foo"], outs=["bar"])
(create_foo,) = dvc.reproduce("create-foo")

dvc.push()

remove("foo")
remove(create_foo.outs[0].cache_path)

assert dvc.reproduce(pull=True)
7 changes: 5 additions & 2 deletions tests/func/test_run_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_restore_pull(tmp_dir, dvc, run_copy, mocker, local_remote):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")

dvc.push()
dvc.push(run_cache=True)

mock_restore = mocker.spy(dvc.stage_cache, "restore")
mock_run = mocker.patch("dvc.stage.run.cmd_run")
Expand All @@ -175,11 +175,14 @@ def test_restore_pull(tmp_dir, dvc, run_copy, mocker, local_remote):
(tmp_dir / LOCK_FILE).unlink()
remove(stage.outs[0].cache_path)

# removing local run cache
remove(dvc.stage_cache.cache_dir)

(stage,) = dvc.reproduce("copy-foo-bar", pull=True)

mock_restore.assert_called_once_with(stage, pull=True, dry=False)
mock_run.assert_not_called()
assert mock_checkout.call_count == 2
assert mock_checkout.call_count == 3
assert (tmp_dir / "bar").exists()
assert not (tmp_dir / "foo").unlink()
assert (tmp_dir / LOCK_FILE).exists()