diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index e7ea6e7356..4d96457438 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -2,10 +2,10 @@ import getpass import os import posixpath -import pathlib import re import shutil import uuid +from pathlib import Path from subprocess import PIPE from subprocess import Popen from urllib.parse import urljoin @@ -17,6 +17,7 @@ from google.cloud import storage as gc from mock import patch +from dvc.compat import fspath from dvc.exceptions import CyclicGraphError from dvc.exceptions import ReproductionError from dvc.exceptions import StagePathAsOutputError @@ -1372,210 +1373,190 @@ def test(self): @pytest.fixture -def repro_dir(dvc_repo, repo_dir): - repo_dir.dname = "dir" - os.mkdir(repo_dir.dname) - repo_dir.emptydname = "emptydir" - os.mkdir(repo_dir.emptydname) - subdname = os.path.join(repo_dir.dname, "subdir") - os.mkdir(subdname) - - repo_dir.source = "source" - repo_dir.source_stage = repo_dir.source + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.source_stage, - outs=[repo_dir.source], - deps=[repo_dir.FOO], - ) - assert stage is not None - assert filecmp.cmp(repo_dir.source, repo_dir.FOO, shallow=False) - - repo_dir.unrelated1 = "unrelated1" - repo_dir.unrelated1_stage = repo_dir.unrelated1 + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.unrelated1_stage, - outs=[repo_dir.unrelated1], - deps=[repo_dir.source], - ) - assert stage is not None - - repo_dir.unrelated2 = "unrelated2" - repo_dir.unrelated2_stage = repo_dir.unrelated2 + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.unrelated2_stage, - outs=[repo_dir.unrelated2], - deps=[repo_dir.DATA], +def repro_dir(tmp_dir, dvc, run_copy): + # Creates repo with following structure: + # data_dir/dir_file origin_data + # | | | + # | | origin_copy.dvc + # unrelated2.dvc | | | + # | | unrelated1.dvc + # dir/subdir/dir_file_copy.dvc | + # | | + # | dir/origin_copy_2.dvc + # | | + # \ / + # \ / + # dir/Dvcfile + tmp_dir.gen( + { + "origin_data": "origin data content", + "data_dir": {"dir_file": "dir file content"}, + "dir": {"subdir": {}}, + } ) - assert stage is not None - repo_dir.first = os.path.join(repo_dir.dname, "first") - repo_dir.first_stage = repo_dir.first + ".dvc" + stages = {} - stage = dvc_repo.run( - fname=repo_dir.first_stage, - deps=[repo_dir.source], - outs=[repo_dir.first], - cmd="python {} {} {}".format( - repo_dir.CODE, repo_dir.source, repo_dir.first - ), + origin_copy = tmp_dir / "origin_copy" + stage = run_copy("origin_data", fspath(origin_copy)) + assert stage is not None + assert origin_copy.read_text() == "origin data content" + stages["origin_copy"] = stage + + origin_copy_2 = tmp_dir / "dir" / "origin_copy_2" + stage = run_copy( + fspath(origin_copy), + fspath(origin_copy_2), + fname=fspath(origin_copy_2) + ".dvc", ) assert stage is not None - assert filecmp.cmp(repo_dir.first, repo_dir.FOO, shallow=False) - - repo_dir.second = os.path.join(subdname, "second") - repo_dir.second_stage = repo_dir.second + ".dvc" - stage = dvc_repo.run( - fname=repo_dir.second_stage, - outs=[repo_dir.second], - deps=[repo_dir.DATA], + assert origin_copy_2.read_text() == "origin data content" + stages["origin_copy_2"] = stage + + dir_file_path = tmp_dir / "data_dir" / "dir_file" + dir_file_copy = tmp_dir / "dir" / "subdir" / "dir_file_copy" + stage = run_copy( + fspath(dir_file_path), + fspath(dir_file_copy), + fname=fspath(dir_file_copy) + ".dvc", ) assert stage is not None - assert filecmp.cmp(repo_dir.second, repo_dir.DATA, shallow=False) + assert dir_file_copy.read_text() == "dir file content" + stages["dir_file_copy"] = stage - repo_dir.third_stage = os.path.join(repo_dir.dname, "Dvcfile") - stage = dvc_repo.run( - fname=repo_dir.third_stage, deps=[repo_dir.first, repo_dir.second] + last_stage = tmp_dir / "dir" / "Dvcfile" + stage = dvc.run( + fname=fspath(last_stage), + deps=[fspath(origin_copy_2), fspath(dir_file_copy)], ) assert stage is not None + stages["last_stage"] = stage + + # Unrelated are to verify that reproducing `dir` will not trigger them too + assert run_copy(fspath(origin_copy), "unrelated1") is not None + assert run_copy(fspath(dir_file_path), "unrelated2") is not None + + yield stages + + +def _rewrite_file(path_elements, new_content): + if isinstance(path_elements, str): + path_elements = [path_elements] + file = Path(os.sep.join(path_elements)) + file.unlink() + file.write_text(new_content) + - yield repo_dir +def _read_out(stage): + return Path(stage.outs[0].fspath).read_text() -def test_recursive_repro_default(dvc_repo, repro_dir): +def test_recursive_repro_default(dvc, repro_dir): """ Test recursive repro on dir after a dep outside this dir has changed. """ - os.unlink(repro_dir.FOO) - shutil.copyfile(repro_dir.BAR, repro_dir.FOO) + _rewrite_file("origin_data", "new origin data content") - stages = dvc_repo.reproduce(repro_dir.dname, recursive=True) - # Check that the dependency ("source") and the dependent stages - # inside the folder have been reproduced ("first", "third") - assert len(stages) == 3 - names = [stage.relpath for stage in stages] - assert repro_dir.source_stage in names - assert repro_dir.first_stage in names - assert repro_dir.third_stage in names - assert filecmp.cmp(repro_dir.source, repro_dir.BAR, shallow=False) - assert filecmp.cmp(repro_dir.first, repro_dir.BAR, shallow=False) + stages = dvc.reproduce("dir", recursive=True) + + # Check that the dependency ("origin_copy") and the dependent stages + # inside the folder have been reproduced ("origin_copy_2", "last_stage") + assert stages == [ + repro_dir["origin_copy"], + repro_dir["origin_copy_2"], + repro_dir["last_stage"], + ] + assert _read_out(repro_dir["origin_copy"]) == "new origin data content" + assert _read_out(repro_dir["origin_copy_2"]) == "new origin data content" -def test_recursive_repro_single(dvc_repo, repro_dir): +def test_recursive_repro_single(dvc, repro_dir): """ Test recursive single-item repro on dir after a dep outside this dir has changed. """ - os.unlink(repro_dir.FOO) - shutil.copyfile(repro_dir.BAR, repro_dir.FOO) + _rewrite_file("origin_data", "new origin content") + _rewrite_file(["data_dir", "dir_file"], "new dir file content") - os.unlink(repro_dir.DATA) - shutil.copyfile(repro_dir.BAR, repro_dir.DATA) - - stages = dvc_repo.reproduce( - repro_dir.dname, recursive=True, single_item=True - ) + stages = dvc.reproduce("dir", recursive=True, single_item=True) # Check that just stages inside given dir # with changed direct deps have been reproduced. - # This means that "first" stage should not be reproduced - # since it depends on "source". - # Also check that "second" stage was reproduced before "third" stage - assert len(stages) == 2 - assert repro_dir.second_stage == stages[0].relpath - assert repro_dir.third_stage == stages[1].relpath - assert filecmp.cmp(repro_dir.second, repro_dir.BAR, shallow=False) + # This means that "origin_copy_2" stage should not be reproduced + # since it depends on "origin_copy". + # Also check that "dir_file_copy" stage was reproduced before "last_stage" + assert stages == [repro_dir["dir_file_copy"], repro_dir["last_stage"]] + assert _read_out(repro_dir["dir_file_copy"]) == "new dir file content" -def test_recursive_repro_single_force(dvc_repo, repro_dir): +def test_recursive_repro_single_force(dvc, repro_dir): """ Test recursive single-item force repro on dir without any dependencies changing. """ - stages = dvc_repo.reproduce( - repro_dir.dname, recursive=True, single_item=True, force=True - ) - assert len(stages) == 3 - names = [stage.relpath for stage in stages] + stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True) # Check that all stages inside given dir have been reproduced - # Also check that "second" stage was reproduced before "third" stage - # and that "first" stage was reproduced before "third" stage - assert repro_dir.first_stage in names - assert repro_dir.second_stage in names - assert repro_dir.third_stage in names - assert names.index(repro_dir.first_stage) < names.index( - repro_dir.third_stage + # Also check that "dir_file_copy" stage was reproduced before "last_stage" + # and that "origin_copy" stage was reproduced before "last_stage" stage + assert len(stages) == 3 + assert set(stages) == { + repro_dir["origin_copy_2"], + repro_dir["dir_file_copy"], + repro_dir["last_stage"], + } + assert stages.index(repro_dir["origin_copy_2"]) < stages.index( + repro_dir["last_stage"] ) - assert names.index(repro_dir.second_stage) < names.index( - repro_dir.third_stage + assert stages.index(repro_dir["dir_file_copy"]) < stages.index( + repro_dir["last_stage"] ) -def test_recursive_repro_empty_dir(dvc_repo, repro_dir): +def test_recursive_repro_empty_dir(tmp_dir, dvc): """ Test recursive repro on an empty directory """ - stages = dvc_repo.reproduce( - repro_dir.emptydname, recursive=True, force=True - ) - assert len(stages) == 0 + (tmp_dir / "emptydir").mkdir() + + stages = dvc.reproduce("emptydir", recursive=True, force=True) + assert stages == [] -def test_recursive_repro_recursive_missing_file(dvc_repo): +def test_recursive_repro_recursive_missing_file(dvc): """ Test recursive repro on a missing file """ with pytest.raises(StageFileDoesNotExistError): - dvc_repo.reproduce("notExistingStage.dvc", recursive=True) + dvc.reproduce("notExistingStage.dvc", recursive=True) with pytest.raises(StageFileDoesNotExistError): - dvc_repo.reproduce("notExistingDir/", recursive=True) + dvc.reproduce("notExistingDir/", recursive=True) -def test_recursive_repro_on_stage_file(dvc_repo, repro_dir): +def test_recursive_repro_on_stage_file(dvc, repro_dir): """ Test recursive repro on a stage file instead of directory """ - stages = dvc_repo.reproduce( - repro_dir.first_stage, recursive=True, force=True + stages = dvc.reproduce( + repro_dir["origin_copy_2"].relpath, recursive=True, force=True ) - assert len(stages) == 2 - names = [stage.relpath for stage in stages] - assert repro_dir.source_stage in names - assert repro_dir.first_stage in names + assert stages == [repro_dir["origin_copy"], repro_dir["origin_copy_2"]] -@pytest.fixture -def foo_copy(repo_dir, dvc_repo): - stages = dvc_repo.add(repo_dir.FOO) - assert len(stages) == 1 - foo_stage = stages[0] - assert foo_stage is not None - - fname = "foo_copy" - stage_fname = fname + ".dvc" - dvc_repo.run( - fname=stage_fname, - outs=[fname], - deps=[repo_dir.FOO, repo_dir.CODE], - cmd="python {} {} {}".format(repo_dir.CODE, repo_dir.FOO, fname), - ) - return {"fname": fname, "stage_fname": stage_fname} - - -def test_dvc_formatting_retained(dvc_repo, foo_copy): - root = pathlib.Path(dvc_repo.root_dir) - stage_file = root / foo_copy["stage_fname"] +def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): + tmp_dir.dvc_gen("foo", "foo content") + stage = run_copy("foo", "foo_copy", fname="foo_copy.dvc") + stage_path = tmp_dir / stage.relpath # Add comments and custom formatting to DVC-file - lines = list(map(_format_dvc_line, stage_file.read_text().splitlines())) + lines = list(map(_format_dvc_line, stage_path.read_text().splitlines())) lines.insert(0, "# Starting comment") stage_text = "".join(l + "\n" for l in lines) - stage_file.write_text(stage_text) + stage_path.write_text(stage_text) # Rewrite data source and repro - (root / "foo").write_text("new_foo") - dvc_repo.reproduce(foo_copy["stage_fname"]) + (tmp_dir / "foo").write_text("new foo") + dvc.reproduce("foo_copy.dvc", force=True) - # All differences should be only about md5 - assert _hide_md5(stage_text) == _hide_md5(stage_file.read_text()) + assert _hide_md5(stage_text) == _hide_md5(stage_path.read_text()) def _format_dvc_line(line): @@ -1630,10 +1611,12 @@ def test(self): assert evaluation[2].relpath == "E.dvc" -def test_ssh_dir_out(dvc_repo): +def test_ssh_dir_out(tmp_dir, dvc): if not _should_test_ssh(): pytest.skip() + tmp_dir.gen({"foo": "foo content"}) + # Set up remote and cache remote_url = get_ssh_url() assert main(["remote", "add", "upstream", remote_url]) == 0 @@ -1643,7 +1626,7 @@ def test_ssh_dir_out(dvc_repo): assert main(["config", "cache.ssh", "sshcache"]) == 0 # Recreating to reread configs - repo = DvcRepo(dvc_repo.root_dir) + repo = DvcRepo(dvc.root_dir) url_info = URLInfo(remote_url) mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt"