-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
repro: tests: migrate to dir helpers #3091
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,10 @@ | |
import getpass | ||
import os | ||
import posixpath | ||
import pathlib | ||
import re | ||
import shutil | ||
import uuid | ||
from pathlib import Path | ||
from subprocess import PIPE | ||
from subprocess import Popen | ||
from urllib.parse import urljoin | ||
|
@@ -17,6 +17,7 @@ | |
from google.cloud import storage as gc | ||
from mock import patch | ||
|
||
from dvc.compat import fspath | ||
from dvc.exceptions import CyclicGraphError | ||
from dvc.exceptions import ReproductionError | ||
from dvc.exceptions import StagePathAsOutputError | ||
|
@@ -1372,210 +1373,199 @@ def test(self): | |
|
||
|
||
@pytest.fixture | ||
def repro_dir(dvc_repo, repo_dir): | ||
repo_dir.dname = "dir" | ||
os.mkdir(repo_dir.dname) | ||
repo_dir.emptydname = "emptydir" | ||
os.mkdir(repo_dir.emptydname) | ||
subdname = os.path.join(repo_dir.dname, "subdir") | ||
os.mkdir(subdname) | ||
|
||
repo_dir.source = "source" | ||
repo_dir.source_stage = repo_dir.source + ".dvc" | ||
stage = dvc_repo.run( | ||
fname=repo_dir.source_stage, | ||
outs=[repo_dir.source], | ||
deps=[repo_dir.FOO], | ||
) | ||
assert stage is not None | ||
assert filecmp.cmp(repo_dir.source, repo_dir.FOO, shallow=False) | ||
|
||
repo_dir.unrelated1 = "unrelated1" | ||
repo_dir.unrelated1_stage = repo_dir.unrelated1 + ".dvc" | ||
stage = dvc_repo.run( | ||
fname=repo_dir.unrelated1_stage, | ||
outs=[repo_dir.unrelated1], | ||
deps=[repo_dir.source], | ||
) | ||
assert stage is not None | ||
|
||
repo_dir.unrelated2 = "unrelated2" | ||
repo_dir.unrelated2_stage = repo_dir.unrelated2 + ".dvc" | ||
stage = dvc_repo.run( | ||
fname=repo_dir.unrelated2_stage, | ||
outs=[repo_dir.unrelated2], | ||
deps=[repo_dir.DATA], | ||
def repro_dir(tmp_dir, dvc, run_copy): | ||
# Creates repo with following structure: | ||
# data_dir/dir_file origin_data | ||
# | | | | ||
# | | origin_copy.dvc | ||
# unrelated2.dvc | | | | ||
# | | unrelated1.dvc | ||
# dir/subdir/dir_file_copy.dvc | | ||
# | | | ||
# | dir/origin_copy_2.dvc | ||
# | | | ||
# \ / | ||
# \ / | ||
# dir/Dvcfile | ||
tmp_dir.gen( | ||
{ | ||
"origin_data": "origin data content", | ||
"data_dir": {"dir_file": "dir file content"}, | ||
"dir": {"subdir": {}}, | ||
} | ||
) | ||
assert stage is not None | ||
|
||
repo_dir.first = os.path.join(repo_dir.dname, "first") | ||
repo_dir.first_stage = repo_dir.first + ".dvc" | ||
|
||
stage = dvc_repo.run( | ||
fname=repo_dir.first_stage, | ||
deps=[repo_dir.source], | ||
outs=[repo_dir.first], | ||
cmd="python {} {} {}".format( | ||
repo_dir.CODE, repo_dir.source, repo_dir.first | ||
), | ||
tmp_dir.origin_copy = "origin_copy" | ||
assert run_copy("origin_data", tmp_dir.origin_copy) is not None | ||
assert (tmp_dir / tmp_dir.origin_copy).read_text() == "origin data content" | ||
|
||
# Unrelated are to verify that reproducing `dir` will not trigger them too | ||
assert run_copy(tmp_dir.origin_copy, "unrelated1") is not None | ||
dir_file_path = tmp_dir / "data_dir" / "dir_file" | ||
assert run_copy(fspath(dir_file_path), "unrelated2") is not None | ||
|
||
tmp_dir.origin_copy_2 = os.path.join("dir", "origin_copy_2") | ||
assert ( | ||
run_copy( | ||
tmp_dir.origin_copy, | ||
tmp_dir.origin_copy_2, | ||
fname=tmp_dir.origin_copy_2 + ".dvc", | ||
) | ||
is not None | ||
) | ||
assert stage is not None | ||
assert filecmp.cmp(repo_dir.first, repo_dir.FOO, shallow=False) | ||
|
||
repo_dir.second = os.path.join(subdname, "second") | ||
repo_dir.second_stage = repo_dir.second + ".dvc" | ||
stage = dvc_repo.run( | ||
fname=repo_dir.second_stage, | ||
outs=[repo_dir.second], | ||
deps=[repo_dir.DATA], | ||
assert ( | ||
tmp_dir / tmp_dir.origin_copy_2 | ||
).read_text() == "origin data content" | ||
|
||
tmp_dir.dir_file_copy = os.path.join("dir", "subdir", "dir_file_copy") | ||
assert ( | ||
run_copy( | ||
fspath(dir_file_path), | ||
tmp_dir.dir_file_copy, | ||
fname=tmp_dir.dir_file_copy + ".dvc", | ||
) | ||
is not None | ||
) | ||
assert stage is not None | ||
assert filecmp.cmp(repo_dir.second, repo_dir.DATA, shallow=False) | ||
assert (tmp_dir / tmp_dir.dir_file_copy).read_text() == "dir file content" | ||
|
||
repo_dir.third_stage = os.path.join(repo_dir.dname, "Dvcfile") | ||
stage = dvc_repo.run( | ||
fname=repo_dir.third_stage, deps=[repo_dir.first, repo_dir.second] | ||
tmp_dir.last_stage = os.path.join("dir", "Dvcfile") | ||
assert ( | ||
dvc.run( | ||
fname=tmp_dir.last_stage, | ||
deps=[tmp_dir.origin_copy_2, tmp_dir.dir_file_copy], | ||
) | ||
is not None | ||
) | ||
assert stage is not None | ||
|
||
yield repo_dir | ||
yield tmp_dir | ||
|
||
|
||
def test_recursive_repro_default(dvc_repo, repro_dir): | ||
def test_recursive_repro_default(dvc, repro_dir): | ||
""" | ||
Test recursive repro on dir after a dep outside this dir has changed. | ||
""" | ||
os.unlink(repro_dir.FOO) | ||
shutil.copyfile(repro_dir.BAR, repro_dir.FOO) | ||
origin = Path("origin_data") | ||
origin.unlink() | ||
origin.write_text("new origin data content") | ||
|
||
stages = dvc.reproduce("dir", recursive=True) | ||
|
||
stages = dvc_repo.reproduce(repro_dir.dname, recursive=True) | ||
# Check that the dependency ("source") and the dependent stages | ||
# inside the folder have been reproduced ("first", "third") | ||
assert len(stages) == 3 | ||
names = [stage.relpath for stage in stages] | ||
assert repro_dir.source_stage in names | ||
assert repro_dir.first_stage in names | ||
assert repro_dir.third_stage in names | ||
assert filecmp.cmp(repro_dir.source, repro_dir.BAR, shallow=False) | ||
assert filecmp.cmp(repro_dir.first, repro_dir.BAR, shallow=False) | ||
names = [s.relpath for s in stages] | ||
assert len(names) == 3 | ||
assert set(names) == { | ||
repro_dir.origin_copy + ".dvc", | ||
repro_dir.origin_copy_2 + ".dvc", | ||
repro_dir.last_stage, | ||
} | ||
assert Path(repro_dir.origin_copy).read_text() == "new origin data content" | ||
assert ( | ||
Path(repro_dir.origin_copy_2).read_text() == "new origin data content" | ||
) | ||
|
||
|
||
def test_recursive_repro_single(dvc_repo, repro_dir): | ||
def test_recursive_repro_single(dvc, repro_dir): | ||
""" | ||
Test recursive single-item repro on dir | ||
after a dep outside this dir has changed. | ||
""" | ||
os.unlink(repro_dir.FOO) | ||
shutil.copyfile(repro_dir.BAR, repro_dir.FOO) | ||
origin_data = Path("origin_data") | ||
origin_data.unlink() | ||
origin_data.write_text("new origin content") | ||
|
||
os.unlink(repro_dir.DATA) | ||
shutil.copyfile(repro_dir.BAR, repro_dir.DATA) | ||
dir_file = repro_dir / "data_dir" / "dir_file" | ||
dir_file.unlink() | ||
dir_file.write_text("new dir file content") | ||
|
||
stages = dvc_repo.reproduce( | ||
repro_dir.dname, recursive=True, single_item=True | ||
) | ||
stages = dvc.reproduce("dir", recursive=True, single_item=True) | ||
# Check that just stages inside given dir | ||
# with changed direct deps have been reproduced. | ||
# This means that "first" stage should not be reproduced | ||
# since it depends on "source". | ||
# Also check that "second" stage was reproduced before "third" stage | ||
# Also check that "dir_file_copy" stage was reproduced before "third" stage | ||
assert len(stages) == 2 | ||
assert repro_dir.second_stage == stages[0].relpath | ||
assert repro_dir.third_stage == stages[1].relpath | ||
assert filecmp.cmp(repro_dir.second, repro_dir.BAR, shallow=False) | ||
assert [s.relpath for s in stages] == [ | ||
repro_dir.dir_file_copy + ".dvc", | ||
repro_dir.last_stage, | ||
] | ||
assert Path(repro_dir.dir_file_copy).read_text() == "new dir file content" | ||
|
||
|
||
def test_recursive_repro_single_force(dvc_repo, repro_dir): | ||
def test_recursive_repro_single_force(dvc, repro_dir): | ||
""" | ||
Test recursive single-item force repro on dir | ||
without any dependencies changing. | ||
""" | ||
stages = dvc_repo.reproduce( | ||
repro_dir.dname, recursive=True, single_item=True, force=True | ||
) | ||
stages = dvc.reproduce("dir", recursive=True, single_item=True, force=True) | ||
assert len(stages) == 3 | ||
names = [stage.relpath for stage in stages] | ||
# Check that all stages inside given dir have been reproduced | ||
# Also check that "second" stage was reproduced before "third" stage | ||
# Also check that "dir_file_copy" stage was reproduced before "third" stage | ||
# and that "first" stage was reproduced before "third" stage | ||
assert repro_dir.first_stage in names | ||
assert repro_dir.second_stage in names | ||
assert repro_dir.third_stage in names | ||
assert names.index(repro_dir.first_stage) < names.index( | ||
repro_dir.third_stage | ||
names = [s.relpath for s in stages] | ||
assert { | ||
repro_dir.origin_copy_2 + ".dvc", | ||
repro_dir.dir_file_copy + ".dvc", | ||
repro_dir.last_stage, | ||
} == set(names) | ||
assert names.index(repro_dir.origin_copy_2 + ".dvc") < names.index( | ||
repro_dir.last_stage | ||
) | ||
assert names.index(repro_dir.second_stage) < names.index( | ||
repro_dir.third_stage | ||
assert names.index(repro_dir.dir_file_copy + ".dvc") < names.index( | ||
repro_dir.last_stage | ||
) | ||
|
||
|
||
def test_recursive_repro_empty_dir(dvc_repo, repro_dir): | ||
def test_recursive_repro_empty_dir(dvc, repro_dir): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I presume that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pared I think it was only used for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @efiop removed |
||
""" | ||
Test recursive repro on an empty directory | ||
""" | ||
stages = dvc_repo.reproduce( | ||
repro_dir.emptydname, recursive=True, force=True | ||
) | ||
(repro_dir / "emptydir").mkdir() | ||
|
||
stages = dvc.reproduce("emptydir", recursive=True, force=True) | ||
assert len(stages) == 0 | ||
|
||
|
||
def test_recursive_repro_recursive_missing_file(dvc_repo): | ||
def test_recursive_repro_recursive_missing_file(dvc): | ||
""" | ||
Test recursive repro on a missing file | ||
""" | ||
with pytest.raises(StageFileDoesNotExistError): | ||
dvc_repo.reproduce("notExistingStage.dvc", recursive=True) | ||
dvc.reproduce("notExistingStage.dvc", recursive=True) | ||
with pytest.raises(StageFileDoesNotExistError): | ||
dvc_repo.reproduce("notExistingDir/", recursive=True) | ||
dvc.reproduce("notExistingDir/", recursive=True) | ||
|
||
|
||
def test_recursive_repro_on_stage_file(dvc_repo, repro_dir): | ||
def test_recursive_repro_on_stage_file(dvc, repro_dir): | ||
""" | ||
Test recursive repro on a stage file instead of directory | ||
""" | ||
stages = dvc_repo.reproduce( | ||
repro_dir.first_stage, recursive=True, force=True | ||
stages = dvc.reproduce( | ||
repro_dir.origin_copy_2 + ".dvc", recursive=True, force=True | ||
) | ||
assert len(stages) == 2 | ||
names = [stage.relpath for stage in stages] | ||
assert repro_dir.source_stage in names | ||
assert repro_dir.first_stage in names | ||
|
||
|
||
@pytest.fixture | ||
def foo_copy(repo_dir, dvc_repo): | ||
stages = dvc_repo.add(repo_dir.FOO) | ||
assert len(stages) == 1 | ||
foo_stage = stages[0] | ||
assert foo_stage is not None | ||
|
||
fname = "foo_copy" | ||
stage_fname = fname + ".dvc" | ||
dvc_repo.run( | ||
fname=stage_fname, | ||
outs=[fname], | ||
deps=[repo_dir.FOO, repo_dir.CODE], | ||
cmd="python {} {} {}".format(repo_dir.CODE, repo_dir.FOO, fname), | ||
) | ||
return {"fname": fname, "stage_fname": stage_fname} | ||
assert [ | ||
repro_dir.origin_copy + ".dvc", | ||
repro_dir.origin_copy_2 + ".dvc", | ||
] == [stage.relpath for stage in stages] | ||
|
||
|
||
def test_dvc_formatting_retained(dvc_repo, foo_copy): | ||
root = pathlib.Path(dvc_repo.root_dir) | ||
stage_file = root / foo_copy["stage_fname"] | ||
def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): | ||
tmp_dir.dvc_gen("foo", "foo content") | ||
stage = run_copy("foo", "foo_copy", fname="foo_copy.dvc") | ||
stage_path = tmp_dir / stage.relpath | ||
|
||
# Add comments and custom formatting to DVC-file | ||
lines = list(map(_format_dvc_line, stage_file.read_text().splitlines())) | ||
lines = list(map(_format_dvc_line, stage_path.read_text().splitlines())) | ||
lines.insert(0, "# Starting comment") | ||
stage_text = "".join(l + "\n" for l in lines) | ||
stage_file.write_text(stage_text) | ||
stage_path.write_text(stage_text) | ||
|
||
# Rewrite data source and repro | ||
(root / "foo").write_text("new_foo") | ||
dvc_repo.reproduce(foo_copy["stage_fname"]) | ||
(tmp_dir / "foo").write_text("new foo") | ||
dvc.reproduce("foo_copy.dvc", force=True) | ||
|
||
# All differences should be only about md5 | ||
assert _hide_md5(stage_text) == _hide_md5(stage_file.read_text()) | ||
assert _hide_md5(stage_text) == _hide_md5(stage_path.read_text()) | ||
|
||
|
||
def _format_dvc_line(line): | ||
|
@@ -1630,7 +1620,7 @@ def test(self): | |
assert evaluation[2].relpath == "E.dvc" | ||
|
||
|
||
def test_ssh_dir_out(dvc_repo): | ||
def test_ssh_dir_out(dvc): | ||
if not _should_test_ssh(): | ||
pytest.skip() | ||
|
||
|
@@ -1643,7 +1633,7 @@ def test_ssh_dir_out(dvc_repo): | |
assert main(["config", "cache.ssh", "sshcache"]) == 0 | ||
|
||
# Recreating to reread configs | ||
repo = DvcRepo(dvc_repo.root_dir) | ||
repo = DvcRepo(dvc.root_dir) | ||
|
||
url_info = URLInfo(remote_url) | ||
mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could've used
tree
CLI util, it would be much easier to understand than graph πThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I intended to show dependency between particular stages,
tree
would show placement in the directory. I thought that this one brings more information. Maybe I should include both?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, right, sorry. Ok, why not
dvc pipeline show --ascii
then? Or is it too big?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
^ Just wondering, I'm okay with this illustration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@efiop, you are right, it was too big, had to to it my own way, so that we will still be able to pass all checks :)