Skip to content

Commit

Permalink
run: use multistage dvcfiles by default (#3740)
Browse files Browse the repository at this point in the history
Adding hidden `--single-stage` to simplify the test migration.
  • Loading branch information
efiop authored May 4, 2020
1 parent 6f73a67 commit bc7f177
Show file tree
Hide file tree
Showing 13 changed files with 345 additions and 87 deletions.
11 changes: 10 additions & 1 deletion dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def run(self):
outs_persist_no_cache=self.args.outs_persist_no_cache,
always_changed=self.args.always_changed,
name=self.args.name,
single_stage=self.args.single_stage,
)
except DvcException:
logger.exception("")
Expand Down Expand Up @@ -96,7 +97,9 @@ def add_parser(subparsers, parent_parser):
help="Declare dependencies for reproducible cmd.",
metavar="<path>",
)
run_parser.add_argument("-n", "--name", help=argparse.SUPPRESS)
run_parser.add_argument(
"-n", "--name", help="Stage name.",
)
run_parser.add_argument(
"-o",
"--outs",
Expand Down Expand Up @@ -198,6 +201,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Always consider this DVC-file as changed.",
)
run_parser.add_argument(
"--single-stage",
action="store_true",
default=False,
help=argparse.SUPPRESS,
)
run_parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Command to execute."
)
Expand Down
14 changes: 12 additions & 2 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from . import locked
from .scm_context import scm_context
from dvc.exceptions import InvalidArgumentError
from dvc.stage.exceptions import DuplicateStageName, InvalidStageName

from funcy import first, concat
Expand Down Expand Up @@ -36,14 +37,23 @@ def _get_file_path(kwargs):

@locked
@scm_context
def run(self, fname=None, no_exec=False, **kwargs):
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
from dvc.stage import PipelineStage, Stage, create_stage
from dvc.dvcfile import Dvcfile, PIPELINE_FILE

stage_cls = PipelineStage
path = PIPELINE_FILE
stage_name = kwargs.get("name")
if not stage_name:

if stage_name and single_stage:
raise InvalidArgumentError(
"`-n|--name` is incompatible with `--single-stage`"
)

if not stage_name and not single_stage:
raise InvalidArgumentError("`-n|--name` is required")

if single_stage:
kwargs.pop("name", None)
stage_cls = Stage
path = fname or _get_file_path(kwargs)
Expand Down
6 changes: 4 additions & 2 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
assert ret == 0
assert file_md5_counter.mock.call_count == 1

ret = main(["run", "-d", "foo", "echo foo"])
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
assert file_md5_counter.mock.call_count == 1

Expand Down Expand Up @@ -297,7 +297,9 @@ def test_should_update_state_entry_for_directory_after_add(
assert file_md5_counter.mock.call_count == 3

ls = "dir" if os.name == "nt" else "ls"
ret = main(["run", "-d", "data", "{} {}".format(ls, "data")])
ret = main(
["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")]
)
assert ret == 0
assert file_md5_counter.mock.call_count == 3

Expand Down
1 change: 1 addition & 0 deletions tests/func/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def test_open_not_cached(dvc):
metric_file, metric_content
)
dvc.run(
single_stage=True,
metrics_no_cache=[metric_file],
cmd=('python -c "{}"'.format(metric_code)),
)
Expand Down
16 changes: 13 additions & 3 deletions tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def test(self):
self.commit_data_file(fname1)
self.commit_data_file(fname2)
self.dvc.run(
single_stage=True,
cmd="python {} {} {}".format(self.CODE, self.FOO, fname3),
deps=[self.CODE, self.FOO],
outs_no_cache=[fname3],
Expand Down Expand Up @@ -341,7 +342,10 @@ def test(self):

self.dvc.add(self.FOO)
stage = self.dvc.run(
cmd=cmd, deps=[self.FOO, self.CODE], outs_no_cache=["out"]
cmd=cmd,
deps=[self.FOO, self.CODE],
outs_no_cache=["out"],
single_stage=True,
)
self.assertTrue(stage is not None)

Expand Down Expand Up @@ -480,7 +484,9 @@ def test(self):

def test_checkout_no_checksum(tmp_dir, dvc):
tmp_dir.gen("file", "file content")
stage = dvc.run(outs=["file"], no_exec=True, cmd="somecmd")
stage = dvc.run(
outs=["file"], no_exec=True, cmd="somecmd", single_stage=True
)

with pytest.raises(CheckoutError):
dvc.checkout([stage.path], force=True)
Expand Down Expand Up @@ -706,7 +712,11 @@ def test_checkout_with_relink_existing(tmp_dir, dvc, link):
def test_checkout_with_deps(tmp_dir, dvc):
tmp_dir.dvc_gen({"foo": "foo"})
dvc.run(
fname="copy_file.dvc", cmd="echo foo > bar", outs=["bar"], deps=["foo"]
fname="copy_file.dvc",
cmd="echo foo > bar",
outs=["bar"],
deps=["foo"],
single_stage=True,
)

(tmp_dir / "bar").unlink()
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_commit_force(tmp_dir, dvc):
assert dvc.status([stage.path]) == {}


@pytest.mark.parametrize("run_kw", [{}, {"name": "copy"}])
@pytest.mark.parametrize("run_kw", [{"single_stage": True}, {"name": "copy"}])
def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw):
tmp_dir.gen("foo", "foo")
(foo_stage,) = dvc.add("foo", no_commit=True)
Expand Down
10 changes: 6 additions & 4 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,9 @@ def _test(self):
url = Local.get_url()
self.main(["remote", "add", "-d", TEST_REMOTE, url])

stage = self.dvc.run(outs=["bar"], cmd="echo bar > bar")
stage = self.dvc.run(
outs=["bar"], cmd="echo bar > bar", single_stage=True
)
self.main(["push"])

stage_file_path = stage.relpath
Expand Down Expand Up @@ -630,7 +632,7 @@ def test_checksum_recalculation(mocker, dvc, tmp_dir):
assert ret == 0
ret = main(["push"])
assert ret == 0
ret = main(["run", "-d", "foo", "echo foo"])
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
assert test_get_file_checksum.mock.call_count == 1

Expand Down Expand Up @@ -783,7 +785,7 @@ def recurse_list_dir(d):

def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):
(stage0,) = tmp_dir.dvc_gen("foo", "foo")
stage1 = run_copy("foo", "bar")
stage1 = run_copy("foo", "bar", single_stage=True)
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")
outs = ["foo", "bar", "foobar"]

Expand Down Expand Up @@ -813,7 +815,7 @@ def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):

def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy):
tmp_dir.dvc_gen("foo", "foo")
run_copy("foo", "bar")
run_copy("foo", "bar", single_stage=True)

tmp_dir.dvc_gen("lorem", "lorem")
run_copy("lorem", "lorem2", name="copy-lorem-lorem2")
Expand Down
26 changes: 21 additions & 5 deletions tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def test_run_load_one_for_multistage_non_existing_stage_name(tmp_dir, dvc):
def test_run_load_one_on_single_stage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
assert Dvcfile(dvc, stage.path).stages.get("random-name")
assert Dvcfile(dvc, stage.path).stage
Expand Down Expand Up @@ -97,7 +101,11 @@ def test_load_all_multistage(tmp_dir, dvc):
def test_load_all_singlestage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage1 = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
stages = Dvcfile(dvc, "foo2.dvc").stages.values()
assert len(stages) == 1
Expand All @@ -107,7 +115,11 @@ def test_load_all_singlestage(tmp_dir, dvc):
def test_load_singlestage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage1 = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
assert Dvcfile(dvc, "foo2.dvc").stage == stage1

Expand Down Expand Up @@ -144,7 +156,11 @@ def test_stage_collection(tmp_dir, dvc):
always_changed=True,
)
stage3 = dvc.run(
cmd="cp bar bar2", deps=["bar"], metrics=["bar2"], always_changed=True,
cmd="cp bar bar2",
deps=["bar"],
metrics=["bar2"],
always_changed=True,
single_stage=True,
)
assert {s for s in dvc.stages} == {stage1, stage3, stage2}

Expand Down Expand Up @@ -174,7 +190,7 @@ def test_stage_filter(tmp_dir, dvc, run_copy):

def test_stage_filter_in_singlestage_file(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar")
stage = run_copy("foo", "bar", single_stage=True)
dvcfile = Dvcfile(dvc, stage.path)
assert set(dvcfile.stages.filter(None).values()) == {stage}
assert dvcfile.stages.filter(None).get(None) == stage
Expand Down
39 changes: 29 additions & 10 deletions tests/func/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,23 @@ def test_disconnected_stage(tmp_dir, dvc):
tmp_dir.dvc_gen({"base": "base"})

dvc.add("base")
dvc.run(deps=["base"], outs=["derived1"], cmd="echo derived1 > derived1")
dvc.run(deps=["base"], outs=["derived2"], cmd="echo derived2 > derived2")
dvc.run(
deps=["base"],
outs=["derived1"],
cmd="echo derived1 > derived1",
single_stage=True,
)
dvc.run(
deps=["base"],
outs=["derived2"],
cmd="echo derived2 > derived2",
single_stage=True,
)
final_stage = dvc.run(
deps=["derived1"], outs=["final"], cmd="echo final > final"
deps=["derived1"],
outs=["final"],
cmd="echo final > final",
single_stage=True,
)

command = CmdPipelineShow([])
Expand Down Expand Up @@ -134,7 +147,7 @@ def test_print_locked_stages(tmp_dir, dvc, caplog):

def test_dot_outs(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo content")
run_copy("foo", "file")
run_copy("foo", "file", single_stage=True)
assert main(["pipeline", "show", "--dot", "file.dvc", "--outs"]) == 0


Expand Down Expand Up @@ -208,8 +221,10 @@ def test_no_stages(self):

def one_pipeline(self):
self.dvc.add("foo")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="")
self.dvc.run(deps=["bar"], outs=["baz"], cmd="echo baz > baz")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="", single_stage=True)
self.dvc.run(
deps=["bar"], outs=["baz"], cmd="echo baz > baz", single_stage=True
)
pipelines = self.dvc.pipelines

self.assertEqual(len(pipelines), 1)
Expand All @@ -218,8 +233,10 @@ def one_pipeline(self):

def two_pipelines(self):
self.dvc.add("foo")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="")
self.dvc.run(deps=["bar"], outs=["baz"], cmd="echo baz > baz")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="", single_stage=True)
self.dvc.run(
deps=["bar"], outs=["baz"], cmd="echo baz > baz", single_stage=True
)

self.dvc.add("code.py")

Expand Down Expand Up @@ -248,11 +265,13 @@ def test_split_pipeline(tmp_dir, scm, dvc):
deps=["git_dep1", "data"],
outs=["data_train", "data_valid"],
cmd="echo train >> data_train && echo valid >> data_valid",
single_stage=True,
)
stage = dvc.run(
deps=["git_dep2", "data_train", "data_valid"],
outs=["result"],
cmd="echo result >> result",
single_stage=True,
)

command = CmdPipelineShow([])
Expand All @@ -271,7 +290,7 @@ def test_split_pipeline(tmp_dir, scm, dvc):
def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog):
tmp_dir.gen("foo", "foo")
run_copy("foo", "bar", name="copy-foo-bar")
run_copy("bar", "foobar")
run_copy("bar", "foobar", single_stage=True)
command = CmdPipelineShow([])

caplog.clear()
Expand Down Expand Up @@ -299,7 +318,7 @@ def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog):
def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
run_copy("foo", "bar", name="copy-foo-bar")
run_copy("bar", "foobar")
run_copy("bar", "foobar", single_stage=True)
command = CmdPipelineShow([])
nodes, edges, is_tree = command._build_graph("foobar.dvc")
assert set(nodes) == {"dvc.yaml:copy-foo-bar", "foobar.dvc"}
Expand Down
Loading

0 comments on commit bc7f177

Please sign in to comment.