Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

run: use multistage dvcfiles by default #3740

Merged
merged 1 commit into from
May 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def run(self):
outs_persist_no_cache=self.args.outs_persist_no_cache,
always_changed=self.args.always_changed,
name=self.args.name,
single_stage=self.args.single_stage,
)
except DvcException:
logger.exception("")
Expand Down Expand Up @@ -96,7 +97,9 @@ def add_parser(subparsers, parent_parser):
help="Declare dependencies for reproducible cmd.",
metavar="<path>",
)
run_parser.add_argument("-n", "--name", help=argparse.SUPPRESS)
run_parser.add_argument(
"-n", "--name", help="Stage name.",
)
run_parser.add_argument(
"-o",
"--outs",
Expand Down Expand Up @@ -198,6 +201,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Always consider this DVC-file as changed.",
)
run_parser.add_argument(
"--single-stage",
action="store_true",
default=False,
help=argparse.SUPPRESS,
)
run_parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Command to execute."
)
Expand Down
14 changes: 12 additions & 2 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from . import locked
from .scm_context import scm_context
from dvc.exceptions import InvalidArgumentError
from dvc.stage.exceptions import DuplicateStageName, InvalidStageName

from funcy import first, concat
Expand Down Expand Up @@ -36,14 +37,23 @@ def _get_file_path(kwargs):

@locked
@scm_context
def run(self, fname=None, no_exec=False, **kwargs):
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
from dvc.stage import PipelineStage, Stage, create_stage
from dvc.dvcfile import Dvcfile, PIPELINE_FILE

stage_cls = PipelineStage
path = PIPELINE_FILE
stage_name = kwargs.get("name")
if not stage_name:

if stage_name and single_stage:
raise InvalidArgumentError(
"`-n|--name` is incompatible with `--single-stage`"
)

if not stage_name and not single_stage:
raise InvalidArgumentError("`-n|--name` is required")

if single_stage:
kwargs.pop("name", None)
stage_cls = Stage
path = fname or _get_file_path(kwargs)
Expand Down
6 changes: 4 additions & 2 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
assert ret == 0
assert file_md5_counter.mock.call_count == 1

ret = main(["run", "-d", "foo", "echo foo"])
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
assert file_md5_counter.mock.call_count == 1

Expand Down Expand Up @@ -297,7 +297,9 @@ def test_should_update_state_entry_for_directory_after_add(
assert file_md5_counter.mock.call_count == 3

ls = "dir" if os.name == "nt" else "ls"
ret = main(["run", "-d", "data", "{} {}".format(ls, "data")])
ret = main(
["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")]
)
assert ret == 0
assert file_md5_counter.mock.call_count == 3

Expand Down
1 change: 1 addition & 0 deletions tests/func/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def test_open_not_cached(dvc):
metric_file, metric_content
)
dvc.run(
single_stage=True,
metrics_no_cache=[metric_file],
cmd=('python -c "{}"'.format(metric_code)),
)
Expand Down
16 changes: 13 additions & 3 deletions tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def test(self):
self.commit_data_file(fname1)
self.commit_data_file(fname2)
self.dvc.run(
single_stage=True,
cmd="python {} {} {}".format(self.CODE, self.FOO, fname3),
deps=[self.CODE, self.FOO],
outs_no_cache=[fname3],
Expand Down Expand Up @@ -341,7 +342,10 @@ def test(self):

self.dvc.add(self.FOO)
stage = self.dvc.run(
cmd=cmd, deps=[self.FOO, self.CODE], outs_no_cache=["out"]
cmd=cmd,
deps=[self.FOO, self.CODE],
outs_no_cache=["out"],
single_stage=True,
)
self.assertTrue(stage is not None)

Expand Down Expand Up @@ -480,7 +484,9 @@ def test(self):

def test_checkout_no_checksum(tmp_dir, dvc):
tmp_dir.gen("file", "file content")
stage = dvc.run(outs=["file"], no_exec=True, cmd="somecmd")
stage = dvc.run(
outs=["file"], no_exec=True, cmd="somecmd", single_stage=True
)

with pytest.raises(CheckoutError):
dvc.checkout([stage.path], force=True)
Expand Down Expand Up @@ -706,7 +712,11 @@ def test_checkout_with_relink_existing(tmp_dir, dvc, link):
def test_checkout_with_deps(tmp_dir, dvc):
tmp_dir.dvc_gen({"foo": "foo"})
dvc.run(
fname="copy_file.dvc", cmd="echo foo > bar", outs=["bar"], deps=["foo"]
fname="copy_file.dvc",
cmd="echo foo > bar",
outs=["bar"],
deps=["foo"],
single_stage=True,
)

(tmp_dir / "bar").unlink()
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_commit_force(tmp_dir, dvc):
assert dvc.status([stage.path]) == {}


@pytest.mark.parametrize("run_kw", [{}, {"name": "copy"}])
@pytest.mark.parametrize("run_kw", [{"single_stage": True}, {"name": "copy"}])
def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw):
tmp_dir.gen("foo", "foo")
(foo_stage,) = dvc.add("foo", no_commit=True)
Expand Down
10 changes: 6 additions & 4 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,9 @@ def _test(self):
url = Local.get_url()
self.main(["remote", "add", "-d", TEST_REMOTE, url])

stage = self.dvc.run(outs=["bar"], cmd="echo bar > bar")
stage = self.dvc.run(
outs=["bar"], cmd="echo bar > bar", single_stage=True
)
self.main(["push"])

stage_file_path = stage.relpath
Expand Down Expand Up @@ -630,7 +632,7 @@ def test_checksum_recalculation(mocker, dvc, tmp_dir):
assert ret == 0
ret = main(["push"])
assert ret == 0
ret = main(["run", "-d", "foo", "echo foo"])
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
assert test_get_file_checksum.mock.call_count == 1

Expand Down Expand Up @@ -783,7 +785,7 @@ def recurse_list_dir(d):

def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):
(stage0,) = tmp_dir.dvc_gen("foo", "foo")
stage1 = run_copy("foo", "bar")
stage1 = run_copy("foo", "bar", single_stage=True)
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")
outs = ["foo", "bar", "foobar"]

Expand Down Expand Up @@ -813,7 +815,7 @@ def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):

def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy):
tmp_dir.dvc_gen("foo", "foo")
run_copy("foo", "bar")
run_copy("foo", "bar", single_stage=True)

tmp_dir.dvc_gen("lorem", "lorem")
run_copy("lorem", "lorem2", name="copy-lorem-lorem2")
Expand Down
26 changes: 21 additions & 5 deletions tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def test_run_load_one_for_multistage_non_existing_stage_name(tmp_dir, dvc):
def test_run_load_one_on_single_stage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
assert Dvcfile(dvc, stage.path).stages.get("random-name")
assert Dvcfile(dvc, stage.path).stage
Expand Down Expand Up @@ -97,7 +101,11 @@ def test_load_all_multistage(tmp_dir, dvc):
def test_load_all_singlestage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage1 = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
stages = Dvcfile(dvc, "foo2.dvc").stages.values()
assert len(stages) == 1
Expand All @@ -107,7 +115,11 @@ def test_load_all_singlestage(tmp_dir, dvc):
def test_load_singlestage(tmp_dir, dvc):
tmp_dir.gen("foo", "foo")
stage1 = dvc.run(
cmd="cp foo foo2", deps=["foo"], metrics=["foo2"], always_changed=True,
cmd="cp foo foo2",
deps=["foo"],
metrics=["foo2"],
always_changed=True,
single_stage=True,
)
assert Dvcfile(dvc, "foo2.dvc").stage == stage1

Expand Down Expand Up @@ -144,7 +156,11 @@ def test_stage_collection(tmp_dir, dvc):
always_changed=True,
)
stage3 = dvc.run(
cmd="cp bar bar2", deps=["bar"], metrics=["bar2"], always_changed=True,
cmd="cp bar bar2",
deps=["bar"],
metrics=["bar2"],
always_changed=True,
single_stage=True,
)
assert {s for s in dvc.stages} == {stage1, stage3, stage2}

Expand Down Expand Up @@ -174,7 +190,7 @@ def test_stage_filter(tmp_dir, dvc, run_copy):

def test_stage_filter_in_singlestage_file(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar")
stage = run_copy("foo", "bar", single_stage=True)
dvcfile = Dvcfile(dvc, stage.path)
assert set(dvcfile.stages.filter(None).values()) == {stage}
assert dvcfile.stages.filter(None).get(None) == stage
Expand Down
39 changes: 29 additions & 10 deletions tests/func/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,23 @@ def test_disconnected_stage(tmp_dir, dvc):
tmp_dir.dvc_gen({"base": "base"})

dvc.add("base")
dvc.run(deps=["base"], outs=["derived1"], cmd="echo derived1 > derived1")
dvc.run(deps=["base"], outs=["derived2"], cmd="echo derived2 > derived2")
dvc.run(
deps=["base"],
outs=["derived1"],
cmd="echo derived1 > derived1",
single_stage=True,
)
dvc.run(
deps=["base"],
outs=["derived2"],
cmd="echo derived2 > derived2",
single_stage=True,
)
final_stage = dvc.run(
deps=["derived1"], outs=["final"], cmd="echo final > final"
deps=["derived1"],
outs=["final"],
cmd="echo final > final",
single_stage=True,
)

command = CmdPipelineShow([])
Expand Down Expand Up @@ -134,7 +147,7 @@ def test_print_locked_stages(tmp_dir, dvc, caplog):

def test_dot_outs(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo content")
run_copy("foo", "file")
run_copy("foo", "file", single_stage=True)
assert main(["pipeline", "show", "--dot", "file.dvc", "--outs"]) == 0


Expand Down Expand Up @@ -208,8 +221,10 @@ def test_no_stages(self):

def one_pipeline(self):
self.dvc.add("foo")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="")
self.dvc.run(deps=["bar"], outs=["baz"], cmd="echo baz > baz")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="", single_stage=True)
self.dvc.run(
deps=["bar"], outs=["baz"], cmd="echo baz > baz", single_stage=True
)
pipelines = self.dvc.pipelines

self.assertEqual(len(pipelines), 1)
Expand All @@ -218,8 +233,10 @@ def one_pipeline(self):

def two_pipelines(self):
self.dvc.add("foo")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="")
self.dvc.run(deps=["bar"], outs=["baz"], cmd="echo baz > baz")
self.dvc.run(deps=["foo"], outs=["bar"], cmd="", single_stage=True)
self.dvc.run(
deps=["bar"], outs=["baz"], cmd="echo baz > baz", single_stage=True
)

self.dvc.add("code.py")

Expand Down Expand Up @@ -248,11 +265,13 @@ def test_split_pipeline(tmp_dir, scm, dvc):
deps=["git_dep1", "data"],
outs=["data_train", "data_valid"],
cmd="echo train >> data_train && echo valid >> data_valid",
single_stage=True,
)
stage = dvc.run(
deps=["git_dep2", "data_train", "data_valid"],
outs=["result"],
cmd="echo result >> result",
single_stage=True,
)

command = CmdPipelineShow([])
Expand All @@ -271,7 +290,7 @@ def test_split_pipeline(tmp_dir, scm, dvc):
def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog):
tmp_dir.gen("foo", "foo")
run_copy("foo", "bar", name="copy-foo-bar")
run_copy("bar", "foobar")
run_copy("bar", "foobar", single_stage=True)
command = CmdPipelineShow([])

caplog.clear()
Expand Down Expand Up @@ -299,7 +318,7 @@ def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog):
def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
run_copy("foo", "bar", name="copy-foo-bar")
run_copy("bar", "foobar")
run_copy("bar", "foobar", single_stage=True)
command = CmdPipelineShow([])
nodes, edges, is_tree = command._build_graph("foobar.dvc")
assert set(nodes) == {"dvc.yaml:copy-foo-bar", "foobar.dvc"}
Expand Down
Loading