From 628f87845c97d39f42f578870cbeb7fb2a2ddc94 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 18 Jan 2021 13:50:36 +0545 Subject: [PATCH] introduce stage:add command (#5263) * Implement stage:add * fix tests * Add short flag -C for checkpoints --- dvc/cli.py | 2 + dvc/command/run.py | 144 +--------------------- dvc/command/stage.py | 203 +++++++++++++++++++++++++++++++ dvc/repo/run.py | 21 +--- dvc/stage/utils.py | 23 +++- tests/func/test_live.py | 8 +- tests/unit/command/test_run.py | 12 +- tests/unit/command/test_stage.py | 90 ++++++++++++++ 8 files changed, 333 insertions(+), 170 deletions(-) create mode 100644 dvc/command/stage.py create mode 100644 tests/unit/command/test_stage.py diff --git a/dvc/cli.py b/dvc/cli.py index 2a346da962..4306cd7846 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -39,6 +39,7 @@ repro, root, run, + stage, unprotect, update, version, @@ -82,6 +83,7 @@ update, git_hook, plots, + stage, experiments, check_ignore, live, diff --git a/dvc/command/run.py b/dvc/command/run.py index f1ee4d4798..5db1553342 100644 --- a/dvc/command/run.py +++ b/dvc/command/run.py @@ -1,7 +1,6 @@ import argparse import logging -from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link from dvc.exceptions import DvcException @@ -43,8 +42,8 @@ def run(self): plots=self.args.plots, plots_no_cache=self.args.plots_no_cache, live=self.args.live, - live_summary=not self.args.live_no_summary, - live_report=not self.args.live_no_report, + live_no_summary=self.args.live_no_summary, + live_no_report=self.args.live_no_report, deps=self.args.deps, params=self.args.params, fname=self.args.file, @@ -91,6 +90,8 @@ def _quote_argument(self, argument): def add_parser(subparsers, parent_parser): + from dvc.command.stage import _add_common_args + RUN_HELP = "Generate a stage file from a command and execute the command." run_parser = subparsers.add_parser( "run", @@ -99,109 +100,19 @@ def add_parser(subparsers, parent_parser): help=RUN_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - run_parser.add_argument( - "-d", - "--deps", - action="append", - default=[], - help="Declare dependencies for reproducible cmd.", - metavar="", - ).complete = completion.FILE run_parser.add_argument( "-n", "--name", help="Stage name.", ) - run_parser.add_argument( - "-o", - "--outs", - action="append", - default=[], - help="Declare output file or directory.", - metavar="", - ).complete = completion.FILE - run_parser.add_argument( - "-O", - "--outs-no-cache", - action="append", - default=[], - help="Declare output file or directory " - "(do not put into DVC cache).", - metavar="", - ).complete = completion.FILE - run_parser.add_argument( - "-p", - "--params", - action="append", - default=[], - help="Declare parameter to use as additional dependency.", - metavar="[:]", - ).complete = completion.FILE - run_parser.add_argument( - "-m", - "--metrics", - action="append", - default=[], - help="Declare output metrics file.", - metavar="", - ) - run_parser.add_argument( - "-M", - "--metrics-no-cache", - action="append", - default=[], - help="Declare output metrics file (do not put into DVC cache).", - metavar="", - ) - run_parser.add_argument( - "--plots", - action="append", - default=[], - help="Declare output plot file.", - metavar="", - ) - run_parser.add_argument( - "--plots-no-cache", - action="append", - default=[], - help="Declare output plot file (do not put into DVC cache).", - metavar="", - ) - run_parser.add_argument( - "--live", help="Declare output as dvclive.", metavar="", - ) - run_parser.add_argument( - "--live-no-summary", - action="store_true", - default=False, - help="Signal dvclive logger to not dump latest metrics file.", - ) - run_parser.add_argument( - "--live-no-report", - action="store_true", - default=False, - help="Signal dvclive logger to not produce training report.", - ) + _add_common_args(run_parser) run_parser.add_argument( "--file", metavar="", help=argparse.SUPPRESS, ) - run_parser.add_argument( - "-w", - "--wdir", - help="Directory within your repo to run your command in.", - metavar="", - ) run_parser.add_argument( "--no-exec", action="store_true", default=False, help="Only create stage file without actually running it.", ) - run_parser.add_argument( - "-f", - "--force", - action="store_true", - default=False, - help="Overwrite existing stage", - ) run_parser.add_argument( "--no-run-cache", action="store_true", @@ -217,57 +128,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Don't put files/directories into cache.", ) - run_parser.add_argument( - "--outs-persist", - action="append", - default=[], - help="Declare output file or directory that will not be " - "removed upon repro.", - metavar="", - ) - run_parser.add_argument( - "--outs-persist-no-cache", - action="append", - default=[], - help="Declare output file or directory that will not be " - "removed upon repro (do not put into DVC cache).", - metavar="", - ) - run_parser.add_argument( - "-c", - "--checkpoints", - action="append", - default=[], - help=argparse.SUPPRESS, - metavar="", - ).complete = completion.FILE - run_parser.add_argument( - "--always-changed", - action="store_true", - default=False, - help="Always consider this DVC-file as changed.", - ) run_parser.add_argument( "--single-stage", action="store_true", default=False, help=argparse.SUPPRESS, ) - run_parser.add_argument( - "--external", - action="store_true", - default=False, - help="Allow outputs that are outside of the DVC repository.", - ) - run_parser.add_argument( - "--desc", - type=str, - metavar="", - help=( - "User description of the stage (optional). " - "This doesn't affect any DVC operations." - ), - ) run_parser.add_argument( "command", nargs=argparse.REMAINDER, help="Command to execute." ) diff --git a/dvc/command/stage.py b/dvc/command/stage.py new file mode 100644 index 0000000000..5a69f9a499 --- /dev/null +++ b/dvc/command/stage.py @@ -0,0 +1,203 @@ +import argparse +import logging + +from dvc.command import completion +from dvc.command.base import CmdBase, append_doc_link, fix_subparsers + +logger = logging.getLogger(__name__) + + +class CmdStageAdd(CmdBase): + @staticmethod + def create(repo, force, **kwargs): + from dvc.stage.utils import check_graphs, create_stage_from_cli + + stage = create_stage_from_cli(repo, **kwargs) + check_graphs(repo, stage, force=force) + return stage + + def run(self): + stage = self.create(self.repo, self.args.force, **vars(self.args)) + stage.ignore_outs() + stage.dump() + + return 0 + + +def _add_common_args(parser): + parser.add_argument( + "-d", + "--deps", + action="append", + default=[], + help="Declare dependencies for reproducible cmd.", + metavar="", + ).complete = completion.FILE + parser.add_argument( + "-o", + "--outs", + action="append", + default=[], + help="Declare output file or directory.", + metavar="", + ).complete = completion.FILE + parser.add_argument( + "-O", + "--outs-no-cache", + action="append", + default=[], + help="Declare output file or directory " + "(do not put into DVC cache).", + metavar="", + ).complete = completion.FILE + parser.add_argument( + "-p", + "--params", + action="append", + default=[], + help="Declare parameter to use as additional dependency.", + metavar="[:]", + ).complete = completion.FILE + parser.add_argument( + "-m", + "--metrics", + action="append", + default=[], + help="Declare output metrics file.", + metavar="", + ) + parser.add_argument( + "-M", + "--metrics-no-cache", + action="append", + default=[], + help="Declare output metrics file (do not put into DVC cache).", + metavar="", + ) + parser.add_argument( + "--plots", + action="append", + default=[], + help="Declare output plot file.", + metavar="", + ) + parser.add_argument( + "--plots-no-cache", + action="append", + default=[], + help="Declare output plot file (do not put into DVC cache).", + metavar="", + ) + parser.add_argument( + "--live", help="Declare output as dvclive.", metavar="", + ) + parser.add_argument( + "--live-no-summary", + action="store_true", + default=False, + help="Signal dvclive logger to not dump latest metrics file.", + ) + parser.add_argument( + "--live-no-report", + action="store_true", + default=False, + help="Signal dvclive logger to not produce training report.", + ) + parser.add_argument( + "-w", + "--wdir", + help="Directory within your repo to run your command in.", + metavar="", + ) + parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + help="Overwrite existing stage", + ) + parser.add_argument( + "--outs-persist", + action="append", + default=[], + help="Declare output file or directory that will not be " + "removed upon repro.", + metavar="", + ) + parser.add_argument( + "--outs-persist-no-cache", + action="append", + default=[], + help="Declare output file or directory that will not be " + "removed upon repro (do not put into DVC cache).", + metavar="", + ) + parser.add_argument( + "-C", + "--checkpoints", + action="append", + default=[], + help=argparse.SUPPRESS, + metavar="", + ).complete = completion.FILE + parser.add_argument( + "--always-changed", + action="store_true", + default=False, + help="Always consider this DVC-file as changed.", + ) + parser.add_argument( + "--external", + action="store_true", + default=False, + help="Allow outputs that are outside of the DVC repository.", + ) + parser.add_argument( + "--desc", + type=str, + metavar="", + help=( + "User description of the stage (optional). " + "This doesn't affect any DVC operations." + ), + ) + + +def add_parser(subparsers, parent_parser): + STAGES_HELP = "Commands to list and create stages." + + stage_parser = subparsers.add_parser( + "stage", + parents=[parent_parser], + description=append_doc_link(STAGES_HELP, "stage"), + help=STAGES_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + stage_subparsers = stage_parser.add_subparsers( + dest="cmd", + help="Use `dvc stage CMD --help` to display command-specific help.", + ) + + fix_subparsers(stage_subparsers) + + STAGE_ADD_HELP = "Create stage" + stage_add_parser = stage_subparsers.add_parser( + "add", + parents=[parent_parser], + description=append_doc_link(STAGE_ADD_HELP, "stage/add"), + help=STAGE_ADD_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + stage_add_parser.add_argument("name", help="Name of the stage to add") + stage_add_parser.add_argument( + "-c", + "--command", + action="append", + default=[], + dest="cmd", + help="Command to execute.", + required=True, + ) + _add_common_args(stage_add_parser) + stage_add_parser.set_defaults(func=CmdStageAdd) diff --git a/dvc/repo/run.py b/dvc/repo/run.py index 362d78b22c..8124355102 100644 --- a/dvc/repo/run.py +++ b/dvc/repo/run.py @@ -1,7 +1,5 @@ from typing import TYPE_CHECKING -from dvc.exceptions import InvalidArgumentError - from . import locked from .scm_context import scm_context @@ -20,27 +18,10 @@ def run( ): from dvc.stage.utils import check_graphs, create_stage_from_cli - if not kwargs.get("cmd"): - raise InvalidArgumentError("command is not specified") - - stage_name = kwargs.get("name") - if stage_name and single_stage: - raise InvalidArgumentError( - "`-n|--name` is incompatible with `--single-stage`" - ) - - if stage_name and fname: - raise InvalidArgumentError( - "`--file` is currently incompatible with `-n|--name` " - "and requires `--single-stage`" - ) - - if not stage_name and not single_stage: - raise InvalidArgumentError("`-n|--name` is required") - stage = create_stage_from_cli( self, single_stage=single_stage, fname=fname, **kwargs ) + if kwargs.get("run_cache", True) and stage.can_be_skipped: return None diff --git a/dvc/stage/utils.py b/dvc/stage/utils.py index 281d1bc139..d619cd9223 100644 --- a/dvc/stage/utils.py +++ b/dvc/stage/utils.py @@ -6,6 +6,7 @@ from funcy import concat, first, lsplit, rpartial, without +from dvc.exceptions import InvalidArgumentError from dvc.utils.cli_parse import parse_params from dvc.utils.collections import chunk_dict @@ -352,17 +353,37 @@ def create_stage_from_cli( from . import PipelineStage, Stage, create_stage, restore_meta + cmd = kwargs.get("cmd") + if not cmd: + raise InvalidArgumentError("command is not specified") + + stage_name = kwargs.get("name") + if stage_name and single_stage: + raise InvalidArgumentError( + "`-n|--name` is incompatible with `--single-stage`" + ) + if stage_name and fname: + raise InvalidArgumentError( + "`--file` is currently incompatible with `-n|--name` " + "and requires `--single-stage`" + ) + if not stage_name and not single_stage: + raise InvalidArgumentError("`-n|--name` is required") + if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: - stage_name = kwargs.get("name", None) path = PIPELINE_FILE stage_cls = PipelineStage if not (stage_name and is_valid_name(stage_name)): raise InvalidStageName + kwargs["cmd"] = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd + kwargs["live_summary"] = not kwargs.pop("live_no_summary", False) + kwargs["live_report"] = not kwargs.pop("live_no_report", False) + params = chunk_dict(parse_params(kwargs.pop("params", []))) stage = create_stage( stage_cls, repo=repo, path=path, params=params, **kwargs diff --git a/tests/func/test_live.py b/tests/func/test_live.py index f0bf11acf7..1c12817f68 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -31,8 +31,8 @@ def make(summary=True, report=True): deps=["train.py"], name="live_stage", live="logs", - live_summary=summary, - live_report=report, + live_no_summary=not summary, + live_no_report=not report, ) scm.add(["dvc.yaml", "dvc.lock", "train.py", "params.yaml"]) @@ -52,8 +52,8 @@ def test_export_config_tmp(tmp_dir, dvc, mocker, summary, report): deps=["src"], name="run_logger", live="logs", - live_summary=summary, - live_report=report, + live_no_summary=not summary, + live_no_report=not report, ) assert run_spy.call_count == 1 diff --git a/tests/unit/command/test_run.py b/tests/unit/command/test_run.py index dabb3d20ee..e89c60fb17 100644 --- a/tests/unit/command/test_run.py +++ b/tests/unit/command/test_run.py @@ -71,8 +71,8 @@ def test_run(mocker, dvc): checkpoints=["checkpoints"], params=["file:param1,param2", "param3"], live="live", - live_summary=False, - live_report=False, + live_no_summary=True, + live_no_report=True, fname="file", wdir="wdir", no_exec=True, @@ -102,8 +102,8 @@ def test_run_args_from_cli(mocker, dvc): plots=[], plots_no_cache=[], live=None, - live_summary=True, - live_report=True, + live_no_summary=False, + live_no_report=False, outs_persist=[], outs_persist_no_cache=[], checkpoints=[], @@ -137,8 +137,8 @@ def test_run_args_with_spaces(mocker, dvc): plots=[], plots_no_cache=[], live=None, - live_summary=True, - live_report=True, + live_no_summary=False, + live_no_report=False, outs_persist=[], outs_persist_no_cache=[], checkpoints=[], diff --git a/tests/unit/command/test_stage.py b/tests/unit/command/test_stage.py new file mode 100644 index 0000000000..01cf56178e --- /dev/null +++ b/tests/unit/command/test_stage.py @@ -0,0 +1,90 @@ +import pytest + +from dvc.cli import parse_args +from dvc.command.stage import CmdStageAdd + + +@pytest.mark.parametrize( + "extra_args, expected_extra", + [ + (["-c", "cmd1", "-c", "cmd2"], {"cmd": ["cmd1", "cmd2"]}), + (["-c", "cmd1"], {"cmd": ["cmd1"]}), + ], +) +def test_stage_add(mocker, dvc, extra_args, expected_extra): + cli_args = parse_args( + [ + "stage", + "add", + "name", + "--deps", + "deps", + "--outs", + "outs", + "--outs-no-cache", + "outs-no-cache", + "--metrics", + "metrics", + "--metrics-no-cache", + "metrics-no-cache", + "--plots", + "plots", + "--plots-no-cache", + "plots-no-cache", + "--live", + "live", + "--live-no-summary", + "--live-no-report", + "--wdir", + "wdir", + "--force", + "--outs-persist", + "outs-persist", + "--outs-persist-no-cache", + "outs-persist-no-cache", + "--checkpoints", + "checkpoints", + "--always-changed", + "--params", + "file:param1,param2", + "--params", + "param3", + "--external", + "--desc", + "description", + "--force", + *extra_args, + ] + ) + assert cli_args.func == CmdStageAdd + m = mocker.patch.object(CmdStageAdd, "create") + + cmd = cli_args.func(cli_args) + + assert cmd.run() == 0 + assert m.call_args[0] == (cmd.repo, True) + + expected = dict( + name="name", + deps=["deps"], + outs=["outs"], + outs_no_cache=["outs-no-cache"], + params=["file:param1,param2", "param3"], + metrics=["metrics"], + metrics_no_cache=["metrics-no-cache"], + plots=["plots"], + plots_no_cache=["plots-no-cache"], + live="live", + live_no_summary=True, + live_no_report=True, + wdir="wdir", + outs_persist=["outs-persist"], + outs_persist_no_cache=["outs-persist-no-cache"], + checkpoints=["checkpoints"], + always_changed=True, + external=True, + desc="description", + **expected_extra + ) + # expected values should be a subset of what's in the call args list + assert expected.items() <= m.call_args[1].items()