From b020918f5d06813d07deba104911efad5b306e11 Mon Sep 17 00:00:00 2001
From: Saugat Pachhai <suagatchhetri@outlook.com>
Date: Mon, 1 Feb 2021 21:35:04 +0545
Subject: [PATCH] stage:add: use remainder as a command script (#5350)

* stage:add use remainder as a command

* use -c for checkpoints

* use -n for stage name, make it required, fix tests
---
 dvc/command/run.py               | 31 ++-------------
 dvc/command/stage.py             | 39 ++++++++++++++-----
 tests/func/test_cli.py           |  2 +-
 tests/unit/command/test_stage.py | 65 ++++++++++++++++++--------------
 4 files changed, 71 insertions(+), 66 deletions(-)
diff --git a/dvc/command/run.py b/dvc/command/run.py
index 5db1553342..376e7a9778 100644
--- a/dvc/command/run.py
+++ b/dvc/command/run.py
@@ -2,6 +2,7 @@
 import logging
 
 from dvc.command.base import CmdBase, append_doc_link
+from dvc.command.stage import parse_cmd
 from dvc.exceptions import DvcException
 
 logger = logging.getLogger(__name__)
@@ -22,7 +23,7 @@ def run(self):
                 self.args.outs_persist_no_cache,
                 self.args.checkpoints,
                 self.args.params,
-                self.args.command,
+                self.args.cmd,
             ]
         ):  # pragma: no cover
             logger.error(
@@ -34,7 +35,7 @@ def run(self):
 
         try:
             self.repo.run(
-                cmd=self._parsed_cmd(),
+                cmd=parse_cmd(self.args.cmd),
                 outs=self.args.outs,
                 outs_no_cache=self.args.outs_no_cache,
                 metrics=self.args.metrics,
@@ -67,27 +68,6 @@ def run(self):
 
         return 0
 
-    def _parsed_cmd(self):
-        """
-        We need to take into account two cases:
-
-        - ['python code.py foo bar']: Used mainly with dvc as a library
-        - ['echo', 'foo bar']: List of arguments received from the CLI
-
-        The second case would need quoting, as it was passed through:
-                dvc run echo "foo bar"
-        """
-        if len(self.args.command) < 2:
-            return " ".join(self.args.command)
-
-        return " ".join(self._quote_argument(arg) for arg in self.args.command)
-
-    def _quote_argument(self, argument):
-        if " " not in argument or '"' in argument:
-            return argument
-
-        return f'"{argument}"'
-
 
 def add_parser(subparsers, parent_parser):
     from dvc.command.stage import _add_common_args
@@ -103,7 +83,6 @@ def add_parser(subparsers, parent_parser):
     run_parser.add_argument(
         "-n", "--name", help="Stage name.",
     )
-    _add_common_args(run_parser)
     run_parser.add_argument(
         "--file", metavar="<filename>", help=argparse.SUPPRESS,
     )
@@ -134,7 +113,5 @@ def add_parser(subparsers, parent_parser):
         default=False,
         help=argparse.SUPPRESS,
     )
-    run_parser.add_argument(
-        "command", nargs=argparse.REMAINDER, help="Command to execute."
-    )
+    _add_common_args(run_parser)
     run_parser.set_defaults(func=CmdRun)
diff --git a/dvc/command/stage.py b/dvc/command/stage.py
index c52adbae81..f51c446a16 100644
--- a/dvc/command/stage.py
+++ b/dvc/command/stage.py
@@ -105,10 +105,32 @@ def log_error(relpath: str, exc: Exception):
         return 0
 
 
+def parse_cmd(commands: List[str]) -> str:
+    """
+    We need to take into account two cases:
+
+    - ['python code.py foo bar']: Used mainly with dvc as a library
+    - ['echo', 'foo bar']: List of arguments received from the CLI
+
+    The second case would need quoting, as it was passed through:
+            dvc run echo "foo bar"
+    """
+
+    def quote_argument(arg: str):
+        should_quote = " " in arg and '"' not in arg
+        return f'"{arg}"' if should_quote else arg
+
+    if len(commands) < 2:
+        return " ".join(commands)
+    return " ".join(map(quote_argument, commands))
+
+
 class CmdStageAdd(CmdBase):
     def run(self):
         repo = self.repo
         kwargs = vars(self.args)
+        kwargs["cmd"] = parse_cmd(kwargs.pop("cmd"))
+
         stage = repo.stage.create_from_cli(validate=True, **kwargs)
 
         with repo.scm.track_file_changes(config=repo.config):
@@ -226,7 +248,7 @@ def _add_common_args(parser):
         metavar="<filename>",
     )
     parser.add_argument(
-        "-C",
+        "-c",
         "--checkpoints",
         action="append",
         default=[],
@@ -254,6 +276,12 @@ def _add_common_args(parser):
             "This doesn't affect any DVC operations."
         ),
     )
+    parser.add_argument(
+        "cmd",
+        nargs=argparse.REMAINDER,
+        help="Command to execute.",
+        metavar="command",
+    )
 
 
 def add_parser(subparsers, parent_parser):
@@ -282,15 +310,8 @@ def add_parser(subparsers, parent_parser):
         help=STAGE_ADD_HELP,
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    stage_add_parser.add_argument("name", help="Name of the stage to add")
     stage_add_parser.add_argument(
-        "-c",
-        "--command",
-        action="append",
-        default=[],
-        dest="cmd",
-        help="Command to execute.",
-        required=True,
+        "-n", "--name", help="Name of the stage to add", required=True
     )
     _add_common_args(stage_add_parser)
     stage_add_parser.set_defaults(func=CmdStageAdd)
diff --git a/tests/func/test_cli.py b/tests/func/test_cli.py
index 93c45926dd..d10ab1d487 100644
--- a/tests/func/test_cli.py
+++ b/tests/func/test_cli.py
@@ -66,7 +66,7 @@ def test(self):
         self.assertEqual(args.outs, [out1, out2])
         self.assertEqual(args.outs_no_cache, [out_no_cache1, out_no_cache2])
         self.assertEqual(args.file, fname)
-        self.assertEqual(args.command, [cmd, arg1, arg2])
+        self.assertEqual(args.cmd, [cmd, arg1, arg2])
 
 
 class TestPull(TestDvc):
diff --git a/tests/unit/command/test_stage.py b/tests/unit/command/test_stage.py
index 462091132a..9e40c13a6a 100644
--- a/tests/unit/command/test_stage.py
+++ b/tests/unit/command/test_stage.py
@@ -5,17 +5,19 @@
 
 
 @pytest.mark.parametrize(
-    "extra_args, expected_extra",
+    "command, parsed_command",
     [
-        (["-c", "cmd1", "-c", "cmd2"], {"cmd": ["cmd1", "cmd2"]}),
-        (["-c", "cmd1"], {"cmd": ["cmd1"]}),
+        (["echo", "foo", "bar"], "echo foo bar"),
+        (["echo", '"foo bar"'], 'echo "foo bar"'),
+        (["echo", "foo bar"], 'echo "foo bar"'),
     ],
 )
-def test_stage_add(mocker, dvc, extra_args, expected_extra):
+def test_stage_add(mocker, dvc, command, parsed_command):
     cli_args = parse_args(
         [
             "stage",
             "add",
+            "--name",
             "name",
             "--deps",
             "deps",
@@ -53,7 +55,7 @@ def test_stage_add(mocker, dvc, extra_args, expected_extra):
             "--desc",
             "description",
             "--force",
-            *extra_args,
+            *command,
         ]
     )
     assert cli_args.func == CmdStageAdd
@@ -62,27 +64,32 @@ def test_stage_add(mocker, dvc, extra_args, expected_extra):
     m = mocker.patch.object(cmd.repo.stage, "create_from_cli")
 
     assert cmd.run() == 0
-    expected = dict(
-        name="name",
-        deps=["deps"],
-        outs=["outs"],
-        outs_no_cache=["outs-no-cache"],
-        params=["file:param1,param2", "param3"],
-        metrics=["metrics"],
-        metrics_no_cache=["metrics-no-cache"],
-        plots=["plots"],
-        plots_no_cache=["plots-no-cache"],
-        live="live",
-        live_no_summary=True,
-        live_no_report=True,
-        wdir="wdir",
-        outs_persist=["outs-persist"],
-        outs_persist_no_cache=["outs-persist-no-cache"],
-        checkpoints=["checkpoints"],
-        always_changed=True,
-        external=True,
-        desc="description",
-        **expected_extra
-    )
-    # expected values should be a subset of what's in the call args list
-    assert expected.items() <= m.call_args[1].items()
+    expected = {
+        "name": "name",
+        "deps": ["deps"],
+        "outs": ["outs"],
+        "outs_no_cache": ["outs-no-cache"],
+        "params": ["file:param1,param2", "param3"],
+        "metrics": ["metrics"],
+        "metrics_no_cache": ["metrics-no-cache"],
+        "plots": ["plots"],
+        "plots_no_cache": ["plots-no-cache"],
+        "live": "live",
+        "live_no_summary": True,
+        "live_no_report": True,
+        "wdir": "wdir",
+        "outs_persist": ["outs-persist"],
+        "outs_persist_no_cache": ["outs-persist-no-cache"],
+        "checkpoints": ["checkpoints"],
+        "always_changed": True,
+        "external": True,
+        "desc": "description",
+        "cmd": parsed_command,
+    }
+    args, kwargs = m.call_args
+    assert not args
+
+    for key, val in expected.items():
+        # expected values should be a subset of what's in the call args list
+        assert key in kwargs
+        assert kwargs[key] == val