Skip to content

Commit

Permalink
implement removing stage (#3881)
Browse files Browse the repository at this point in the history
This should allow us to be able to remove a stage from a dvcfile,
(if it's a single stage file, it removes it), and make us able to
delete the overwrite the stage on run as well.

This commit:
* changes `run` `--overwrite-dvcfile` flag to `--overwrite`
* removes prompt in `run` for overwrite and fails hard
* changes `remove`: now it by default removes stage entry and unprotect outs
* `remove` has `--outs` to remove outputs instead of unprotecting them
  • Loading branch information
skshetry authored May 31, 2020
1 parent 9ee62ef commit edfa174
Show file tree
Hide file tree
Showing 14 changed files with 290 additions and 188 deletions.
45 changes: 4 additions & 41 deletions dvc/command/remove.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,37 @@
import argparse
import logging

import dvc.prompt as prompt
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import DvcException

logger = logging.getLogger(__name__)


class CmdRemove(CmdBase):
def _is_dvc_only(self, target):
if not self.args.purge:
return True

if self.args.force:
return False

msg = "Are you sure you want to remove '{}' with its outputs?".format(
target
)

if prompt.confirm(msg):
return False

raise DvcException(
"Cannot purge without a confirmation from the user."
" Use `-f` to force."
)

def run(self):
for target in self.args.targets:
try:
dvc_only = self._is_dvc_only(target)
self.repo.remove(target, dvc_only=dvc_only)
self.repo.remove(target, outs=self.args.outs)
except DvcException:
logger.exception(f"failed to remove '{target}'")
return 1
return 0


def add_parser(subparsers, parent_parser):
REMOVE_HELP = "Remove DVC-tracked files or directories."
REMOVE_HELP = "Remove stage entry and unprotect outputs"
remove_parser = subparsers.add_parser(
"remove",
parents=[parent_parser],
description=append_doc_link(REMOVE_HELP, "remove"),
help=REMOVE_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
remove_parser_group = remove_parser.add_mutually_exclusive_group()
remove_parser_group.add_argument(
"-o",
"--outs",
action="store_true",
default=True,
help="Only remove DVC-file outputs. (Default)",
)
remove_parser_group.add_argument(
"-p",
"--purge",
action="store_true",
default=False,
help="Remove DVC-file and all its outputs.",
)
remove_parser.add_argument(
"-f",
"--force",
"--outs",
action="store_true",
default=False,
help="Force purge.",
help="Remove outputs as well.",
)
remove_parser.add_argument(
"targets", nargs="+", help="DVC-files to remove."
Expand Down
6 changes: 3 additions & 3 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def run(self):
fname=self.args.file,
wdir=self.args.wdir,
no_exec=self.args.no_exec,
overwrite=self.args.overwrite_dvcfile,
overwrite=self.args.overwrite,
run_cache=not self.args.no_run_cache,
no_commit=self.args.no_commit,
outs_persist=self.args.outs_persist,
Expand Down Expand Up @@ -177,10 +177,10 @@ def add_parser(subparsers, parent_parser):
help="Only create stage file without actually running it.",
)
run_parser.add_argument(
"--overwrite-dvcfile",
"--overwrite",
action="store_true",
default=False,
help="Overwrite existing DVC-file without asking for confirmation.",
help="Overwrite existing stage",
)
run_parser.add_argument(
"--no-run-cache",
Expand Down
39 changes: 37 additions & 2 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def dump(self, stage, **kwargs):
"Saving information to '{file}'.".format(file=relpath(self.path))
)
dump_stage_file(self.path, serialize.to_single_stage_file(stage))
self.repo.scm.track_file(relpath(self.path))
self.repo.scm.track_file(self.relpath)

def remove_with_prompt(self, force=False):
if not self.exists():
Expand All @@ -165,6 +165,9 @@ def remove_with_prompt(self, force=False):

self.remove()

def remove_stage(self, stage):
self.remove()


class PipelineFile(FileMixin):
"""Abstraction for pipelines file, .yaml + .lock combined."""
Expand Down Expand Up @@ -212,7 +215,7 @@ def _dump_pipeline_file(self, stage):
"Adding stage '%s' to '%s'", stage.name, self.relpath,
)
dump_stage_file(self.path, data)
self.repo.scm.track_file(relpath(self.path))
self.repo.scm.track_file(self.relpath)

@property
def stage(self):
Expand All @@ -234,6 +237,22 @@ def remove(self, force=False):
super().remove()
self._lockfile.remove()

def remove_stage(self, stage):
self._lockfile.remove_stage(stage)
if not self.exists():
return

with open(self.path, "r") as f:
d = parse_stage_for_update(f.read(), self.path)

self.validate(d, self.path)
if stage.name not in d.get("stages", {}):
return

logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d["stages"][stage.name]
dump_stage_file(self.path, d)


class Lockfile(FileMixin):
from dvc.schema import COMPILED_LOCKFILE_SCHEMA as SCHEMA
Expand Down Expand Up @@ -271,6 +290,22 @@ def dump(self, stage, **kwargs):
if modified:
self.repo.scm.track_file(self.relpath)

def remove_stage(self, stage):
if not self.exists():
return

with open(self.path) as f:
d = parse_stage_for_update(f.read(), self.path)
self.validate(d, self.path)

if stage.name not in d:
return

logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d[stage.name]

dump_stage_file(self.path, d)


class Dvcfile:
def __new__(cls, repo, path, **kwargs):
Expand Down
10 changes: 3 additions & 7 deletions dvc/repo/remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@


@locked
def remove(self, target, dvc_only=False):
from ..dvcfile import Dvcfile, is_valid_filename

def remove(self, target, outs=False):
path, name = parse_target(target)
stages = self.get_stages(path, name)
for stage in stages:
stage.remove_outs(force=True)

if path and is_valid_filename(path) and not dvc_only:
Dvcfile(self, path).remove()
for stage in stages:
stage.remove(remove_outs=outs, force=outs)

return stages
14 changes: 10 additions & 4 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from funcy import concat, first

from dvc.exceptions import InvalidArgumentError
from dvc.stage.exceptions import DuplicateStageName, InvalidStageName
from dvc.stage.exceptions import (
DuplicateStageName,
InvalidStageName,
StageFileAlreadyExistsError,
)

from ..exceptions import OutputDuplicationError
from . import locked
Expand Down Expand Up @@ -74,10 +78,12 @@ def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):

dvcfile = Dvcfile(self, stage.path)
if dvcfile.exists():
if stage_name and stage_name in dvcfile.stages:
if kwargs.get("overwrite", True):
dvcfile.remove_stage(stage)
elif stage_cls != PipelineStage:
raise StageFileAlreadyExistsError(dvcfile.relpath)
elif stage_name and stage_name in dvcfile.stages:
raise DuplicateStageName(stage_name, dvcfile)
if stage_cls != PipelineStage:
dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True))

try:
self.check_modified_graph([stage])
Expand Down
5 changes: 3 additions & 2 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,12 +282,13 @@ def unprotect_outs(self):
out.unprotect()

@rwlocked(write=["outs"])
def remove(self, force=False, remove_outs=True):
def remove(self, force=False, remove_outs=True, purge=True):
if remove_outs:
self.remove_outs(ignore_remove=True, force=force)
else:
self.unprotect_outs()
self.dvcfile.remove()
if purge:
self.dvcfile.remove_stage(self)

@rwlocked(read=["deps"], write=["outs"])
def reproduce(self, interactive=False, **kwargs):
Expand Down
134 changes: 132 additions & 2 deletions tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import textwrap

import pytest

from dvc.dvcfile import PIPELINE_FILE, Dvcfile
from dvc.stage.exceptions import StageFileDoesNotExistError
from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK, Dvcfile
from dvc.stage.exceptions import (
StageFileDoesNotExistError,
StageFileFormatError,
)
from dvc.stage.loader import StageNotFound
from dvc.utils.stage import dump_stage_file


def test_run_load_one_for_multistage(tmp_dir, dvc):
Expand Down Expand Up @@ -160,3 +166,127 @@ def test_stage_collection(tmp_dir, dvc):
single_stage=True,
)
assert {s for s in dvc.stages} == {stage1, stage3, stage2}


def test_remove_stage(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")

dvc_file = Dvcfile(dvc, PIPELINE_FILE)
assert dvc_file.exists()
assert {"copy-bar-foobar", "copy-foo-bar"} == set(
dvc_file._load()[0]["stages"].keys()
)

dvc_file.remove_stage(stage)

assert ["copy-bar-foobar"] == list(dvc_file._load()[0]["stages"].keys())

# sanity check
stage2.reload()

# re-check to see if it fails if there's no stage entry
dvc_file.remove_stage(stage)
dvc_file.remove(force=True)
# should not fail when there's no file at all.
dvc_file.remove_stage(stage)


def test_remove_stage_lockfile(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")

dvc_file = Dvcfile(dvc, PIPELINE_FILE)
lock_file = dvc_file._lockfile
assert dvc_file.exists()
assert lock_file.exists()
assert {"copy-bar-foobar", "copy-foo-bar"} == set(lock_file.load().keys())
lock_file.remove_stage(stage)

assert ["copy-bar-foobar"] == list(lock_file.load().keys())

# sanity check
stage2.reload()

# re-check to see if it fails if there's no stage entry
lock_file.remove_stage(stage)
lock_file.remove()
# should not fail when there's no file at all.
lock_file.remove_stage(stage)


def test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", single_stage=True)

dvc_file = Dvcfile(dvc, stage.path)
assert dvc_file.exists()
dvc_file.remove_stage(stage)
assert not dvc_file.exists()

# re-check to see if it fails if there's no stage entry
dvc_file.remove_stage(stage)
dvc_file.remove(force=True)

# should not fail when there's no file at all.
dvc_file.remove_stage(stage)


def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
dvc_file = Dvcfile(dvc, stage.path)
lock_file = dvc_file._lockfile

data = dvc_file._load()[0]
lock_data = lock_file.load()
lock_data["gibberish"] = True
data["gibberish"] = True
dump_stage_file(lock_file.relpath, lock_data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)

lock_file.remove()
dvc_file.dump(stage)

dump_stage_file(dvc_file.relpath, data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)


def test_remove_stage_preserves_comment(tmp_dir, dvc, run_copy):
tmp_dir.gen(
"dvc.yaml",
textwrap.dedent(
"""\
stages:
generate-foo:
cmd: "echo foo > foo"
# This copies 'foo' text to 'foo' file.
outs:
- foo
copy-foo-bar:
cmd: "python copy.py foo bar"
deps:
- foo
outs:
- bar"""
),
)

dvc.reproduce(PIPELINE_FILE)

dvc_file = Dvcfile(dvc, PIPELINE_FILE)

assert dvc_file.exists()
assert (tmp_dir / PIPELINE_LOCK).exists()
assert (tmp_dir / "foo").exists()
assert (tmp_dir / "bar").exists()

dvc_file.remove_stage(dvc_file.stages["copy-foo-bar"])
assert (
"# This copies 'foo' text to 'foo' file."
in (tmp_dir / PIPELINE_FILE).read_text()
)
Loading

0 comments on commit edfa174

Please sign in to comment.