Skip to content

Commit

Permalink
Split create_stage out of dvcfile to Stage::create() and overwrite
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry committed Apr 12, 2020
1 parent 059e161 commit ff12d71
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 52 deletions.
7 changes: 0 additions & 7 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def run(self):
no_exec=self.args.no_exec,
overwrite=overwrite,
ignore_build_cache=self.args.ignore_build_cache,
remove_outs=self.args.remove_outs,
no_commit=self.args.no_commit,
outs_persist=self.args.outs_persist,
outs_persist_no_cache=self.args.outs_persist_no_cache,
Expand Down Expand Up @@ -168,12 +167,6 @@ def add_parser(subparsers, parent_parser):
help="Run this stage even if it has been already ran with the same "
"command/dependencies/outputs/etc before.",
)
run_parser.add_argument(
"--remove-outs",
action="store_true",
default=False,
help="Deprecated, this is now the default behavior",
)
run_parser.add_argument(
"--no-commit",
action="store_true",
Expand Down
48 changes: 10 additions & 38 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,43 +160,15 @@ def validate(d, fname=None):
except MultipleInvalid as exc:
raise StageFileFormatError(fname, exc)

@classmethod
def create_stage(cls, repo, accompany_outs=False, **kwargs):
from dvc.stage import Stage

stage = Stage.create(repo, accompany_outs=accompany_outs, **kwargs)

ignore_build_cache = kwargs.get("ignore_build_cache", False)
# NOTE: remove outs before we check build cache
if kwargs.get("remove_outs", False):
logger.warning(
"--remove-outs is deprecated."
" It is now the default behavior,"
" so there's no need to use this option anymore."
)
stage.remove_outs(ignore_remove=False)
logger.warning("Build cache is ignored when using --remove-outs.")
ignore_build_cache = True

dvcfile = Dvcfile(stage.repo, stage.path)
if dvcfile.exists():
if any(out.persist for out in stage.outs):
logger.warning(
"Build cache is ignored when persisting outputs."
)
ignore_build_cache = True

if not ignore_build_cache and stage.can_be_skipped:
logger.info("Stage is cached, skipping.")
return None

msg = (
"'{}' already exists. Do you wish to run the command and "
"overwrite it?".format(stage.relpath)
)
if not (kwargs.get("overwrite", True) or prompt.confirm(msg)):
raise StageFileAlreadyExistsError(stage.relpath)
def overwrite_with_prompt(self, force=False):
if not self.exists():
return

os.unlink(dvcfile.path)
msg = (
"'{}' already exists. Do you wish to run the command and "
"overwrite it?".format(self.path)
)
if not (force or prompt.confirm(msg)):
raise StageFileAlreadyExistsError(self.path)

return stage
os.unlink(self.path)
7 changes: 6 additions & 1 deletion dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def _find_all_targets(repo, target, recursive):


def _create_stages(repo, targets, fname, pbar=None):
from dvc.stage import Stage

stages = []

for out in Tqdm(
Expand All @@ -123,9 +125,12 @@ def _create_stages(repo, targets, fname, pbar=None):
disable=len(targets) < LARGE_DIR_SIZE,
unit="file",
):
stage = Dvcfile.create_stage(
stage = Stage.create(
repo, accompany_outs=True, outs=[out], fname=fname
)
if stage:
Dvcfile(repo, stage.path).overwrite_with_prompt(force=True)

repo._reset()

if not stage:
Expand Down
8 changes: 6 additions & 2 deletions dvc/repo/imp_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
@scm_context
def imp_url(self, url, out=None, fname=None, erepo=None, locked=True):
from dvc.dvcfile import Dvcfile
from dvc.stage import Stage

out = resolve_output(url, out)
stage = Dvcfile.create_stage(
stage = Stage.create(
self,
cmd=None,
deps=[url],
Expand All @@ -22,12 +23,15 @@ def imp_url(self, url, out=None, fname=None, erepo=None, locked=True):
if stage is None:
return None

dvcfile = Dvcfile(self, stage.path)
dvcfile.overwrite_with_prompt(force=True)

self.check_modified_graph([stage])

stage.run()

stage.locked = locked

Dvcfile(self, stage.path).dump(stage)
dvcfile.dump(stage)

return stage
15 changes: 11 additions & 4 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
import logging

from . import locked
from .scm_context import scm_context

logger = logging.getLogger(__name__)


@locked
@scm_context
def run(self, no_exec=False, **kwargs):
from dvc.stage import Stage
from dvc.dvcfile import Dvcfile

stage = Dvcfile.create_stage(self, **kwargs)

if stage is None:
stage = Stage.create(self, **kwargs)
if not stage:
return None

dvcfile = Dvcfile(self, stage.path)
dvcfile.overwrite_with_prompt(force=kwargs.get("overwrite", True))

self.check_modified_graph([stage])

if not no_exec:
stage.run(no_commit=kwargs.get("no_commit", False))

Dvcfile(self, stage.path).dump(stage)
dvcfile.dump(stage)

return stage
15 changes: 15 additions & 0 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,21 @@ def create(repo, accompany_outs=False, **kwargs):
stage._check_and_set_wdir(wdir, is_wdir=kwargs.get("wdir", False))
stage._check_and_set_path(fname)

dvcfile = Dvcfile(stage.repo, stage.path)
if dvcfile.exists():
has_persist_outs = any(out.persist for out in stage.outs)
ignore_build_cache = (
kwargs.get("ignore_build_cache", False) or has_persist_outs
)
if has_persist_outs:
logger.warning(
"Build cache is ignored when persisting outputs."
)

if not ignore_build_cache and stage.can_be_skipped:
logger.info("Stage is cached, skipping.")
return None

return stage

def _fill_stage_outputs(self, **kwargs):
Expand Down

0 comments on commit ff12d71

Please sign in to comment.