Skip to content

Commit

Permalink
dvc: require --external for external outputs (#3929)
Browse files Browse the repository at this point in the history
Fixes #1545
  • Loading branch information
efiop authored Jun 3, 2020
1 parent 2c0116c commit ac4f351
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 12 deletions.
7 changes: 7 additions & 0 deletions dvc/command/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def run(self):
recursive=self.args.recursive,
no_commit=self.args.no_commit,
fname=self.args.file,
external=self.args.external,
)

except DvcException:
Expand Down Expand Up @@ -49,6 +50,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Don't put files/directories into cache.",
)
parser.add_argument(
"--external",
action="store_true",
default=False,
help="Allow targets that are outside of the DVC repository.",
)
parser.add_argument(
"-f",
"--file",
Expand Down
7 changes: 7 additions & 0 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def run(self):
always_changed=self.args.always_changed,
name=self.args.name,
single_stage=self.args.single_stage,
external=self.args.external,
)
except DvcException:
logger.exception("")
Expand Down Expand Up @@ -225,6 +226,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help=argparse.SUPPRESS,
)
run_parser.add_argument(
"--external",
action="store_true",
default=False,
help="Allow outputs that are outside of the DVC repository.",
)
run_parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Command to execute."
)
Expand Down
19 changes: 15 additions & 4 deletions dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

@locked
@scm_context
def add(repo, targets, recursive=False, no_commit=False, fname=None):
def add(
repo, targets, recursive=False, no_commit=False, fname=None, external=False
):
if recursive and fname:
raise RecursiveAddingWhileUsingFilename()

Expand Down Expand Up @@ -52,7 +54,9 @@ def add(repo, targets, recursive=False, no_commit=False, fname=None):
)
)

stages = _create_stages(repo, sub_targets, fname, pbar=pbar)
stages = _create_stages(
repo, sub_targets, fname, pbar=pbar, external=external
)

try:
repo.check_modified_graph(stages)
Expand Down Expand Up @@ -120,7 +124,7 @@ def _find_all_targets(repo, target, recursive):
return [target]


def _create_stages(repo, targets, fname, pbar=None):
def _create_stages(repo, targets, fname, pbar=None, external=False):
from dvc.stage import Stage, create_stage

stages = []
Expand All @@ -132,7 +136,14 @@ def _create_stages(repo, targets, fname, pbar=None):
unit="file",
):
path, wdir, out = resolve_paths(repo, out)
stage = create_stage(Stage, repo, fname or path, wdir=wdir, outs=[out])
stage = create_stage(
Stage,
repo,
fname or path,
wdir=wdir,
outs=[out],
external=external,
)
if stage:
Dvcfile(repo, stage.path).remove_with_prompt(force=True)

Expand Down
5 changes: 4 additions & 1 deletion dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
check_circular_dependency,
check_duplicated_arguments,
check_missing_outputs,
check_no_externals,
check_stage_path,
compute_md5,
fill_stage_dependencies,
Expand Down Expand Up @@ -52,7 +53,7 @@ def loads_from(cls, repo, path, wdir, data):
return cls(**kw)


def create_stage(cls, repo, path, **kwargs):
def create_stage(cls, repo, path, external=False, **kwargs):
from dvc.dvcfile import check_dvc_filename

wdir = os.path.abspath(kwargs.get("wdir", None) or os.curdir)
Expand All @@ -63,6 +64,8 @@ def create_stage(cls, repo, path, **kwargs):

stage = loads_from(cls, repo, path, wdir, kwargs)
fill_stage_outputs(stage, **kwargs)
if not external:
check_no_externals(stage)
fill_stage_dependencies(
stage, **project(kwargs, ["deps", "erepo", "params"])
)
Expand Down
1 change: 1 addition & 0 deletions dvc/stage/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _create_stage(self, cache, wdir=None):
params=params,
deps=[dep["path"] for dep in cache.get("deps", [])],
outs=[out["path"] for out in cache["outs"]],
external=True,
)
StageLoader.fill_from_lock(stage, cache)
return stage
Expand Down
4 changes: 4 additions & 0 deletions dvc/stage/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class StageCommitError(DvcException):
pass


class StageExternalOutputsError(DvcException):
pass


class StageUpdateError(DvcException):
def __init__(self, path):
super().__init__(
Expand Down
28 changes: 27 additions & 1 deletion dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from dvc.utils.fs import path_isin

from ..remote import LocalRemote, S3Remote
from ..utils import dict_md5, relpath
from ..utils import dict_md5, format_link, relpath
from .exceptions import (
MissingDataSource,
StageExternalOutputsError,
StagePathNotDirectoryError,
StagePathNotFoundError,
StagePathOutsideError,
Expand Down Expand Up @@ -68,6 +69,31 @@ def fill_stage_dependencies(stage, deps=None, erepo=None, params=None):
stage.deps += dependency.loads_params(stage, params or [])


def check_no_externals(stage):
from urllib.parse import urlparse

# NOTE: preventing users from accidentally using external outputs. See
# https://github.com/iterative/dvc/issues/1545 for more details.

def _is_external(out):
# NOTE: in case of `remote://` notation, the user clearly knows that
# this is an advanced feature and so we shouldn't error-out.
if out.is_in_repo or urlparse(out.def_path).scheme == "remote":
return False
return True

outs = [str(out) for out in stage.outs if _is_external(out)]
if not outs:
return

str_outs = ", ".join(outs)
link = format_link("https://dvc.org/doc/user-guide/managing-external-data")
raise StageExternalOutputsError(
f"Output(s) outside of DVC project: {str_outs}. "
f"See {link} for more info."
)


def check_circular_dependency(stage):
from dvc.exceptions import CircularDependencyError

Expand Down
4 changes: 2 additions & 2 deletions scripts/completion/dvc.bash
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ _dvc_commands='add cache checkout commit config dag destroy diff fetch freeze \
_dvc_options='-h --help -V --version'
_dvc_global_options='-h --help -q --quiet -v --verbose'

_dvc_add='-R --recursive -f --file --no-commit'
_dvc_add='-R --recursive -f --file --no-commit --external'
_dvc_add_COMPGEN=_dvc_compgen_files
_dvc_cache='dir'
_dvc_cache_dir=' --global --system --local -u --unset'
Expand Down Expand Up @@ -65,7 +65,7 @@ _dvc_remove_COMPGEN=_dvc_compgen_DVCFiles
_dvc_repro='-f --force -s --single-item -c --cwd -m --metrics --dry -i --interactive -p --pipeline -P --all-pipelines --no-run-cache --force-downstream --no-commit -R --recursive --downstream'
_dvc_repro_COMPGEN=_dvc_compgen_DVCFiles
_dvc_root=''
_dvc_run='--no-exec -f --file -d --deps -o --outs -O --outs-no-cache --outs-persist --outs-persist-no-cache -m --metrics -M --metrics-no-cache --overwrite-dvcfile --no-run-cache --no-commit -w --wdir'
_dvc_run='--no-exec -f --file -d --deps -o --outs -O --outs-no-cache --outs-persist --outs-persist-no-cache -m --metrics -M --metrics-no-cache --overwrite-dvcfile --no-run-cache --no-commit -w --wdir --external'
_dvc_run_COMPGEN=_dvc_compgen_DVCFiles
_dvc_status='-j --jobs -r --remote -a --all-branches -T --all-tags -d --with-deps -c --cloud'
_dvc_status_COMPGEN=_dvc_compgen_DVCFiles
Expand Down
2 changes: 2 additions & 0 deletions scripts/completion/dvc.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ _dvc_add=(
{-R,--recursive}"[Recursively add each file under the directory.]"
"--no-commit[Don't put files/directories into cache.]"
{-f,--file}"[Specify name of the DVC-file it generates.]:File:_files"
"--external[Allow targets that are outside of the DVC project.]"
"1:File:_files"
)

Expand Down Expand Up @@ -267,6 +268,7 @@ _dvc_run=(
"--no-commit[Don't put files/directories into cache.]"
"--outs-persist[Declare output file or directory that will not be removed upon repro.]:Output persistent:_files"
"--outs-persist-no-cache[Declare output file or directory that will not be removed upon repro (do not put into DVC cache).]:Output persistent regular:_files"
"--external[Allow outputs that are outside of the DVC project.]"
)

_dvc_status=(
Expand Down
7 changes: 6 additions & 1 deletion tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,16 @@ def test_add_file_in_dir(tmp_dir, dvc):

class TestAddExternalLocalFile(TestDvc):
def test(self):
from dvc.stage.exceptions import StageExternalOutputsError

dname = TestDvc.mkdtemp()
fname = os.path.join(dname, "foo")
shutil.copyfile(self.FOO, fname)

stages = self.dvc.add(fname)
with self.assertRaises(StageExternalOutputsError):
self.dvc.add(fname)

stages = self.dvc.add(fname, external=True)
self.assertEqual(len(stages), 1)
stage = stages[0]
self.assertNotEqual(stage, None)
Expand Down
4 changes: 2 additions & 2 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,15 @@ def test(self):
cwd = os.getcwd()
os.chdir(self.additional_path)
# ADD FILE ONLY IN SECOND PROJECT
fname = os.path.join(self.additional_path, "only_in_second")
fname = "only_in_second"
with open(fname, "w+") as fobj:
fobj.write("only in additional repo")

stages = self.additional_dvc.add(fname)
self.assertEqual(len(stages), 1)

# ADD FILE IN SECOND PROJECT THAT IS ALSO IN MAIN PROJECT
fname = os.path.join(self.additional_path, "in_both")
fname = "in_both"
with open(fname, "w+") as fobj:
fobj.write("in both repos")

Expand Down
3 changes: 2 additions & 1 deletion tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def test_get_full_dvc_path(tmp_dir, erepo_dir, tmp_path_factory):
external_data.write_text("ext_data")

with erepo_dir.chdir():
erepo_dir.dvc_add(os.fspath(external_data), commit="add external data")
erepo_dir.dvc.add(os.fspath(external_data), external=True)
erepo_dir.scm_add("ext_data.dvc", commit="add external data")

Repo.get(
os.fspath(erepo_dir), os.fspath(external_data), "ext_data_imported"
Expand Down
1 change: 1 addition & 0 deletions tests/func/test_repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ def test(self, mock_prompt):
deps=[out_foo_path],
cmd=self.cmd(foo_path, bar_path),
name="external-base",
external=True,
)

self.assertEqual(self.dvc.status([cmd_stage.addressing]), {})
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/command/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_run(mocker, dvc):
"file:param1,param2",
"--params",
"param3",
"--external",
"command",
]
)
Expand Down Expand Up @@ -70,6 +71,7 @@ def test_run(mocker, dvc):
cmd="command",
name="nam",
single_stage=False,
external=True,
)


Expand Down Expand Up @@ -99,6 +101,7 @@ def test_run_args_from_cli(mocker, dvc):
cmd="echo foo",
name=None,
single_stage=False,
external=False,
)


Expand Down Expand Up @@ -128,4 +131,5 @@ def test_run_args_with_spaces(mocker, dvc):
cmd='echo "foo bar"',
name=None,
single_stage=False,
external=False,
)
18 changes: 18 additions & 0 deletions tests/unit/stage/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,21 @@ def test_always_changed(dvc):
with dvc.lock:
assert stage.changed()
assert stage.status()["path"] == ["always changed"]


def test_external_outs(tmp_path_factory, dvc):
from dvc.stage import create_stage
from dvc.stage.exceptions import StageExternalOutputsError

tmp_path = tmp_path_factory.mktemp("external-outs")
foo = tmp_path / "foo"
foo.write_text("foo")

with pytest.raises(StageExternalOutputsError):
create_stage(Stage, dvc, "path.dvc", outs=[os.fspath(foo)])

with dvc.config.edit() as conf:
conf["remote"]["myremote"] = {"url": os.fspath(tmp_path)}

create_stage(Stage, dvc, "path.dvc", outs=["remote://myremote/foo"])
create_stage(Stage, dvc, "path.dvc", outs=[os.fspath(foo)], external=True)

0 comments on commit ac4f351

Please sign in to comment.