Skip to content

Commit

Permalink
Merge pull request #3097 from fabiosantoscode/feature/3089-get-non-dv…
Browse files Browse the repository at this point in the history
…c-repositories

get: handle non-DVC repositories
  • Loading branch information
efiop authored Jan 11, 2020
2 parents 9e9f37e + de303d4 commit b7f9e73
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 52 deletions.
17 changes: 9 additions & 8 deletions dvc/command/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def run(self):


def add_parser(subparsers, parent_parser):
GET_HELP = "Download/copy files or directories from DVC repository."
GET_HELP = (
"Download a file or directory from any DVC project or Git repository."
)
get_parser = subparsers.add_parser(
"get",
parents=[parent_parser],
Expand All @@ -40,18 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawDescriptionHelpFormatter,
)
get_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
get_parser.add_argument(
"path", help="Path to a file or directory within a DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
get_parser.add_argument(
"-o",
"--out",
nargs="?",
help="Destination path to copy/download files to.",
"-o", "--out", nargs="?", help="Destination path to download files to"
)
get_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
get_parser.set_defaults(func=CmdGet)
13 changes: 8 additions & 5 deletions dvc/command/imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def run(self):

def add_parser(subparsers, parent_parser):
IMPORT_HELP = (
"Download data from DVC repository and take it under DVC control."
"Download a file or directory from any DVC project or Git repository"
"and take it under DVC control."
)

import_parser = subparsers.add_parser(
Expand All @@ -41,15 +42,17 @@ def add_parser(subparsers, parent_parser):
formatter_class=argparse.RawTextHelpFormatter,
)
import_parser.add_argument(
"url", help="URL of Git repository with DVC project to download from."
"url",
help="Location of DVC project or Git repository to download from",
)
import_parser.add_argument(
"path", help="Path to data within DVC repository."
"path",
help="Path to a file or directory within the project or repository",
)
import_parser.add_argument(
"-o", "--out", nargs="?", help="Destination path to put data to."
"-o", "--out", nargs="?", help="Destination path to download files to"
)
import_parser.add_argument(
"--rev", nargs="?", help="DVC repository git revision."
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
)
import_parser.set_defaults(func=CmdImport)
5 changes: 0 additions & 5 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
)


class UrlNotDvcRepoError(DvcException):
def __init__(self, url):
super().__init__("URL '{}' is not a dvc repository.".format(url))


class GitHookAlreadyExistsError(DvcException):
def __init__(self, hook_name):
super().__init__(
Expand Down
63 changes: 36 additions & 27 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
DvcException,
NotDvcRepoError,
OutputNotFoundError,
UrlNotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.external_repo import external_repo, cached_clone
from dvc.path_info import PathInfo
from dvc.stage import Stage
from dvc.utils import resolve_output
Expand Down Expand Up @@ -42,37 +41,47 @@ def get(url, path, out=None, rev=None):
# and won't work with reflink/hardlink.
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
repo_dir = None
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we are
# done, and to make that work we would have to copy data over
# anyway before removing the cache, so we might just copy it
# right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
else:
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Try any links possible to avoid data duplication.
#
# Not using symlink, because we need to remove cache after we
# are done, and to make that work we would have to copy data
# over anyway before removing the cache, so we might just copy
# it right away.
#
# Also, we can't use theoretical "move" link type here, because
# the same cache file might be used a few times in a directory.
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

try:
output = repo.find_out_by_relpath(path)
except OutputNotFoundError:
output = None

if output and output.use_cache:
_get_cached(repo, output, out)
return

# Either an uncached out with absolute path or a user error
if os.path.isabs(path):
raise FileNotFoundError

fs_copy(os.path.join(repo.root_dir, path), out)
repo_dir = repo.root_dir

except NotDvcRepoError:
# Not a DVC repository, continue below and copy from git
pass

if os.path.isabs(path):
raise FileNotFoundError

if not repo_dir:
repo_dir = cached_clone(url, rev=rev)

fs_copy(os.path.join(repo_dir, path), out)
except (OutputNotFoundError, FileNotFoundError):
raise PathMissingError(path, url)
except NotDvcRepoError:
raise UrlNotDvcRepoError(url)
finally:
remove(tmp_dir)

Expand Down
18 changes: 11 additions & 7 deletions tests/func/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from dvc.cache import Cache
from dvc.config import Config
from dvc.exceptions import UrlNotDvcRepoError
from dvc.repo.get import GetDVCFileError, PathMissingError
from dvc.repo import Repo
from dvc.system import System
Expand Down Expand Up @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")

with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file.zip")
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
assert (tmp_dir / "file_imported").read_text() == "contents"


def test_get_a_dvc_file(tmp_dir, erepo_dir):
Expand Down Expand Up @@ -136,6 +136,14 @@ def test_absolute_file_outside_repo(tmp_dir, erepo_dir):
Repo.get(fspath(erepo_dir), "/root/")


def test_absolute_file_outside_git_repo(tmp_dir, erepo_dir):
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
erepo_dir.scm.commit("remove dvc")

with pytest.raises(PathMissingError):
Repo.get(fspath(erepo_dir), "/root/")


def test_unknown_path(tmp_dir, erepo_dir):
with pytest.raises(PathMissingError):
Repo.get(fspath(erepo_dir), "a_non_existing_file")
Expand Down Expand Up @@ -164,10 +172,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
erepo_dir.dvc.scm.commit("remove .dvc")

# sanity check
with pytest.raises(UrlNotDvcRepoError):
Repo.get(fspath(erepo_dir), "some_file")

caplog.clear()
dst = "file_imported"
with caplog.at_level(logging.INFO, logger="dvc"):
Expand Down

0 comments on commit b7f9e73

Please sign in to comment.