Skip to content

Commit

Permalink
external_repo: fix for local file import fail
Browse files Browse the repository at this point in the history
If the URL is local dvc repo, the import/get should fetch from the source project cache.

Fixes iterative#2599
  • Loading branch information
maykulkarni committed Dec 7, 2019
1 parent f9d4ef8 commit 0dd310d
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
49 changes: 48 additions & 1 deletion dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from contextlib import contextmanager
from distutils.dir_util import copy_tree

from dvc.remote import RemoteConfig
from funcy import retry

from dvc.config import NoRemoteError
from dvc.config import NoRemoteError, ConfigError
from dvc.exceptions import RemoteNotSpecifiedInExternalRepoError
from dvc.exceptions import NoOutputInExternalRepoError
from dvc.exceptions import OutputNotFoundError
Expand Down Expand Up @@ -62,16 +63,29 @@ def _external_repo(url=None, rev=None, cache_dir=None):

# Adjust new clone/copy to fit rev and cache_dir
repo = Repo(new_path)
# Adjust original repo for pointing remote towards its' cache
original_repo = Repo(url)
rconfig = RemoteConfig(original_repo.config)
try:
if rev is not None:
repo.scm.checkout(rev)

if not _is_local(url) and not _remote_config_exists(rconfig):
# check if the URL is local and no default remote
# add default remote pointing to the original repo's cache location
rconfig.add("upstream",
original_repo.cache.local.cache_dir,
default=True)
original_repo.scm.add([original_repo.config.config_file])
original_repo.scm.commit("add remote")

if cache_dir is not None:
cache_config = CacheConfig(repo.config)
cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
finally:
# Need to close/reopen repo to force config reread
repo.close()
original_repo.close()

REPO_CACHE[key] = new_path
return new_path
Expand Down Expand Up @@ -100,3 +114,36 @@ def _clone_repo(url, path):

git = Git.clone(url, path)
git.close()


def _remote_config_exists(rconfig):
"""
Checks if default remote config is present.
Args:
rconfig: a remote config
Returns:
True if the remote config exists, else False
"""
try:
default = rconfig.get_default()
except ConfigError:
default = None
return True if default else False


def _is_local(url):
"""
Checks if the URL is local or not.
Args:
url: url
Returns:
True, if the URL is local else False
"""
remote_urls = {"azure://", "gs://", "http://", "https://",
"oss://", "s3://", "hdfs://"}
for remote_url in remote_urls:
if url.startswith(remote_url):
return False
return True
9 changes: 9 additions & 0 deletions tests/func/test_external_repo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from dvc.repo import Repo
from mock import patch

from dvc.external_repo import external_repo
Expand All @@ -26,3 +27,11 @@ def test_external_repo(erepo):
assert path_isin(repo.cache.local.cache_dir, repo.root_dir)

assert mock.call_count == 1


def test_external_repo_import_without_remote(erepo, dvc_repo):
src = erepo.CODE
dst = dvc_repo.root_dir

Repo.get(erepo.root_dir, src, dst)
assert os.path.exists(dst + "/" + erepo.CODE)

0 comments on commit 0dd310d

Please sign in to comment.