Skip to content

Commit

Permalink
Merge pull request #2202 from Suor/api-rev
Browse files Browse the repository at this point in the history
api: add `rev` param to api calls
  • Loading branch information
efiop authored Jul 1, 2019
2 parents 01e2fe3 + 766b7d8 commit cc9411d
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 24 deletions.
25 changes: 14 additions & 11 deletions dvc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,27 @@
except ImportError:
from contextlib import GeneratorContextManager as GCM

from dvc.utils import remove
from dvc.utils.compat import urlparse
from dvc.repo import Repo
from dvc.external_repo import ExternalRepo


def get_url(path, repo=None, remote=None):
def get_url(path, repo=None, rev=None, remote=None):
"""Returns an url of a resource specified by path in repo"""
with _make_repo(repo) as _repo:
with _make_repo(repo, rev=rev) as _repo:
abspath = os.path.join(_repo.root_dir, path)
out, = _repo.find_outs_by_path(abspath)
remote_obj = _repo.cloud.get_remote(remote)
return str(remote_obj.checksum_to_path_info(out.checksum))


def open(path, repo=None, remote=None, mode="r", encoding=None):
def open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
"""Opens a specified resource as a file descriptor"""
args = (path,)
kwargs = {
"repo": repo,
"remote": remote,
"rev": rev,
"mode": mode,
"encoding": encoding,
}
Expand All @@ -45,30 +45,33 @@ def __getattr__(self, name):
)


def _open(path, repo=None, remote=None, mode="r", encoding=None):
with _make_repo(repo) as _repo:
def _open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
with _make_repo(repo, rev=rev) as _repo:
abspath = os.path.join(_repo.root_dir, path)
with _repo.open(
abspath, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd


def read(path, repo=None, remote=None, mode="r", encoding=None):
def read(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
"""Read a specified resource into string"""
with open(path, repo, remote=remote, mode=mode, encoding=encoding) as fd:
with open(
path, repo=repo, rev=rev, remote=remote, mode=mode, encoding=encoding
) as fd:
return fd.read()


@contextmanager
def _make_repo(repo_url):
def _make_repo(repo_url, rev=None):
if not repo_url or urlparse(repo_url).scheme == "":
assert rev is None, "Custom revision is not supported for local repo"
yield Repo(repo_url)
else:
tmp_dir = tempfile.mkdtemp("dvc-repo")
ext_repo = ExternalRepo(tmp_dir, url=repo_url, rev=rev)
try:
ext_repo = ExternalRepo(tmp_dir, url=repo_url)
ext_repo.install()
yield ext_repo.repo
finally:
remove(tmp_dir)
ext_repo.uninstall()
7 changes: 7 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,10 @@ def __init__(self, etag, cached_etag):
"ETag mismatch detected when copying file to cache! "
"(expected: '{}', actual: '{}')".format(etag, cached_etag)
)


class OutputFileMissingError(DvcException):
def __init__(self, path):
super(OutputFileMissingError, self).__init__(
"Can't find {} neither locally nor on remote".format(path)
)
13 changes: 11 additions & 2 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import shortuuid

from funcy import cached_property
from funcy import cached_property, retry
from schema import Optional

from dvc.config import Config
Expand Down Expand Up @@ -138,7 +138,16 @@ def uninstall(self):
)
return

remove(self.path)
# If repo has been initialized then we need to close its git repo
if "repo" in self.__dict__:
self.repo.scm.git.close()

if os.name == "nt":
# git.exe may hang for a while not permitting to remove temp dir
os_retry = retry(5, errors=OSError, timeout=0.1)
os_retry(remove)(self.path)
else:
remove(self.path)

def update(self):
self.repo.scm.fetch(self.rev)
Expand Down
13 changes: 10 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
NotDvcRepoError,
OutputNotFoundError,
TargetNotDirectoryError,
OutputFileMissingError,
)
from dvc.ignore import DvcIgnoreFileHandler
from dvc.path_info import PathInfo
from dvc.utils.compat import open as _open
from dvc.utils.compat import open as _open, fspath_py35
from dvc.utils import relpath

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -455,9 +456,15 @@ def open(self, path, remote=None, mode="r", encoding=None):
if out.isdir():
raise ValueError("Can't open a dir")

cache_file = self.cache.local.checksum_to_path_info(out.checksum)
cache_file = fspath_py35(cache_file)

with self.state:
cache_info = out.get_used_cache(remote=remote)
self.cloud.pull(cache_info, remote=remote)

cache_file = self.cache.local.checksum_to_path_info(out.checksum)
return _open(cache_file.fspath, mode=mode, encoding=encoding)
# Since pull may just skip with a warning, we need to check it here
if not os.path.exists(cache_file):
raise OutputFileMissingError(relpath(path, self.root_dir))

return _open(cache_file, mode=mode, encoding=encoding)
7 changes: 2 additions & 5 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dvc.path_info import PathInfo
from dvc.external_repo import ExternalRepo
from dvc.utils.compat import urlparse
from dvc.utils import remove


@staticmethod
Expand All @@ -19,8 +18,8 @@ def get(url, path, out=None, rev=None):
# and won't work with reflink/hardlink.
dpath = os.path.dirname(os.path.abspath(out))
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
erepo = ExternalRepo(tmp_dir, url=url, rev=rev)
try:
erepo = ExternalRepo(tmp_dir, url=url, rev=rev)
erepo.install()
# Try any links possible to avoid data duplication.
#
Expand All @@ -42,7 +41,5 @@ def get(url, path, out=None, rev=None):
o.path_info = PathInfo(os.path.abspath(out))
with o.repo.state:
o.checkout()
erepo.repo.scm.git.close()
finally:
if os.path.exists(tmp_dir):
remove(tmp_dir)
erepo.uninstall()
22 changes: 19 additions & 3 deletions tests/func/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil

from dvc import api
from dvc.exceptions import OutputFileMissingError
from dvc.main import main
from dvc.path_info import URLInfo
from dvc.remote.config import RemoteConfig
Expand Down Expand Up @@ -101,16 +102,31 @@ def test_open(repo_dir, dvc_repo, remote_url):


def test_open_external(repo_dir, dvc_repo, erepo, remote_url):
erepo.dvc.scm.checkout("branch")
_set_remote_url_and_commit(erepo.dvc, remote_url)
erepo.dvc.push()
erepo.dvc.scm.checkout("master")
_set_remote_url_and_commit(erepo.dvc, remote_url)

erepo.dvc.push(all_branches=True)

# Remove cache to force download
shutil.rmtree(erepo.dvc.cache.local.cache_dir)

# Using file url to force clone to tmp repo
repo_url = "file://" + erepo.dvc.root_dir
with api.open(repo_dir.FOO, repo=repo_url) as fd:
assert fd.read() == repo_dir.FOO_CONTENTS
with api.open("version", repo=repo_url) as fd:
assert fd.read() == "master"

assert api.read("version", repo=repo_url, rev="branch") == "branch"


def test_open_missing(erepo):
# Remove cache to make foo missing
shutil.rmtree(erepo.dvc.cache.local.cache_dir)

repo_url = "file://" + erepo.dvc.root_dir
with pytest.raises(OutputFileMissingError):
api.read(erepo.FOO, repo=repo_url)


def _set_remote_url_and_commit(repo, remote_url):
Expand Down

0 comments on commit cc9411d

Please sign in to comment.