Skip to content

Commit

Permalink
gdrive: add open
Browse files Browse the repository at this point in the history
  • Loading branch information
casperdcl committed May 30, 2020
1 parent 516d82e commit 4ccbc95
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 45 deletions.
20 changes: 20 additions & 0 deletions dvc/remote/gdrive.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import io
import logging
import os
import posixpath
import re
import threading
from collections import defaultdict
from contextlib import contextmanager
from urllib.parse import urlparse

from funcy import cached_property, retry, wrap_prop, wrap_with
Expand All @@ -15,6 +17,7 @@
from dvc.remote.base import BaseRemote
from dvc.scheme import Schemes
from dvc.utils import format_link, tmp_fname
from dvc.utils.stream import IterStream

logger = logging.getLogger(__name__)
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
Expand Down Expand Up @@ -393,6 +396,23 @@ def _gdrive_download_file(
) as pbar:
gdrive_file.GetContentFile(to_file, callback=pbar.update_to)

@contextmanager
@_gdrive_retry
def open(self, path_info, mode="r", encoding=None):
assert mode in {"r", "rt", "rb"}

item_id = self._get_item_id(path_info)
param = {"id": item_id}
# it does not create a file on the remote
gdrive_file = self._drive.CreateFile(param)
fd = gdrive_file.GetContentIOBuffer()
stream = IterStream(iter(fd))

if mode != "rb":
stream = io.TextIOWrapper(stream, encoding=encoding)

yield stream

@_gdrive_retry
def _gdrive_delete_file(self, item_id):
from pydrive2.files import ApiRequestError
Expand Down
46 changes: 2 additions & 44 deletions dvc/utils/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import io
from contextlib import contextmanager

from dvc.utils.stream import IterStream


@contextmanager
def open_url(url, mode="r", encoding=None):
Expand Down Expand Up @@ -61,47 +63,3 @@ def gen(response):
finally:
# Ensure connection is closed
it.close()


class IterStream(io.RawIOBase):
"""Wraps an iterator yielding bytes as a file object"""

def __init__(self, iterator):
self.iterator = iterator
self.leftover = None

def readable(self):
return True

# Python 3 requires only .readinto() method, it still uses other ones
# under some circumstances and falls back if those are absent. Since
# iterator already constructs byte strings for us, .readinto() is not the
# most optimal, so we provide .read1() too.

def readinto(self, b):
try:
n = len(b) # We're supposed to return at most this much
chunk = self.leftover or next(self.iterator)
output, self.leftover = chunk[:n], chunk[n:]

n_out = len(output)
b[:n_out] = output
return n_out
except StopIteration:
return 0 # indicate EOF

readinto1 = readinto

def read1(self, n=-1):
try:
chunk = self.leftover or next(self.iterator)
except StopIteration:
return b""

# Return an arbitrary number or bytes
if n <= 0:
self.leftover = None
return chunk

output, self.leftover = chunk[:n], chunk[n:]
return output
45 changes: 45 additions & 0 deletions dvc/utils/stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import io


class IterStream(io.RawIOBase):
"""Wraps an iterator yielding bytes as a file object"""

def __init__(self, iterator):
self.iterator = iterator
self.leftover = None

def readable(self):
return True

# Python 3 requires only .readinto() method, it still uses other ones
# under some circumstances and falls back if those are absent. Since
# iterator already constructs byte strings for us, .readinto() is not the
# most optimal, so we provide .read1() too.

def readinto(self, b):
try:
n = len(b) # We're supposed to return at most this much
chunk = self.leftover or next(self.iterator)
output, self.leftover = chunk[:n], chunk[n:]

n_out = len(output)
b[:n_out] = output
return n_out
except StopIteration:
return 0 # indicate EOF

readinto1 = readinto

def read1(self, n=-1):
try:
chunk = self.leftover or next(self.iterator)
except StopIteration:
return b""

# Return an arbitrary number or bytes
if n <= 0:
self.leftover = None
return chunk

output, self.leftover = chunk[:n], chunk[n:]
return output
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def run(self):
# Extra dependencies for remote integrations

gs = ["google-cloud-storage==1.19.0"]
gdrive = ["pydrive2>=1.4.13"]
gdrive = ["pydrive2>=1.4.15"]
s3 = ["boto3>=1.9.201"]
azure = ["azure-storage-blob==2.1.0"]
oss = ["oss2==2.6.1"]
Expand Down

0 comments on commit 4ccbc95

Please sign in to comment.