From 24559a42e22d8938ea51fefacf32ac2963921842 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 19:07:13 +0100 Subject: [PATCH 01/15] file.GetContentFile: stream to disk, add callback - fixes #28 - related #20 - reference: https://developers.google.com/drive/api/v2/manage-downloads --- pydrive2/files.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index cd23dfae..4c29bfb7 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -4,6 +4,7 @@ from googleapiclient import errors from googleapiclient.http import MediaIoBaseUpload +from googleapiclient.http import MediaIoBaseDownload from functools import wraps from .apiattr import ApiAttribute @@ -220,7 +221,10 @@ def GetContentString( self.FetchContent(mimetype, remove_bom) return self.content.getvalue().decode(encoding) - def GetContentFile(self, filename, mimetype=None, remove_bom=False): + @LoadMetadata + def GetContentFile( + self, filename, mimetype=None, remove_bom=False, callback=None + ): """Save content of this file as a local file. :param filename: name of the file to write to. @@ -229,17 +233,26 @@ def GetContentFile(self, filename, mimetype=None, remove_bom=False): :type mimetype: str :param remove_bom: Whether to remove the byte order marking. :type remove_bom: bool + :param callback: passed two arguments: (total trasferred, file size). + :type param: callable :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ - if ( - self.content is None - or type(self.content) is not io.BytesIO - or self.has_bom == remove_bom - ): - self.FetchContent(mimetype, remove_bom) - f = open(filename, "wb") - f.write(self.content.getvalue()) - f.close() + file_id = self.metadata.get("id") or self.get("id") + request = self.auth.service.files().get_media(fileId=file_id) + with open(filename, mode="w+b") as fd: + downloader = MediaIoBaseDownload(fd, request) + done = False + while done is False: + status, done = downloader.next_chunk() + if callback: + callback(status.resumable_progress, status.total_size) + + if mimetype == "text/plain" and remove_bom: + fd.seek(0) + self._RemovePrefix( + fd, MIME_TYPE_TO_BOM[self["mimeType"]][mimetype] + ) + self.has_bom = not remove_bom @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): From b5cc59d9e2a27f5aacfab1fa97e1ac63cde53c0b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 19:40:28 +0100 Subject: [PATCH 02/15] use export_media if needed --- pydrive2/files.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 4c29bfb7..dd860d12 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -238,7 +238,9 @@ def GetContentFile( :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ file_id = self.metadata.get("id") or self.get("id") - request = self.auth.service.files().get_media(fileId=file_id) + files = self.auth.service.files() + get = files.get_media if mimetype is None else files.export_media + request = get(fileId=file_id) with open(filename, mode="w+b") as fd: downloader = MediaIoBaseDownload(fd, request) done = False From 5348f6a58d9bc580ab79e2f43d8b68f997edb5f3 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 19:48:51 +0100 Subject: [PATCH 03/15] actaully use mimetype --- pydrive2/files.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index dd860d12..08e8b862 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -5,7 +5,7 @@ from googleapiclient import errors from googleapiclient.http import MediaIoBaseUpload from googleapiclient.http import MediaIoBaseDownload -from functools import wraps +from functools import wraps, partial from .apiattr import ApiAttribute from .apiattr import ApiAttributeMixin @@ -239,7 +239,11 @@ def GetContentFile( """ file_id = self.metadata.get("id") or self.get("id") files = self.auth.service.files() - get = files.get_media if mimetype is None else files.export_media + get = ( + files.get_media + if mimetype is None + else partial(files.export_media, mimeType=mimetype) + ) request = get(fileId=file_id) with open(filename, mode="w+b") as fd: downloader = MediaIoBaseDownload(fd, request) From 4ce96181076314f767ad10ede62ba188dc0390c3 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 19:50:08 +0100 Subject: [PATCH 04/15] minor tidy --- pydrive2/files.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 08e8b862..76bf48d4 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -237,14 +237,13 @@ def GetContentFile( :type param: callable :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ - file_id = self.metadata.get("id") or self.get("id") files = self.auth.service.files() get = ( files.get_media if mimetype is None else partial(files.export_media, mimeType=mimetype) ) - request = get(fileId=file_id) + request = get(fileId=self.metadata.get("id") or self.get("id")) with open(filename, mode="w+b") as fd: downloader = MediaIoBaseDownload(fd, request) done = False From e36a5e6873d9c157cd5ec23921f33aea9e3ee9a7 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 20:17:57 +0100 Subject: [PATCH 05/15] try to auto-infer mimetype --- pydrive2/files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydrive2/files.py b/pydrive2/files.py index 76bf48d4..774aaef7 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -238,6 +238,8 @@ def GetContentFile( :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ files = self.auth.service.files() + if mimetype is None: + mimetype = self.metadata.get("mimeType") or self.get("mimeType") get = ( files.get_media if mimetype is None From e877d961012d6947c968fcdc14caa34e8ef2c1bc Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 20:27:28 +0100 Subject: [PATCH 06/15] auto-infer docs files --- pydrive2/files.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 774aaef7..287e978d 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -238,13 +238,12 @@ def GetContentFile( :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ files = self.auth.service.files() + get = files.get_media if mimetype is None: mimetype = self.metadata.get("mimeType") or self.get("mimeType") - get = ( - files.get_media - if mimetype is None - else partial(files.export_media, mimeType=mimetype) - ) + if mimetype.startswith("application/vnd.google-apps."): + mimetype = "text/plain" # or "application/octet-stream"? + get = partial(files.export_media, mimeType=mimetype) request = get(fileId=self.metadata.get("id") or self.get("id")) with open(filename, mode="w+b") as fd: downloader = MediaIoBaseDownload(fd, request) From 5328e1009e9458e43481512c8556fdcb71873e46 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 20:37:14 +0100 Subject: [PATCH 07/15] auto-infer gsuite from metadata rather than user mimeType --- pydrive2/files.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 287e978d..2ebda559 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -239,11 +239,12 @@ def GetContentFile( """ files = self.auth.service.files() get = files.get_media - if mimetype is None: - mimetype = self.metadata.get("mimeType") or self.get("mimeType") - if mimetype.startswith("application/vnd.google-apps."): - mimetype = "text/plain" # or "application/octet-stream"? - get = partial(files.export_media, mimeType=mimetype) + # patch `get` for docs files + meta_mimeType = self.metadata.get("mimeType") or self.get("mimeType") + if meta_mimeType.startswith("application/vnd.google-apps."): + mimetype = mimetype or "text/plain" + get = partial(files.export_media, mimeType=mimetype) + request = get(fileId=self.metadata.get("id") or self.get("id")) with open(filename, mode="w+b") as fd: downloader = MediaIoBaseDownload(fd, request) From 677219d477c1e96f8ca7a1044b8e38053560b0a3 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 3 May 2020 22:07:28 +0100 Subject: [PATCH 08/15] avoid slow API calls --- pydrive2/files.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 2ebda559..de6df59b 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -5,7 +5,7 @@ from googleapiclient import errors from googleapiclient.http import MediaIoBaseUpload from googleapiclient.http import MediaIoBaseDownload -from functools import wraps, partial +from functools import wraps from .apiattr import ApiAttribute from .apiattr import ApiAttributeMixin @@ -221,7 +221,6 @@ def GetContentString( self.FetchContent(mimetype, remove_bom) return self.content.getvalue().decode(encoding) - @LoadMetadata def GetContentFile( self, filename, mimetype=None, remove_bom=False, callback=None ): @@ -238,15 +237,9 @@ def GetContentFile( :raises: ApiRequestError, FileNotUploadedError, FileNotDownloadableError """ files = self.auth.service.files() - get = files.get_media - # patch `get` for docs files - meta_mimeType = self.metadata.get("mimeType") or self.get("mimeType") - if meta_mimeType.startswith("application/vnd.google-apps."): - mimetype = mimetype or "text/plain" - get = partial(files.export_media, mimeType=mimetype) - - request = get(fileId=self.metadata.get("id") or self.get("id")) - with open(filename, mode="w+b") as fd: + file_id = self.metadata.get("id") or self.get("id") + + def download(fd, request): downloader = MediaIoBaseDownload(fd, request) done = False while done is False: @@ -254,6 +247,22 @@ def GetContentFile( if callback: callback(status.resumable_progress, status.total_size) + with open(filename, mode="w+b") as fd: + # Ideally would use files.export_media instead if + # metadata.get("mimeType").startswith("application/vnd.google-apps.") + # but that would first require a slow call to FetchMetadata() + try: + download(fd, files.get_media(fileId=file_id)) + except errors.HttpError as error: + err_str = str(error).lower() + if "403" not in err_str or "use export" not in err_str: + raise + mimetype = mimetype or "text/plain" + fd.seek(0) + download( + fd, files.export_media(fileId=file_id, mimeType=mimetype) + ) + if mimetype == "text/plain" and remove_bom: fd.seek(0) self._RemovePrefix( From e57aca7e7a3efd27ea4466181910bb48ad99390d Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 4 May 2020 23:48:13 +0100 Subject: [PATCH 09/15] add ApiRequestError.status, safer has_bom --- pydrive2/files.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index de6df59b..97b9d934 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -31,6 +31,7 @@ def __init__(self, http_error): assert isinstance(http_error, errors.HttpError) content = json.loads(http_error.content.decode("utf-8")) self.error = content.get("error", {}) if content else {} + self.status = http_error.resp.status # Initialize args for backward compatibility super().__init__(http_error) @@ -254,21 +255,23 @@ def download(fd, request): try: download(fd, files.get_media(fileId=file_id)) except errors.HttpError as error: - err_str = str(error).lower() - if "403" not in err_str or "use export" not in err_str: + err = ApiRequestError(error) + if err.status != 403 or "use export" not in err.error["message"]: raise mimetype = mimetype or "text/plain" fd.seek(0) - download( - fd, files.export_media(fileId=file_id, mimeType=mimetype) - ) + try: + download( + fd, files.export_media(fileId=file_id, mimeType=mimetype) + ) + except errors.HttpError as error: + raise ApiRequestError(error) if mimetype == "text/plain" and remove_bom: fd.seek(0) self._RemovePrefix( fd, MIME_TYPE_TO_BOM[self["mimeType"]][mimetype] ) - self.has_bom = not remove_bom @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): From 7ca89e41f1d154c0ce437df016f7671b9b4da75c Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 14:45:31 +0100 Subject: [PATCH 10/15] parse json for status code, debug reason --- pydrive2/files.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 97b9d934..14408f96 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -31,7 +31,6 @@ def __init__(self, http_error): assert isinstance(http_error, errors.HttpError) content = json.loads(http_error.content.decode("utf-8")) self.error = content.get("error", {}) if content else {} - self.status = http_error.resp.status # Initialize args for backward compatibility super().__init__(http_error) @@ -255,14 +254,17 @@ def download(fd, request): try: download(fd, files.get_media(fileId=file_id)) except errors.HttpError as error: - err = ApiRequestError(error) - if err.status != 403 or "use export" not in err.error["message"]: - raise + exc = ApiRequestError(error) + reason = exc.error.get("errors", [{}])[0].get("reason", "") + if exc.error["code"] != 403 or reason != "??": + print(reason) + raise exc mimetype = mimetype or "text/plain" - fd.seek(0) + fd.seek(0) # just in case `download()` modified `fd` try: download( - fd, files.export_media(fileId=file_id, mimeType=mimetype) + fd, + files.export_media(fileId=file_id, mimeType=mimetype), ) except errors.HttpError as error: raise ApiRequestError(error) From b21ca02fef55a62d2a1b29a668c89fca1689b1c2 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 15:01:59 +0100 Subject: [PATCH 11/15] add ApiRequestError.GetField, fix reason --- pydrive2/files.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 14408f96..01cbcd92 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -35,6 +35,10 @@ def __init__(self, http_error): # Initialize args for backward compatibility super().__init__(http_error) + def GetField(self, field): + """Returns the `field` from the first error""" + return self.error.get("errors", [{}])[0].get(field, "") + class FileNotDownloadableError(RuntimeError): """Error trying to download file that is not downloadable.""" @@ -255,9 +259,10 @@ def download(fd, request): download(fd, files.get_media(fileId=file_id)) except errors.HttpError as error: exc = ApiRequestError(error) - reason = exc.error.get("errors", [{}])[0].get("reason", "") - if exc.error["code"] != 403 or reason != "??": - print(reason) + if ( + exc.error["code"] != 403 + or exc.GetField("reason") != "fileNotDownloadable" + ): raise exc mimetype = mimetype or "text/plain" fd.seek(0) # just in case `download()` modified `fd` From e3ca49354e957201d85dc0ae383eca3c49f8ea64 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 15:05:29 +0100 Subject: [PATCH 12/15] update tests --- pydrive2/test/test_util.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pydrive2/test/test_util.py b/pydrive2/test/test_util.py index c8143f0e..bba8876d 100644 --- a/pydrive2/test/test_util.py +++ b/pydrive2/test/test_util.py @@ -48,11 +48,7 @@ def pydrive_retry(call): try: result = call() except ApiRequestError as exception: - retry_codes = ["403", "500", "502", "503", "504"] - if any( - "HttpError {}".format(code) in str(exception) - for code in retry_codes - ): + if exception.error["code"] in [403, 500, 502, 503, 504]: raise PyDriveRetriableError("Google API request failed") raise return result From c9735c3622cb55dddfb41e15fbc4bec62eb5df4f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 15:14:02 +0100 Subject: [PATCH 13/15] avoid implicit FetchMetadata --- pydrive2/files.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 01cbcd92..327025b2 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -276,9 +276,13 @@ def download(fd, request): if mimetype == "text/plain" and remove_bom: fd.seek(0) - self._RemovePrefix( - fd, MIME_TYPE_TO_BOM[self["mimeType"]][mimetype] - ) + boms = [ + bom[mimetype] + for bom in MIME_TYPE_TO_BOM.values() + if mimetype in bom + ] + if boms: + self._RemovePrefix(fd, boms[0].encode("utf8")) @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): From 0227318d1ad5949169e3ee4150a6145920394576 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 20:27:39 +0100 Subject: [PATCH 14/15] silly encoding bug fix --- pydrive2/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 327025b2..bd0b8fa5 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -282,7 +282,7 @@ def download(fd, request): if mimetype in bom ] if boms: - self._RemovePrefix(fd, boms[0].encode("utf8")) + self._RemovePrefix(fd, boms[0]) @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): From 4c820cc789681f6393706a2280dcaa06c0009165 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 5 May 2020 20:31:08 +0100 Subject: [PATCH 15/15] fix remove_bom test See #30 --- pydrive2/test/test_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydrive2/test/test_file.py b/pydrive2/test/test_file.py index 6dad4d53..6e49ea47 100644 --- a/pydrive2/test/test_file.py +++ b/pydrive2/test/test_file.py @@ -487,7 +487,7 @@ def test_GFile_Conversion_Lossless_String(self): downloaded_file_name = "_tmp_downloaded_file_name.txt" pydrive_retry( lambda: file1.GetContentFile( - downloaded_file_name, mimetype="text/plain" + downloaded_file_name, mimetype="text/plain", remove_bom=True ) ) downloaded_string = open(downloaded_file_name).read()