From ecdc608e13970939dd1b16b1f2a6b302af13f780 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 1 Dec 2019 02:22:04 +0000 Subject: [PATCH 1/5] clean up gs progress using Tqdm.wrapattr - fixes #2829 --- dvc/remote/gs.py | 50 +++++++++++++++++------------------------------- setup.py | 2 +- 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 6f389ec4f6..b1d5c960fe 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -58,23 +58,15 @@ def _upload_to_bucket( no_progress_bar=True, ): blob = bucket.blob(to_info.path, chunk_size=chunk_size) - with Tqdm( - desc=name or to_info.path, - total=os.path.getsize(from_file), - bytes=True, - disable=no_progress_bar, - ) as pbar: - with io.open(from_file, mode="rb") as fobj: - raw_read = fobj.read - - def read(size=chunk_size): - res = raw_read(size) - if res: - pbar.update(len(res)) - return res - - fobj.read = read - blob.upload_from_file(fobj) + with io.open(from_file, mode="rb") as fobj: + with Tqdm.wrapattr( + fobj, + "read", + desc=name or to_info.path, + total=os.path.getsize(from_file), + disable=no_progress_bar, + ) as wrapped: + blob.upload_from_file(wrapped) class RemoteGS(RemoteBASE): @@ -162,21 +154,15 @@ def _upload(self, from_file, to_info, name=None, no_progress_bar=True): def _download(self, from_info, to_file, name=None, no_progress_bar=True): bucket = self.gs.bucket(from_info.bucket) blob = bucket.get_blob(from_info.path) - with Tqdm( - desc=name or from_info.path, - total=blob.size, - bytes=True, - disable=no_progress_bar, - ) as pbar: - with io.open(to_file, mode="wb") as fobj: - raw_write = fobj.write - - def write(byte_string): - raw_write(byte_string) - pbar.update(len(byte_string)) - - fobj.write = write - blob.download_to_file(fobj) + with io.open(to_file, mode="wb") as fobj: + with Tqdm.wrapattr( + fobj, + "write", + desc=name or from_info.path, + total=blob.size, + disable=no_progress_bar, + ) as wrapped: + blob.download_to_file(wrapped) def _generate_download_url(self, path_info, expires=3600): expiration = timedelta(seconds=int(expires)) diff --git a/setup.py b/setup.py index b9b16f014d..b23d269db7 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ def run(self): "funcy>=1.14", "pathspec>=0.6.0", "shortuuid>=0.5.0", - "tqdm>=4.38.0,<5", + "tqdm>=4.40.0,<5", "packaging>=19.0", "win-unicode-console>=0.5; sys_platform == 'win32'", "pywin32>=225; sys_platform == 'win32'", From 75f8b9cd8b6cff3913ef341d6034a8a9fd8a3f30 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 1 Dec 2019 02:31:57 +0000 Subject: [PATCH 2/5] start to clean up gdrive --- dvc/remote/{gdrive/__init__.py => gdrive.py} | 27 ++++++++++++-------- dvc/remote/gdrive/utils.py | 25 ------------------ 2 files changed, 16 insertions(+), 36 deletions(-) rename dvc/remote/{gdrive/__init__.py => gdrive.py} (93%) delete mode 100644 dvc/remote/gdrive/utils.py diff --git a/dvc/remote/gdrive/__init__.py b/dvc/remote/gdrive.py similarity index 93% rename from dvc/remote/gdrive/__init__.py rename to dvc/remote/gdrive.py index fa0e25a28f..1c9c567c35 100644 --- a/dvc/remote/gdrive/__init__.py +++ b/dvc/remote/gdrive.py @@ -8,7 +8,7 @@ from funcy import retry, compose, decorator, wrap_with from funcy.py3 import cat -from dvc.remote.gdrive.utils import TrackFileReadProgress, FOLDER_MIME_TYPE +from dvc.progress import Tqdm from dvc.scheme import Schemes from dvc.path_info import CloudURLInfo from dvc.remote.base import RemoteBASE @@ -17,6 +17,7 @@ from dvc.utils import tmp_fname logger = logging.getLogger(__name__) +FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" class GDriveRetriableError(DvcException): @@ -96,21 +97,25 @@ def gdrive_upload_file( return item def upload_file(self, item, no_progress_bar, from_file, progress_name): - with open(from_file, "rb") as opened_file: - if not no_progress_bar: - opened_file = TrackFileReadProgress(progress_name, opened_file) - # PyDrive doesn't like content property setting for empty files - # https://github.com/gsuitedevs/PyDrive/issues/121 - if os.stat(from_file).st_size: - item.content = opened_file - item.Upload() + with open(from_file, "rb") as fobj: + total = os.fstat(fobj.fileno()).st_size + with Tqdm.wrapattr( + fobj, + "read", + desc=progress_name, + total=total, + disable=no_progress_bar, + ) as opened_file: + # PyDrive doesn't like content property setting for empty files + # https://github.com/gsuitedevs/PyDrive/issues/121 + if total: + item.content = opened_file + item.Upload() @gdrive_retry def gdrive_download_file( self, file_id, to_file, progress_name, no_progress_bar ): - from dvc.progress import Tqdm - gdrive_file = self.drive.CreateFile({"id": file_id}) with Tqdm( desc=progress_name, diff --git a/dvc/remote/gdrive/utils.py b/dvc/remote/gdrive/utils.py deleted file mode 100644 index 781af811a5..0000000000 --- a/dvc/remote/gdrive/utils.py +++ /dev/null @@ -1,25 +0,0 @@ -import os - -from dvc.progress import Tqdm - - -FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" - - -class TrackFileReadProgress(object): - def __init__(self, progress_name, fobj): - self.progress_name = progress_name - self.fobj = fobj - file_size = os.fstat(fobj.fileno()).st_size - self.tqdm = Tqdm(desc=self.progress_name, total=file_size) - - def read(self, size): - self.tqdm.update(size) - return self.fobj.read(size) - - def close(self): - self.fobj.close() - self.tqdm.close() - - def __getattr__(self, attr): - return getattr(self.fobj, attr) From f54460f2cb631033f825837774b2fbd283e34273 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 1 Dec 2019 02:54:01 +0000 Subject: [PATCH 3/5] merge unneeded function --- dvc/remote/gdrive.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 1c9c567c35..f29b436c1c 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -93,24 +93,22 @@ def gdrive_upload_file( item = self.drive.CreateFile( {"title": args["title"], "parents": [{"id": args["parent_id"]}]} ) - self.upload_file(item, no_progress_bar, from_file, progress_name) - return item - def upload_file(self, item, no_progress_bar, from_file, progress_name): with open(from_file, "rb") as fobj: - total = os.fstat(fobj.fileno()).st_size + total = os.path.getsize(from_file) with Tqdm.wrapattr( fobj, "read", desc=progress_name, total=total, disable=no_progress_bar, - ) as opened_file: + ) as wrapped: # PyDrive doesn't like content property setting for empty files # https://github.com/gsuitedevs/PyDrive/issues/121 if total: - item.content = opened_file + item.content = wrapped item.Upload() + return item @gdrive_retry def gdrive_download_file( From 7318a6d120a3fa889fea1e6ee0a794e094027158 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 2 Dec 2019 17:35:29 +0000 Subject: [PATCH 4/5] safer download notification --- dvc/remote/gdrive.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index f29b436c1c..e2cd6f57ba 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -115,10 +115,11 @@ def gdrive_download_file( self, file_id, to_file, progress_name, no_progress_bar ): gdrive_file = self.drive.CreateFile({"id": file_id}) + bar_format = "Donwloading {desc}... " + Tqdm.format_sizeof( + int(gdrive_file["fileSize"]), "B", 1024 + ) with Tqdm( - desc=progress_name, - total=int(gdrive_file["fileSize"]), - disable=no_progress_bar, + bar_format=bar_format, desc=progress_name, disable=no_progress_bar ): gdrive_file.GetContentFile(to_file) From c622287cd39a042ae4940c27c2d39e4950a713d6 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 2 Dec 2019 17:43:11 +0000 Subject: [PATCH 5/5] truncate long desc --- dvc/remote/gdrive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index e2cd6f57ba..b74738d92e 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -115,8 +115,9 @@ def gdrive_download_file( self, file_id, to_file, progress_name, no_progress_bar ): gdrive_file = self.drive.CreateFile({"id": file_id}) - bar_format = "Donwloading {desc}... " + Tqdm.format_sizeof( - int(gdrive_file["fileSize"]), "B", 1024 + bar_format = ( + "Donwloading {desc:{ncols_desc}.{ncols_desc}}... " + + Tqdm.format_sizeof(int(gdrive_file["fileSize"]), "B", 1024) ) with Tqdm( bar_format=bar_format, desc=progress_name, disable=no_progress_bar