From 8d07ca7a1d1af0aceb13ff88f94b907c2914e172 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 20 Jul 2021 20:46:43 +0900 Subject: [PATCH 1/2] fs: ensure gdrivefs.info returns integer size --- dvc/fs/gdrive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/fs/gdrive.py b/dvc/fs/gdrive.py index 4bbf583f7b..ee277f2c1c 100644 --- a/dvc/fs/gdrive.py +++ b/dvc/fs/gdrive.py @@ -619,7 +619,7 @@ def info(self, path_info): item_id = self._get_item_id(path_info) gdrive_file = self._drive.CreateFile({"id": item_id}) gdrive_file.FetchMetadata(fields="fileSize") - return {"size": gdrive_file.get("fileSize"), "type": "file"} + return {"size": int(gdrive_file.get("fileSize")), "type": "file"} def _upload_fobj(self, fobj, to_info, **kwargs): dirname = to_info.parent From a36750a2c87ab5a0230d09a77073b77ef99aed01 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 20 Jul 2021 20:47:32 +0900 Subject: [PATCH 2/2] transfer: only verify on pull --- dvc/data_cloud.py | 1 + dvc/objects/transfer.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index 687237b297..76170b0020 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -118,6 +118,7 @@ def pull( jobs=jobs, src_index=get_index(odb), cache_odb=self.repo.odb.local, + verify=odb.verify, ) def status( diff --git a/dvc/objects/transfer.py b/dvc/objects/transfer.py index bc10d893e7..b59f1b568d 100644 --- a/dvc/objects/transfer.py +++ b/dvc/objects/transfer.py @@ -41,7 +41,7 @@ def wrapper(odb, obj, *args, **kwargs): return wrapper -def _transfer(src, dest, dir_objs, file_objs, missing, jobs, **kwargs): +def _transfer(src, dest, dir_objs, file_objs, missing, jobs, verify, **kwargs): from . import save from .stage import is_memfs_staging @@ -54,7 +54,14 @@ def _transfer(src, dest, dir_objs, file_objs, missing, jobs, **kwargs): func = pbar.wrap_fn(func) with ThreadPoolExecutor(max_workers=jobs) as executor: processor = partial( - _create_tasks, executor, jobs, func, src, dest, is_staged + _create_tasks, + executor, + jobs, + func, + src, + dest, + is_staged, + verify, ) processor.save_func = func _do_transfer( @@ -69,7 +76,7 @@ def _transfer(src, dest, dir_objs, file_objs, missing, jobs, **kwargs): return total -def _create_tasks(executor, jobs, func, src, dest, is_staged, objs): +def _create_tasks(executor, jobs, func, src, dest, is_staged, verify, objs): fails = 0 obj_iter = iter(objs) @@ -80,7 +87,7 @@ def create_taskset(amount): dest, _raw_obj(src, obj, is_staged), move=False, - verify=src.verify, + verify=verify, ) for obj in itertools.islice(obj_iter, amount) } @@ -180,6 +187,7 @@ def transfer( dest: "ObjectDB", objs: Iterable["HashFile"], jobs: Optional[int] = None, + verify: bool = False, **kwargs, ) -> int: """Transfer (copy) the specified objects from one ODB to another. @@ -214,5 +222,6 @@ def transfer( files, status.missing, jobs, + verify, **kwargs, )