From 5e2150d509fd7caa1f0bc6375025b4d310b436e5 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Thu, 28 Nov 2019 00:07:04 +0545 Subject: [PATCH] remote/gs: set max_results to improve performance when checking exists/isdir --- dvc/remote/gs.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 66408abc96..aa0958bd38 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -137,12 +137,14 @@ def remove(self, path_info): blob.delete() - def _list_paths(self, bucket, prefix): - for blob in self.gs.bucket(bucket).list_blobs(prefix=prefix): + def _list_paths(self, bucket, prefix, max_items=None): + for blob in self.gs.bucket(bucket).list_blobs( + prefix=prefix, max_results=max_items + ): yield blob.name def list_cache_paths(self): - return self._list_paths(self.path_info.bucket, self.path_info.path) + return self.walk_files(self.path_info.bucket, self.path_info.path) def walk_files(self, path_info): for fname in self._list_paths(path_info.bucket, path_info.path): @@ -150,12 +152,17 @@ def walk_files(self, path_info): def isdir(self, path_info): dir_path = path_info / "" - file = next(self._list_paths(path_info.bucket, dir_path.path), "") - return file.startswith(dir_path.path) + return bool( + list( + self._list_paths(path_info.bucket, dir_path.path, max_items=1) + ) + ) def exists(self, path_info): dir_path = path_info / "" - file = next(self._list_paths(path_info.bucket, path_info.path), "") + file = next( + self._list_paths(path_info.bucket, path_info.path, max_items=1), "" + ) return path_info.path == file or file.startswith(dir_path.path) def _upload(self, from_file, to_info, name=None, no_progress_bar=True):