Skip to content

Commit

Permalink
remote/gs: set max_results to improve performance when checking exist…
Browse files Browse the repository at this point in the history
…s/isdir
  • Loading branch information
skshetry committed Nov 27, 2019
1 parent e750496 commit 5e2150d
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions dvc/remote/gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,25 +137,32 @@ def remove(self, path_info):

blob.delete()

def _list_paths(self, bucket, prefix):
for blob in self.gs.bucket(bucket).list_blobs(prefix=prefix):
def _list_paths(self, bucket, prefix, max_items=None):
for blob in self.gs.bucket(bucket).list_blobs(
prefix=prefix, max_results=max_items
):
yield blob.name

def list_cache_paths(self):
return self._list_paths(self.path_info.bucket, self.path_info.path)
return self.walk_files(self.path_info.bucket, self.path_info.path)

def walk_files(self, path_info):
for fname in self._list_paths(path_info.bucket, path_info.path):
yield path_info / posixpath.relpath(fname, path_info.path)

def isdir(self, path_info):
dir_path = path_info / ""
file = next(self._list_paths(path_info.bucket, dir_path.path), "")
return file.startswith(dir_path.path)
return bool(
list(
self._list_paths(path_info.bucket, dir_path.path, max_items=1)
)
)

def exists(self, path_info):
dir_path = path_info / ""
file = next(self._list_paths(path_info.bucket, path_info.path), "")
file = next(
self._list_paths(path_info.bucket, path_info.path, max_items=1), ""
)
return path_info.path == file or file.startswith(dir_path.path)

def _upload(self, from_file, to_info, name=None, no_progress_bar=True):
Expand Down

0 comments on commit 5e2150d

Please sign in to comment.