Skip to content

Commit

Permalink
Merge pull request #802 from dhermes/bucket-iter-combine
Browse files Browse the repository at this point in the history
Combining get_all_blobs and Bucket.iterator.
  • Loading branch information
dhermes committed Apr 8, 2015
2 parents 96ee9d3 + 34a025f commit 41b5b72
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 86 deletions.
4 changes: 2 additions & 2 deletions docs/_components/storage-getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ bucket object::
>>> bucket = storage.get_bucket('my-bucket', connection=connection)

If you want to get all the blobs in the bucket, you can use
:func:`get_all_blobs <gcloud.storage.bucket.Bucket.get_all_blobs>`::
:func:`list_blobs <gcloud.storage.bucket.Bucket.list_blobs>`::

>>> blobs = bucket.get_all_blobs()
>>> blobs = bucket.list_blobs()

However, if you're looking to iterate through the blobs, you can use the
bucket itself as an iterator::
Expand Down
2 changes: 1 addition & 1 deletion docs/_components/storage-quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ you can create buckets and blobs::
>>> blob = blob.upload_from_string('this is test content!')
>>> print blob.download_as_string()
'this is test content!'
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-new-bucket, my-test-file.txt>]
>>> blob.delete()
>>> bucket.delete()
Expand Down
84 changes: 48 additions & 36 deletions gcloud/storage/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
False
If you want to get all the blobs in the bucket, you can use
:func:`get_all_blobs <gcloud.storage.bucket.Bucket.get_all_blobs>`::
:func:`list_blobs <gcloud.storage.bucket.Bucket.list_blobs>`::
>>> blobs = bucket.get_all_blobs()
>>> blobs = bucket.list_blobs()
You can also use the bucket as an iterator::
Expand Down Expand Up @@ -104,7 +104,7 @@ def __repr__(self):
return '<Bucket: %s>' % self.name

def __iter__(self):
return iter(self._iterator_class(bucket=self))
return iter(self.list_blobs())

def __contains__(self, blob_name):
blob = Blob(blob_name, bucket=self)
Expand Down Expand Up @@ -223,56 +223,68 @@ def get_blob(self, blob_name):
except NotFound:
return None

def get_all_blobs(self):
"""List all the blobs in this bucket.
This will **not** retrieve all the data for all the blobs, it
will only retrieve the blob paths.
This is equivalent to::
blobs = [blob for blob in bucket]
def list_blobs(self, max_results=None, page_token=None, prefix=None,
delimiter=None, versions=None,
projection='noAcl', fields=None):
"""Return an iterator used to find blobs in the bucket.
:rtype: list of :class:`gcloud.storage.blob.Blob`
:returns: A list of all the Blob objects in this bucket.
"""
return list(self)
:type max_results: integer or ``NoneType``
:param max_results: maximum number of blobs to return.
def iterator(self, prefix=None, delimiter=None, max_results=None,
versions=None):
"""Return an iterator used to find blobs in the bucket.
:type page_token: string
:param page_token: opaque marker for the next "page" of blobs. If not
passed, will return the first page of blobs.
:type prefix: string or None
:type prefix: string or ``NoneType``
:param prefix: optional prefix used to filter blobs.
:type delimiter: string or None
:type delimiter: string or ``NoneType``
:param delimiter: optional delimter, used with ``prefix`` to
emulate hierarchy.
:type max_results: integer or None
:param max_results: maximum number of blobs to return.
:type versions: boolean or None
:type versions: boolean or ``NoneType``
:param versions: whether object versions should be returned as
separate blobs.
:rtype: :class:`_BlobIterator`
:type projection: string or ``NoneType``
:param projection: If used, must be 'full' or 'noAcl'. Defaults to
'noAcl'. Specifies the set of properties to return.
:type fields: string or ``NoneType``
:param fields: Selector specifying which fields to include in a
partial response. Must be a list of fields. For example
to get a partial response with just the next page token
and the language of each blob returned:
'items/contentLanguage,nextPageToken'
:rtype: :class:`_BlobIterator`.
:returns: An iterator of blobs.
"""
extra_params = {}

if max_results is not None:
extra_params['maxResults'] = max_results

if prefix is not None:
extra_params['prefix'] = prefix

if delimiter is not None:
extra_params['delimiter'] = delimiter

if max_results is not None:
extra_params['maxResults'] = max_results

if versions is not None:
extra_params['versions'] = versions

return self._iterator_class(self, extra_params=extra_params)
extra_params['projection'] = projection

if fields is not None:
extra_params['fields'] = fields

result = self._iterator_class(self, extra_params=extra_params)
# Page token must be handled specially since the base `Iterator`
# class has it as a reserved property.
if page_token is not None:
result.next_page_token = page_token
return result

def delete(self, force=False):
"""Delete this bucket.
Expand All @@ -297,7 +309,7 @@ def delete(self, force=False):
contains more than 256 objects / blobs.
"""
if force:
blobs = list(self.iterator(
blobs = list(self.list_blobs(
max_results=self._MAX_OBJECTS_FOR_BUCKET_DELETE + 1))
if len(blobs) > self._MAX_OBJECTS_FOR_BUCKET_DELETE:
message = (
Expand Down Expand Up @@ -325,7 +337,7 @@ def delete_blob(self, blob_name):
>>> from gcloud import storage
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]
>>> bucket.delete_blob('my-file.txt')
>>> try:
Expand Down Expand Up @@ -408,7 +420,7 @@ def upload_file(self, filename, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file('~/my-file.txt', 'remote-text-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, remote-text-file.txt>]
If you don't provide a blob name, we will try to upload the file
Expand All @@ -418,7 +430,7 @@ def upload_file(self, filename, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file('~/my-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]
:type filename: string
Expand Down Expand Up @@ -450,7 +462,7 @@ def upload_file_object(self, file_obj, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file(open('~/my-file.txt'), 'remote-text-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, remote-text-file.txt>]
If you don't provide a blob name, we will try to upload the file
Expand All @@ -460,7 +472,7 @@ def upload_file_object(self, file_obj, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file(open('~/my-file.txt'))
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]
:type file_obj: file
Expand Down
68 changes: 28 additions & 40 deletions gcloud/storage/test_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test___iter___empty(self):
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test___iter___non_empty(self):
NAME = 'name'
Expand All @@ -115,7 +115,7 @@ def test___iter___non_empty(self):
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test___contains___miss(self):
NAME = 'name'
Expand Down Expand Up @@ -269,58 +269,46 @@ def test_get_blob_hit(self):
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o/%s' % (NAME, BLOB_NAME))

def test_get_all_blobs_empty(self):
def test_list_blobs_defaults(self):
NAME = 'name'
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
blobs = bucket.get_all_blobs()
self.assertEqual(blobs, [])
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})

def test_get_all_blobs_non_empty(self):
NAME = 'name'
BLOB_NAME = 'blob-name'
connection = _Connection({'items': [{'name': BLOB_NAME}]})
bucket = self._makeOne(NAME, connection)
blobs = bucket.get_all_blobs()
blob, = blobs
self.assertTrue(blob.bucket is bucket)
self.assertEqual(blob.name, BLOB_NAME)
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})

def test_iterator_defaults(self):
NAME = 'name'
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
iterator = bucket.iterator()
iterator = bucket.list_blobs()
blobs = list(iterator)
self.assertEqual(blobs, [])
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test_iterator_explicit(self):
def test_list_blobs_explicit(self):
NAME = 'name'
MAX_RESULTS = 10
PAGE_TOKEN = 'ABCD'
PREFIX = 'subfolder'
DELIMITER = '/'
VERSIONS = True
PROJECTION = 'full'
FIELDS = 'items/contentLanguage,nextPageToken'
EXPECTED = {
'prefix': 'subfolder',
'delimiter': '/',
'maxResults': 10,
'versions': True,
'pageToken': PAGE_TOKEN,
'prefix': PREFIX,
'delimiter': DELIMITER,
'versions': VERSIONS,
'projection': PROJECTION,
'fields': FIELDS,
}
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
iterator = bucket.iterator(
prefix='subfolder',
delimiter='/',
max_results=10,
versions=True,
iterator = bucket.list_blobs(
max_results=MAX_RESULTS,
page_token=PAGE_TOKEN,
prefix=PREFIX,
delimiter=DELIMITER,
versions=VERSIONS,
projection=PROJECTION,
fields=FIELDS,
)
blobs = list(iterator)
self.assertEqual(blobs, [])
Expand Down Expand Up @@ -1069,7 +1057,7 @@ def get_items_from_response(self, response):
self.assertEqual(kw[0]['query_params'], {'projection': 'full'})
self.assertEqual(kw[1]['method'], 'GET')
self.assertEqual(kw[1]['path'], '/b/%s/o' % NAME)
self.assertEqual(kw[1]['query_params'], {})
self.assertEqual(kw[1]['query_params'], {'projection': 'noAcl'})


class _Connection(object):
Expand Down
15 changes: 8 additions & 7 deletions regression/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ def tearDownClass(cls):
blob.delete()

def test_list_files(self):
all_blobs = self.bucket.get_all_blobs()
all_blobs = list(self.bucket.list_blobs())
self.assertEqual(len(all_blobs), len(self.FILENAMES))

def test_paginate_files(self):
truncation_size = 1
count = len(self.FILENAMES) - truncation_size
iterator = self.bucket.iterator(max_results=count)
iterator = self.bucket.list_blobs(max_results=count)
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual(len(blobs), count)
Expand Down Expand Up @@ -254,7 +254,7 @@ def tearDownClass(cls):
blob.delete()

def test_root_level_w_delimiter(self):
iterator = self.bucket.iterator(delimiter='/')
iterator = self.bucket.list_blobs(delimiter='/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs], ['file01.txt'])
Expand All @@ -263,7 +263,7 @@ def test_root_level_w_delimiter(self):
self.assertEqual(iterator.prefixes, ('parent/',))

def test_first_level(self):
iterator = self.bucket.iterator(delimiter='/', prefix='parent/')
iterator = self.bucket.list_blobs(delimiter='/', prefix='parent/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs], ['parent/file11.txt'])
Expand All @@ -272,7 +272,8 @@ def test_first_level(self):
self.assertEqual(iterator.prefixes, ('parent/child/',))

def test_second_level(self):
iterator = self.bucket.iterator(delimiter='/', prefix='parent/child/')
iterator = self.bucket.list_blobs(delimiter='/',
prefix='parent/child/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs],
Expand All @@ -288,8 +289,8 @@ def test_third_level(self):
# of 1024 characters in the UTF-8 encoded name:
# https://cloud.google.com/storage/docs/bucketnaming#objectnames
# Exercise a layer deeper to illustrate this.
iterator = self.bucket.iterator(delimiter='/',
prefix='parent/child/grand/')
iterator = self.bucket.list_blobs(delimiter='/',
prefix='parent/child/grand/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs],
Expand Down

0 comments on commit 41b5b72

Please sign in to comment.