diff --git a/hexa/files/api.py b/hexa/files/api.py index 88295b5ee..13a1ae53e 100644 --- a/hexa/files/api.py +++ b/hexa/files/api.py @@ -132,38 +132,62 @@ def _prefix_to_dict(bucket_name, name: str): } -def list_bucket_objects(bucket_name, prefix=None, page: int = 1, per_page=30): +def list_bucket_objects( + bucket_name, prefix=None, page: int = 1, per_page=30, ignore_hidden_files=True +): client = get_storage_client() request = client.list_blobs( bucket_name, prefix=prefix, - page_size=per_page, + # We take twice the number of items to be sure to have enough + page_size=per_page * 2, delimiter="/", include_trailing_delimiter=True, ) + pages = request.pages + + max_items = (page * per_page) + 1 + start_offset = (page - 1) * per_page + end_offset = page * per_page + objects = [] - next_page = None - page_number = 0 - for req_page in request.pages: - if request.page_number == page: - if page == 1: - # Add the prefix to the response if the user requests the first page - for prefix in request.prefixes: - objects.append(_prefix_to_dict(bucket_name, prefix)) - - page_number = request.page_number - objects += [_blob_to_dict(obj) for obj in req_page if _is_dir(obj) is False] - elif request.page_number > page: - next_page = req_page - break - - return ObjectsPage( - items=objects, - page_number=page_number, - has_previous_page=page_number > 1, - has_next_page=bool(next_page), - ) + try: + current_page = next(pages) + if page == 1: + # Start by adding the prefixes + for prefix in request.prefixes: + res = _prefix_to_dict(bucket_name, prefix) + if not ignore_hidden_files or not res["name"].startswith("."): + objects.append(res) + while len(objects) <= max_items: + print(len(objects), flush=True) + for obj in current_page: + if _is_dir(obj): + continue + + res = _blob_to_dict(obj) + if not ignore_hidden_files or not res["name"].startswith("."): + objects.append(res) + + current_page = next(pages) + + return ObjectsPage( + items=objects[start_offset:end_offset], + page_number=page, + has_previous_page=page > 1, + has_next_page=len(objects) > page * per_page, + ) + + except StopIteration: + # We reached the end of the list of pages. Let's return what we have and set the + # has_next_page to false + return ObjectsPage( + items=objects[start_offset:end_offset], + page_number=page, + has_previous_page=page > 1, + has_next_page=False, + ) def ensure_is_folder(object_key: str): diff --git a/hexa/files/graphql/schema.graphql b/hexa/files/graphql/schema.graphql index 1f88dde17..a0952657c 100644 --- a/hexa/files/graphql/schema.graphql +++ b/hexa/files/graphql/schema.graphql @@ -26,7 +26,7 @@ type BucketObjectPage { type Bucket { name: String! - objects(prefix: String, page: Int = 1, perPage: Int = 15): BucketObjectPage! + objects(prefix: String, page: Int = 1, perPage: Int = 15, ignoreHiddenFiles: Boolean = true): BucketObjectPage! object(key: String!): BucketObject } diff --git a/hexa/files/schema/types.py b/hexa/files/schema/types.py index ab9f6d3b7..ed73c7620 100644 --- a/hexa/files/schema/types.py +++ b/hexa/files/schema/types.py @@ -41,12 +41,23 @@ def resolve_bucket_name(workspace, info, **kwargs): @bucket_object.field("objects") @convert_kwargs_to_snake_case -def resolve_bucket_objects(workspace, info, prefix=None, page=1, per_page=15, **kwargs): +def resolve_bucket_objects( + workspace, + info, + prefix=None, + page=1, + per_page=15, + ignore_hidden_files=True, + **kwargs +): if workspace.bucket_name is None: raise ImproperlyConfigured("Workspace does not have a bucket") - page = list_bucket_objects( - workspace.bucket_name, prefix=prefix, page=page, per_page=per_page + workspace.bucket_name, + prefix=prefix, + page=page, + per_page=per_page, + ignore_hidden_files=ignore_hidden_files, ) return page diff --git a/hexa/files/tests/mocks/backend.py b/hexa/files/tests/mocks/backend.py index 133834dec..86ef0bdd5 100644 --- a/hexa/files/tests/mocks/backend.py +++ b/hexa/files/tests/mocks/backend.py @@ -9,11 +9,9 @@ def __init__(self, project=None): project = "test-project-" + str(uuid.uuid1()) self.project = project self.buckets = {} - self.blobs = {} def reset(self): self.buckets = {} - self.blobs = {} def create_bucket(self, bucket_name, *args, **kwargs): pass diff --git a/hexa/files/tests/mocks/blob.py b/hexa/files/tests/mocks/blob.py index 7eda300df..b4d27abde 100644 --- a/hexa/files/tests/mocks/blob.py +++ b/hexa/files/tests/mocks/blob.py @@ -6,24 +6,21 @@ def __init__( self, name, bucket, - chunk_size=None, - encryption_key=None, - kms_key_name=None, - generation=None, + size=None, + content_type=None, ): self.name = _bytes_to_unicode(name) - self.chunk_size = chunk_size # Check that setter accepts value. - self._bucket = bucket - # self._acl = ObjectACL(self) - if encryption_key is not None and kms_key_name is not None: - raise ValueError( - "Pass at most one of 'encryption_key' " "and 'kms_key_name'" - ) + self.size = size + self._content_type = content_type + self.bucket = bucket - self._encryption_key = encryption_key + @property + def content_type(self): + return self._content_type - if kms_key_name is not None: - self._properties["kmsKeyName"] = kms_key_name + @property + def updated(self): + return None - if generation is not None: - self._properties["generation"] = generation + def __repr__(self) -> str: + return f"" diff --git a/hexa/files/tests/mocks/bucket.py b/hexa/files/tests/mocks/bucket.py index aa0f327ff..f6c20f32c 100644 --- a/hexa/files/tests/mocks/bucket.py +++ b/hexa/files/tests/mocks/bucket.py @@ -1,12 +1,6 @@ from google.cloud.storage._helpers import _validate_name - -class MockBlob: - def __init__(self): - pass - - def upload_from_filename(self, *args, **kwargs): - pass +from .blob import MockBlob class MockBucket: @@ -34,7 +28,9 @@ def list_blobs(self, *args, **kwargs): return self.client.list_blobs(self, *args, **kwargs) def blob(self, *args, **kwargs): - return MockBlob() + b = MockBlob(*args, bucket=self, **kwargs) + self._blobs.append(b) + return b def patch(self): pass diff --git a/hexa/files/tests/mocks/client.py b/hexa/files/tests/mocks/client.py index faa01efe0..b6b031f4c 100644 --- a/hexa/files/tests/mocks/client.py +++ b/hexa/files/tests/mocks/client.py @@ -1,12 +1,109 @@ -from unittest import mock - from google.api_core import page_iterator from google.cloud.exceptions import Conflict, NotFound -from google.cloud.storage.bucket import _blobs_page_start, _item_to_blob from .bucket import MockBucket +class MockHTTPIterator: + def __init__(self, items, page_size, max_results=None): + self.items = items + self._page_size = page_size + self.num_results = 0 + self.page_number = 0 + self.max_results = max_results + self._started = False + self.__active_iterator = None + + def __iter__(self): + """Iterator for each item returned. + + Returns: + types.GeneratorType[Any]: A generator of items from the API. + + Raises: + ValueError: If the iterator has already been started. + """ + if self._started: + raise ValueError("Iterator has already started", self) + self._started = True + return self._items_iter() + + def __next__(self): + if self.__active_iterator is None: + self.__active_iterator = iter(self) + return next(self.__active_iterator) + + def _items_iter(self): + """Iterator for each item returned.""" + for page in self._page_iter(increment=False): + for item in page: + self.num_results += 1 + yield item + + @property + def prefixes(self): + return set([item.name for item in self.items if item.name.endswith("/")]) + + @property + def pages(self): + if self._started: + raise ValueError("Iterator has already started", self) + self._started = True + return self._page_iter(increment=True) + + def _page_iter(self, increment): + """Generator of pages of API responses. + + Args: + increment (bool): Flag indicating if the total number of results + should be incremented on each page. This is useful since a page + iterator will want to increment by results per page while an + items iterator will want to increment per item. + + Yields: + Page: each page of items from the API. + """ + page = self._next_page() + while page is not None: + self.page_number += 1 + if increment: + self.num_results += page.num_items + yield page + page = self._next_page() + + def _next_page(self): + """Get the next page in the iterator. + + Returns: + Optional[Page]: The next page in the iterator or :data:`None` if + there are no pages left. + """ + if self._has_next_page(): + page = page_iterator.Page( + self, + self.items[self.num_results : self.num_results + self._page_size], + lambda _, item: item, + ) + return page + else: + return None + + def _has_next_page(self): + """Determines whether or not there are more pages with results. + + Returns: + bool: Whether the iterator has more pages. + """ + if self.page_number == 0: + return True + + if self.max_results is not None: + if self.num_results >= self.max_results: + return False + + return self.num_results < len(self.items) + + class MockClient: def __init__( self, @@ -47,7 +144,7 @@ def _bucket_arg_to_bucket(self, bucket_or_name): if isinstance(bucket_or_name, MockBucket): bucket = bucket_or_name else: - bucket = MockBucket(self, name=bucket_or_name) + bucket = self.backend.buckets.get(bucket_or_name) return bucket @property @@ -84,6 +181,12 @@ def lookup_bucket(self, bucket_name): def create_bucket(self, bucket_or_name, *args, **kwargs): bucket = self._bucket_arg_to_bucket(bucket_or_name) + if bucket is None: + bucket = MockBucket( + client=self, + name=bucket_or_name, + ) + if bucket.name in self.backend.buckets.keys(): raise Conflict( "409 POST https://storage.googleapis.com/storage/v1/b?project={}: You already own this bucket. Please select another name.".format( @@ -98,81 +201,46 @@ def download_blob_to_file(self, blob_or_uri, file_obj, start=None, end=None): raise NotImplementedError def list_blobs( - self, - bucket_or_name, - max_results=None, - page_token=None, - prefix=None, - delimiter=None, - start_offset=None, - end_offset=None, - include_trailing_delimiter=None, - versions=None, - projection="noAcl", - fields=None, - page_size=None, + self, bucket_or_name, max_results=None, prefix=None, page_size=None, **kwargs ): bucket = self._bucket_arg_to_bucket(bucket_or_name) if isinstance(max_results, int): blobs = bucket._blobs[:max_results] else: - blobs = bucket._blobs - + blobs = bucket._blobs[: len(bucket._blobs)] if isinstance(prefix, str): - blobs = [blob for blob in blobs if blob.name.startswith(prefix)] - - page_response = {"items": blobs} - api_request = mock.Mock(return_value=page_response) - - extra_params = {"projection": projection} - - if prefix is not None: - extra_params["prefix"] = prefix - - if delimiter is not None: - extra_params["delimiter"] = delimiter - - if start_offset is not None: - extra_params["startOffset"] = start_offset - - if end_offset is not None: - extra_params["endOffset"] = end_offset - - if include_trailing_delimiter is not None: - extra_params["includeTrailingDelimiter"] = include_trailing_delimiter - - iterator = page_iterator.HTTPIterator( - mock.sentinel.client, - api_request, - "/foo", - item_to_value=_item_to_blob, - page_start=_blobs_page_start, - page_token=page_token, + blobs = [ + blob + for blob in blobs + if blob.name.startswith(prefix) and blob.name != prefix + ] + + # Only take the blobs at the prefix and not the blobs in "subdirectories" + prefix_len = len(prefix or "") + blobs = [ + blob + for blob in blobs + if blob.name[prefix_len:].find("/") < 0 + or blob.name[prefix_len:].find("/") == len(blob.name[prefix_len:]) - 1 + ] + + blobs.sort(key=lambda blob: blob.name) + + iterator = MockHTTPIterator( + items=blobs, page_size=page_size, max_results=max_results, - extra_params=extra_params, ) - iterator.bucket = bucket - iterator.prefixes = set() - return iterator def list_buckets( self, max_results=None, - page_token=None, prefix=None, - projection="noAcl", - fields=None, - project=None, + page_size=None, + **kwargs, ): - if project is None: - project = self.project - - if project is None: - raise ValueError("Client project not set: pass an explicit project.") - if isinstance(max_results, int): buckets = list(self.backend.buckets.values())[:max_results] else: @@ -181,43 +249,9 @@ def list_buckets( if isinstance(prefix, str): buckets = [bucket for bucket in buckets if bucket.name.startswith(prefix)] - path = "/foo" - page_response = {"items": buckets} - api_request = mock.Mock(return_value=page_response) - extra_params = {"key": "val"} + # The default page_size is set by the server + page_size = page_size if page_size else 10 - return page_iterator.HTTPIterator( - mock.sentinel.client, - api_request, - path=path, - item_to_value=page_iterator._item_to_value_identity, - max_results=max_results, - page_token=mock.sentinel.token, - extra_params=extra_params, + return MockHTTPIterator( + items=buckets, max_results=max_results, page_size=page_size ) - - def create_hmac_key( - self, service_account_email, project_id=None, user_project=None - ): - raise NotImplementedError - - def list_hmac_keys( - self, - max_results=None, - service_account_email=None, - show_deleted_keys=None, - project_id=None, - user_project=None, - ): - raise NotImplementedError - - def get_hmac_key_metadata(self, access_id, project_id=None, user_project=None): - raise NotImplementedError - - -def _item_to_bucket(iterator, item): - raise NotImplementedError - - -def _item_to_hmac_key_metadata(iterator, item): - raise NotImplementedError diff --git a/hexa/files/tests/test_api.py b/hexa/files/tests/test_api.py index 8c67552b5..52ab8dde0 100644 --- a/hexa/files/tests/test_api.py +++ b/hexa/files/tests/test_api.py @@ -29,3 +29,113 @@ def test_create_same_bucket(self): def test_list_blobs_empty(self): bucket = create_bucket("empty-bucket") self.assertEqual(list_bucket_objects(bucket.name).items, []) + + @backend.mock_storage + def test_list_blobs(self): + bucket = create_bucket("not-empty-bucket") + bucket.blob( + "test.txt", + size=123, + content_type="text/plain", + ) + bucket.blob( + "readme.md", + size=2103, + content_type="text/plain", + ) + bucket.blob( + "other_file.md", + size=2102, + content_type="text/plain", + ) + bucket.blob("folder/", size=0) + bucket.blob( + "folder/readme.md", + size=1, + content_type="text/plain", + ) + self.assertEqual( + [ + x["key"] + for x in list_bucket_objects(bucket.name, page=1, per_page=2).items + ], + [ + "folder/", + "other_file.md", + ], + ) + + @backend.mock_storage + def test_list_hide_hidden_files(self): + bucket = create_bucket("bucket") + bucket.blob( + "test.txt", + size=123, + content_type="text/plain", + ) + bucket.blob( + ".gitconfig", + size=2103, + content_type="text/plain", + ) + bucket.blob( + ".gitignore", + size=2102, + content_type="text/plain", + ) + bucket.blob(".git/", size=0) + bucket.blob(".git/config", size=1, content_type="text/plain") + + self.assertEqual( + [ + x["key"] + for x in list_bucket_objects(bucket.name, page=1, per_page=10).items + ], + [ + "test.txt", + ], + ) + + self.assertEqual( + [ + x["key"] + for x in list_bucket_objects( + bucket.name, page=1, per_page=10, ignore_hidden_files=False + ).items + ], + [".git/", ".gitconfig", ".gitignore", "test.txt"], + ) + + @backend.mock_storage + def test_list_blobs_with_prefix(self): + bucket = create_bucket("bucket") + bucket.blob( + "test.txt", + size=123, + content_type="text/plain", + ) + bucket.blob( + "dir/", + size=0, + ) + bucket.blob( + "dir/readme.md", + size=2102, + content_type="text/plain", + ) + bucket.blob("dir/b/", size=0) + bucket.blob("dir/b/image.jpg", size=1, content_type="image/jpeg") + bucket.blob("other_dir/", size=0) + + self.assertEqual( + [ + x["key"] + for x in list_bucket_objects( + bucket.name, page=1, per_page=10, prefix="dir/" + ).items + ], + [ + "dir/b/", + "dir/readme.md", + ], + )