From f89b02baec0bc21fccca42cd8d6015d9f4e9ed51 Mon Sep 17 00:00:00 2001 From: Annie Liang <64233642+xinlian12@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:43:40 -0700 Subject: [PATCH 01/12] AddFeedRangeSupportInChangeFeed (#36930) * Add getFeedRanges API * Add feedRange support in query changeFeed --------- Co-authored-by: annie-mac --- sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 19 +- .../azure/cosmos/_change_feed/__init__.py | 20 + .../azure/cosmos/_change_feed/aio/__init__.py | 20 + .../_change_feed/aio/change_feed_fetcher.py | 205 +++++++++ .../_change_feed/aio/change_feed_iterable.py | 166 +++++++ .../_change_feed/change_feed_fetcher.py | 196 +++++++++ .../_change_feed/change_feed_iterable.py | 159 +++++++ .../_change_feed/change_feed_start_from.py | 199 +++++++++ .../cosmos/_change_feed/change_feed_state.py | 413 ++++++++++++++++++ .../composite_continuation_token.py | 72 +++ .../azure/cosmos/_change_feed/feed_range.py | 109 +++++ ...feed_range_composite_continuation_token.py | 175 ++++++++ .../azure/cosmos/_cosmos_client_connection.py | 44 +- .../_routing/aio/routing_map_provider.py | 14 +- .../cosmos/_routing/routing_map_provider.py | 9 +- .../azure/cosmos/_routing/routing_range.py | 75 ++++ .../azure/cosmos/aio/_container.py | 245 +++++++++-- .../aio/_cosmos_client_connection_async.py | 10 +- .../azure-cosmos/azure/cosmos/container.py | 276 +++++++++--- .../azure-cosmos/azure/cosmos/exceptions.py | 7 +- .../azure/cosmos/partition_key.py | 25 +- .../azure-cosmos/test/test_change_feed.py | 256 +++++++++++ .../test/test_change_feed_async.py | 280 ++++++++++++ .../test/test_change_feed_split.py | 81 ++++ .../test/test_change_feed_split_async.py | 94 ++++ .../test/test_container_properties_cache.py | 2 +- .../test_container_properties_cache_async.py | 2 +- sdk/cosmos/azure-cosmos/test/test_query.py | 290 +----------- .../azure-cosmos/test/test_query_async.py | 328 +------------- .../azure-cosmos/test/test_vector_policy.py | 2 +- .../test/test_vector_policy_async.py | 2 +- 31 files changed, 3032 insertions(+), 763 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_async.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py index 1891560cfa10..315b5583a558 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py @@ -283,23 +283,8 @@ def GetHeaders( # pylint: disable=too-many-statements,too-many-branches if options.get("disableRUPerMinuteUsage"): headers[http_constants.HttpHeaders.DisableRUPerMinuteUsage] = options["disableRUPerMinuteUsage"] - if options.get("changeFeed") is True: - # On REST level, change feed is using IfNoneMatch/ETag instead of continuation. - if_none_match_value = None - if options.get("continuation"): - if_none_match_value = options["continuation"] - elif options.get("isStartFromBeginning") and not options["isStartFromBeginning"]: - if_none_match_value = "*" - elif options.get("startTime"): - start_time = options.get("startTime") - headers[http_constants.HttpHeaders.IfModified_since] = start_time - if if_none_match_value: - headers[http_constants.HttpHeaders.IfNoneMatch] = if_none_match_value - - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - else: - if options.get("continuation"): - headers[http_constants.HttpHeaders.Continuation] = options["continuation"] + if options.get("continuation"): + headers[http_constants.HttpHeaders.Continuation] = options["continuation"] if options.get("populatePartitionKeyRangeStatistics"): headers[http_constants.HttpHeaders.PopulatePartitionKeyRangeStatistics] = options[ diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py new file mode 100644 index 000000000000..d997360e4c41 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -0,0 +1,205 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import json +from abc import ABC, abstractmethod +from typing import Dict, Any, List, Callable, Tuple, Awaitable, cast + +from azure.cosmos import http_constants, exceptions +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV2, ChangeFeedStateVersion +from azure.cosmos.aio import _retry_utility_async +from azure.cosmos.exceptions import CosmosHttpResponseError + +# pylint: disable=protected-access + +class ChangeFeedFetcher(ABC): + + @abstractmethod + async def fetch_next_block(self) -> List[Dict[str, Any]]: + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V1: + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + async def callback(): + return await self.fetch_change_feed_items() + + return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) + + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + is_s_time_first_fetch = self._change_feed_state._continuation is None + while True: + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + break + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + is_s_time_first_fetch = False + else: + break + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V2: + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state.version)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + async def callback(): + return await self.fetch_change_feed_items() + + try: + return await _retry_utility_async.ExecuteAsync( + self._client, + self._client._global_endpoint_manager, + callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + await self._change_feed_state.handle_feed_range_gone_async( + self._client._routing_map_provider, + self._resource_link) + else: + raise e + + return await self.fetch_next_block() + + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False + + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py new file mode 100644 index 000000000000..3f73050dfc7a --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -0,0 +1,166 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" +from typing import Dict, Any, Optional, Callable, Tuple, List, Awaitable, Union + +from azure.core.async_paging import AsyncPageIterator + +from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion + + +# pylint: disable=protected-access + +class ChangeFeedIterable(AsyncPageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page + """ + + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None + + if self._options.get("changeFeedStateContext") is None: + raise ValueError("Missing changeFeedStateContext in feed options") + + change_feed_state_context = self._options.pop("changeFeedStateContext") + + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) + + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state + if continuation is not None: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): + change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation + + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context + + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) + + async def _unpack( + self, + block: List[Dict[str, Any]] + ) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + async def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + :param Any args: + :return: List of results. + :rtype: list + """ + if self._change_feed_fetcher is None: + await self._initialize_change_feed_fetcher() + + assert self._change_feed_fetcher is not None + block = await self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopAsyncIteration + return block + + async def _initialize_change_feed_fetcher(self) -> None: + change_feed_state_context = self._options.pop("changeFeedStateContext") + conn_properties = await self._options.pop("containerProperties") + if change_feed_state_context.get("partitionKey"): + change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") + change_feed_state_context["partitionKeyFeedRange"] =\ + await change_feed_state_context.pop("partitionKeyFeedRange") + + change_feed_state =\ + ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], change_feed_state_context) + self._options["changeFeedState"] = change_feed_state + + if change_feed_state.version == ChangeFeedStateVersion.V1: + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: + + if change_feed_state_context.get("continuationPkRangeId") is not None: + # if continuation token is in v1 format, throw exception if feed_range is set + if change_feed_state_context.get("feedRange") is not None: + raise ValueError("feed_range and continuation are incompatible") + elif change_feed_state_context.get("continuationFeedRange") is not None: + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py new file mode 100644 index 000000000000..c3ff6472af28 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -0,0 +1,196 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import json +from abc import ABC, abstractmethod +from typing import Dict, Any, List, Callable, Tuple, cast + +from azure.cosmos import _retry_utility, http_constants, exceptions +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2, ChangeFeedStateVersion +from azure.cosmos.exceptions import CosmosHttpResponseError + +# pylint: disable=protected-access + +class ChangeFeedFetcher(ABC): + + @abstractmethod + def fetch_next_block(self): + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]] + ) -> None: + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV1 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V1: + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + def callback(): + return self.fetch_change_feed_items() + + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + is_s_time_first_fetch = self._change_feed_state._continuation is None + while True: + (fetched_items, response_headers) = self._fetch_function(self._feed_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + break + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + is_s_time_first_fetch = False + else: + break + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: Dict[str, Any], + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") + if self._change_feed_state.version != ChangeFeedStateVersion.V2: + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state)}") + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self) -> List[Dict[str, Any]]: + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + def callback(): + return self.fetch_change_feed_items() + + try: + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + self._change_feed_state.handle_feed_range_gone(self._client._routing_map_provider, self._resource_link) + else: + raise e + + return self.fetch_next_block() + + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(self._feed_options) + + is_s_time_first_fetch = self._change_feed_state._continuation.current_token.token is None + while True: + (fetched_items, response_headers) = self._fetch_function(self._feed_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + bool(fetched_items)) + + if fetched_items: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False + + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py new file mode 100644 index 000000000000..bd37b60926cf --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -0,0 +1,159 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" +from typing import Dict, Any, Tuple, List, Optional, Callable, cast, Union + +from azure.core.paging import PageIterator + +from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion + + +class ChangeFeedIterable(PageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page + """ + + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None + + if self._options.get("changeFeedStateContext") is None: + raise ValueError("Missing changeFeedStateContext in feed options") + + change_feed_state_context = self._options.pop("changeFeedStateContext") + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) + + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state + if continuation is not None: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): + change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation + + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context + + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) + + def _unpack(self, block: List[Dict[str, Any]]) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + :param Any args: + :return: List of results. + :rtype: list + """ + + if self._change_feed_fetcher is None: + self._initialize_change_feed_fetcher() + + assert self._change_feed_fetcher is not None + block = self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopIteration + return block + + def _initialize_change_feed_fetcher(self) -> None: + change_feed_state_context = self._options.pop("changeFeedStateContext") + change_feed_state = \ + ChangeFeedState.from_json( + self._collection_link, + cast(str, self._options.get("containerRID")), + change_feed_state_context) + + self._options["changeFeedState"] = change_feed_state + + if change_feed_state.version == ChangeFeedStateVersion.V1: + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: + + if change_feed_state_context.get("continuationPkRangeId") is not None: + # if continuation token is in v1 format, throw exception if feed_range is set + if change_feed_state_context.get("feedRange") is not None: + raise ValueError("feed_range and continuation are incompatible") + elif change_feed_state_context.get("continuationFeedRange") is not None: + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py new file mode 100644 index 000000000000..dc255eced586 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -0,0 +1,199 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed start from implementation in the Azure Cosmos database service. +""" + +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from enum import Enum +from typing import Optional, Union, Literal, Any, Dict + +from azure.cosmos import http_constants +from azure.cosmos._routing.routing_range import Range + +class ChangeFeedStartFromType(Enum): + BEGINNING = "Beginning" + NOW = "Now" + LEASE = "Lease" + POINT_IN_TIME = "PointInTime" + +class ChangeFeedStartFromInternal(ABC): + """Abstract class for change feed start from implementation in the Azure Cosmos database service. + """ + + type_property_name = "Type" + + def __init__(self, start_from_type: ChangeFeedStartFromType) -> None: + self.version = start_from_type + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + pass + + @staticmethod + def from_start_time( + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': + if start_time is None: + return ChangeFeedStartFromNow() + if isinstance(start_time, datetime): + return ChangeFeedStartFromPointInTime(start_time) + if start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): + return ChangeFeedStartFromNow() + if start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): + return ChangeFeedStartFromBeginning() + + raise ValueError(f"Invalid start_time '{start_time}'") + + @staticmethod + def from_json(data: Dict[str, Any]) -> 'ChangeFeedStartFromInternal': + change_feed_start_from_type = data.get(ChangeFeedStartFromInternal.type_property_name) + if change_feed_start_from_type is None: + raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal.type_property_name}]") + + if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: + return ChangeFeedStartFromBeginning.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: + return ChangeFeedStartFromETagAndFeedRange.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: + return ChangeFeedStartFromNow.from_json(data) + if change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: + return ChangeFeedStartFromPointInTime.from_json(data) + + raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") + + @abstractmethod + def populate_request_headers(self, request_headers) -> None: + pass + + +class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): + """Class for change feed start from beginning implementation in the Azure Cosmos database service. + """ + + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.BEGINNING) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.BEGINNING.value + } + + def populate_request_headers(self, request_headers) -> None: + pass # there is no headers need to be set for start from beginning + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromBeginning': + return ChangeFeedStartFromBeginning() + + +class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + _etag_property_name = "Etag" + _feed_range_property_name = "FeedRange" + + def __init__(self, etag, feed_range) -> None: + if feed_range is None: + raise ValueError("feed_range is missing") + + self._etag = etag + self._feed_range = feed_range + super().__init__(ChangeFeedStartFromType.LEASE) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.LEASE.value, + self._etag_property_name: self._etag, + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': + etag = data.get(cls._etag_property_name) + if etag is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + return cls(etag, feed_range) + + def populate_request_headers(self, request_headers) -> None: + # change feed uses etag as the continuationToken + if self._etag: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._etag + + +class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.NOW) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.NOW.value + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromNow': + return ChangeFeedStartFromNow() + + +class ChangeFeedStartFromPointInTime(ChangeFeedStartFromInternal): + """Class for change feed start from point in time implementation in the Azure Cosmos database service. + """ + + _point_in_time_ms_property_name = "PointInTimeMs" + + def __init__(self, start_time: datetime): + if start_time is None: + raise ValueError("start_time is missing") + + self._start_time = start_time + super().__init__(ChangeFeedStartFromType.POINT_IN_TIME) + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, + self._point_in_time_ms_property_name: + int(self._start_time.astimezone(timezone.utc).timestamp() * 1000) + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfModified_since] =\ + self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': + point_in_time_ms = data.get(cls._point_in_time_ms_property_name) + if point_in_time_ms is None: + raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") + + point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) + return ChangeFeedStartFromPointInTime(point_in_time) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py new file mode 100644 index 000000000000..46dd1afddcfe --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -0,0 +1,413 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed state implementation in the Azure Cosmos +database service. +""" + +import base64 +import collections +import json +from abc import ABC, abstractmethod +from enum import Enum +from typing import Optional, Union, List, Any, Dict, Deque + +from azure.cosmos import http_constants +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ + ChangeFeedStartFromETagAndFeedRange +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey +from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range +from azure.cosmos.exceptions import CosmosHttpResponseError +from azure.cosmos.http_constants import StatusCodes, SubStatusCodes +from azure.cosmos.partition_key import _Empty, _Undefined + +class ChangeFeedStateVersion(Enum): + V1 = "v1" + V2 = "v2" + +class ChangeFeedState(ABC): + version_property_name = "v" + + def __init__(self, version: ChangeFeedStateVersion) -> None: + self.version = version + + @abstractmethod + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + pass + + @abstractmethod + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + pass + + @abstractmethod + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + pass + + @abstractmethod + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: + pass + + @staticmethod + def from_json( + container_link: str, + container_rid: str, + change_feed_state_context: Dict[str, Any]): + + if (change_feed_state_context.get("partitionKeyRangeId") + or change_feed_state_context.get("continuationPkRangeId")): + return ChangeFeedStateV1.from_json(container_link, container_rid, change_feed_state_context) + + if change_feed_state_context.get("continuationFeedRange"): + # get changeFeedState from continuation + continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode( + 'utf-8') + continuation_json = json.loads(continuation_json_str) + version = continuation_json.get(ChangeFeedState.version_property_name) + if version is None: + raise ValueError("Invalid base64 encoded continuation string [Missing version]") + + if version == ChangeFeedStateVersion.V2.value: + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + + raise ValueError("Invalid base64 encoded continuation string [Invalid version]") + + # when there is no continuation token, by default construct ChangeFeedStateV2 + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, change_feed_state_context) + +class ChangeFeedStateV1(ChangeFeedState): + """Change feed state v1 implementation. + This is used when partition key range id is used or the continuation is just simple _etag + """ + + def __init__( + self, + container_link: str, + container_rid: str, + change_feed_start_from: ChangeFeedStartFromInternal, + partition_key_range_id: Optional[str] = None, + partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, # pylint: disable=line-too-long + continuation: Optional[str] = None) -> None: + + self._container_link = container_link + self._container_rid = container_rid + self._change_feed_start_from = change_feed_start_from + self._partition_key_range_id = partition_key_range_id + self._partition_key = partition_key + self._continuation = continuation + super(ChangeFeedStateV1, self).__init__(ChangeFeedStateVersion.V1) + + @property + def container_rid(self): + return self._container_rid + + @classmethod + def from_json( + cls, + container_link: str, + container_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV1': + return cls( + container_link, + container_rid, + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime")), + change_feed_state_context.get("partitionKeyRangeId"), + change_feed_state_context.get("partitionKey"), + change_feed_state_context.get("continuationPkRangeId") + ) + + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + self._change_feed_start_from.populate_request_headers(request_headers) + if self._continuation: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: # pylint: disable=unused-argument + + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + self._change_feed_start_from.populate_request_headers(request_headers) + if self._continuation: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + if self._partition_key_range_id is not None: + feed_options["partitionKeyRangeId"] = self._partition_key_range_id + if self._partition_key is not None: + feed_options["partitionKey"] = self._partition_key + + def apply_server_response_continuation(self, continuation: str, has_modified_response) -> None: + self._continuation = continuation + +class ChangeFeedStateV2(ChangeFeedState): + container_rid_property_name = "containerRid" + change_feed_mode_property_name = "mode" + change_feed_start_from_property_name = "startFrom" + continuation_property_name = "continuation" + + # TODO: adding change feed mode + def __init__( + self, + container_link: str, + container_rid: str, + feed_range: FeedRange, + change_feed_start_from: ChangeFeedStartFromInternal, + continuation: Optional[FeedRangeCompositeContinuation] + ) -> None: + + self._container_link = container_link + self._container_rid = container_rid + self._feed_range = feed_range + self._change_feed_start_from = change_feed_start_from + if continuation is None: + composite_continuation_token_queue: Deque = collections.deque() + composite_continuation_token_queue.append( + CompositeContinuationToken( + self._feed_range.get_normalized_range(), + None)) + self._continuation =\ + FeedRangeCompositeContinuation( + self._container_rid, + self._feed_range, + composite_continuation_token_queue) + else: + self._continuation = continuation + + super(ChangeFeedStateV2, self).__init__(ChangeFeedStateVersion.V2) + + @property + def container_rid(self) -> str : + return self._container_rid + + def to_dict(self) -> Dict[str, Any]: + return { + self.version_property_name: ChangeFeedStateVersion.V2.value, + self.container_rid_property_name: self._container_rid, + self.change_feed_mode_property_name: "Incremental", + self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), + self.continuation_property_name: self._continuation.to_dict() if self._continuation is not None else None + } + + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag =\ + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges =\ + routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise self.get_feed_range_gone_error(over_lapping_ranges) + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + request_headers[ + http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min + request_headers[ + http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag = \ + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges = \ + await async_routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise self.get_feed_range_gone_error(over_lapping_ranges) + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = \ + over_lapping_ranges[0]["id"] + request_headers[http_constants.HttpHeaders.StartEpkString] = \ + self._continuation.current_token.feed_range.min + request_headers[http_constants.HttpHeaders.EndEpkString] = \ + self._continuation.current_token.feed_range.max + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: + pass + + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + resource_link: str) -> None: + self._continuation.handle_feed_range_gone(routing_provider, resource_link) + + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + resource_link: str) -> None: + await self._continuation.handle_feed_range_gone_async(routing_provider, resource_link) + + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: + self._continuation.apply_server_response_continuation(continuation, has_modified_response) + + def should_retry_on_not_modified_response(self) -> bool: + return self._continuation.should_retry_on_not_modified_response() + + def apply_not_modified_response(self) -> None: + self._continuation.apply_not_modified_response() + + def get_feed_range_gone_error(self, over_lapping_ranges: List[Dict[str, Any]]) -> CosmosHttpResponseError: + formatted_message =\ + (f"Status code: {StatusCodes.GONE} " + f"Sub-status: {SubStatusCodes.PARTITION_KEY_RANGE_GONE}. " + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)} physical partitions:" + f" {[child_range['id'] for child_range in over_lapping_ranges]}") + + response_error = CosmosHttpResponseError(status_code=StatusCodes.GONE, message=formatted_message) + response_error.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE + return response_error + + @classmethod + def from_continuation( + cls, + container_link: str, + container_rid: str, + continuation_json: Dict[str, Any]) -> 'ChangeFeedStateV2': + + container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) + if container_rid_from_continuation is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") + if container_rid_from_continuation != container_rid: + raise ValueError("Invalid continuation: [Mismatch collection rid]") + + change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) + if change_feed_start_from_data is None: + raise ValueError(f"Invalid continuation:" + f" [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") + change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) + + continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.continuation_property_name}]") + continuation = FeedRangeCompositeContinuation.from_json(continuation_data) + return ChangeFeedStateV2( + container_link=container_link, + container_rid=container_rid, + feed_range=continuation.feed_range, + change_feed_start_from=change_feed_start_from, + continuation=continuation) + + @classmethod + def from_initial_state( + cls, + container_link: str, + collection_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': + + feed_range: Optional[FeedRange] = None + if change_feed_state_context.get("feedRange"): + feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') + feed_range_json = json.loads(feed_range_str) + feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) + elif change_feed_state_context.get("partitionKey"): + if change_feed_state_context.get("partitionKeyFeedRange"): + feed_range =\ + FeedRangePartitionKey( + change_feed_state_context["partitionKey"], + change_feed_state_context["partitionKeyFeedRange"]) + else: + raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") + else: + # default to full range + feed_range = FeedRangeEpk( + Range( + "", + "FF", + True, + False) + ) + + change_feed_start_from = ( + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) + return cls( + container_link=container_link, + container_rid=collection_rid, + feed_range=feed_range, + change_feed_start_from=change_feed_start_from, + continuation=None) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py new file mode 100644 index 000000000000..f0d433fd966e --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py @@ -0,0 +1,72 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed composite continuation token in the Azure Cosmos +database service. +""" +from typing import Optional, Dict, Any + +from azure.cosmos._routing.routing_range import Range + + +class CompositeContinuationToken: + token_property_name = "token" + feed_range_property_name = "range" + + def __init__(self, feed_range: Range, token: Optional[str] = None) -> None: + if feed_range is None: + raise ValueError("Missing required parameter feed_range") + + self._token = token + self._feed_range = feed_range + + def to_dict(self) -> Dict[str, Any]: + return { + self.token_property_name: self._token, + self.feed_range_property_name: self.feed_range.to_dict() + } + + @property + def feed_range(self) -> Range: + return self._feed_range + + @property + def token(self) -> Optional[str]: + return self._token + + def update_token(self, etag) -> None: + self._token = etag + + @classmethod + def from_json(cls, data) -> 'CompositeContinuationToken': + token = data.get(cls.token_property_name) + if token is None: + raise ValueError(f"Invalid composite token [Missing {cls.token_property_name}]") + + feed_range_data = data.get(cls.feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid composite token [Missing {cls.feed_range_property_name}]") + + feed_range = Range.ParseFromDict(feed_range_data) + return cls(feed_range=feed_range, token=token) + + def __repr__(self): + return f"CompositeContinuationToken(token={self.token}, range={self.feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py new file mode 100644 index 000000000000..b4f731f2c2ef --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -0,0 +1,109 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for feed range implementation in the Azure Cosmos +database service. +""" +from abc import ABC, abstractmethod +from typing import Union, List, Dict, Any + +from azure.cosmos._routing.routing_range import Range +from azure.cosmos.partition_key import _Undefined, _Empty + + +class FeedRange(ABC): + + @abstractmethod + def get_normalized_range(self) -> Range: + pass + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + pass + +class FeedRangePartitionKey(FeedRange): + type_property_name = "PK" + + def __init__( + self, + pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined], + feed_range: Range) -> None: # pylint: disable=line-too-long + + if pk_value is None: + raise ValueError("PartitionKey cannot be None") + if feed_range is None: + raise ValueError("Feed range cannot be None") + + self._pk_value = pk_value + self._feed_range = feed_range + + def get_normalized_range(self) -> Range: + return self._feed_range.to_normalized_range() + + def to_dict(self) -> Dict[str, Any]: + if isinstance(self._pk_value, _Undefined): + return { self.type_property_name: [{}] } + if isinstance(self._pk_value, _Empty): + return { self.type_property_name: [] } + if isinstance(self._pk_value, list): + return { self.type_property_name: list(self._pk_value) } + + return { self.type_property_name: self._pk_value } + + @classmethod + def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartitionKey': + if data.get(cls.type_property_name): + pk_value = data.get(cls.type_property_name) + if not pk_value: + return cls(_Empty(), feed_range) + if pk_value == [{}]: + return cls(_Undefined(), feed_range) + if isinstance(pk_value, list): + return cls(list(pk_value), feed_range) + return cls(data[cls.type_property_name], feed_range) + + raise ValueError(f"Can not parse FeedRangePartitionKey from the json," + f" there is no property {cls.type_property_name}") + + +class FeedRangeEpk(FeedRange): + type_property_name = "Range" + + def __init__(self, feed_range: Range) -> None: + if feed_range is None: + raise ValueError("feed_range cannot be None") + + self._range = feed_range + + def get_normalized_range(self) -> Range: + return self._range.to_normalized_range() + + def to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: self._range.to_dict() + } + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeEpk': + if data.get(cls.type_property_name): + feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) + return cls(feed_range) + raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py new file mode 100644 index 000000000000..f5967b6bf34b --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -0,0 +1,175 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed continuation token by feed range in the Azure Cosmos +database service. +""" +from collections import deque +from typing import Any, Deque, Dict, Optional + +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range + +class FeedRangeCompositeContinuation: + _version_property_name = "v" + _container_rid_property_name = "rid" + _continuation_property_name = "continuation" + + def __init__( + self, + container_rid: str, + feed_range: FeedRange, + continuation: Deque[CompositeContinuationToken]) -> None: + if container_rid is None: + raise ValueError("container_rid is missing") + + self._container_rid = container_rid + self._feed_range = feed_range + self._continuation = continuation + self._current_token = self._continuation[0] + self._initial_no_result_range: Optional[Range] = None + + @property + def current_token(self) -> CompositeContinuationToken: + return self._current_token + + def to_dict(self) -> Dict[str, Any]: + json_data = { + self._version_property_name: "v2", + self._container_rid_property_name: self._container_rid, + self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], + } + json_data.update(self._feed_range.to_dict()) + return json_data + + @classmethod + def from_json(cls, data) -> 'FeedRangeCompositeContinuation': + version = data.get(cls._version_property_name) + if version is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") + if version != "v2": + raise ValueError("Invalid feed range composite continuation token [Invalid version]") + + container_rid = data.get(cls._container_rid_property_name) + if container_rid is None: + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._container_rid_property_name}]") + + continuation_data = data.get(cls._continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._continuation_property_name}]") + if not isinstance(continuation_data, list) or len(continuation_data) == 0: + raise ValueError(f"Invalid feed range composite continuation token " + f"[The {cls._continuation_property_name} must be non-empty array]") + continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) + for child_range_continuation_token in continuation_data] + + # parsing feed range + feed_range: Optional[FeedRange] = None + if data.get(FeedRangeEpk.type_property_name): + feed_range = FeedRangeEpk.from_json(data) + elif data.get(FeedRangePartitionKey.type_property_name): + feed_range = FeedRangePartitionKey.from_json(data, continuation[0].feed_range) + else: + raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") + + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) + + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + collection_link: str) -> None: + overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) + + self._current_token = self._continuation[0] + + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + collection_link: str) -> None: + overlapping_ranges = \ + await routing_provider.get_overlapping_ranges( + collection_link, + [self._current_token.feed_range]) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) + + self._current_token = self._continuation[0] + + def should_retry_on_not_modified_response(self) -> bool: + # when getting 304(Not Modified) response from one sub feed range, + # we will try to fetch for the next sub feed range + # we will repeat the above logic until we have looped through all sub feed ranges + + # TODO: validate the response headers, can we get the status code + if len(self._continuation) > 1: + return self._current_token.feed_range != self._initial_no_result_range + + return False + + def _move_to_next_token(self) -> None: + first_composition_token = self._continuation.popleft() + # add the composition token to the end of the list + self._continuation.append(first_composition_token) + self._current_token = self._continuation[0] + + def apply_server_response_continuation(self, etag, has_modified_response: bool) -> None: + self._current_token.update_token(etag) + if has_modified_response: + self._initial_no_result_range = None + else: + self.apply_not_modified_response() + + def apply_not_modified_response(self) -> None: + if self._initial_no_result_range is None: + self._initial_no_result_range = self._current_token.feed_range + + @property + def feed_range(self) -> FeedRange: + return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 1288e7a4e66e..49198910b772 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -27,13 +27,11 @@ import urllib.parse from typing import Callable, Dict, Any, Iterable, List, Mapping, Optional, Sequence, Tuple, Union, cast, Type from typing_extensions import TypedDict - from urllib3.util.retry import Retry + +from azure.core import PipelineClient from azure.core.credentials import TokenCredential from azure.core.paging import ItemPaged -from azure.core import PipelineClient -from azure.core.pipeline.transport import HttpRequest, \ - HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import from azure.core.pipeline.policies import ( HTTPPolicy, ContentDecodePolicy, @@ -44,22 +42,29 @@ DistributedTracingPolicy, ProxyPolicy ) +from azure.core.pipeline.transport import HttpRequest, \ + HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import from . import _base as base -from ._base import _set_properties_cache -from . import documents -from .documents import ConnectionPolicy, DatabaseAccount -from ._constants import _Constants as Constants -from . import http_constants, exceptions +from . import _global_endpoint_manager as global_endpoint_manager from . import _query_iterable as query_iterable from . import _runtime_constants as runtime_constants -from ._request_object import RequestObject -from . import _synchronized_request as synchronized_request -from . import _global_endpoint_manager as global_endpoint_manager -from ._routing import routing_map_provider, routing_range -from ._retry_utility import ConnectionRetryPolicy from . import _session +from . import _synchronized_request as synchronized_request from . import _utils +from . import documents +from . import http_constants, exceptions +from ._auth_policy import CosmosBearerTokenCredentialPolicy +from ._base import _set_properties_cache +from ._change_feed.change_feed_iterable import ChangeFeedIterable +from ._change_feed.change_feed_state import ChangeFeedState +from ._constants import _Constants as Constants +from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy +from ._range_partition_resolver import RangePartitionResolver +from ._request_object import RequestObject +from ._retry_utility import ConnectionRetryPolicy +from ._routing import routing_map_provider, routing_range +from .documents import ConnectionPolicy, DatabaseAccount from .partition_key import ( _Undefined, _Empty, @@ -67,9 +72,6 @@ _return_undefined_or_empty_partition_key, NonePartitionKeyValue ) -from ._auth_policy import CosmosBearerTokenCredentialPolicy -from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy -from ._range_partition_resolver import RangePartitionResolver PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -1191,11 +1193,10 @@ def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Dict[str return ItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def _ReadPartitionKeyRanges( @@ -3023,6 +3024,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: options, partition_key_range_id ) + + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + change_feed_state.populate_request_headers(self._routing_map_provider, headers) + result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py index ebf1ee82b005..e70ae355c495 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py @@ -49,7 +49,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -64,7 +64,7 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link)] + collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link, **kwargs)] # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -131,7 +131,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -165,8 +165,12 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): else: queryRange = currentProvidedRange - overlappingRanges = await PartitionKeyRangeCache.get_overlapping_ranges(self, - collection_link, queryRange) + overlappingRanges =\ + await PartitionKeyRangeCache.get_overlapping_ranges( + self, + collection_link, + [queryRange], + **kwargs) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py index 59c609dec7ea..8dacb5190e07 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py @@ -50,7 +50,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -65,7 +65,7 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link)) + collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link, **kwargs)) # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -132,7 +132,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -166,7 +166,8 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): else: queryRange = currentProvidedRange - overlappingRanges = PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, queryRange) + overlappingRanges = ( + PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, [queryRange], **kwargs)) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index 0d61fbbbe1d7..f2e7576bf376 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -22,8 +22,15 @@ """Internal class for partition key range implementation in the Azure Cosmos database service. """ +import base64 +import binascii +import json +from typing import Dict, Any +def partition_key_range_to_range_string(partition_key_range: Dict[str, Any]) -> str: + return Range.PartitionKeyRangeToRange(partition_key_range).to_base64_encoded_string() + class PartitionKeyRange(object): """Partition Key Range Constants""" @@ -81,6 +88,74 @@ def ParseFromDict(cls, range_as_dict): ) return self + def to_dict(self): + return { + self.MinPath: self.min, + self.MaxPath: self.max, + self.IsMinInclusivePath: self.isMinInclusive, + self.IsMaxInclusivePath: self.isMaxInclusive + } + + def to_normalized_range(self): + if self.isMinInclusive and not self.isMaxInclusive: + return self + + normalized_min = self.min + normalized_max = self.max + + if not self.isMinInclusive: + normalized_min = self.add_to_effective_partition_key(self.min, -1) + + if self.isMaxInclusive: + normalized_max = self.add_to_effective_partition_key(self.max, 1) + + return Range(normalized_min, normalized_max, True, False) + + def add_to_effective_partition_key(self, effective_partition_key: str, value: int): + if value not in (-1, 1): + raise ValueError("Invalid value - only 1 or -1 is allowed") + + byte_array = self.hex_binary_to_byte_array(effective_partition_key) + if value == 1: + for i in range(len(byte_array) -1, -1, -1): + if byte_array[i] < 255: + byte_array[i] += 1 + break + byte_array[i] = 0 + else: + for i in range(len(byte_array) - 1, -1, -1): + if byte_array[i] != 0: + byte_array[i] -= 1 + break + byte_array[i] = 255 + + return binascii.hexlify(byte_array).decode() + + def hex_binary_to_byte_array(self, hex_binary_string: str): + if hex_binary_string is None: + raise ValueError("hex_binary_string is missing") + if len(hex_binary_string) % 2 != 0: + raise ValueError("hex_binary_string must not have an odd number of characters") + + return bytearray.fromhex(hex_binary_string) + + @classmethod + def from_base64_encoded_json_string(cls, data: str): + try: + feed_range_json_string = base64.b64decode(data, validate=True).decode('utf-8') + feed_range_json = json.loads(feed_range_json_string) + return cls.ParseFromDict(feed_range_json) + except Exception as exc: + raise ValueError(f"Invalid feed_range json string {data}") from exc + + def to_base64_encoded_string(self): + data_json = json.dumps(self.to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + def isSingleValue(self): return self.isMinInclusive and self.isMaxInclusive and self.min == self.max diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 385d7f7af236..d7d66738b4ee 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -21,8 +21,9 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone -from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast +import warnings +from datetime import datetime +from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast, overload from typing_extensions import Literal from azure.core import MatchConditions @@ -31,6 +32,7 @@ from azure.core.tracing.decorator_async import distributed_trace_async # type: ignore from ._cosmos_client_connection_async import CosmosClientConnection +from ._scripts import ScriptsProxy from .._base import ( build_options as _build_options, validate_cache_staleness_value, @@ -39,19 +41,20 @@ GenerateGuidId, _set_properties_cache ) +from .._routing.routing_range import Range, partition_key_range_to_range_string from ..offer import ThroughputProperties -from ._scripts import ScriptsProxy from ..partition_key import ( NonePartitionKeyValue, _return_undefined_or_empty_partition_key, _Empty, - _Undefined + _Undefined, PartitionKey ) __all__ = ("ContainerProxy",) # pylint: disable=protected-access, too-many-lines # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs +# pylint: disable=too-many-public-methods PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -132,6 +135,14 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + async def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: + + container_properties = await self._get_properties() + partition_key_definition = container_properties["partitionKey"] + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value) + @distributed_trace_async async def read( self, @@ -480,62 +491,190 @@ def query_items( response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace + @overload def query_items_change_feed( - self, - *, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - start_time: Optional[datetime] = None, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword str partition_key_range_id: ChangeFeed requests can be executed against specific partition key - ranges. This is used to process the change feed in parallel across multiple consumers. - :keyword str continuation: e_tag value to be used as continuation for reading change feed. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] - :keyword response_hook: A callable invoked with the response metadata. - :paramtype response_hook: Callable[[Dict[str, str], AsyncItemPaged[Dict[str, Any]]], None] + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - response_hook = kwargs.pop('response_hook', None) - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + def query_items_change_feed( + self, + *, + feed_range: str, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str feed_range: The feed range that is used to define the scope. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + continuation: str, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + # pylint: enable=line-too-long + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified. + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @distributed_trace + def query_items_change_feed( # pylint: disable=unused-argument + self, + *args: Any, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + # pylint: disable=too-many-statements + if kwargs.get("priority") is not None: + kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if kwargs.get("partition_key_range_id") is not None: + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + + # Back compatibility with deprecation warnings for is_start_from_beginning + if kwargs.get("is_start_from_beginning") is not None: + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + if is_start_from_beginning is True: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if kwargs.get("start_time") is not None: + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if feed_options.get("continuation") is not None: + change_feed_state_context["continuation"] = feed_options.pop('continuation') + + if kwargs.get("max_item_count") is not None: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + + if kwargs.get("partition_key") is not None: + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get("partition_key"))) + change_feed_state_context["partitionKeyFeedRange"] = \ + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) + + if kwargs.get("feed_range") is not None: + change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + + feed_options["containerProperties"] = self._get_properties() + feed_options["changeFeedStateContext"] = change_feed_state_context + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() + if self.container_link in self.__get_client_container_caches(): feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) + if response_hook: response_hook(self.client_connection.last_response_headers, result) return result @@ -1098,3 +1237,29 @@ async def execute_item_batch( return await self.client_connection.Batch( collection_link=self.container_link, batch_operations=batch_operations, options=request_options, **kwargs) + + async def read_feed_ranges( + self, + *, + force_refresh: Optional[bool] = False, + **kwargs: Any + ) -> List[str]: + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :keyword bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + + partition_key_ranges =\ + await self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + # default to full range + [Range("", "FF", True, False)], + **kwargs) + + return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 72ea03668909..9e73445e2063 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -50,6 +50,8 @@ from .. import _base as base from .._base import _set_properties_cache from .. import documents +from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable +from .._change_feed.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2310,11 +2312,10 @@ async def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Di return AsyncItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def QueryOffers( @@ -2812,6 +2813,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: documents._OperationType.QueryPlan if is_query_plan else documents._OperationType.ReadFeed ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) + + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + await change_feed_state.populate_request_headers_async(self._routing_map_provider, headers) + result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: response_hook(self.last_response_headers, result) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 9cf697ee7f5a..e6a6ac7b36b9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -21,16 +21,15 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone import warnings -from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast +from datetime import datetime +from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast, overload from typing_extensions import Literal from azure.core import MatchConditions -from azure.core.tracing.decorator import distributed_trace from azure.core.paging import ItemPaged +from azure.core.tracing.decorator import distributed_trace -from ._cosmos_client_connection import CosmosClientConnection from ._base import ( build_options, validate_cache_staleness_value, @@ -39,8 +38,9 @@ GenerateGuidId, _set_properties_cache ) +from ._cosmos_client_connection import CosmosClientConnection +from ._routing.routing_range import Range, partition_key_range_to_range_string from .offer import Offer, ThroughputProperties -from .scripts import ScriptsProxy from .partition_key import ( NonePartitionKeyValue, PartitionKey, @@ -48,6 +48,7 @@ _Undefined, _return_undefined_or_empty_partition_key ) +from .scripts import ScriptsProxy __all__ = ("ContainerProxy",) @@ -129,6 +130,13 @@ def _set_partition_key( return _return_undefined_or_empty_partition_key(self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + def _get_epk_range_for_partition_key( self, partition_key_value: PartitionKeyType) -> Range: + container_properties = self._get_properties() + partition_key_definition = container_properties["partitionKey"] + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value) + def __get_client_container_caches(self) -> Dict[str, Dict[str, Any]]: return self.client_connection._container_properties_cache @@ -309,60 +317,199 @@ def read_all_items( # pylint:disable=docstring-missing-param response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace + @overload def query_items_change_feed( - self, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - *, - start_time: Optional[datetime] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :param str partition_key_range_id: ChangeFeed requests can be executed against specific partition key ranges. - This is used to process the change feed in parallel across multiple consumers. - :param bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :param max_item_count: Max number of items to be returned in the enumeration operation. - :param str continuation: e_tag value to be used as continuation for reading change feed. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + feed_range: str, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str feed_range: The feed range that is used to define the scope. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An Iterable of items (dicts). - :rtype: Iterable[dict[str, Any]] + :rtype: Iterable[Dict[str, Any]] """ - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + def query_items_change_feed( + self, + *, + continuation: str, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified, + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @distributed_trace + def query_items_change_feed( + self, + *args: Any, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + # pylint: disable=too-many-statements + if kwargs.get("priority") is not None: + kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) - response_hook = kwargs.pop('response_hook', None) - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if is_start_from_beginning is not None: - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if (args and args[0] is not None) or kwargs.get("partition_key_range_id") is not None: + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + try: + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + except KeyError: + change_feed_state_context['partitionKeyRangeId'] = args[0] + + # Back compatibility with deprecation warnings for is_start_from_beginning + if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning") is not None: + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + try: + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + except KeyError: + is_start_from_beginning = args[1] + + if is_start_from_beginning is True: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if kwargs.get("start_time") is not None: + + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if len(args) >= 3 and args[2] is not None or feed_options.get("continuation") is not None: + try: + continuation = feed_options.pop('continuation') + except KeyError: + continuation = args[2] + change_feed_state_context["continuation"] = continuation + + if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count") is not None: + try: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + except KeyError: + feed_options["maxItemCount"] = args[3] + + if kwargs.get("partition_key") is not None: + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get('partition_key'))) + change_feed_state_context["partitionKeyFeedRange"] =\ + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) + + if kwargs.get("feed_range") is not None: + change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + + container_properties = self._get_properties() + feed_options["changeFeedStateContext"] = change_feed_state_context + feed_options["containerRID"] = container_properties["_rid"] + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() - if self.container_link in self.__get_client_container_caches(): - feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) @@ -461,13 +608,14 @@ def query_items( # pylint:disable=docstring-missing-param if populate_index_metrics is not None: feed_options["populateIndexMetrics"] = populate_index_metrics if partition_key is not None: + partition_key_value = self._set_partition_key(partition_key) if self.__is_prefix_partitionkey(partition_key): kwargs["isPrefixPartitionQuery"] = True properties = self._get_properties() kwargs["partitionKeyDefinition"] = properties["partitionKey"] - kwargs["partitionKeyDefinition"]["partition_key"] = partition_key + kwargs["partitionKeyDefinition"]["partition_key"] = partition_key_value else: - feed_options["partitionKey"] = self._set_partition_key(partition_key) + feed_options["partitionKey"] = partition_key_value if enable_scan_in_query is not None: feed_options["enableScanInQuery"] = enable_scan_in_query if max_integrated_cache_staleness_in_ms: @@ -494,16 +642,11 @@ def query_items( # pylint:disable=docstring-missing-param return items def __is_prefix_partitionkey( - self, partition_key: PartitionKeyType - ) -> bool: + self, partition_key: PartitionKeyType) -> bool: properties = self._get_properties() pk_properties = properties["partitionKey"] partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) - if partition_key_definition.kind != "MultiHash": - return False - if isinstance(partition_key, list) and len(partition_key_definition['paths']) == len(partition_key): - return False - return True + return partition_key_definition._is_prefix_partition_key(partition_key) @distributed_trace def replace_item( # pylint:disable=docstring-missing-param @@ -1162,3 +1305,28 @@ def delete_all_items_by_partition_key( self.client_connection.DeleteAllItemsByPartitionKey( collection_link=self.container_link, options=request_options, **kwargs) + + def read_feed_ranges( + self, + *, + force_refresh: Optional[bool] = False, + **kwargs: Any) -> List[str]: + + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :keyword bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + + partition_key_ranges =\ + self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + [Range("", "FF", True, False)], # default to full range + **kwargs) + + return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 5092fd0de7cf..7170a4d1dc39 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -28,7 +28,7 @@ ResourceNotFoundError ) from . import http_constants - +from .http_constants import StatusCodes, SubStatusCodes class CosmosHttpResponseError(HttpResponseError): """An HTTP request to the Azure Cosmos database service has failed.""" @@ -135,7 +135,6 @@ def __init__(self, **kwargs): self.history = None super(CosmosClientTimeoutError, self).__init__(message, **kwargs) - def _partition_range_is_gone(e): if (e.status_code == http_constants.StatusCodes.GONE and e.sub_status == http_constants.SubStatusCodes.PARTITION_KEY_RANGE_GONE): @@ -151,3 +150,7 @@ def _container_recreate_exception(e) -> bool: is_throughput_not_found = e.sub_status == http_constants.SubStatusCodes.THROUGHPUT_OFFER_NOT_FOUND return (is_bad_request and is_collection_rid_mismatch) or (is_not_found and is_throughput_not_found) + + +def _is_partition_split_or_merge(e): + return e.status_code == StatusCodes.GONE and e.status_code == SubStatusCodes.COMPLETING_SPLIT diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 22fcb19dae06..7fa093aa15e1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -23,7 +23,7 @@ from io import BytesIO import binascii import struct -from typing import IO, Sequence, Type, Union, overload, List +from typing import IO, Sequence, Type, Union, overload, List, cast from typing_extensions import Literal from ._cosmos_integers import _UInt64, _UInt128 @@ -173,6 +173,20 @@ def _get_epk_range_for_prefix_partition_key( max_epk = str(min_epk) + "FF" return _Range(min_epk, max_epk, True, False) + def _get_epk_range_for_partition_key( + self, + pk_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long + ) -> _Range: + if self._is_prefix_partition_key(pk_value): + return self._get_epk_range_for_prefix_partition_key( + cast(Sequence[Union[None, bool, int, float, str, Type[NonePartitionKeyValue]]], pk_value)) + + # else return point range + effective_partition_key_string =\ + self._get_effective_partition_key_string( + cast(List[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]], [pk_value])) + return _Range(effective_partition_key_string, effective_partition_key_string, True, True) + def _get_effective_partition_key_for_hash_partitioning(self) -> str: # We shouldn't be supporting V1 return "" @@ -265,6 +279,15 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( return ''.join(sb).upper() + def _is_prefix_partition_key( + self, + partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long + if self.kind!= "MultiHash": + return False + if isinstance(partition_key, list) and len(self['paths']) == len(partition_key): + return False + return True + def _return_undefined_or_empty_partition_key(is_system_key: bool) -> Union[_Empty, _Undefined]: if is_system_key: diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py new file mode 100644 index 000000000000..01e2dc21ddb6 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -0,0 +1,256 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import unittest +import uuid +from datetime import datetime, timedelta, timezone +from time import sleep + +import pytest +from _pytest.outcomes import fail + +import azure.cosmos.cosmos_client as cosmos_client +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos.partition_key import PartitionKey + + +@pytest.fixture(scope="class") +def setup(): + config = test_config.TestConfig() + if (config.masterKey == '[YOUR_KEY_HERE]' or + config.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = cosmos_client.CosmosClient(config.host, config.masterKey), + return { + "created_db": test_client[0].get_database_client(config.TEST_DATABASE_ID) + } + +@pytest.mark.cosmosEmulator +@pytest.mark.unittest +@pytest.mark.usefixtures("setup") +class TestChangeFeed: + """Test to ensure escaping of non-ascii characters from partition key""" + + def test_get_feed_ranges(self, setup): + created_collection = setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + def test_query_change_feed_with_different_filter(self, change_feed_filter_param, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + # Read change feed without passing any options + query_iterable = created_collection.query_items_change_feed() + iter_list = list(query_iterable) + assert len(iter_list) == 0 + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed from current should return an empty list + query_iterable = created_collection.query_items_change_feed(filter_param) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + assert created_collection.client_connection.last_response_headers['etag'] !='' + + # Read change feed from beginning should return an empty list + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + continuation1 = created_collection.client_connection.last_response_headers['etag'] + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + assert 'etag' in created_collection.client_connection.last_response_headers + continuation2 = created_collection.client_connection.last_response_headers['etag'] + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + created_collection.create_item(body=document_definition) + + for pageSize in [1, 100]: + # verify iterator + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + it = query_iterable.__iter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + for page in query_iterable.by_page(): + fetched_res = list(page) + assert len(fetched_res) == min(pageSize, expected_count - count) + count += len(fetched_res) + all_fetched_res.extend(fetched_res) + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__iter__() + for i in range(0, len(expected_ids)): + doc = next(it) + assert doc['id'] == expected_ids[i] + assert 'etag' in created_collection.client_connection.last_response_headers + continuation3 = created_collection.client_connection.last_response_headers['etag'] + + # verify reading empty change feed + query_iterable = created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_start_time(self, setup): + created_collection = setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + fail(e) + + # Create first batch of random items + create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + sleep(1) + + # now create another batch of items + create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + list(created_collection.query_items_change_feed(start_time=invalid_time)) + fail("Cannot format date on a non datetime object.") + except ValueError as e: #TODO: previously it is throwing AttributeError, now has changed into ValueError, is it breaking change? + assert "Invalid start_time 'Invalid value'" == e.args[0] + + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_multi_partition(self, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py new file mode 100644 index 000000000000..2ef61ee5c8a3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -0,0 +1,280 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import unittest +import uuid +from asyncio import sleep +from datetime import datetime, timedelta, timezone + +import pytest +import pytest_asyncio +from _pytest.outcomes import fail + +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos.aio import CosmosClient +from azure.cosmos.partition_key import PartitionKey + + +@pytest_asyncio.fixture() +async def setup(): + config = test_config.TestConfig() + if config.masterKey == '[YOUR_KEY_HERE]' or config.host == '[YOUR_ENDPOINT_HERE]': + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = CosmosClient(config.host, config.masterKey) + created_db = await test_client.create_database_if_not_exists(config.TEST_DATABASE_ID) + created_db_data = { + "created_db": created_db + } + + yield created_db_data + await test_client.close() + +@pytest.mark.cosmosEmulator +@pytest.mark.asyncio +@pytest.mark.usefixtures("setup") +class TestChangeFeedAsync: + """Test to ensure escaping of non-ascii characters from partition key""" + + async def test_get_feed_ranges(self, setup): + created_collection = await setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = await created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + async def test_query_change_feed_with_different_filter_async(self, change_feed_filter_param, setup): + + created_collection = await setup["created_db"].create_container( + "change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = await created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed without passing any options + query_iterable = created_collection.query_items_change_feed() + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + # Read change feed from current should return an empty list + query_iterable = created_collection.query_items_change_feed(filter_param) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['Etag'] != '' + elif 'etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['etag'] != '' + else: + fail("No Etag or etag found in last response headers") + + # Read change feed from beginning should return an empty list + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + await created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + await created_collection.create_item(body=document_definition) + + for pageSize in [2, 100]: + # verify iterator + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param) + it = query_iterable.__aiter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + async for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + pages = query_iterable.by_page() + async for items in await pages.__anext__(): + count += 1 + all_fetched_res.append(items) + assert count == expected_count + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__aiter__() + for i in range(0, len(expected_ids)): + doc = await it.__anext__() + assert doc['id'] == expected_ids[i] + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + + # verify reading empty change feed + query_iterable = created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + await setup["created_db"].delete_container(created_collection.id) + + @pytest.mark.asyncio + async def test_query_change_feed_with_start_time(self, setup): + created_collection = await setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + + async def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + await container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + pytest.fail(e) + + # Create first batch of random items + await create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + await sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + await sleep(1) + + # now create another batch of items + await create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] + fail("Cannot format date on a non datetime object.") + except ValueError as e: + assert ("Invalid start_time 'Invalid value'" == e.args[0]) + + await setup["created_db"].delete_container(created_collection.id) + + async def test_query_change_feed_with_multi_partition_async(self, setup): + created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py new file mode 100644 index 000000000000..8ecb7da9cff3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py @@ -0,0 +1,81 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import azure.cosmos.cosmos_client as cosmos_client +import test_config +from azure.cosmos import DatabaseProxy, PartitionKey + + +class TestPartitionSplitChangeFeed(unittest.TestCase): + database: DatabaseProxy = None + client: cosmos_client.CosmosClient = None + configs = test_config.TestConfig + host = configs.host + masterKey = configs.masterKey + TEST_DATABASE_ID = configs.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + cls.client = cosmos_client.CosmosClient(cls.host, cls.masterKey) + cls.database = cls.client.get_database_client(cls.TEST_DATABASE_ID) + + def test_query_change_feed_with_split(self): + created_collection = self.database.create_container("change_feed_split_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = list(query_iterable) + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.database.delete_container(created_collection.id) + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py new file mode 100644 index 000000000000..60f7b2810884 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py @@ -0,0 +1,94 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import test_config +from azure.cosmos import PartitionKey +from azure.cosmos.aio import CosmosClient, DatabaseProxy + + +class TestPartitionSplitChangeFeedAsync(unittest.IsolatedAsyncioTestCase): + host = test_config.TestConfig.host + masterKey = test_config.TestConfig.masterKey + connectionPolicy = test_config.TestConfig.connectionPolicy + + client: CosmosClient = None + created_database: DatabaseProxy = None + + TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + if (cls.masterKey == '[YOUR_KEY_HERE]' or + cls.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + + async def asyncSetUp(self): + self.client = CosmosClient(self.host, self.masterKey) + self.created_database = self.client.get_database_client(self.TEST_DATABASE_ID) + + async def tearDown(self): + await self.client.close() + + async def test_query_change_feed_with_split_async(self): + created_collection = await self.created_database.create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + await created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = await created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.created_database.delete_container(created_collection.id) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py index 6ced2c6d0cd9..fbac47dfb215 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py @@ -599,7 +599,7 @@ def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = list(created_container.query_items_change_feed()) + change_feed = list(created_container.query_items_change_feed(start_time='Beginning')) self.assertEqual(len(change_feed), 2) # Verify that the change feed contains the new items diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py index 88fd6e20cc14..8cf3b9f39ba0 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py @@ -612,7 +612,7 @@ async def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = [item async for item in created_container.query_items_change_feed()] + change_feed = [item async for item in created_container.query_items_change_feed(start_time='Beginning')] assert len(change_feed) == 2 # Verify that the change feed contains the new items diff --git a/sdk/cosmos/azure-cosmos/test/test_query.py b/sdk/cosmos/azure-cosmos/test/test_query.py index 7efcbca23e7c..fedc4450b904 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query.py +++ b/sdk/cosmos/azure-cosmos/test/test_query.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from datetime import datetime, timedelta, timezone -from time import sleep + import pytest import azure.cosmos._retry_utility as retry_utility @@ -55,293 +54,6 @@ def test_first_and_last_slashes_trimmed_for_query_string(self): self.assertEqual(iter_list[0]['id'], doc_id) self.created_db.delete_container(created_collection.id) - def test_query_change_feed_with_pk(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - def test_query_change_feed_with_pk_range_id(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partitionParam = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partitionParam) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - def test_query_change_feed_with_start_time(self): - created_collection = self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - self.fail(e) - - # Create first batch of random items - create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - sleep(1) - - # now create another batch of items - create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - self.assertEqual(totalCount, batchSize) - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) - totalCount = len(change_feed_iter) - # A future time should return 0 - self.assertEqual(totalCount, 0) - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) - totalCount = len(change_feed_iter) - # Should equal batch size - self.assertEqual(totalCount, batchSize) - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = list(created_collection.query_items_change_feed(start_time=invalid_time)) - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - self.assertTrue("'str' object has no attribute 'astimezone'" == e.args[0]) - def test_populate_query_metrics(self): created_collection = self.created_db.create_container("query_metrics_test", PartitionKey(path="/pk")) diff --git a/sdk/cosmos/azure-cosmos/test/test_query_async.py b/sdk/cosmos/azure-cosmos/test/test_query_async.py index 51018126462d..718c544193a3 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_query_async.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from asyncio import sleep, gather -from datetime import datetime, timedelta, timezone +from asyncio import gather import pytest @@ -14,10 +13,10 @@ import test_config from azure.cosmos import http_constants from azure.cosmos._execution_context.query_execution_info import _PartitionedQueryExecutionInfo +from azure.cosmos._retry_options import RetryOptions from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy from azure.cosmos.documents import _DistinctType from azure.cosmos.partition_key import PartitionKey -from azure.cosmos._retry_options import RetryOptions @pytest.mark.cosmosEmulator @@ -69,329 +68,6 @@ async def test_first_and_last_slashes_trimmed_for_query_string_async(self): await self.created_db.delete_container(created_collection.id) - async def test_query_change_feed_with_pk_async(self): - created_collection = await self.created_db.create_container( - "change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] != '' - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] != '' - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - await self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - async def test_query_change_feed_with_pk_range_id_async(self): - created_collection = await self.created_db.create_container("cf_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partition_param = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partition_param) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - @pytest.mark.asyncio - async def test_query_change_feed_with_start_time(self): - created_collection = await self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - - async def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - await container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - pytest.fail(e) - - # Create first batch of random items - await create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - await sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - await sleep(1) - - # now create another batch of items - await create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - assert totalCount == batchSize - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] - totalCount = len(change_feed_iter) - # A future time should return 0 - assert totalCount == 0 - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] - totalCount = len(change_feed_iter) - # Should equal batch size - assert totalCount == batchSize - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - assert ("'str' object has no attribute 'astimezone'" == e.args[0]) - - await self.created_db.delete_container(created_collection.id) - @pytest.mark.asyncio async def test_populate_query_metrics_async(self): created_collection = await self.created_db.create_container( diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy.py index da0aeb8ec6a8..e44f8f21c5fd 100644 --- a/sdk/cosmos/azure-cosmos/test/test_vector_policy.py +++ b/sdk/cosmos/azure-cosmos/test/test_vector_policy.py @@ -163,7 +163,7 @@ def test_fail_replace_vector_indexing_policy(self): pytest.fail("Container replace should have failed for indexing policy.") except exceptions.CosmosHttpResponseError as e: assert e.status_code == 400 - assert "Vector Indexing Policy cannot be changed in Collection Replace" in e.http_error_message + assert "vector indexing policy cannot be modified in Collection Replace" in e.http_error_message self.test_db.delete_container(container_id) def test_fail_create_vector_embedding_policy(self): diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py index 19dd48268417..71c4997e3179 100644 --- a/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py @@ -173,7 +173,7 @@ async def test_fail_replace_vector_indexing_policy_async(self): pytest.fail("Container replace should have failed for indexing policy.") except exceptions.CosmosHttpResponseError as e: assert e.status_code == 400 - assert "Vector Indexing Policy cannot be changed in Collection Replace" in e.http_error_message + assert "vector indexing policy cannot be modified in Collection Replace" in e.http_error_message await self.test_db.delete_container(container_id) async def test_fail_create_vector_embedding_policy_async(self): From cdb6d527eb335c975f45ce3291c933f7bc10224b Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 17 Sep 2024 10:07:18 -0700 Subject: [PATCH 02/12] expose feedRange as a class type --- .../azure-cosmos/azure/cosmos/__init__.py | 2 + .../cosmos/_change_feed/change_feed_state.py | 16 ++- ...feed_range_composite_continuation_token.py | 16 +-- .../{feed_range.py => feed_range_internal.py} | 15 +-- .../azure-cosmos/azure/cosmos/_feed_range.py | 97 +++++++++++++++++++ .../azure/cosmos/_routing/routing_range.py | 4 - .../azure/cosmos/aio/_container.py | 15 +-- .../azure-cosmos/azure/cosmos/container.py | 15 +-- 8 files changed, 140 insertions(+), 40 deletions(-) rename sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/{feed_range.py => feed_range_internal.py} (88%) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py index 6565ebed8c89..b1e3d8bf2a30 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/__init__.py @@ -42,6 +42,7 @@ ) from .partition_key import PartitionKey from .permission import Permission +from ._feed_range import FeedRange __all__ = ( "CosmosClient", @@ -64,5 +65,6 @@ "TriggerType", "ConnectionRetryPolicy", "ThroughputProperties", + "FeedRange" ) __version__ = VERSION diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 46dd1afddcfe..cefc5b5b2c3a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -34,7 +34,7 @@ from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ ChangeFeedStartFromETagAndFeedRange from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey +from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk, FeedRangeInternalPartitionKey from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider @@ -79,7 +79,7 @@ def apply_server_response_continuation(self, continuation: str, has_modified_res def from_json( container_link: str, container_rid: str, - change_feed_state_context: Dict[str, Any]): + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedState': if (change_feed_state_context.get("partitionKeyRangeId") or change_feed_state_context.get("continuationPkRangeId")): @@ -184,7 +184,7 @@ def __init__( self, container_link: str, container_rid: str, - feed_range: FeedRange, + feed_range: FeedRangeInternal, change_feed_start_from: ChangeFeedStartFromInternal, continuation: Optional[FeedRangeCompositeContinuation] ) -> None: @@ -380,22 +380,20 @@ def from_initial_state( collection_rid: str, change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': - feed_range: Optional[FeedRange] = None + feed_range: Optional[FeedRangeInternal] = None if change_feed_state_context.get("feedRange"): - feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') - feed_range_json = json.loads(feed_range_str) - feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) + feed_range = change_feed_state_context.get("feedRange") elif change_feed_state_context.get("partitionKey"): if change_feed_state_context.get("partitionKeyFeedRange"): feed_range =\ - FeedRangePartitionKey( + FeedRangeInternalPartitionKey( change_feed_state_context["partitionKey"], change_feed_state_context["partitionKeyFeedRange"]) else: raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: # default to full range - feed_range = FeedRangeEpk( + feed_range = FeedRangeInternalEpk( Range( "", "FF", diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index f5967b6bf34b..7d40f51f7994 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -26,7 +26,7 @@ from typing import Any, Deque, Dict, Optional from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey +from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk, FeedRangeInternalPartitionKey from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range @@ -39,7 +39,7 @@ class FeedRangeCompositeContinuation: def __init__( self, container_rid: str, - feed_range: FeedRange, + feed_range: FeedRangeInternal, continuation: Deque[CompositeContinuationToken]) -> None: if container_rid is None: raise ValueError("container_rid is missing") @@ -87,11 +87,11 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': for child_range_continuation_token in continuation_data] # parsing feed range - feed_range: Optional[FeedRange] = None - if data.get(FeedRangeEpk.type_property_name): - feed_range = FeedRangeEpk.from_json(data) - elif data.get(FeedRangePartitionKey.type_property_name): - feed_range = FeedRangePartitionKey.from_json(data, continuation[0].feed_range) + feed_range: Optional[FeedRangeInternal] = None + if data.get(FeedRangeInternalEpk.type_property_name): + feed_range = FeedRangeInternalEpk.from_json(data) + elif data.get(FeedRangeInternalPartitionKey.type_property_name): + feed_range = FeedRangeInternalPartitionKey.from_json(data, continuation[0].feed_range) else: raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") @@ -171,5 +171,5 @@ def apply_not_modified_response(self) -> None: self._initial_no_result_range = self._current_token.feed_range @property - def feed_range(self) -> FeedRange: + def feed_range(self) -> FeedRangeInternal: return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py similarity index 88% rename from sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py rename to sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py index b4f731f2c2ef..b1940a031c2d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py @@ -29,7 +29,7 @@ from azure.cosmos.partition_key import _Undefined, _Empty -class FeedRange(ABC): +class FeedRangeInternal(ABC): @abstractmethod def get_normalized_range(self) -> Range: @@ -39,7 +39,7 @@ def get_normalized_range(self) -> Range: def to_dict(self) -> Dict[str, Any]: pass -class FeedRangePartitionKey(FeedRange): +class FeedRangeInternalPartitionKey(FeedRangeInternal): type_property_name = "PK" def __init__( @@ -69,7 +69,7 @@ def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: self._pk_value } @classmethod - def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartitionKey': + def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangeInternalPartitionKey': if data.get(cls.type_property_name): pk_value = data.get(cls.type_property_name) if not pk_value: @@ -80,11 +80,11 @@ def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartiti return cls(list(pk_value), feed_range) return cls(data[cls.type_property_name], feed_range) - raise ValueError(f"Can not parse FeedRangePartitionKey from the json," + raise ValueError(f"Can not parse FeedRangeInternalPartitionKey from the json," f" there is no property {cls.type_property_name}") -class FeedRangeEpk(FeedRange): +class FeedRangeInternalEpk(FeedRangeInternal): type_property_name = "Range" def __init__(self, feed_range: Range) -> None: @@ -102,8 +102,9 @@ def to_dict(self) -> Dict[str, Any]: } @classmethod - def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeEpk': + def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeInternalEpk': if data.get(cls.type_property_name): feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) return cls(feed_range) - raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") + raise ValueError(f"Can not parse FeedRangeInternalEPK from the json," + f" there is no property {cls.type_property_name}") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py new file mode 100644 index 000000000000..5d80a45c13aa --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -0,0 +1,97 @@ + +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import base64 +import json +from abc import ABC, abstractmethod +from typing import Any, Dict + +from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk +from azure.cosmos._routing.routing_range import Range + + +class FeedRange(ABC): + """Represents a single feed range in an Azure Cosmos DB SQL API container. """ + + def to_string(self) -> str: + """Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. + :return: A json representation of the feed range. + :rtype: str + """ + return self._to_base64_encoded_string() + + @staticmethod + def from_string(json_str: str) -> 'FeedRange': + """ + Create a feed range from previously obtained string representation. + + :param json_str: A string representation of a feed range. + :return: A feed range. + :rtype: ~azure.cosmos.FeedRange + """ + feed_range_json_str = base64.b64decode(json_str).decode('utf-8') + feed_range_json = json.loads(feed_range_json_str) + if feed_range_json.get(FeedRangeEpk.type_property_name): + return FeedRangeEpk._from_json(feed_range_json) + else: + raise ValueError("Invalid feed range base64 encoded string [Wrong feed range type]") + + @abstractmethod + def _to_dict(self) -> Dict[str, Any]: + pass + + @abstractmethod + def _to_feed_range_internal(self) -> 'FeedRangeInternal': + pass + + def _to_base64_encoded_string(self) -> str: + data_json = json.dumps(self._to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + +class FeedRangeEpk (FeedRange): + type_property_name = "Range" + + def __init__(self, feed_range: Range) -> None: + if feed_range is None: + raise ValueError("feed_range cannot be None") + + self._feed_range = feed_range + + def _to_dict(self) -> Dict[str, Any]: + return { + self.type_property_name: self._feed_range.to_dict() + } + + def _to_feed_range_internal(self) -> 'FeedRangeInternal': + return FeedRangeInternalEpk(self._feed_range) + + @classmethod + def _from_json(cls, data: Dict[str, Any]) -> 'FeedRange': + if data.get(cls.type_property_name): + feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) + return cls(feed_range) + raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index f2e7576bf376..a2d789f20644 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -25,12 +25,8 @@ import base64 import binascii import json -from typing import Dict, Any -def partition_key_range_to_range_string(partition_key_range: Dict[str, Any]) -> str: - return Range.PartitionKeyRangeToRange(partition_key_range).to_base64_encoded_string() - class PartitionKeyRange(object): """Partition Key Range Constants""" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index d7d66738b4ee..17227813c29d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -41,7 +41,8 @@ GenerateGuidId, _set_properties_cache ) -from .._routing.routing_range import Range, partition_key_range_to_range_string +from .._feed_range import FeedRange, FeedRangeEpk +from .._routing.routing_range import Range from ..offer import ThroughputProperties from ..partition_key import ( NonePartitionKeyValue, @@ -526,7 +527,7 @@ def query_items_change_feed( def query_items_change_feed( self, *, - feed_range: str, + feed_range: FeedRange, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, @@ -534,7 +535,8 @@ def query_items_change_feed( ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword str feed_range: The feed range that is used to define the scope. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. @@ -659,7 +661,8 @@ def query_items_change_feed( # pylint: disable=unused-argument self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: - change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + feed_range: FeedRange = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._to_feed_range_internal() feed_options["containerProperties"] = self._get_properties() feed_options["changeFeedStateContext"] = change_feed_state_context @@ -1243,7 +1246,7 @@ async def read_feed_ranges( *, force_refresh: Optional[bool] = False, **kwargs: Any - ) -> List[str]: + ) -> List[FeedRange]: """ Obtains a list of feed ranges that can be used to parallelize feed operations. :keyword bool force_refresh: @@ -1262,4 +1265,4 @@ async def read_feed_ranges( [Range("", "FF", True, False)], **kwargs) - return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index e6a6ac7b36b9..d84d286e461c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -39,7 +39,8 @@ _set_properties_cache ) from ._cosmos_client_connection import CosmosClientConnection -from ._routing.routing_range import Range, partition_key_range_to_range_string +from ._feed_range import FeedRange, FeedRangeEpk +from ._routing.routing_range import Range from .offer import Offer, ThroughputProperties from .partition_key import ( NonePartitionKeyValue, @@ -352,7 +353,7 @@ def query_items_change_feed( def query_items_change_feed( self, *, - feed_range: str, + feed_range: FeedRange, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, @@ -361,7 +362,8 @@ def query_items_change_feed( """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword str feed_range: The feed range that is used to define the scope. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. @@ -500,7 +502,8 @@ def query_items_change_feed( self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: - change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + feed_range: FeedRange = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._to_feed_range_internal() container_properties = self._get_properties() feed_options["changeFeedStateContext"] = change_feed_state_context @@ -1310,7 +1313,7 @@ def read_feed_ranges( self, *, force_refresh: Optional[bool] = False, - **kwargs: Any) -> List[str]: + **kwargs: Any) -> List[FeedRange]: """ Obtains a list of feed ranges that can be used to parallelize feed operations. @@ -1329,4 +1332,4 @@ def read_feed_ranges( [Range("", "FF", True, False)], # default to full range **kwargs) - return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) for partitionKeyRange in partition_key_ranges] From ddd598e91ba949b7f5aeb4e87c20105ae77ad644 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 17 Sep 2024 10:30:33 -0700 Subject: [PATCH 03/12] clean up change feed logic from query pipeline --- .../azure/cosmos/_cosmos_client_connection.py | 1 - .../aio/base_execution_context.py | 13 +------------ .../_execution_context/base_execution_context.py | 12 +----------- .../cosmos/aio/_cosmos_client_connection_async.py | 1 - 4 files changed, 2 insertions(+), 25 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 49198910b772..aa0241d7f289 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -1162,7 +1162,6 @@ def _QueryChangeFeed( options = {} else: options = dict(options) - options["changeFeed"] = True resource_key_map = {"Documents": "docs"} diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py index 4ccef73388de..0e10cf263d75 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py @@ -44,7 +44,6 @@ def __init__(self, client, options): """ self._client = client self._options = options - self._is_change_feed = "changeFeed" in options and options["changeFeed"] is True self._continuation = self._get_initial_continuation() self._has_started = False self._has_finished = False @@ -117,10 +116,6 @@ async def _fetch_items_helper_no_retries(self, fetch_function): fetched_items = [] new_options = copy.deepcopy(self._options) while self._continuation or not self._has_started: - # Check if this is first fetch for read from specific time change feed. - # For read specific time the first fetch will return empty even if we have more pages. - is_s_time_first_fetch = self._is_change_feed and self._options.get("startTime") and not self._has_started - new_options["continuation"] = self._continuation response_headers = {} @@ -129,13 +124,7 @@ async def _fetch_items_helper_no_retries(self, fetch_function): self._has_started = True continuation_key = http_constants.HttpHeaders.Continuation - # Use Etag as continuation token for change feed queries. - if self._is_change_feed: - continuation_key = http_constants.HttpHeaders.ETag - # In change feed queries, the continuation token is always populated. The hasNext() test is whether - # there is any items in the response or not. - # No initial fetch for start time change feed, so we need to pass continuation token for first fetch - if not self._is_change_feed or fetched_items or is_s_time_first_fetch: + if fetched_items: self._continuation = response_headers.get(continuation_key) else: self._continuation = None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py index b7ef17898656..c3924ffb6807 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py @@ -42,7 +42,6 @@ def __init__(self, client, options): """ self._client = client self._options = options - self._is_change_feed = "changeFeed" in options and options["changeFeed"] is True self._continuation = self._get_initial_continuation() self._has_started = False self._has_finished = False @@ -115,9 +114,6 @@ def _fetch_items_helper_no_retries(self, fetch_function): fetched_items = [] new_options = copy.deepcopy(self._options) while self._continuation or not self._has_started: - # Check if this is first fetch for read from specific time change feed. - # For read specific time the first fetch will return empty even if we have more pages. - is_s_time_first_fetch = self._is_change_feed and self._options.get("startTime") and not self._has_started if not self._has_started: self._has_started = True new_options["continuation"] = self._continuation @@ -126,13 +122,7 @@ def _fetch_items_helper_no_retries(self, fetch_function): (fetched_items, response_headers) = fetch_function(new_options) continuation_key = http_constants.HttpHeaders.Continuation - # Use Etag as continuation token for change feed queries. - if self._is_change_feed: - continuation_key = http_constants.HttpHeaders.ETag - # In change feed queries, the continuation token is always populated. The hasNext() test is whether - # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token - if not self._is_change_feed or fetched_items or is_s_time_first_fetch: + if fetched_items: self._continuation = response_headers.get(continuation_key) else: self._continuation = None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 9e73445e2063..eeb67225660a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -2277,7 +2277,6 @@ def _QueryChangeFeed( options = {} else: options = dict(options) - options["changeFeed"] = True resource_key_map = {"Documents": "docs"} From 27584f3b44b640d614564a0925d1919fe778eb78 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 17 Sep 2024 16:16:29 -0700 Subject: [PATCH 04/12] fix --- .../cosmos/_execution_context/aio/base_execution_context.py | 6 ++---- .../cosmos/_execution_context/base_execution_context.py | 6 ++---- sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py | 1 - 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py index 0e10cf263d75..560ca6c05389 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/aio/base_execution_context.py @@ -124,10 +124,8 @@ async def _fetch_items_helper_no_retries(self, fetch_function): self._has_started = True continuation_key = http_constants.HttpHeaders.Continuation - if fetched_items: - self._continuation = response_headers.get(continuation_key) - else: - self._continuation = None + self._continuation = response_headers.get(continuation_key) + if fetched_items: break return fetched_items diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py index c3924ffb6807..23ba3d170994 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_execution_context/base_execution_context.py @@ -122,10 +122,8 @@ def _fetch_items_helper_no_retries(self, fetch_function): (fetched_items, response_headers) = fetch_function(new_options) continuation_key = http_constants.HttpHeaders.Continuation - if fetched_items: - self._continuation = response_headers.get(continuation_key) - else: - self._continuation = None + self._continuation = response_headers.get(continuation_key) + if fetched_items: break return fetched_items diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py index 5d80a45c13aa..671b71fa4723 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -1,4 +1,3 @@ - # The MIT License (MIT) # Copyright (c) 2014 Microsoft Corporation From 95572e39beb2feb9a1c710a4e5cf189fac9d3757 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 17 Sep 2024 16:56:44 -0700 Subject: [PATCH 05/12] fix doc --- sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py index 671b71fa4723..4aba18fe1423 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -29,14 +29,18 @@ class FeedRange(ABC): - """Represents a single feed range in an Azure Cosmos DB SQL API container. """ + """Represents a single feed range in an Azure Cosmos DB SQL API container. + + """ def to_string(self) -> str: - """Get a json representation of the feed range. - The returned json string can be used to create a new feed range from it. + """ + Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. :return: A json representation of the feed range. :rtype: str """ + return self._to_base64_encoded_string() @staticmethod From 8eeff36c146983ad16ed897a6866ddd65f15e2c4 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 17 Sep 2024 17:27:38 -0700 Subject: [PATCH 06/12] fix doc --- .../azure/cosmos/_change_feed/change_feed_state.py | 7 ++++--- .../feed_range_composite_continuation_token.py | 3 ++- sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py | 9 +++++---- sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py | 3 ++- sdk/cosmos/azure-cosmos/azure/cosmos/container.py | 3 ++- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index cefc5b5b2c3a..f5674ba3e2d7 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -34,7 +34,8 @@ from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ ChangeFeedStartFromETagAndFeedRange from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk, FeedRangeInternalPartitionKey +from azure.cosmos._change_feed.feed_range_internal import (FeedRangeInternal, FeedRangeInternalEpk, + FeedRangeInternalPartitionKey) from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider @@ -388,7 +389,7 @@ def from_initial_state( feed_range =\ FeedRangeInternalPartitionKey( change_feed_state_context["partitionKey"], - change_feed_state_context["partitionKeyFeedRange"]) + change_feed_state_context["partitionKeyFeedRange"]) # type: FeedRangeInternal else: raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: @@ -399,7 +400,7 @@ def from_initial_state( "FF", True, False) - ) + ) # type: FeedRangeInternal change_feed_start_from = ( ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 7d40f51f7994..8f87ccfa194a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -26,7 +26,8 @@ from typing import Any, Deque, Dict, Optional from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk, FeedRangeInternalPartitionKey +from azure.cosmos._change_feed.feed_range_internal import (FeedRangeInternal, FeedRangeInternalEpk, + FeedRangeInternalPartitionKey) from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py index 4aba18fe1423..fafb64c9d660 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -27,7 +27,7 @@ from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk from azure.cosmos._routing.routing_range import Range - +# pylint: disable=protected-access class FeedRange(ABC): """Represents a single feed range in an Azure Cosmos DB SQL API container. @@ -37,6 +37,7 @@ def to_string(self) -> str: """ Get a json representation of the feed range. The returned json string can be used to create a new feed range from it. + :return: A json representation of the feed range. :rtype: str """ @@ -48,7 +49,7 @@ def from_string(json_str: str) -> 'FeedRange': """ Create a feed range from previously obtained string representation. - :param json_str: A string representation of a feed range. + :param str json_str: A string representation of a feed range. :return: A feed range. :rtype: ~azure.cosmos.FeedRange """ @@ -56,8 +57,8 @@ def from_string(json_str: str) -> 'FeedRange': feed_range_json = json.loads(feed_range_json_str) if feed_range_json.get(FeedRangeEpk.type_property_name): return FeedRangeEpk._from_json(feed_range_json) - else: - raise ValueError("Invalid feed range base64 encoded string [Wrong feed range type]") + + raise ValueError("Invalid feed range base64 encoded string [Wrong feed range type]") @abstractmethod def _to_dict(self) -> Dict[str, Any]: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 17227813c29d..f4ccd2547d72 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -1265,4 +1265,5 @@ async def read_feed_ranges( [Range("", "FF", True, False)], **kwargs) - return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) for partitionKeyRange in partition_key_ranges] + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) + for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index d84d286e461c..af5097eaa4f4 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -1332,4 +1332,5 @@ def read_feed_ranges( [Range("", "FF", True, False)], # default to full range **kwargs) - return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) for partitionKeyRange in partition_key_ranges] + return [FeedRangeEpk(Range.PartitionKeyRangeToRange(partitionKeyRange)) + for partitionKeyRange in partition_key_ranges] From 5061de2ba6f7c5e8c33e9adcabc5efab143aa083 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 18 Sep 2024 08:41:50 -0700 Subject: [PATCH 07/12] fix pylint --- .../azure/cosmos/_change_feed/change_feed_state.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index f5674ba3e2d7..f31330fd2318 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -389,7 +389,7 @@ def from_initial_state( feed_range =\ FeedRangeInternalPartitionKey( change_feed_state_context["partitionKey"], - change_feed_state_context["partitionKeyFeedRange"]) # type: FeedRangeInternal + change_feed_state_context["partitionKeyFeedRange"]) else: raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: @@ -400,10 +400,12 @@ def from_initial_state( "FF", True, False) - ) # type: FeedRangeInternal + ) change_feed_start_from = ( ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) + + assert feed_range is not None return cls( container_link=container_link, container_rid=collection_rid, From fc99213723472f72e2fc095fb028fa4398a25044 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 18 Sep 2024 12:04:16 -0700 Subject: [PATCH 08/12] update docstring --- .../azure/cosmos/aio/_container.py | 24 +++++++++++++++++++ .../azure-cosmos/azure/cosmos/container.py | 24 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index f4ccd2547d72..94cd1554eda0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -610,6 +610,30 @@ def query_items_change_feed( # pylint: disable=unused-argument *args: Any, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + # pylint: disable=too-many-statements if kwargs.get("priority") is not None: kwargs['priority'] = kwargs['priority'] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index af5097eaa4f4..c08e3122e01d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -436,6 +436,30 @@ def query_items_change_feed( *args: Any, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: + + """Get a sorted list of items that were changed, in the order in which they were modified. + + :keyword str continuation: The continuation token retrieved from previous response. + :keyword feed_range: The feed range that is used to define the scope. + :type feed_range: ~azure.cosmos.FeedRange + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + # pylint: disable=too-many-statements if kwargs.get("priority") is not None: kwargs['priority'] = kwargs['priority'] From 1fc073dbd975be88f17c52a9a530ee3a97060728 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Mon, 23 Sep 2024 12:14:19 -0700 Subject: [PATCH 09/12] resolve comments --- .../cosmos/_change_feed/change_feed_state.py | 15 ++++---- .../azure-cosmos/azure/cosmos/_feed_range.py | 34 +++++++++---------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index f31330fd2318..a3adecbf34c2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -405,10 +405,11 @@ def from_initial_state( change_feed_start_from = ( ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) - assert feed_range is not None - return cls( - container_link=container_link, - container_rid=collection_rid, - feed_range=feed_range, - change_feed_start_from=change_feed_start_from, - continuation=None) + if feed_range is not None: + return cls( + container_link=container_link, + container_rid=collection_rid, + feed_range=feed_range, + change_feed_start_from=change_feed_start_from, + continuation=None) + raise RuntimeError("feed_range is empty") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py index fafb64c9d660..6e1ca83d45a9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -32,18 +32,6 @@ class FeedRange(ABC): """Represents a single feed range in an Azure Cosmos DB SQL API container. """ - - def to_string(self) -> str: - """ - Get a json representation of the feed range. - The returned json string can be used to create a new feed range from it. - - :return: A json representation of the feed range. - :rtype: str - """ - - return self._to_base64_encoded_string() - @staticmethod def from_string(json_str: str) -> 'FeedRange': """ @@ -76,7 +64,7 @@ def _to_base64_encoded_string(self) -> str: # Convert the Base64 bytes to a string return base64_bytes.decode('utf-8') -class FeedRangeEpk (FeedRange): +class FeedRangeEpk(FeedRange): type_property_name = "Range" def __init__(self, feed_range: Range) -> None: @@ -84,6 +72,18 @@ def __init__(self, feed_range: Range) -> None: raise ValueError("feed_range cannot be None") self._feed_range = feed_range + self._base64_encoded_string = None + + def __str__(self) -> str: + """Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. + + :return: A json representation of the feed range. + """ + if self._base64_encoded_string is None: + self._base64_encoded_string = self._to_base64_encoded_string() + + return self._base64_encoded_string def _to_dict(self) -> Dict[str, Any]: return { @@ -95,7 +95,7 @@ def _to_feed_range_internal(self) -> 'FeedRangeInternal': @classmethod def _from_json(cls, data: Dict[str, Any]) -> 'FeedRange': - if data.get(cls.type_property_name): - feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) - return cls(feed_range) - raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") + try: + return cls(Range.ParseFromDict(data[cls.type_property_name])) + except KeyError as e: + raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") from e From e698322cf44ab54de63f8debb27426fa881cc2d9 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 2 Oct 2024 09:31:39 -0700 Subject: [PATCH 10/12] refactor --- .../_change_feed/feed_range_internal.py | 22 ++++++++++ .../azure-cosmos/azure/cosmos/_feed_range.py | 41 +++---------------- .../azure/cosmos/aio/_container.py | 4 +- .../azure-cosmos/azure/cosmos/container.py | 4 +- 4 files changed, 31 insertions(+), 40 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py index b1940a031c2d..475eafd25521 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py @@ -22,6 +22,8 @@ """Internal class for feed range implementation in the Azure Cosmos database service. """ +import base64 +import json from abc import ABC, abstractmethod from typing import Union, List, Dict, Any @@ -39,6 +41,14 @@ def get_normalized_range(self) -> Range: def to_dict(self) -> Dict[str, Any]: pass + def _to_base64_encoded_string(self) -> str: + data_json = json.dumps(self._to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + class FeedRangeInternalPartitionKey(FeedRangeInternal): type_property_name = "PK" @@ -92,6 +102,7 @@ def __init__(self, feed_range: Range) -> None: raise ValueError("feed_range cannot be None") self._range = feed_range + self._base64_encoded_string = None def get_normalized_range(self) -> Range: return self._range.to_normalized_range() @@ -108,3 +119,14 @@ def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeInternalEpk': return cls(feed_range) raise ValueError(f"Can not parse FeedRangeInternalEPK from the json," f" there is no property {cls.type_property_name}") + + def __str__(self) -> str: + """Get a json representation of the feed range. + The returned json string can be used to create a new feed range from it. + + :return: A json representation of the feed range. + """ + if self._base64_encoded_string is None: + self._base64_encoded_string = self._to_base64_encoded_string() + + return self._base64_encoded_string \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py index 6e1ca83d45a9..2bda669b6bc0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_feed_range.py @@ -21,10 +21,10 @@ import base64 import json -from abc import ABC, abstractmethod +from abc import ABC from typing import Any, Dict -from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternal, FeedRangeInternalEpk +from azure.cosmos._change_feed.feed_range_internal import FeedRangeInternalEpk from azure.cosmos._routing.routing_range import Range # pylint: disable=protected-access @@ -48,22 +48,6 @@ def from_string(json_str: str) -> 'FeedRange': raise ValueError("Invalid feed range base64 encoded string [Wrong feed range type]") - @abstractmethod - def _to_dict(self) -> Dict[str, Any]: - pass - - @abstractmethod - def _to_feed_range_internal(self) -> 'FeedRangeInternal': - pass - - def _to_base64_encoded_string(self) -> str: - data_json = json.dumps(self._to_dict()) - json_bytes = data_json.encode('utf-8') - # Encode the bytes to a Base64 string - base64_bytes = base64.b64encode(json_bytes) - # Convert the Base64 bytes to a string - return base64_bytes.decode('utf-8') - class FeedRangeEpk(FeedRange): type_property_name = "Range" @@ -71,8 +55,7 @@ def __init__(self, feed_range: Range) -> None: if feed_range is None: raise ValueError("feed_range cannot be None") - self._feed_range = feed_range - self._base64_encoded_string = None + self._feed_range_internal = FeedRangeInternalEpk(feed_range) def __str__(self) -> str: """Get a json representation of the feed range. @@ -80,22 +63,8 @@ def __str__(self) -> str: :return: A json representation of the feed range. """ - if self._base64_encoded_string is None: - self._base64_encoded_string = self._to_base64_encoded_string() - - return self._base64_encoded_string - - def _to_dict(self) -> Dict[str, Any]: - return { - self.type_property_name: self._feed_range.to_dict() - } - - def _to_feed_range_internal(self) -> 'FeedRangeInternal': - return FeedRangeInternalEpk(self._feed_range) + return self._feed_range_internal.__str__() @classmethod def _from_json(cls, data: Dict[str, Any]) -> 'FeedRange': - try: - return cls(Range.ParseFromDict(data[cls.type_property_name])) - except KeyError as e: - raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") from e + return cls(FeedRangeInternalEpk.from_json(data)._range) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 94cd1554eda0..8f0d02595bcc 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -685,8 +685,8 @@ def query_items_change_feed( # pylint: disable=unused-argument self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: - feed_range: FeedRange = kwargs.pop('feed_range') - change_feed_state_context["feedRange"] = feed_range._to_feed_range_internal() + feed_range: FeedRangeEpk = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._feed_range_internal feed_options["containerProperties"] = self._get_properties() feed_options["changeFeedStateContext"] = change_feed_state_context diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index c08e3122e01d..fc6de72136af 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -526,8 +526,8 @@ def query_items_change_feed( self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: - feed_range: FeedRange = kwargs.pop('feed_range') - change_feed_state_context["feedRange"] = feed_range._to_feed_range_internal() + feed_range: FeedRangeEpk = kwargs.pop('feed_range') + change_feed_state_context["feedRange"] = feed_range._feed_range_internal container_properties = self._get_properties() feed_options["changeFeedStateContext"] = change_feed_state_context From 41074040f8bac57ad05018299606dc92b1956d6c Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 2 Oct 2024 11:06:11 -0700 Subject: [PATCH 11/12] fix pylint --- .../_change_feed/feed_range_internal.py | 8 +++---- .../azure/cosmos/aio/_container.py | 2 +- .../azure-cosmos/azure/cosmos/container.py | 1 + sdk/cosmos/azure-cosmos/samples/examples.py | 21 ++++++++++++++++- .../azure-cosmos/samples/examples_async.py | 23 +++++++++++++++++++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py index 475eafd25521..c04fda0952f9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_internal.py @@ -25,7 +25,7 @@ import base64 import json from abc import ABC, abstractmethod -from typing import Union, List, Dict, Any +from typing import Union, List, Dict, Any, Optional from azure.cosmos._routing.routing_range import Range from azure.cosmos.partition_key import _Undefined, _Empty @@ -42,7 +42,7 @@ def to_dict(self) -> Dict[str, Any]: pass def _to_base64_encoded_string(self) -> str: - data_json = json.dumps(self._to_dict()) + data_json = json.dumps(self.to_dict()) json_bytes = data_json.encode('utf-8') # Encode the bytes to a Base64 string base64_bytes = base64.b64encode(json_bytes) @@ -102,7 +102,7 @@ def __init__(self, feed_range: Range) -> None: raise ValueError("feed_range cannot be None") self._range = feed_range - self._base64_encoded_string = None + self._base64_encoded_string: Optional[str] = None def get_normalized_range(self) -> Range: return self._range.to_normalized_range() @@ -129,4 +129,4 @@ def __str__(self) -> str: if self._base64_encoded_string is None: self._base64_encoded_string = self._to_base64_encoded_string() - return self._base64_encoded_string \ No newline at end of file + return self._base64_encoded_string diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 8f0d02595bcc..61dc5b563b49 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -630,10 +630,10 @@ def query_items_change_feed( # pylint: disable=unused-argument request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :keyword Callable response_hook: A callable invoked with the response metadata. + :param Any args: args :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ - # pylint: disable=too-many-statements if kwargs.get("priority") is not None: kwargs['priority'] = kwargs['priority'] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index fc6de72136af..52a3ba3fd4dc 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -456,6 +456,7 @@ def query_items_change_feed( request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :keyword Callable response_hook: A callable invoked with the response metadata. + :param Any args: args :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ diff --git a/sdk/cosmos/azure-cosmos/samples/examples.py b/sdk/cosmos/azure-cosmos/samples/examples.py index 8039218b7a09..958d72c064d1 100644 --- a/sdk/cosmos/azure-cosmos/samples/examples.py +++ b/sdk/cosmos/azure-cosmos/samples/examples.py @@ -255,4 +255,23 @@ query='SELECT * FROM products p WHERE p.state = "GA"' ): container.delete_item(item, partition_key=["GA", "Atlanta", 30363]) -# [END delete_items] \ No newline at end of file +# [END delete_items] + +# Get the feed ranges list from container. +# [START read_feed_ranges] +container.read_feed_ranges() +# [END read_feed_ranges] + +# Query a sorted list of items that were changed for one feed range +# [START query_items_change_feed] +feed_ranges = container.read_feed_ranges() +for item in container.query_items_change_feed(feed_range=feed_ranges[0]): + print(json.dumps(item, indent=True)) +# [END query_items_change_feed] + +# Query a sorted list of items that were changed for one feed range +# [START query_items_change_feed_from_beginning] +feed_ranges = container.read_feed_ranges() +for item in container.query_items_change_feed(feed_range=feed_ranges[0], start_time="Beginning"): + print(json.dumps(item, indent=True)) +# [END query_items_change_feed_from_beginning] \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/samples/examples_async.py b/sdk/cosmos/azure-cosmos/samples/examples_async.py index 06cec5fb07a8..33805fc71d7d 100644 --- a/sdk/cosmos/azure-cosmos/samples/examples_async.py +++ b/sdk/cosmos/azure-cosmos/samples/examples_async.py @@ -263,6 +263,29 @@ async def examples_async(): await container.delete_item(item, partition_key=["GA", "Atlanta", 30363]) # [END delete_items] + # Get the feed ranges list from container. + # [START read_feed_ranges] + await container.read_feed_ranges() + # [END read_feed_ranges] + + # Query a sorted list of items that were changed for one feed range. + # The asynchronous client returns asynchronous iterators for its query methods; + # as such, we iterate over it by using an async for loop + # [START query_items_change_feed] + feed_ranges = await container.read_feed_ranges() + async for item in container.query_items_change_feed(feed_range=feed_ranges[0]): + print(json.dumps(item, indent=True)) + # [END query_items_change_feed] + + # Query a sorted list of items that were changed for one feed range from beginning. + # The asynchronous client returns asynchronous iterators for its query methods; + # as such, we iterate over it by using an async for loop + # [START query_items_change_feed_from_beginning] + feed_ranges = await container.read_feed_ranges() + async for item in container.query_items_change_feed(feed_range=feed_ranges[0], start_time="Beginning"): + print(json.dumps(item, indent=True)) + # [END query_items_change_feed_from_beginning] + await client.delete_database(database_name) print("Sample done running!") From 2b124a654c882e636607d152b1cfa7f185056475 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 2 Oct 2024 11:42:57 -0700 Subject: [PATCH 12/12] update changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index ba6330e458f5..cc76baf6a780 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -4,6 +4,8 @@ #### Features Added * Added Retry Policy for Container Recreate in the Python SDK. See [PR 36043](https://github.com/Azure/azure-sdk-for-python/pull/36043) +* Added get feed ranges API. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687) +* Added feed range support in `query_items_change_feed`. See [PR 37687](https://github.com/Azure/azure-sdk-for-python/pull/37687) #### Breaking Changes