From 2950e205df044d9b907a813da946ff940c46e315 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 14 Aug 2024 15:09:54 -0700 Subject: [PATCH 01/20] merge from main and resolve conflicts --- sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 19 +- .../azure/cosmos/_change_feed/__init__.py | 20 ++ .../azure/cosmos/_change_feed/aio/__init__.py | 20 ++ .../_change_feed/aio/change_feed_fetcher.py | 182 ++++++++++ .../_change_feed/aio/change_feed_iterable.py | 118 +++++++ .../aio/change_feed_start_from.py | 189 ++++++++++ .../_change_feed/aio/change_feed_state.py | 279 +++++++++++++++ .../aio/composite_continuation_token.py | 70 ++++ ...feed_range_composite_continuation_token.py | 134 +++++++ .../_change_feed/change_feed_fetcher.py | 181 ++++++++++ .../_change_feed/change_feed_iterable.py | 118 +++++++ .../_change_feed/change_feed_start_from.py | 189 ++++++++++ .../cosmos/_change_feed/change_feed_state.py | 279 +++++++++++++++ .../composite_continuation_token.py | 72 ++++ ...feed_range_composite_continuation_token.py | 134 +++++++ .../azure/cosmos/_cosmos_client_connection.py | 46 +-- .../azure/cosmos/_routing/routing_range.py | 73 ++++ .../azure-cosmos/azure/cosmos/_utils.py | 14 + .../azure/cosmos/aio/_container.py | 265 +++++++++++--- .../aio/_cosmos_client_connection_async.py | 10 +- .../azure-cosmos/azure/cosmos/container.py | 243 ++++++++++--- .../azure-cosmos/azure/cosmos/exceptions.py | 19 +- .../azure/cosmos/partition_key.py | 14 + .../azure-cosmos/test/test_change_feed.py | 295 ++++++++++++++++ .../test/test_change_feed_async.py | 322 +++++++++++++++++ sdk/cosmos/azure-cosmos/test/test_query.py | 290 +--------------- .../azure-cosmos/test/test_query_async.py | 328 +----------------- 27 files changed, 3177 insertions(+), 746 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_async.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py index bcf5d95e0ef0..ab305f03b020 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py @@ -284,23 +284,8 @@ def GetHeaders( # pylint: disable=too-many-statements,too-many-branches if options.get("disableRUPerMinuteUsage"): headers[http_constants.HttpHeaders.DisableRUPerMinuteUsage] = options["disableRUPerMinuteUsage"] - if options.get("changeFeed") is True: - # On REST level, change feed is using IfNoneMatch/ETag instead of continuation. - if_none_match_value = None - if options.get("continuation"): - if_none_match_value = options["continuation"] - elif options.get("isStartFromBeginning") and not options["isStartFromBeginning"]: - if_none_match_value = "*" - elif options.get("startTime"): - start_time = options.get("startTime") - headers[http_constants.HttpHeaders.IfModified_since] = start_time - if if_none_match_value: - headers[http_constants.HttpHeaders.IfNoneMatch] = if_none_match_value - - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - else: - if options.get("continuation"): - headers[http_constants.HttpHeaders.Continuation] = options["continuation"] + if options.get("continuation"): + headers[http_constants.HttpHeaders.Continuation] = options["continuation"] if options.get("populatePartitionKeyRangeStatistics"): headers[http_constants.HttpHeaders.PopulatePartitionKeyRangeStatistics] = options[ diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py new file mode 100644 index 000000000000..f5373937e446 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/__init__.py @@ -0,0 +1,20 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py new file mode 100644 index 000000000000..83ca3025ee07 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -0,0 +1,182 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import copy +import json +from abc import ABC, abstractmethod + +from azure.cosmos import http_constants, exceptions +from azure.cosmos._change_feed.aio.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 +from azure.cosmos.aio import _retry_utility_async +from azure.cosmos.exceptions import CosmosHttpResponseError + + +class ChangeFeedFetcher(ABC): + + @abstractmethod + async def fetch_next_block(self): + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: dict[str, any], + fetch_function): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if not isinstance(self._change_feed_state, ChangeFeedStateV1): + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version {type(self._change_feed_state)}") + self._change_feed_state.__class__ = ChangeFeedStateV1 + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self): + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + async def callback(): + return await self.fetch_change_feed_items(self._fetch_function) + + return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) + + async def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + new_options = copy.deepcopy(self._feed_options) + new_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(new_options) + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = await fetch_function(new_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + # For start time however we get no initial results, so we need to pass continuation token? Is this true? + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + + if fetched_items: + break + elif is_s_time_first_fetch: + is_s_time_first_fetch = False + else: + break + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: dict[str, any], + fetch_function): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if not isinstance(self._change_feed_state, ChangeFeedStateV2): + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") + self._change_feed_state.__class__ = ChangeFeedStateV2 + + self._resource_link = resource_link + self._fetch_function = fetch_function + + async def fetch_next_block(self): + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + async def callback(): + return await self.fetch_change_feed_items(self._fetch_function) + + try: + return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + await self._change_feed_state.handle_feed_range_gone(self._client._routing_map_provider, self._resource_link) + else: + raise e + + return await self.fetch_next_block() + + async def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + new_options = copy.deepcopy(self._feed_options) + new_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(new_options) + + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = await fetch_function(new_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + # For start time however we get no initial results, so we need to pass continuation token? Is this true? + if fetched_items: + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + else: + self._change_feed_state.apply_not_modified_response() + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() or is_s_time_first_fetch + is_s_time_first_fetch = False + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py new file mode 100644 index 000000000000..501f3a7e4150 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -0,0 +1,118 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" +from azure.core.async_paging import AsyncPageIterator + +from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.aio.change_feed_state import ChangeFeedStateV1, ChangeFeedState +from azure.cosmos._utils import is_base64_encoded + + +class ChangeFeedIterable(AsyncPageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options, + fetch_function=None, + collection_link=None, + continuation_token=None, + ): + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + ChangeFeedFetcher will be used as the internal query execution + context. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param method fetch_function: + + """ + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + + change_feed_state = self._options.get("changeFeedState") + if not change_feed_state: + raise ValueError("Missing changeFeedState in feed options") + + if isinstance(change_feed_state, ChangeFeedStateV1): + if continuation_token: + if is_base64_encoded(continuation_token): + raise ValueError("Incompatible continuation token") + else: + change_feed_state.apply_server_response_continuation(continuation_token) + + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + fetch_function + ) + else: + if continuation_token: + if not is_base64_encoded(continuation_token): + raise ValueError("Incompatible continuation token") + + effective_change_feed_context = {"continuationFeedRange": continuation_token} + effective_change_feed_state = ChangeFeedState.from_json(change_feed_state.container_rid, effective_change_feed_context) + # replace with the effective change feed state + self._options["continuationFeedRange"] = effective_change_feed_state + + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + fetch_function + ) + super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) + + async def _unpack(self, block): + continuation = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + async def _fetch_next(self, *args): # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + This method only exists for backward compatibility reasons. (Because + QueryIterable has exposed fetch_next_block api). + + :param Any args: + :return: List of results. + :rtype: list + """ + block = await self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopAsyncIteration + return block diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py new file mode 100644 index 000000000000..99aeeb6eb914 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py @@ -0,0 +1,189 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed start from implementation in the Azure Cosmos database service. +""" + +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from enum import Enum +from typing import Optional, Union, Literal, Any + +from azure.cosmos import http_constants +from azure.cosmos._routing.routing_range import Range + +class ChangeFeedStartFromType(Enum): + BEGINNING = "Beginning" + NOW = "Now" + LEASE = "Lease" + POINT_IN_TIME = "PointInTime" + +class ChangeFeedStartFromInternal(ABC): + """Abstract class for change feed start from implementation in the Azure Cosmos database service. + """ + + _type_property_name = "Type" + + @abstractmethod + def to_dict(self) -> dict[str, Any]: + pass + + @staticmethod + def from_start_time(start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': + if start_time is None: + return ChangeFeedStartFromNow() + elif isinstance(start_time, datetime): + return ChangeFeedStartFromPointInTime(start_time) + elif start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): + return ChangeFeedStartFromNow() + elif start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): + return ChangeFeedStartFromBeginning() + else: + raise ValueError(f"Invalid start_time '{start_time}'") + + @staticmethod + def from_json(data: dict[str, any]) -> 'ChangeFeedStartFromInternal': + change_feed_start_from_type = data.get(ChangeFeedStartFromInternal._type_property_name) + if change_feed_start_from_type is None: + raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal._type_property_name}]") + + if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: + return ChangeFeedStartFromBeginning.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: + return ChangeFeedStartFromETagAndFeedRange.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: + return ChangeFeedStartFromNow.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: + return ChangeFeedStartFromPointInTime.from_json(data) + else: + raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") + + @abstractmethod + def populate_request_headers(self, request_headers) -> None: + pass + + +class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): + """Class for change feed start from beginning implementation in the Azure Cosmos database service. + """ + + def to_dict(self) -> dict[str, Any]: + return { + self._type_property_name: ChangeFeedStartFromType.BEGINNING.value + } + + def populate_request_headers(self, request_headers) -> None: + pass # there is no headers need to be set for start from beginning + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromBeginning': + return ChangeFeedStartFromBeginning() + + +class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + _etag_property_name = "Etag" + _feed_range_property_name = "FeedRange" + + def __init__(self, etag, feed_range): + if feed_range is None: + raise ValueError("feed_range is missing") + + self._etag = etag + self._feed_range = feed_range + + def to_dict(self) -> dict[str, Any]: + return { + self._type_property_name: ChangeFeedStartFromType.LEASE.value, + self._etag_property_name: self._etag, + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': + etag = data.get(cls._etag_property_name) + if etag is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + return cls(etag, feed_range) + + def populate_request_headers(self, request_headers) -> None: + # change feed uses etag as the continuationToken + if self._etag: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._etag + + +class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + def to_dict(self) -> dict[str, Any]: + return { + self._type_property_name: ChangeFeedStartFromType.NOW.value + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromNow': + return ChangeFeedStartFromNow() + + +class ChangeFeedStartFromPointInTime(ChangeFeedStartFromInternal): + """Class for change feed start from point in time implementation in the Azure Cosmos database service. + """ + + _point_in_time_ms_property_name = "PointInTimeMs" + + def __init__(self, start_time: datetime): + if start_time is None: + raise ValueError("start_time is missing") + + self._start_time = start_time + + def to_dict(self) -> dict[str, Any]: + return { + self._type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, + self._point_in_time_ms_property_name: + int(self._start_time.astimezone(timezone.utc).timestamp() * 1000) + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfModified_since] =\ + self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': + point_in_time_ms = data.get(cls._point_in_time_ms_property_name) + if point_in_time_ms is None: + raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") + + point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) + return ChangeFeedStartFromPointInTime(point_in_time) + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py new file mode 100644 index 000000000000..ae2e37568bd4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py @@ -0,0 +1,279 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed state implementation in the Azure Cosmos +database service. +""" + +import base64 +import collections +import json +from abc import ABC, abstractmethod +from typing import Optional, Union, List, Any + +from azure.cosmos import http_constants +from azure.cosmos._change_feed.aio.change_feed_start_from import ChangeFeedStartFromETagAndFeedRange, \ + ChangeFeedStartFromInternal +from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.aio.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range +from azure.cosmos._utils import is_key_exists_and_not_none +from azure.cosmos.exceptions import CosmosFeedRangeGoneError +from azure.cosmos.partition_key import _Empty, _Undefined + + +class ChangeFeedState(ABC): + version_property_name = "v" + + @abstractmethod + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + pass + + @abstractmethod + async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, request_headers: dict[str, any]) -> None: + pass + + @abstractmethod + def apply_server_response_continuation(self, continuation: str) -> None: + pass + + @staticmethod + def from_json(container_link: str, container_rid: str, data: dict[str, Any]): + if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): + return ChangeFeedStateV1.from_json(container_link, container_rid, data) + else: + if is_key_exists_and_not_none(data, "continuationFeedRange"): + # get changeFeedState from continuation + continuation_json_str = base64.b64decode(data["continuationFeedRange"]).decode('utf-8') + continuation_json = json.loads(continuation_json_str) + version = continuation_json.get(ChangeFeedState.version_property_name) + if version is None: + raise ValueError("Invalid base64 encoded continuation string [Missing version]") + elif version == "V2": + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + else: + raise ValueError("Invalid base64 encoded continuation string [Invalid version]") + # when there is no continuation token, by default construct ChangeFeedStateV2 + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) + +class ChangeFeedStateV1(ChangeFeedState): + """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag + """ + + def __init__( + self, + container_link: str, + container_rid: str, + change_feed_start_from: ChangeFeedStartFromInternal, + partition_key_range_id: Optional[str] = None, + partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, + continuation: Optional[str] = None): + + self._container_link = container_link + self._container_rid = container_rid + self._change_feed_start_from = change_feed_start_from + self._partition_key_range_id = partition_key_range_id + self._partition_key = partition_key + self._continuation = continuation + + @property + def container_rid(self): + return self._container_rid + + @classmethod + def from_json(cls, container_link: str, container_rid: str, data: dict[str, Any]) -> 'ChangeFeedStateV1': + return cls( + container_link, + container_rid, + ChangeFeedStartFromInternal.from_start_time(data.get("startTime")), + data.get("partitionKeyRangeId"), + data.get("partitionKey"), + data.get("continuationPkRangeId") + ) + + async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, Any]) -> None: + headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(headers) + if self._continuation: + headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + if self._partition_key_range_id is not None: + feed_options["partitionKeyRangeId"] = self._partition_key_range_id + if self._partition_key is not None: + feed_options["partitionKey"] = self._partition_key + + def apply_server_response_continuation(self, continuation: str) -> None: + self._continuation = continuation + +class ChangeFeedStateV2(ChangeFeedState): + container_rid_property_name = "containerRid" + change_feed_mode_property_name = "mode" + change_feed_start_from_property_name = "startFrom" + continuation_property_name = "continuation" + + # TODO: adding change feed mode + def __init__( + self, + container_link: str, + container_rid: str, + feed_range: Range, + change_feed_start_from: ChangeFeedStartFromInternal, + continuation: Optional[FeedRangeCompositeContinuation] = None): + + self._container_link = container_link + self._container_rid = container_rid + self._feed_range = feed_range + self._change_feed_start_from = change_feed_start_from + self._continuation = continuation + if self._continuation is None: + composite_continuation_token_queue = collections.deque() + composite_continuation_token_queue.append(CompositeContinuationToken(self._feed_range, None)) + self._continuation =\ + FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) + + @property + def container_rid(self) -> str : + return self._container_rid + + def to_dict(self) -> dict[str, Any]: + return { + self.version_property_name: "V2", + self.container_rid_property_name: self._container_rid, + self.change_feed_mode_property_name: "Incremental", + self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), + self.continuation_property_name: self._continuation.to_dict() + } + + async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, any]) -> None: + headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag =\ + ChangeFeedStartFromETagAndFeedRange(self._continuation.current_token.token, self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges =\ + await routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise CosmosFeedRangeGoneError(message= + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)}" + f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + else: + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + headers[http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min + headers[http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + pass + + async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, resource_link: str) -> None: + await self._continuation.handle_feed_range_gone(routing_provider, resource_link) + + def apply_server_response_continuation(self, continuation: str) -> None: + self._continuation.apply_server_response_continuation(continuation) + + def should_retry_on_not_modified_response(self): + self._continuation.should_retry_on_not_modified_response() + + def apply_not_modified_response(self) -> None: + self._continuation.apply_not_modified_response() + + @classmethod + def from_continuation( + cls, + container_link: str, + container_rid: str, + continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': + + container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) + if container_rid_from_continuation is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") + elif container_rid_from_continuation != container_rid: + raise ValueError("Invalid continuation: [Mismatch collection rid]") + + change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) + if change_feed_start_from_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") + change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) + + continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.continuation_property_name}]") + continuation = FeedRangeCompositeContinuation.from_json(continuation_data) + return ChangeFeedStateV2( + container_link=container_link, + container_rid=container_rid, + feed_range=continuation.feed_range, + change_feed_start_from=change_feed_start_from, + continuation=continuation) + + @classmethod + def from_initial_state( + cls, + container_link: str, + collection_rid: str, + data: dict[str, Any]) -> 'ChangeFeedStateV2': + + if is_key_exists_and_not_none(data, "feedRange"): + feed_range_str = base64.b64decode(data["feedRange"]).decode('utf-8') + feed_range_json = json.loads(feed_range_str) + feed_range = Range.ParseFromDict(feed_range_json) + elif is_key_exists_and_not_none(data, "partitionKeyFeedRange"): + feed_range = data["partitionKeyFeedRange"] + else: + # default to full range + feed_range = Range( + "", + "FF", + True, + False) + + change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(data.get("startTime")) + return cls( + container_link=container_link, + container_rid=collection_rid, + feed_range=feed_range, + change_feed_start_from=change_feed_start_from, + continuation=None) + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py new file mode 100644 index 000000000000..6d779fed1037 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py @@ -0,0 +1,70 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed composite continuation token in the Azure Cosmos +database service. +""" +from azure.cosmos._routing.routing_range import Range + + +class CompositeContinuationToken(object): + _token_property_name = "token" + _feed_range_property_name = "range" + + def __init__(self, feed_range: Range, token): + if range is None: + raise ValueError("range is missing") + + self._token = token + self._feed_range = feed_range + + def to_dict(self): + return { + self._token_property_name: self._token, + self._feed_range_property_name: self._feed_range.to_dict() + } + + @property + def feed_range(self): + return self._feed_range + + @property + def token(self): + return self._token + + def update_token(self, etag): + self._token = etag + + @classmethod + def from_json(cls, data): + token = data.get(cls._token_property_name) + if token is None: + raise ValueError(f"Invalid composite token [Missing {cls._token_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid composite token [Missing {cls._feed_range_property_name}]") + + feed_range = Range.ParseFromDict(feed_range_data) + return cls(feed_range=feed_range, token=token) + + def __repr__(self): + return f"CompositeContinuationToken(token={self.token}, range={self._feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py new file mode 100644 index 000000000000..6e1b8f974eea --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py @@ -0,0 +1,134 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed continuation token by feed range in the Azure Cosmos +database service. +""" +import collections +from collections import deque +from typing import Any + +from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range + + +class FeedRangeCompositeContinuation(object): + _version_property_name = "V" + _container_rid_property_name = "Rid" + _continuation_property_name = "Continuation" + _feed_range_property_name = "Range" + + def __init__( + self, + container_rid: str, + feed_range: Range, + continuation: collections.deque[CompositeContinuationToken]): + if container_rid is None: + raise ValueError("container_rid is missing") + + self._container_rid = container_rid + self._feed_range = feed_range + self._continuation = continuation + self._current_token = self._continuation[0] + self._initial_no_result_range = None + + @property + def current_token(self): + return self._current_token + + def to_dict(self) -> dict[str, Any]: + return { + self._version_property_name: "v1", #TODO: should this start from v2 + self._container_rid_property_name: self._container_rid, + self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data) -> 'FeedRangeCompositeContinuation': + version = data.get(cls._version_property_name) + if version is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") + if version != "v1": + raise ValueError("Invalid feed range composite continuation token [Invalid version]") + + container_rid = data.get(cls._container_rid_property_name) + if container_rid is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + + continuation_data = data.get(cls._continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") + if not isinstance(continuation_data, list) or len(continuation_data) == 0: + raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") + continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] + + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) + + async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: + overlapping_ranges = await routing_provider.get_overlapping_ranges(collection_link, self._current_token.feed_range) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append(CompositeContinuationToken(Range.PartitionKeyRangeToRange(child_range), self._current_token.token)) + + self._current_token = self._continuation[0] + + def should_retry_on_not_modified_response(self) -> bool: + # when getting 304(Not Modified) response from one sub feed range, we will try to fetch for the next sub feed range + # we will repeat the above logic until we have looped through all sub feed ranges + + # TODO: validate the response headers, can we get the status code + if len(self._continuation) > 1: + return self._current_token.feed_range != self._initial_no_result_range + + else: + return False + + def _move_to_next_token(self) -> None: + first_composition_token = self._continuation.popleft() + # add the composition token to the end of the list + self._continuation.append(first_composition_token) + self._current_token = self._continuation[0] + + def apply_server_response_continuation(self, etag) -> None: + self._current_token.update_token(etag) + self._move_to_next_token() + + def apply_not_modified_response(self) -> None: + if self._initial_no_result_range is None: + self._initial_no_result_range = self._current_token.feed_range + + @property + def feed_range(self) -> Range: + return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py new file mode 100644 index 000000000000..fd8ac2787a8f --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -0,0 +1,181 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for processing change feed implementation in the Azure Cosmos +database service. +""" +import base64 +import copy +import json +from abc import ABC, abstractmethod + +from azure.cosmos import _retry_utility, http_constants, exceptions +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 +from azure.cosmos.exceptions import CosmosHttpResponseError + + +class ChangeFeedFetcher(ABC): + + @abstractmethod + def fetch_next_block(self): + pass + +class ChangeFeedFetcherV1(ChangeFeedFetcher): + """Internal class for change feed fetch v1 implementation. + This is used when partition key range id is used or when the supplied continuation token is in just simple etag. + Please note v1 does not support split or merge. + + """ + def __init__( + self, + client, + resource_link: str, + feed_options: dict[str, any], + fetch_function): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if not isinstance(self._change_feed_state, ChangeFeedStateV1): + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version {type(self._change_feed_state)}") + self._change_feed_state.__class__ = ChangeFeedStateV1 + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self): + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + def callback(): + return self.fetch_change_feed_items(self._fetch_function) + + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + + def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + new_options = copy.deepcopy(self._feed_options) + new_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(new_options) + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = fetch_function(new_options) + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + # For start time however we get no initial results, so we need to pass continuation token? Is this true? + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + + if fetched_items: + break + elif is_s_time_first_fetch: + is_s_time_first_fetch = False + else: + break + + return fetched_items + + +class ChangeFeedFetcherV2(object): + """Internal class for change feed fetch v2 implementation. + """ + + def __init__( + self, + client, + resource_link: str, + feed_options: dict[str, any], + fetch_function): + + self._client = client + self._feed_options = feed_options + + self._change_feed_state = self._feed_options.pop("changeFeedState") + if not isinstance(self._change_feed_state, ChangeFeedStateV2): + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") + self._change_feed_state.__class__ = ChangeFeedStateV2 + + self._resource_link = resource_link + self._fetch_function = fetch_function + + def fetch_next_block(self): + """Returns a block of results. + + :return: List of results. + :rtype: list + """ + + def callback(): + return self.fetch_change_feed_items(self._fetch_function) + + try: + return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) + except CosmosHttpResponseError as e: + if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): + # refresh change feed state + self._change_feed_state.handle_feed_range_gone(self._client._routing_map_provider, self._resource_link) + else: + raise e + + return self.fetch_next_block() + + def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + new_options = copy.deepcopy(self._feed_options) + new_options["changeFeedState"] = self._change_feed_state + + self._change_feed_state.populate_feed_options(new_options) + + is_s_time_first_fetch = True + while True: + (fetched_items, response_headers) = fetch_function(new_options) + + continuation_key = http_constants.HttpHeaders.ETag + # In change feed queries, the continuation token is always populated. The hasNext() test is whether + # there is any items in the response or not. + # For start time however we get no initial results, so we need to pass continuation token? Is this true? + if fetched_items: + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + response_headers[continuation_key] = self._get_base64_encoded_continuation() + break + else: + self._change_feed_state.apply_not_modified_response() + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() or is_s_time_first_fetch + is_s_time_first_fetch = False + if not should_retry: + break + + return fetched_items + + def _get_base64_encoded_continuation(self) -> str: + continuation_json = json.dumps(self._change_feed_state.to_dict()) + json_bytes = continuation_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py new file mode 100644 index 000000000000..676036180d29 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -0,0 +1,118 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Iterable change feed results in the Azure Cosmos database service. +""" + +from azure.core.paging import PageIterator + +from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedState +from azure.cosmos._utils import is_base64_encoded + + +class ChangeFeedIterable(PageIterator): + """Represents an iterable object of the change feed results. + + ChangeFeedIterable is a wrapper for change feed execution. + """ + + def __init__( + self, + client, + options, + fetch_function=None, + collection_link=None, + continuation_token=None, + ): + """Instantiates a ChangeFeedIterable for non-client side partitioning queries. + + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page + """ + + self._client = client + self.retry_options = client.connection_policy.RetryOptions + self._options = options + self._fetch_function = fetch_function + self._collection_link = collection_link + + change_feed_state = self._options.get("changeFeedState") + if not change_feed_state: + raise ValueError("Missing changeFeedState in feed options") + + if isinstance(change_feed_state, ChangeFeedStateV1): + if continuation_token: + if is_base64_encoded(continuation_token): + raise ValueError("Incompatible continuation token") + else: + change_feed_state.apply_server_response_continuation(continuation_token) + + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + fetch_function + ) + else: + if continuation_token: + if not is_base64_encoded(continuation_token): + raise ValueError("Incompatible continuation token") + + effective_change_feed_context = {"continuationFeedRange": continuation_token} + effective_change_feed_state = ChangeFeedState.from_json(change_feed_state.container_rid, effective_change_feed_context) + # replace with the effective change feed state + self._options["continuationFeedRange"] = effective_change_feed_state + + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + fetch_function + ) + super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) + + def _unpack(self, block): + continuation = None + if self._client.last_response_headers: + continuation = self._client.last_response_headers.get('etag') + + if block: + self._did_a_call_already = False + return continuation, block + + def _fetch_next(self, *args): # pylint: disable=unused-argument + """Return a block of results with respecting retry policy. + + This method only exists for backward compatibility reasons. (Because + QueryIterable has exposed fetch_next_block api). + + :param Any args: + :return: List of results. + :rtype: list + """ + block = self._change_feed_fetcher.fetch_next_block() + if not block: + raise StopIteration + return block diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py new file mode 100644 index 000000000000..76a4d6b56803 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -0,0 +1,189 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed start from implementation in the Azure Cosmos database service. +""" + +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from enum import Enum +from typing import Optional, Union, Literal, Any + +from azure.cosmos import http_constants +from azure.cosmos._routing.routing_range import Range + +class ChangeFeedStartFromType(Enum): + BEGINNING = "Beginning" + NOW = "Now" + LEASE = "Lease" + POINT_IN_TIME = "PointInTime" + +class ChangeFeedStartFromInternal(ABC): + """Abstract class for change feed start from implementation in the Azure Cosmos database service. + """ + + type_property_name = "Type" + + @abstractmethod + def to_dict(self) -> dict[str, Any]: + pass + + @staticmethod + def from_start_time(start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': + if start_time is None: + return ChangeFeedStartFromNow() + elif isinstance(start_time, datetime): + return ChangeFeedStartFromPointInTime(start_time) + elif start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): + return ChangeFeedStartFromNow() + elif start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): + return ChangeFeedStartFromBeginning() + else: + raise ValueError(f"Invalid start_time '{start_time}'") + + @staticmethod + def from_json(data: dict[str, any]) -> 'ChangeFeedStartFromInternal': + change_feed_start_from_type = data.get(ChangeFeedStartFromInternal.type_property_name) + if change_feed_start_from_type is None: + raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal.type_property_name}]") + + if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: + return ChangeFeedStartFromBeginning.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: + return ChangeFeedStartFromETagAndFeedRange.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: + return ChangeFeedStartFromNow.from_json(data) + elif change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: + return ChangeFeedStartFromPointInTime.from_json(data) + else: + raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") + + @abstractmethod + def populate_request_headers(self, request_headers) -> None: + pass + + +class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): + """Class for change feed start from beginning implementation in the Azure Cosmos database service. + """ + + def to_dict(self) -> dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.BEGINNING.value + } + + def populate_request_headers(self, request_headers) -> None: + pass # there is no headers need to be set for start from beginning + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromBeginning': + return ChangeFeedStartFromBeginning() + + +class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + _etag_property_name = "Etag" + _feed_range_property_name = "FeedRange" + + def __init__(self, etag, feed_range): + if feed_range is None: + raise ValueError("feed_range is missing") + + self._etag = etag + self._feed_range = feed_range + + def to_dict(self) -> dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.LEASE.value, + self._etag_property_name: self._etag, + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': + etag = data.get(cls._etag_property_name) + if etag is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid change feed start from [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + return cls(etag, feed_range) + + def populate_request_headers(self, request_headers) -> None: + # change feed uses etag as the continuationToken + if self._etag: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._etag + + +class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): + """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. + """ + + def to_dict(self) -> dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.NOW.value + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromNow': + return ChangeFeedStartFromNow() + + +class ChangeFeedStartFromPointInTime(ChangeFeedStartFromInternal): + """Class for change feed start from point in time implementation in the Azure Cosmos database service. + """ + + _point_in_time_ms_property_name = "PointInTimeMs" + + def __init__(self, start_time: datetime): + if start_time is None: + raise ValueError("start_time is missing") + + self._start_time = start_time + + def to_dict(self) -> dict[str, Any]: + return { + self.type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, + self._point_in_time_ms_property_name: + int(self._start_time.astimezone(timezone.utc).timestamp() * 1000) + } + + def populate_request_headers(self, request_headers) -> None: + request_headers[http_constants.HttpHeaders.IfModified_since] =\ + self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') + + @classmethod + def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': + point_in_time_ms = data.get(cls._point_in_time_ms_property_name) + if point_in_time_ms is None: + raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") + + point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) + return ChangeFeedStartFromPointInTime(point_in_time) + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py new file mode 100644 index 000000000000..8c61c306b94e --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -0,0 +1,279 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed state implementation in the Azure Cosmos +database service. +""" + +import base64 +import collections +import json +from abc import ABC, abstractmethod +from typing import Optional, Union, List, Any + +from azure.cosmos import http_constants +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ + ChangeFeedStartFromETagAndFeedRange +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range +from azure.cosmos._utils import is_key_exists_and_not_none +from azure.cosmos.exceptions import CosmosFeedRangeGoneError +from azure.cosmos.partition_key import _Empty, _Undefined + + +class ChangeFeedState(ABC): + version_property_name = "v" + + @abstractmethod + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + pass + + @abstractmethod + def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, request_headers: dict[str, any]) -> None: + pass + + @abstractmethod + def apply_server_response_continuation(self, continuation: str) -> None: + pass + + @staticmethod + def from_json(container_link: str, container_rid: str, data: dict[str, Any]): + if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): + return ChangeFeedStateV1.from_json(container_link, container_rid, data) + else: + if is_key_exists_and_not_none(data, "continuationFeedRange"): + # get changeFeedState from continuation + continuation_json_str = base64.b64decode(data["continuationFeedRange"]).decode('utf-8') + continuation_json = json.loads(continuation_json_str) + version = continuation_json.get(ChangeFeedState.version_property_name) + if version is None: + raise ValueError("Invalid base64 encoded continuation string [Missing version]") + elif version == "V2": + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + else: + raise ValueError("Invalid base64 encoded continuation string [Invalid version]") + # when there is no continuation token, by default construct ChangeFeedStateV2 + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) + +class ChangeFeedStateV1(ChangeFeedState): + """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag + """ + + def __init__( + self, + container_link: str, + container_rid: str, + change_feed_start_from: ChangeFeedStartFromInternal, + partition_key_range_id: Optional[str] = None, + partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, + continuation: Optional[str] = None): + + self._container_link = container_link + self._container_rid = container_rid + self._change_feed_start_from = change_feed_start_from + self._partition_key_range_id = partition_key_range_id + self._partition_key = partition_key + self._continuation = continuation + + @property + def container_rid(self): + return self._container_rid + + @classmethod + def from_json(cls, container_link: str, container_rid: str, data: dict[str, Any]) -> 'ChangeFeedStateV1': + return cls( + container_link, + container_rid, + ChangeFeedStartFromInternal.from_start_time(data.get("startTime")), + data.get("partitionKeyRangeId"), + data.get("partitionKey"), + data.get("continuationPkRangeId") + ) + + def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, Any]) -> None: + headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(headers) + if self._continuation: + headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + if self._partition_key_range_id is not None: + feed_options["partitionKeyRangeId"] = self._partition_key_range_id + if self._partition_key is not None: + feed_options["partitionKey"] = self._partition_key + + def apply_server_response_continuation(self, continuation: str) -> None: + self._continuation = continuation + +class ChangeFeedStateV2(ChangeFeedState): + container_rid_property_name = "containerRid" + change_feed_mode_property_name = "mode" + change_feed_start_from_property_name = "startFrom" + continuation_property_name = "continuation" + + # TODO: adding change feed mode + def __init__( + self, + container_link: str, + container_rid: str, + feed_range: Range, + change_feed_start_from: ChangeFeedStartFromInternal, + continuation: Optional[FeedRangeCompositeContinuation] = None): + + self._container_link = container_link + self._container_rid = container_rid + self._feed_range = feed_range + self._change_feed_start_from = change_feed_start_from + self._continuation = continuation + if self._continuation is None: + composite_continuation_token_queue = collections.deque() + composite_continuation_token_queue.append(CompositeContinuationToken(self._feed_range, None)) + self._continuation =\ + FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) + + @property + def container_rid(self) -> str : + return self._container_rid + + def to_dict(self) -> dict[str, Any]: + return { + self.version_property_name: "V2", + self.container_rid_property_name: self._container_rid, + self.change_feed_mode_property_name: "Incremental", + self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), + self.continuation_property_name: self._continuation.to_dict() + } + + def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, any]) -> None: + headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag =\ + ChangeFeedStartFromETagAndFeedRange(self._continuation.current_token.token, self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges =\ + routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise CosmosFeedRangeGoneError(message= + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)}" + f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + else: + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + headers[http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min + headers[http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + + def populate_feed_options(self, feed_options: dict[str, any]) -> None: + pass + + def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, resource_link: str) -> None: + self._continuation.handle_feed_range_gone(routing_provider, resource_link) + + def apply_server_response_continuation(self, continuation: str) -> None: + self._continuation.apply_server_response_continuation(continuation) + + def should_retry_on_not_modified_response(self): + self._continuation.should_retry_on_not_modified_response() + + def apply_not_modified_response(self) -> None: + self._continuation.apply_not_modified_response() + + @classmethod + def from_continuation( + cls, + container_link: str, + container_rid: str, + continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': + + container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) + if container_rid_from_continuation is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") + elif container_rid_from_continuation != container_rid: + raise ValueError("Invalid continuation: [Mismatch collection rid]") + + change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) + if change_feed_start_from_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") + change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) + + continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.continuation_property_name}]") + continuation = FeedRangeCompositeContinuation.from_json(continuation_data) + return ChangeFeedStateV2( + container_link=container_link, + container_rid=container_rid, + feed_range=continuation.feed_range, + change_feed_start_from=change_feed_start_from, + continuation=continuation) + + @classmethod + def from_initial_state( + cls, + container_link: str, + collection_rid: str, + data: dict[str, Any]) -> 'ChangeFeedStateV2': + + if is_key_exists_and_not_none(data, "feedRange"): + feed_range_str = base64.b64decode(data["feedRange"]).decode('utf-8') + feed_range_json = json.loads(feed_range_str) + feed_range = Range.ParseFromDict(feed_range_json) + elif is_key_exists_and_not_none(data, "partitionKeyFeedRange"): + feed_range = data["partitionKeyFeedRange"] + else: + # default to full range + feed_range = Range( + "", + "FF", + True, + False) + + change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(data.get("startTime")) + return cls( + container_link=container_link, + container_rid=collection_rid, + feed_range=feed_range, + change_feed_start_from=change_feed_start_from, + continuation=None) + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py new file mode 100644 index 000000000000..9945405e4b57 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py @@ -0,0 +1,72 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed composite continuation token in the Azure Cosmos +database service. +""" +from typing import Optional + +from azure.cosmos._routing.routing_range import Range + + +class CompositeContinuationToken(object): + token_property_name = "token" + feed_range_property_name = "range" + + def __init__(self, feed_range: Range, token: Optional[str] = None): + if feed_range is None: + raise ValueError("Missing required parameter feed_range") + + self._token = token + self._feed_range = feed_range + + def to_dict(self): + return { + self.token_property_name: self._token, + self.feed_range_property_name: self._feed_range.to_dict() + } + + @property + def feed_range(self): + return self._feed_range + + @property + def token(self): + return self._token + + def update_token(self, etag): + self._token = etag + + @classmethod + def from_json(cls, data): + token = data.get(cls.token_property_name) + if token is None: + raise ValueError(f"Invalid composite token [Missing {cls.token_property_name}]") + + feed_range_data = data.get(cls.feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid composite token [Missing {cls.feed_range_property_name}]") + + feed_range = Range.ParseFromDict(feed_range_data) + return cls(feed_range=feed_range, token=token) + + def __repr__(self): + return f"CompositeContinuationToken(token={self.token}, range={self._feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py new file mode 100644 index 000000000000..2461436924aa --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -0,0 +1,134 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for change feed continuation token by feed range in the Azure Cosmos +database service. +""" +import collections +from collections import deque +from typing import Any + +from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.routing_range import Range + + +class FeedRangeCompositeContinuation(object): + _version_property_name = "V" + _container_rid_property_name = "Rid" + _continuation_property_name = "Continuation" + _feed_range_property_name = "Range" + + def __init__( + self, + container_rid: str, + feed_range: Range, + continuation: collections.deque[CompositeContinuationToken]): + if container_rid is None: + raise ValueError("container_rid is missing") + + self._container_rid = container_rid + self._feed_range = feed_range + self._continuation = continuation + self._current_token = self._continuation[0] + self._initial_no_result_range = None + + @property + def current_token(self): + return self._current_token + + def to_dict(self) -> dict[str, Any]: + return { + self._version_property_name: "v1", #TODO: should this start from v2 + self._container_rid_property_name: self._container_rid, + self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], + self._feed_range_property_name: self._feed_range.to_dict() + } + + @classmethod + def from_json(cls, data) -> 'FeedRangeCompositeContinuation': + version = data.get(cls._version_property_name) + if version is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") + if version != "v1": + raise ValueError("Invalid feed range composite continuation token [Invalid version]") + + container_rid = data.get(cls._container_rid_property_name) + if container_rid is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") + + feed_range_data = data.get(cls._feed_range_property_name) + if feed_range_data is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._feed_range_property_name}]") + feed_range = Range.ParseFromDict(feed_range_data) + + continuation_data = data.get(cls._continuation_property_name) + if continuation_data is None: + raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") + if not isinstance(continuation_data, list) or len(continuation_data) == 0: + raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") + continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] + + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) + + def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: + overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, self._current_token.feed_range) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append(CompositeContinuationToken(Range.PartitionKeyRangeToRange(child_range), self._current_token.token)) + + self._current_token = self._continuation[0] + + def should_retry_on_not_modified_response(self) -> bool: + # when getting 304(Not Modified) response from one sub feed range, we will try to fetch for the next sub feed range + # we will repeat the above logic until we have looped through all sub feed ranges + + # TODO: validate the response headers, can we get the status code + if len(self._continuation) > 1: + return self._current_token.feed_range != self._initial_no_result_range + + else: + return False + + def _move_to_next_token(self) -> None: + first_composition_token = self._continuation.popleft() + # add the composition token to the end of the list + self._continuation.append(first_composition_token) + self._current_token = self._continuation[0] + + def apply_server_response_continuation(self, etag) -> None: + self._current_token.update_token(etag) + self._move_to_next_token() + + def apply_not_modified_response(self) -> None: + if self._initial_no_result_range is None: + self._initial_no_result_range = self._current_token.feed_range + + @property + def feed_range(self) -> Range: + return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 1288e7a4e66e..a81ab438cbf2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -26,14 +26,10 @@ import os import urllib.parse from typing import Callable, Dict, Any, Iterable, List, Mapping, Optional, Sequence, Tuple, Union, cast, Type -from typing_extensions import TypedDict -from urllib3.util.retry import Retry +from azure.core import PipelineClient from azure.core.credentials import TokenCredential from azure.core.paging import ItemPaged -from azure.core import PipelineClient -from azure.core.pipeline.transport import HttpRequest, \ - HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import from azure.core.pipeline.policies import ( HTTPPolicy, ContentDecodePolicy, @@ -44,22 +40,31 @@ DistributedTracingPolicy, ProxyPolicy ) +from azure.core.pipeline.transport import HttpRequest, \ + HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import +from typing_extensions import TypedDict +from urllib3.util.retry import Retry from . import _base as base -from ._base import _set_properties_cache -from . import documents -from .documents import ConnectionPolicy, DatabaseAccount -from ._constants import _Constants as Constants -from . import http_constants, exceptions +from . import _global_endpoint_manager as global_endpoint_manager from . import _query_iterable as query_iterable from . import _runtime_constants as runtime_constants -from ._request_object import RequestObject -from . import _synchronized_request as synchronized_request -from . import _global_endpoint_manager as global_endpoint_manager -from ._routing import routing_map_provider, routing_range -from ._retry_utility import ConnectionRetryPolicy from . import _session +from . import _synchronized_request as synchronized_request from . import _utils +from . import documents +from . import http_constants, exceptions +from ._auth_policy import CosmosBearerTokenCredentialPolicy +from ._base import _set_properties_cache +from ._change_feed.change_feed_iterable import ChangeFeedIterable +from ._change_feed.change_feed_state import ChangeFeedState +from ._constants import _Constants as Constants +from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy +from ._range_partition_resolver import RangePartitionResolver +from ._request_object import RequestObject +from ._retry_utility import ConnectionRetryPolicy +from ._routing import routing_map_provider, routing_range +from .documents import ConnectionPolicy, DatabaseAccount from .partition_key import ( _Undefined, _Empty, @@ -67,9 +72,6 @@ _return_undefined_or_empty_partition_key, NonePartitionKeyValue ) -from ._auth_policy import CosmosBearerTokenCredentialPolicy -from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy -from ._range_partition_resolver import RangePartitionResolver PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -1191,11 +1193,10 @@ def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Dict[str return ItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def _ReadPartitionKeyRanges( @@ -3023,6 +3024,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: options, partition_key_range_id ) + + change_feed_state = options.pop("changeFeedState", None) + if change_feed_state and isinstance(change_feed_state, ChangeFeedState): + change_feed_state.populate_request_headers(self._routing_map_provider, headers) + result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index 0d61fbbbe1d7..f3269af47271 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -22,6 +22,9 @@ """Internal class for partition key range implementation in the Azure Cosmos database service. """ +import base64 +import binascii +import json class PartitionKeyRange(object): @@ -81,6 +84,76 @@ def ParseFromDict(cls, range_as_dict): ) return self + def to_dict(self): + return { + self.MinPath: self.min, + self.MaxPath: self.max, + self.IsMinInclusivePath: self.isMinInclusive, + self.IsMaxInclusivePath: self.isMaxInclusive + } + + def to_normalized_range(self): + if self.isMinInclusive and not self.isMaxInclusive: + return self + + normalized_min = self.min + normalized_max = self.max + + if not self.isMinInclusive: + normalized_min = self.add_to_effective_partition_key(self.min, -1) + + if self.isMaxInclusive: + normalized_max = self.add_to_effective_partition_key(self.max, 1) + + return Range(normalized_min, normalized_max, True, False) + + def add_to_effective_partition_key(self, effective_partition_key: str, value: int): + if value != 1 and value != -1: + raise ValueError("Invalid value - only 1 or -1 is allowed") + + byte_array = self.hex_binary_to_byte_array(effective_partition_key) + if value == 1: + for i in range(len(byte_array) -1, -1, -1): + if byte_array[i] < 255: + byte_array[i] += 1 + break + else: + byte_array[i] = 0 + else: + for i in range(len(byte_array) - 1, -1, -1): + if byte_array[i] != 0: + byte_array[i] -= 1 + break + else: + byte_array[i] = 255 + + return binascii.hexlify(byte_array).decode() + + def hex_binary_to_byte_array(self, hex_binary_string: str): + if hex_binary_string is None: + raise ValueError("hex_binary_string is missing") + if len(hex_binary_string) % 2 != 0: + raise ValueError("hex_binary_string must not have an odd number of characters") + + return bytearray.fromhex(hex_binary_string) + + @classmethod + def from_base64_encoded_json_string(cls, data: str): + try: + feed_range_json_string = base64.b64decode(data, validate=True).decode('utf-8') + feed_range_json = json.loads(feed_range_json_string) + return cls.ParseFromDict(feed_range_json) + except Exception: + raise ValueError(f"Invalid feed_range json string {data}") + + def to_base64_encoded_string(self): + data_json = json.dumps(self.to_dict()) + json_bytes = data_json.encode('utf-8') + # Encode the bytes to a Base64 string + base64_bytes = base64.b64encode(json_bytes) + # Convert the Base64 bytes to a string + return base64_bytes.decode('utf-8') + def isSingleValue(self): return self.isMinInclusive and self.isMaxInclusive and self.min == self.max diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py index 1b3d0370e6ef..1c03b8a054c5 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py @@ -69,3 +69,17 @@ def get_index_metrics_info(delimited_string: Optional[str]) -> Dict[str, Any]: return result except (json.JSONDecodeError, ValueError): return {} + + +def is_base64_encoded(data: str) -> bool: + if data is None: + return False + try: + base64.b64decode(data, validate=True).decode('utf-8') + return True + except (json.JSONDecodeError, ValueError): + return False + + +def is_key_exists_and_not_none(data: dict[str, Any], key: str) -> bool: + return key in data and data[key] is not None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 385d7f7af236..a8559839aad7 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -21,16 +21,18 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone -from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast -from typing_extensions import Literal +import warnings +from datetime import datetime +from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast, overload from azure.core import MatchConditions from azure.core.async_paging import AsyncItemPaged from azure.core.tracing.decorator import distributed_trace from azure.core.tracing.decorator_async import distributed_trace_async # type: ignore +from typing_extensions import Literal from ._cosmos_client_connection_async import CosmosClientConnection +from ._scripts import ScriptsProxy from .._base import ( build_options as _build_options, validate_cache_staleness_value, @@ -39,13 +41,16 @@ GenerateGuidId, _set_properties_cache ) +from .._change_feed.aio.change_feed_state import ChangeFeedState +from .._routing import routing_range +from .._routing.routing_range import Range +from .._utils import is_key_exists_and_not_none, is_base64_encoded from ..offer import ThroughputProperties -from ._scripts import ScriptsProxy from ..partition_key import ( NonePartitionKeyValue, _return_undefined_or_empty_partition_key, _Empty, - _Undefined + _Undefined, PartitionKey ) __all__ = ("ContainerProxy",) @@ -132,6 +137,26 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + async def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: + container_properties = await self._get_properties() + partition_key_definition = container_properties.get("partitionKey") + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + is_prefix_partition_key = await self.__is_prefix_partition_key(partition_key_value) + + return partition_key._get_epk_range_for_partition_key(partition_key_value, is_prefix_partition_key) + + async def __is_prefix_partition_key(self, partition_key: PartitionKeyType) -> bool: + + properties = await self._get_properties() + pk_properties = properties.get("partitionKey") + partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) + if partition_key_definition.kind != "MultiHash": + return False + if isinstance(partition_key, list) and len(partition_key_definition['paths']) == len(partition_key): + return False + return True + @distributed_trace_async async def read( self, @@ -480,62 +505,196 @@ def query_items( response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace - def query_items_change_feed( - self, - *, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - start_time: Optional[datetime] = None, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + @overload + async def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: Optional[PartitionKeyType] = None, + # -> would RU usage be more efficient, bug to backend team? deprecate it or using FeedRange to convert? + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword str partition_key_range_id: ChangeFeed requests can be executed against specific partition key - ranges. This is used to process the change feed in parallel across multiple consumers. - :keyword str continuation: e_tag value to be used as continuation for reading change feed. - :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] - :keyword response_hook: A callable invoked with the response metadata. - :paramtype response_hook: Callable[[Dict[str, str], AsyncItemPaged[Dict[str, Any]]], None] - :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :param int max_item_count: Max number of items to be returned in the enumeration operation. + :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :param PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - response_hook = kwargs.pop('response_hook', None) - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + async def query_items_change_feed( + self, + *, + feed_range: Optional[str] = None, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :param str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. + :param int max_item_count: Max number of items to be returned in the enumeration operation. + :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @overload + async def query_items_change_feed( + self, + *, + continuation: Optional[str] = None, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :param str continuation: The continuation token retrieved from previous response. + :param int max_item_count: Max number of items to be returned in the enumeration operation. + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @distributed_trace + async def query_items_change_feed( + self, + *args: Any, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: + + if is_key_exists_and_not_none(kwargs, "priority"): + kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if (args and args[0] is not None) or is_key_exists_and_not_none(kwargs, "partition_key_range_id"): + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + try: + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + except KeyError: + change_feed_state_context['partitionKeyRangeId'] = args[0] + + # Back compatibility with deprecation warnings for is_start_from_beginning + if (len(args) >= 2 and args[1] is not None) or is_key_exists_and_not_none(kwargs, "is_start_from_beginning"): + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + try: + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + except KeyError: + is_start_from_beginning = args[1] + + if is_start_from_beginning: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if is_key_exists_and_not_none(kwargs, "start_time"): + if change_feed_state_context.get("startTime") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'now' or 'beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if len(args) >= 3 and args[2] is not None or is_key_exists_and_not_none(feed_options, "continuation"): + try: + continuation = feed_options.pop('continuation') + except KeyError: + continuation = args[2] + + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + if is_base64_encoded(continuation): + change_feed_state_context["continuationFeedRange"] = continuation + else: + change_feed_state_context["continuationPkRangeId"] = continuation + + if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): + try: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + except KeyError: + feed_options["maxItemCount"] = args[3] + + if is_key_exists_and_not_none(kwargs, "partition_key"): + partition_key = kwargs.pop("partition_key") + change_feed_state_context["partitionKey"] = await self._set_partition_key(partition_key) + change_feed_state_context["partitionKeyFeedRange"] = await self._get_epk_range_for_partition_key(partition_key) + + if is_key_exists_and_not_none(kwargs, "feed_range"): + change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + + # validate exclusive or in-compatible parameters + if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + # if continuation token is in v1 format, throw exception if feed_range is set + if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + raise ValueError("feed_range and continuation are incompatible") + elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters if they passed in + if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId"): + raise ValueError("partition_key_range_id and continuation are incompatible") + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") + + container_properties = await self._get_properties() + container_rid = container_properties.get("_rid") + change_feed_state = ChangeFeedState.from_json(self.container_link, container_rid, change_feed_state_context) + feed_options["changeFeedState"] = change_feed_state + feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() - if self.container_link in self.__get_client_container_caches(): - feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) + if response_hook: response_hook(self.client_connection.last_response_headers, result) return result @@ -1098,3 +1257,17 @@ async def execute_item_batch( return await self.client_connection.Batch( collection_link=self.container_link, batch_operations=batch_operations, options=request_options, **kwargs) + + async def read_feed_ranges( + self, + **kwargs: Any + ) -> List[str]: + partition_key_ranges =\ + await self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + # default to full range + [Range("", "FF", True, False)]) + + return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 72ea03668909..7cccac695769 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -50,6 +50,8 @@ from .. import _base as base from .._base import _set_properties_cache from .. import documents +from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable +from .._change_feed.aio.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2310,11 +2312,10 @@ async def fetch_fn(options: Mapping[str, Any]) -> Tuple[List[Dict[str, Any]], Di return AsyncItemPaged( self, - None, options, fetch_function=fetch_fn, collection_link=collection_link, - page_iterator_class=query_iterable.QueryIterable + page_iterator_class=ChangeFeedIterable ) def QueryOffers( @@ -2812,6 +2813,11 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: documents._OperationType.QueryPlan if is_query_plan else documents._OperationType.ReadFeed ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) + + change_feed_state = options.pop("changeFeedState", None) + if change_feed_state and isinstance(change_feed_state, ChangeFeedState): + await change_feed_state.populate_request_headers(self._routing_map_provider, headers) + result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: response_hook(self.last_response_headers, result) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 9cf697ee7f5a..32fd818075f8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -21,16 +21,15 @@ """Create, read, update and delete items in the Azure Cosmos DB SQL API service. """ -from datetime import datetime, timezone import warnings -from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast -from typing_extensions import Literal +from datetime import datetime +from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast, overload from azure.core import MatchConditions -from azure.core.tracing.decorator import distributed_trace from azure.core.paging import ItemPaged +from azure.core.tracing.decorator import distributed_trace +from typing_extensions import Literal -from ._cosmos_client_connection import CosmosClientConnection from ._base import ( build_options, validate_cache_staleness_value, @@ -39,8 +38,12 @@ GenerateGuidId, _set_properties_cache ) +from ._change_feed.change_feed_state import ChangeFeedState +from ._cosmos_client_connection import CosmosClientConnection +from ._routing import routing_range +from ._routing.routing_range import Range +from ._utils import is_key_exists_and_not_none, is_base64_encoded from .offer import Offer, ThroughputProperties -from .scripts import ScriptsProxy from .partition_key import ( NonePartitionKeyValue, PartitionKey, @@ -48,6 +51,7 @@ _Undefined, _return_undefined_or_empty_partition_key ) +from .scripts import ScriptsProxy __all__ = ("ContainerProxy",) @@ -132,6 +136,13 @@ def _set_partition_key( def __get_client_container_caches(self) -> Dict[str, Dict[str, Any]]: return self.client_connection._container_properties_cache + def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: + container_properties = self._get_properties() + partition_key_definition = container_properties.get("partitionKey") + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value, self.__is_prefix_partitionkey(partition_key_value)) + @distributed_trace def read( # pylint:disable=docstring-missing-param self, @@ -309,56 +320,186 @@ def read_all_items( # pylint:disable=docstring-missing-param response_hook(self.client_connection.last_response_headers, items) return items - @distributed_trace + @overload def query_items_change_feed( - self, - partition_key_range_id: Optional[str] = None, - is_start_from_beginning: bool = False, - continuation: Optional[str] = None, - max_item_count: Optional[int] = None, - *, - start_time: Optional[datetime] = None, - partition_key: Optional[PartitionKeyType] = None, - priority: Optional[Literal["High", "Low"]] = None, - **kwargs: Any + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: Optional[PartitionKeyType] = None, + # -> would RU usage be more efficient, bug to backend team? deprecate it or using FeedRange to convert? + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :param str partition_key_range_id: ChangeFeed requests can be executed against specific partition key ranges. - This is used to process the change feed in parallel across multiple consumers. - :param bool is_start_from_beginning: Get whether change feed should start from - beginning (true) or from current (false). By default, it's start from current (false). - :param max_item_count: Max number of items to be returned in the enumeration operation. - :param str continuation: e_tag value to be used as continuation for reading change feed. :param int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword ~datetime.datetime start_time: Specifies a point of time to start change feed. Provided value will be - converted to UTC. This value will be ignored if `is_start_from_beginning` is set to true. - :keyword partition_key: partition key at which ChangeFeed requests are targeted. - :paramtype partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] - :keyword Callable response_hook: A callable invoked with the response metadata. - :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :param PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An Iterable of items (dicts). - :rtype: Iterable[dict[str, Any]] + :rtype: Iterable[Dict[str, Any]] """ - if priority is not None: - kwargs['priority'] = priority + ... + + @overload + def query_items_change_feed( + self, + *, + feed_range: Optional[str] = None, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :param str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. + :param int max_item_count: Max number of items to be returned in the enumeration operation. + :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + continuation: Optional[str] = None, + max_item_count: Optional[int] = None, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + + :param str continuation: The continuation token retrieved from previous response. + :param int max_item_count: Max number of items to be returned in the enumeration operation. + :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @distributed_trace + def query_items_change_feed( + self, + *args: Any, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: + + if is_key_exists_and_not_none(kwargs, "priority"): + kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) - response_hook = kwargs.pop('response_hook', None) - if partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = partition_key_range_id - if partition_key is not None: - feed_options["partitionKey"] = self._set_partition_key(partition_key) - if is_start_from_beginning is not None: - feed_options["isStartFromBeginning"] = is_start_from_beginning - if start_time is not None and is_start_from_beginning is False: - feed_options["startTime"] = start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - if max_item_count is not None: - feed_options["maxItemCount"] = max_item_count - if continuation is not None: - feed_options["continuation"] = continuation + change_feed_state_context = {} + # Back compatibility with deprecation warnings for partition_key_range_id + if (args and args[0] is not None) or is_key_exists_and_not_none(kwargs, "partition_key_range_id"): + warnings.warn( + "partition_key_range_id is deprecated. Please pass in feed_range instead.", + DeprecationWarning + ) + + try: + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') + except KeyError: + change_feed_state_context['partitionKeyRangeId'] = args[0] + + # Back compatibility with deprecation warnings for is_start_from_beginning + if (len(args) >= 2 and args[1] is not None) or is_key_exists_and_not_none(kwargs, "is_start_from_beginning"): + warnings.warn( + "is_start_from_beginning is deprecated. Please pass in start_time instead.", + DeprecationWarning + ) + + try: + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + except KeyError: + is_start_from_beginning = args[1] + + if is_start_from_beginning: + change_feed_state_context["startTime"] = "Beginning" + + # parse start_time + if is_key_exists_and_not_none(kwargs, "start_time"): + if change_feed_state_context.get("startTime") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + + start_time = kwargs.pop('start_time') + if not isinstance(start_time, (datetime, str)): + raise TypeError( + "'start_time' must be either a datetime object, or either the values 'now' or 'beginning'.") + change_feed_state_context["startTime"] = start_time + + # parse continuation token + if len(args) >= 3 and args[2] is not None or is_key_exists_and_not_none(feed_options, "continuation"): + try: + continuation = feed_options.pop('continuation') + except KeyError: + continuation = args[2] + + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + if is_base64_encoded(continuation): + change_feed_state_context["continuationFeedRange"] = continuation + else: + change_feed_state_context["continuationPkRangeId"] = continuation + + if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): + try: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + except KeyError: + feed_options["maxItemCount"] = args[3] + + if is_key_exists_and_not_none(kwargs, "partition_key"): + partition_key = kwargs.pop("partition_key") + change_feed_state_context["partitionKey"] = self._set_partition_key(partition_key) + change_feed_state_context["partitionKeyFeedRange"] = self._get_epk_range_for_partition_key(partition_key) + + if is_key_exists_and_not_none(kwargs, "feed_range"): + change_feed_state_context["feedRange"] = kwargs.pop('feed_range') + + # validate exclusive or in-compatible parameters + if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + # if continuation token is in v1 format, throw exception if feed_range is set + if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + raise ValueError("feed_range and continuation are incompatible") + elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters if they passed in + if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId"): + raise ValueError("partition_key_range_id and continuation are incompatible") + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError("partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") + + container_properties = self._get_properties() + container_rid = container_properties.get("_rid") + change_feed_state = ChangeFeedState.from_json(self.container_link, container_rid, change_feed_state_context) + feed_options["changeFeedState"] = change_feed_state + + response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() if self.container_link in self.__get_client_container_caches(): @@ -1162,3 +1303,15 @@ def delete_all_items_by_partition_key( self.client_connection.DeleteAllItemsByPartitionKey( collection_link=self.container_link, options=request_options, **kwargs) + + def read_feed_ranges( + self, + **kwargs: Any + ) -> List[str]: + partition_key_ranges =\ + self.client_connection._routing_map_provider.get_overlapping_ranges( + self.container_link, + # default to full range + [Range("", "FF", True, False)]) + + return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 5092fd0de7cf..768890dacfa6 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -28,7 +28,7 @@ ResourceNotFoundError ) from . import http_constants - +from .http_constants import StatusCodes, SubStatusCodes class CosmosHttpResponseError(HttpResponseError): """An HTTP request to the Azure Cosmos database service has failed.""" @@ -136,6 +136,19 @@ def __init__(self, **kwargs): super(CosmosClientTimeoutError, self).__init__(message, **kwargs) +class CosmosFeedRangeGoneError(CosmosHttpResponseError): + """An HTTP error response with status code 404.""" + def __init__(self, message=None, response=None, **kwargs): + """ + :param int sub_status_code: HTTP response sub code. + """ + self.status_code = StatusCodes.GONE + self.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE + self.http_error_message = message + formatted_message = "Status code: %d Sub-status: %d\n%s" % (self.status_code, self.sub_status, str(message)) + super(CosmosHttpResponseError, self).__init__(message=formatted_message, response=response, **kwargs) + + def _partition_range_is_gone(e): if (e.status_code == http_constants.StatusCodes.GONE and e.sub_status == http_constants.SubStatusCodes.PARTITION_KEY_RANGE_GONE): @@ -151,3 +164,7 @@ def _container_recreate_exception(e) -> bool: is_throughput_not_found = e.sub_status == http_constants.SubStatusCodes.THROUGHPUT_OFFER_NOT_FOUND return (is_bad_request and is_collection_rid_mismatch) or (is_not_found and is_throughput_not_found) + + +def _is_partition_split_or_merge(e): + return e.status_code == StatusCodes.GONE and e.status_code == SubStatusCodes.COMPLETING_SPLIT \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 22fcb19dae06..9f0a5cde29a2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -173,6 +173,20 @@ def _get_epk_range_for_prefix_partition_key( max_epk = str(min_epk) + "FF" return _Range(min_epk, max_epk, True, False) + def _get_epk_range_for_partition_key( + self, + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]], + is_prefix_pk_value: bool = False + ) -> _Range: + if is_prefix_pk_value: + return self._get_epk_range_for_prefix_partition_key(pk_value) + + # else return point range + effective_partition_key_string = self._get_effective_partition_key_string(pk_value) + partition_key_range = _Range(effective_partition_key_string, effective_partition_key_string, True, True) + + return partition_key_range.to_normalized_range() + def _get_effective_partition_key_for_hash_partitioning(self) -> str: # We shouldn't be supporting V1 return "" diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py new file mode 100644 index 000000000000..a1d34262cae7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -0,0 +1,295 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid +from datetime import datetime, timedelta, timezone +from time import sleep + +import pytest +from _pytest.outcomes import fail + +import azure.cosmos.cosmos_client as cosmos_client +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos import DatabaseProxy +from azure.cosmos.partition_key import PartitionKey + + +@pytest.fixture(scope="class") +def setup(): + if (TestChangeFeed.masterKey == '[YOUR_KEY_HERE]' or + TestChangeFeed.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = cosmos_client.CosmosClient(test_config.TestConfig.host, test_config.TestConfig.masterKey), + return { + "created_db": test_client[0].get_database_client(TestChangeFeed.TEST_DATABASE_ID) + } + +@pytest.mark.cosmosEmulator +@pytest.mark.unittest +@pytest.mark.usefixtures("setup") +class TestChangeFeed: + """Test to ensure escaping of non-ascii characters from partition key""" + + created_db: DatabaseProxy = None + client: cosmos_client.CosmosClient = None + config = test_config.TestConfig + host = config.host + masterKey = config.masterKey + connectionPolicy = config.connectionPolicy + TEST_DATABASE_ID = config.TEST_DATABASE_ID + + def test_get_feed_ranges(self, setup): + created_collection = setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + def test_query_change_feed_with_different_filter(self, change_feed_filter_param, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + # Read change feed without passing any options + query_iterable = created_collection.query_items_change_feed() + iter_list = list(query_iterable) + assert len(iter_list) == 0 + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed from current should return an empty list + query_iterable = created_collection.query_items_change_feed(filter_param) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + assert created_collection.client_connection.last_response_headers['etag'] !='' + + # Read change feed from beginning should return an empty list + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + assert 'etag' in created_collection.client_connection.last_response_headers + continuation1 = created_collection.client_connection.last_response_headers['etag'] + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + assert 'etag' in created_collection.client_connection.last_response_headers + continuation2 = created_collection.client_connection.last_response_headers['etag'] + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + created_collection.create_item(body=document_definition) + + for pageSize in [1, 100]: + # verify iterator + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + it = query_iterable.__iter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + for page in query_iterable.by_page(): + fetched_res = list(page) + assert len(fetched_res) == min(pageSize, expected_count - count) + count += len(fetched_res) + all_fetched_res.extend(fetched_res) + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__iter__() + for i in range(0, len(expected_ids)): + doc = next(it) + assert doc['id'] == expected_ids[i] + assert 'etag' in created_collection.client_connection.last_response_headers + continuation3 = created_collection.client_connection.last_response_headers['etag'] + + # verify reading empty change feed + query_iterable = created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = list(query_iterable) + assert len(iter_list) == 0 + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_start_time(self, setup): + created_collection = setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + fail(e) + + # Create first batch of random items + create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + sleep(1) + + # now create another batch of items + create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + list(created_collection.query_items_change_feed(start_time=invalid_time)) + fail("Cannot format date on a non datetime object.") + except ValueError as e: #TODO: previously it is throwing AttributeError, now has changed into ValueError, is it breaking change? + assert "Invalid start_time 'Invalid value'" == e.args[0] + + setup["created_db"].delete_container(created_collection.id) + + def test_query_change_feed_with_split(self, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = list(query_iterable) + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + setup["created_db"].delete_container(created_collection.id) + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py new file mode 100644 index 000000000000..c3246768a796 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -0,0 +1,322 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid +from asyncio import sleep +from datetime import datetime, timedelta, timezone + +import pytest +import pytest_asyncio +from _pytest.outcomes import fail + +import azure.cosmos.exceptions as exceptions +import test_config +from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy +from azure.cosmos.partition_key import PartitionKey + + +@pytest_asyncio.fixture() +async def setup(): + config = test_config.TestConfig() + if config.masterKey == '[YOUR_KEY_HERE]' or config.host == '[YOUR_ENDPOINT_HERE]': + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + test_client = CosmosClient(config.host, config.masterKey) + created_db = await test_client.create_database_if_not_exists(config.TEST_DATABASE_ID) + created_db_data = { + "created_db": created_db + } + + yield created_db_data + await test_client.delete_database(config.TEST_DATABASE_ID) + await test_client.close() + +@pytest.mark.cosmosEmulator +@pytest.mark.asyncio +@pytest.mark.usefixtures("setup") +class TestChangeFeedAsync: + """Test to ensure escaping of non-ascii characters from partition key""" + + created_db: DatabaseProxy = None + created_container: ContainerProxy = None + client: CosmosClient = None + config = test_config.TestConfig + TEST_CONTAINER_ID = config.TEST_MULTI_PARTITION_CONTAINER_ID + TEST_DATABASE_ID = config.TEST_DATABASE_ID + host = config.host + masterKey = config.masterKey + connectionPolicy = config.connectionPolicy + + async def test_get_feed_ranges(self, setup): + created_collection = await setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + result = await created_collection.read_feed_ranges() + assert len(result) == 1 + + @pytest.mark.parametrize("change_feed_filter_param", ["partitionKey", "partitionKeyRangeId", "feedRange"]) + async def test_query_change_feed_with_different_filter_async(self, change_feed_filter_param, setup): + + created_collection = await setup["created_db"].create_container( + "change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + + if change_feed_filter_param == "partitionKey": + filter_param = {"partition_key": "pk"} + elif change_feed_filter_param == "partitionKeyRangeId": + filter_param = {"partition_key_range_id": "0"} + elif change_feed_filter_param == "feedRange": + feed_ranges = await created_collection.read_feed_ranges() + assert len(feed_ranges) == 1 + filter_param = {"feed_range": feed_ranges[0]} + else: + filter_param = None + + # Read change feed without passing any options + query_iterable = await created_collection.query_items_change_feed() + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + # Read change feed from current should return an empty list + query_iterable = await created_collection.query_items_change_feed(filter_param) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['Etag'] != '' + elif 'etag' in created_collection.client_connection.last_response_headers: + assert created_collection.client_connection.last_response_headers['etag'] != '' + else: + fail("No Etag or etag found in last response headers") + + # Read change feed from beginning should return an empty list + query_iterable = await created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation1 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation1 != '' + + # Create a document. Read change feed should return be able to read that document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = await created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + assert iter_list[0]['id'] == 'doc1' + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation2 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + assert continuation2 != '' + assert continuation2 != continuation1 + + # Create two new documents. Verify that change feed contains the 2 new documents + # with page size 1 and page size 100 + document_definition = {'pk': 'pk', 'id': 'doc2'} + await created_collection.create_item(body=document_definition) + document_definition = {'pk': 'pk', 'id': 'doc3'} + await created_collection.create_item(body=document_definition) + + for pageSize in [2, 100]: + # verify iterator + query_iterable = await created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param) + it = query_iterable.__aiter__() + expected_ids = 'doc2.doc3.' + actual_ids = '' + async for item in it: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify by_page + # the options is not copied, therefore it need to be restored + query_iterable = await created_collection.query_items_change_feed( + continuation=continuation2, + max_item_count=pageSize, + **filter_param + ) + count = 0 + expected_count = 2 + all_fetched_res = [] + pages = query_iterable.by_page() + async for items in await pages.__anext__(): + count += 1 + all_fetched_res.append(items) + assert count == expected_count + + actual_ids = '' + for item in all_fetched_res: + actual_ids += item['id'] + '.' + assert actual_ids == expected_ids + + # verify reading change feed from the beginning + query_iterable = await created_collection.query_items_change_feed( + is_start_from_beginning=True, + **filter_param + ) + expected_ids = ['doc1', 'doc2', 'doc3'] + it = query_iterable.__aiter__() + for i in range(0, len(expected_ids)): + doc = await it.__anext__() + assert doc['id'] == expected_ids[i] + if 'Etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['Etag'] + elif 'etag' in created_collection.client_connection.last_response_headers: + continuation3 = created_collection.client_connection.last_response_headers['etag'] + else: + fail("No Etag or etag found in last response headers") + + # verify reading empty change feed + query_iterable = await created_collection.query_items_change_feed( + continuation=continuation3, + is_start_from_beginning=True, + **filter_param + ) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + + await setup["created_db"].delete_container(created_collection.id) + + @pytest.mark.asyncio + async def test_query_change_feed_with_start_time(self, setup): + created_collection = await setup["created_db"].create_container_if_not_exists("query_change_feed_start_time_test", + PartitionKey(path="/pk")) + batchSize = 50 + + def round_time(): + utc_now = datetime.now(timezone.utc) + return utc_now - timedelta(microseconds=utc_now.microsecond) + + async def create_random_items(container, batch_size): + for _ in range(batch_size): + # Generate a Random partition key + partition_key = 'pk' + str(uuid.uuid4()) + + # Generate a random item + item = { + 'id': 'item' + str(uuid.uuid4()), + 'partitionKey': partition_key, + 'content': 'This is some random content', + } + + try: + # Create the item in the container + await container.upsert_item(item) + except exceptions.CosmosHttpResponseError as e: + pytest.fail(e) + + # Create first batch of random items + await create_random_items(created_collection, batchSize) + + # wait for 1 second and record the time, then wait another second + await sleep(1) + start_time = round_time() + not_utc_time = datetime.now() + await sleep(1) + + # now create another batch of items + await create_random_items(created_collection, batchSize) + + # now query change feed based on start time + change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=start_time)] + totalCount = len(change_feed_iter) + + # now check if the number of items that were changed match the batch size + assert totalCount == batchSize + + # negative test: pass in a valid time in the future + future_time = start_time + timedelta(hours=1) + change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=future_time)] + totalCount = len(change_feed_iter) + # A future time should return 0 + assert totalCount == 0 + + # test a date that is not utc, will be converted to utc by sdk + change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=not_utc_time)] + totalCount = len(change_feed_iter) + # Should equal batch size + assert totalCount == batchSize + + # test an invalid value, Attribute error will be raised for passing non datetime object + invalid_time = "Invalid value" + try: + change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=invalid_time)] + fail("Cannot format date on a non datetime object.") + except ValueError as e: + assert ("Invalid start_time 'Invalid value'" == e.args[0]) + + await setup["created_db"].delete_container(created_collection.id) + + async def test_query_change_feed_with_split_async(self, setup): + created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = await created_collection.query_items_change_feed(start_time="Beginning") + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = await created_collection.query_items_change_feed(continuation=continuation) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + await created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = await created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = await created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + setup["created_db"].delete_container(created_collection.id) + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_query.py b/sdk/cosmos/azure-cosmos/test/test_query.py index 73249e562c14..3cc8f57a6d21 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query.py +++ b/sdk/cosmos/azure-cosmos/test/test_query.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from datetime import datetime, timedelta, timezone -from time import sleep + import pytest import azure.cosmos._retry_utility as retry_utility @@ -61,293 +60,6 @@ def test_first_and_last_slashes_trimmed_for_query_string(self): self.assertEqual(iter_list[0]['id'], doc_id) self.created_db.delete_container(created_collection.id) - def test_query_change_feed_with_pk(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - def test_query_change_feed_with_pk_range_id(self): - created_collection = self.created_db.create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partitionParam = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partitionParam) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - self.assertNotEqual(created_collection.client_connection.last_response_headers['etag'], '') - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation1 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation1, '') - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 1) - self.assertEqual(iter_list[0]['id'], 'doc1') - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation2 = created_collection.client_connection.last_response_headers['etag'] - self.assertNotEqual(continuation2, '') - self.assertNotEqual(continuation2, continuation1) - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - created_collection.create_item(body=document_definition) - - for pageSize in [1, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - it = query_iterable.__iter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - for item in it: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partitionParam - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - for page in query_iterable.by_page(): - fetched_res = list(page) - self.assertEqual(len(fetched_res), min(pageSize, expected_count - count)) - count += len(fetched_res) - all_fetched_res.extend(fetched_res) - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - self.assertEqual(actual_ids, expected_ids) - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partitionParam - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__iter__() - for i in range(0, len(expected_ids)): - doc = next(it) - self.assertEqual(doc['id'], expected_ids[i]) - self.assertTrue('etag' in created_collection.client_connection.last_response_headers) - continuation3 = created_collection.client_connection.last_response_headers['etag'] - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partitionParam - ) - iter_list = list(query_iterable) - self.assertEqual(len(iter_list), 0) - self.created_db.delete_container(created_collection.id) - - def test_query_change_feed_with_start_time(self): - created_collection = self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - self.fail(e) - - # Create first batch of random items - create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - sleep(1) - - # now create another batch of items - create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = list(created_collection.query_items_change_feed(start_time=start_time)) - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - self.assertEqual(totalCount, batchSize) - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = list(created_collection.query_items_change_feed(start_time=future_time)) - totalCount = len(change_feed_iter) - # A future time should return 0 - self.assertEqual(totalCount, 0) - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = list(created_collection.query_items_change_feed(start_time=not_utc_time)) - totalCount = len(change_feed_iter) - # Should equal batch size - self.assertEqual(totalCount, batchSize) - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = list(created_collection.query_items_change_feed(start_time=invalid_time)) - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - self.assertTrue("'str' object has no attribute 'astimezone'" == e.args[0]) - def test_populate_query_metrics(self): created_collection = self.created_db.create_container("query_metrics_test", PartitionKey(path="/pk")) diff --git a/sdk/cosmos/azure-cosmos/test/test_query_async.py b/sdk/cosmos/azure-cosmos/test/test_query_async.py index 51018126462d..718c544193a3 100644 --- a/sdk/cosmos/azure-cosmos/test/test_query_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_query_async.py @@ -4,8 +4,7 @@ import os import unittest import uuid -from asyncio import sleep, gather -from datetime import datetime, timedelta, timezone +from asyncio import gather import pytest @@ -14,10 +13,10 @@ import test_config from azure.cosmos import http_constants from azure.cosmos._execution_context.query_execution_info import _PartitionedQueryExecutionInfo +from azure.cosmos._retry_options import RetryOptions from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy from azure.cosmos.documents import _DistinctType from azure.cosmos.partition_key import PartitionKey -from azure.cosmos._retry_options import RetryOptions @pytest.mark.cosmosEmulator @@ -69,329 +68,6 @@ async def test_first_and_last_slashes_trimmed_for_query_string_async(self): await self.created_db.delete_container(created_collection.id) - async def test_query_change_feed_with_pk_async(self): - created_collection = await self.created_db.create_container( - "change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key = "pk" - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(partition_key=partition_key) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] != '' - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] != '' - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - partition_key=partition_key - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - partition_key=partition_key - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - partition_key=partition_key - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - await self.created_db.delete_container(created_collection.id) - - # TODO: partition key range id 0 is relative to the way collection is created - @pytest.mark.skip - async def test_query_change_feed_with_pk_range_id_async(self): - created_collection = await self.created_db.create_container("cf_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk")) - # The test targets partition #3 - partition_key_range_id = 0 - partition_param = {"partition_key_range_id": partition_key_range_id} - - # Read change feed without passing any options - query_iterable = created_collection.query_items_change_feed() - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - # Read change feed from current should return an empty list - query_iterable = created_collection.query_items_change_feed(**partition_param) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - assert created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # Read change feed from beginning should return an empty list - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation1 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation1 != '' - - # Create a document. Read change feed should return be able to read that document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - assert iter_list[0]['id'] == 'doc1' - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation2 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - assert continuation2 != '' - assert continuation2 != continuation1 - - # Create two new documents. Verify that change feed contains the 2 new documents - # with page size 1 and page size 100 - document_definition = {'pk': 'pk', 'id': 'doc2'} - await created_collection.create_item(body=document_definition) - document_definition = {'pk': 'pk', 'id': 'doc3'} - await created_collection.create_item(body=document_definition) - - for pageSize in [2, 100]: - # verify iterator - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - it = query_iterable.__aiter__() - expected_ids = 'doc2.doc3.' - actual_ids = '' - async for item in it: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify by_page - # the options is not copied, therefore it need to be restored - query_iterable = created_collection.query_items_change_feed( - continuation=continuation2, - max_item_count=pageSize, - **partition_param - ) - count = 0 - expected_count = 2 - all_fetched_res = [] - pages = query_iterable.by_page() - async for items in await pages.__anext__(): - count += 1 - all_fetched_res.append(items) - assert count == expected_count - - actual_ids = '' - for item in all_fetched_res: - actual_ids += item['id'] + '.' - assert actual_ids == expected_ids - - # verify reading change feed from the beginning - query_iterable = created_collection.query_items_change_feed( - is_start_from_beginning=True, - **partition_param - ) - expected_ids = ['doc1', 'doc2', 'doc3'] - it = query_iterable.__aiter__() - for i in range(0, len(expected_ids)): - doc = await it.__anext__() - assert doc['id'] == expected_ids[i] - if 'Etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['Etag'] - elif 'etag' in created_collection.client_connection.last_response_headers: - continuation3 = created_collection.client_connection.last_response_headers['etag'] - else: - self.fail("No Etag or etag found in last response headers") - - # verify reading empty change feed - query_iterable = created_collection.query_items_change_feed( - continuation=continuation3, - is_start_from_beginning=True, - **partition_param - ) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - - @pytest.mark.asyncio - async def test_query_change_feed_with_start_time(self): - created_collection = await self.created_db.create_container_if_not_exists("query_change_feed_start_time_test", - PartitionKey(path="/pk")) - batchSize = 50 - - def round_time(): - utc_now = datetime.now(timezone.utc) - return utc_now - timedelta(microseconds=utc_now.microsecond) - - async def create_random_items(container, batch_size): - for _ in range(batch_size): - # Generate a Random partition key - partition_key = 'pk' + str(uuid.uuid4()) - - # Generate a random item - item = { - 'id': 'item' + str(uuid.uuid4()), - 'partitionKey': partition_key, - 'content': 'This is some random content', - } - - try: - # Create the item in the container - await container.upsert_item(item) - except exceptions.CosmosHttpResponseError as e: - pytest.fail(e) - - # Create first batch of random items - await create_random_items(created_collection, batchSize) - - # wait for 1 second and record the time, then wait another second - await sleep(1) - start_time = round_time() - not_utc_time = datetime.now() - await sleep(1) - - # now create another batch of items - await create_random_items(created_collection, batchSize) - - # now query change feed based on start time - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] - totalCount = len(change_feed_iter) - - # now check if the number of items that were changed match the batch size - assert totalCount == batchSize - - # negative test: pass in a valid time in the future - future_time = start_time + timedelta(hours=1) - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] - totalCount = len(change_feed_iter) - # A future time should return 0 - assert totalCount == 0 - - # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] - totalCount = len(change_feed_iter) - # Should equal batch size - assert totalCount == batchSize - - # test an invalid value, Attribute error will be raised for passing non datetime object - invalid_time = "Invalid value" - try: - change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] - self.fail("Cannot format date on a non datetime object.") - except AttributeError as e: - assert ("'str' object has no attribute 'astimezone'" == e.args[0]) - - await self.created_db.delete_container(created_collection.id) - @pytest.mark.asyncio async def test_populate_query_metrics_async(self): created_collection = await self.created_db.create_container( From 7a1a1ebb7e06e0260c755cf5f4f305576489144e Mon Sep 17 00:00:00 2001 From: annie-mac Date: Sat, 17 Aug 2024 18:17:56 -0700 Subject: [PATCH 02/20] remove async keyword from changeFeed query in aio package --- .../_change_feed/aio/change_feed_iterable.py | 104 +++++++++++++----- .../_change_feed/aio/change_feed_state.py | 56 +++++++--- ...feed_range_composite_continuation_token.py | 42 ++++--- .../azure/cosmos/_change_feed/feed_range.py | 102 +++++++++++++++++ .../azure/cosmos/aio/_container.py | 71 ++---------- .../azure-cosmos/azure/cosmos/container.py | 1 - .../azure/cosmos/partition_key.py | 18 ++- .../test/test_change_feed_async.py | 30 ++--- 8 files changed, 281 insertions(+), 143 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 501f3a7e4150..16a431653e9c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,11 +21,13 @@ """Iterable change feed results in the Azure Cosmos database service. """ + from azure.core.async_paging import AsyncPageIterator +from azure.cosmos import PartitionKey from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.aio.change_feed_state import ChangeFeedStateV1, ChangeFeedState -from azure.cosmos._utils import is_base64_encoded +from azure.cosmos._utils import is_base64_encoded, is_key_exists_and_not_none class ChangeFeedIterable(AsyncPageIterator): @@ -57,40 +59,30 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link + self._change_feed_fetcher = None - change_feed_state = self._options.get("changeFeedState") - if not change_feed_state: - raise ValueError("Missing changeFeedState in feed options") + if not is_key_exists_and_not_none(self._options, "changeFeedStateContext"): + raise ValueError("Missing changeFeedStateContext in feed options") - if isinstance(change_feed_state, ChangeFeedStateV1): - if continuation_token: - if is_base64_encoded(continuation_token): - raise ValueError("Incompatible continuation token") - else: - change_feed_state.apply_server_response_continuation(continuation_token) + change_feed_state_context = self._options.pop("changeFeedStateContext") - self._change_feed_fetcher = ChangeFeedFetcherV1( - self._client, - self._collection_link, - self._options, - fetch_function - ) - else: - if continuation_token: - if not is_base64_encoded(continuation_token): - raise ValueError("Incompatible continuation token") + continuation = continuation_token if continuation_token is not None else change_feed_state_context.pop("continuation", None) - effective_change_feed_context = {"continuationFeedRange": continuation_token} - effective_change_feed_state = ChangeFeedState.from_json(change_feed_state.container_rid, effective_change_feed_context) - # replace with the effective change feed state - self._options["continuationFeedRange"] = effective_change_feed_state + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + if continuation is not None: + if is_base64_encoded(continuation): + change_feed_state_context["continuationFeedRange"] = continuation + else: + change_feed_state_context["continuationPkRangeId"] = continuation + + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context - self._change_feed_fetcher = ChangeFeedFetcherV2( - self._client, - self._collection_link, - self._options, - fetch_function - ) super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) async def _unpack(self, block): @@ -112,7 +104,59 @@ async def _fetch_next(self, *args): # pylint: disable=unused-argument :return: List of results. :rtype: list """ + if self._change_feed_fetcher is None: + await self._initialize_change_feed_fetcher() + block = await self._change_feed_fetcher.fetch_next_block() if not block: raise StopAsyncIteration return block + + async def _initialize_change_feed_fetcher(self): + change_feed_state_context = self._options.pop("changeFeedStateContext") + conn_properties = await change_feed_state_context.pop("containerProperties") + if is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): + change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") + + pk_properties = conn_properties.get("partitionKey") + partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) + + change_feed_state =\ + ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], partition_key_definition, change_feed_state_context) + self._options["changeFeedState"] = change_feed_state + + if isinstance(change_feed_state, ChangeFeedStateV1): + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: dict[str, any]) -> None: + + if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + # if continuation token is in v1 format, throw exception if feed_range is set + if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + raise ValueError("feed_range and continuation are incompatible") + elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py index ae2e37568bd4..eede9bd4fe15 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py @@ -29,11 +29,12 @@ from abc import ABC, abstractmethod from typing import Optional, Union, List, Any -from azure.cosmos import http_constants +from azure.cosmos import http_constants, PartitionKey from azure.cosmos._change_feed.aio.change_feed_start_from import ChangeFeedStartFromETagAndFeedRange, \ ChangeFeedStartFromInternal from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken from azure.cosmos._change_feed.aio.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._change_feed.feed_range import FeedRangeEpk, FeedRangePartitionKey, FeedRange from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range from azure.cosmos._utils import is_key_exists_and_not_none @@ -49,7 +50,10 @@ def populate_feed_options(self, feed_options: dict[str, any]) -> None: pass @abstractmethod - async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, request_headers: dict[str, any]) -> None: + async def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: dict[str, any]) -> None: pass @abstractmethod @@ -57,7 +61,11 @@ def apply_server_response_continuation(self, continuation: str) -> None: pass @staticmethod - def from_json(container_link: str, container_rid: str, data: dict[str, Any]): + def from_json( + container_link: str, + container_rid: str, + partition_key_definition: PartitionKey, + data: dict[str, Any]): if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): return ChangeFeedStateV1.from_json(container_link, container_rid, data) else: @@ -69,11 +77,11 @@ def from_json(container_link: str, container_rid: str, data: dict[str, Any]): if version is None: raise ValueError("Invalid base64 encoded continuation string [Missing version]") elif version == "V2": - return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + return ChangeFeedStateV2.from_continuation(container_link, container_rid, partition_key_definition, continuation_json) else: raise ValueError("Invalid base64 encoded continuation string [Invalid version]") # when there is no continuation token, by default construct ChangeFeedStateV2 - return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, partition_key_definition, data) class ChangeFeedStateV1(ChangeFeedState): """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag @@ -110,7 +118,10 @@ def from_json(cls, container_link: str, container_rid: str, data: dict[str, Any] data.get("continuationPkRangeId") ) - async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, Any]) -> None: + async def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + headers: dict[str, Any]) -> None: headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time @@ -140,7 +151,8 @@ def __init__( self, container_link: str, container_rid: str, - feed_range: Range, + partition_key_definition: PartitionKey, + feed_range: FeedRange, change_feed_start_from: ChangeFeedStartFromInternal, continuation: Optional[FeedRangeCompositeContinuation] = None): @@ -151,7 +163,9 @@ def __init__( self._continuation = continuation if self._continuation is None: composite_continuation_token_queue = collections.deque() - composite_continuation_token_queue.append(CompositeContinuationToken(self._feed_range, None)) + composite_continuation_token_queue.append(CompositeContinuationToken( + self._feed_range.get_normalized_range(partition_key_definition), + None)) self._continuation =\ FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) @@ -168,7 +182,10 @@ def to_dict(self) -> dict[str, Any]: self.continuation_property_name: self._continuation.to_dict() } - async def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, any]) -> None: + async def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + headers: dict[str, any]) -> None: headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time @@ -224,6 +241,7 @@ def from_continuation( cls, container_link: str, container_rid: str, + partition_key_definition: PartitionKey, continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) @@ -244,6 +262,7 @@ def from_continuation( return ChangeFeedStateV2( container_link=container_link, container_rid=container_rid, + partition_key_definition=partition_key_definition, feed_range=continuation.feed_range, change_feed_start_from=change_feed_start_from, continuation=continuation) @@ -253,26 +272,29 @@ def from_initial_state( cls, container_link: str, collection_rid: str, + partition_key_definition: PartitionKey, data: dict[str, Any]) -> 'ChangeFeedStateV2': if is_key_exists_and_not_none(data, "feedRange"): feed_range_str = base64.b64decode(data["feedRange"]).decode('utf-8') feed_range_json = json.loads(feed_range_str) - feed_range = Range.ParseFromDict(feed_range_json) - elif is_key_exists_and_not_none(data, "partitionKeyFeedRange"): - feed_range = data["partitionKeyFeedRange"] + feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) + elif is_key_exists_and_not_none(data, "partitionKey"): + feed_range = FeedRangePartitionKey(data["partitionKey"]) else: # default to full range - feed_range = Range( - "", - "FF", - True, - False) + feed_range = FeedRangeEpk( + Range( + "", + "FF", + True, + False)) change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(data.get("startTime")) return cls( container_link=container_link, container_rid=collection_rid, + partition_key_definition=partition_key_definition, feed_range=feed_range, change_feed_start_from=change_feed_start_from, continuation=None) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py index 6e1b8f974eea..d7bf97c0a903 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py @@ -27,20 +27,21 @@ from typing import Any from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range +from azure.cosmos._utils import is_key_exists_and_not_none class FeedRangeCompositeContinuation(object): - _version_property_name = "V" - _container_rid_property_name = "Rid" - _continuation_property_name = "Continuation" - _feed_range_property_name = "Range" + _version_property_name = "v" + _container_rid_property_name = "rid" + _continuation_property_name = "continuation" def __init__( self, container_rid: str, - feed_range: Range, + feed_range: FeedRange, continuation: collections.deque[CompositeContinuationToken]): if container_rid is None: raise ValueError("container_rid is missing") @@ -55,31 +56,34 @@ def __init__( def current_token(self): return self._current_token + def get_feed_range(self) -> FeedRange: + if isinstance(self._feed_range, FeedRangeEpk): + return FeedRangeEpk(self.current_token.feed_range) + else: + return self._feed_range + def to_dict(self) -> dict[str, Any]: - return { - self._version_property_name: "v1", #TODO: should this start from v2 + json_data = { + self._version_property_name: "v2", self._container_rid_property_name: self._container_rid, self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], - self._feed_range_property_name: self._feed_range.to_dict() } + json_data.update(self._feed_range.to_dict()) + return json_data + @classmethod def from_json(cls, data) -> 'FeedRangeCompositeContinuation': version = data.get(cls._version_property_name) if version is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") - if version != "v1": + if version != "v2": raise ValueError("Invalid feed range composite continuation token [Invalid version]") container_rid = data.get(cls._container_rid_property_name) if container_rid is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") - feed_range_data = data.get(cls._feed_range_property_name) - if feed_range_data is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._feed_range_property_name}]") - feed_range = Range.ParseFromDict(feed_range_data) - continuation_data = data.get(cls._continuation_property_name) if continuation_data is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") @@ -87,6 +91,14 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] + # parsing feed range + if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): + feed_range = FeedRangeEpk.from_json({ FeedRangeEpk.type_property_name: data[FeedRangeEpk.type_property_name] }) + elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): + feed_range = FeedRangePartitionKey.from_json({ FeedRangePartitionKey.type_property_name: data[FeedRangePartitionKey.type_property_name] }) + else: + raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: @@ -130,5 +142,5 @@ def apply_not_modified_response(self) -> None: self._initial_no_result_range = self._current_token.feed_range @property - def feed_range(self) -> Range: + def feed_range(self) -> FeedRange: return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py new file mode 100644 index 000000000000..a4b4b5dfedda --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -0,0 +1,102 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""Internal class for feed range implementation in the Azure Cosmos +database service. +""" +import json +from abc import ABC, abstractmethod +from typing import Union, List + +from azure.cosmos import PartitionKey +from azure.cosmos._routing.routing_range import Range +from azure.cosmos._utils import is_key_exists_and_not_none +from azure.cosmos.partition_key import _Undefined, _Empty + + +class FeedRange(ABC): + + @abstractmethod + def get_normalized_range(self, partition_key_range_definition: PartitionKey) -> Range: + pass + + @abstractmethod + def to_dict(self) -> dict[str, any]: + pass + +class FeedRangePartitionKey(FeedRange): + type_property_name = "PK" + + def __init__( + self, + pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]): + if pk_value is None: + raise ValueError("PartitionKey cannot be None") + + self._pk_value = pk_value + + def get_normalized_range(self, partition_key_definition: PartitionKey) -> Range: + return partition_key_definition._get_epk_range_for_partition_key(self._pk_value).to_normalized_range() + + def to_dict(self) -> dict[str, any]: + if isinstance(self._pk_value, _Undefined): + return { self.type_property_name: [{}] } + elif isinstance(self._pk_value, _Empty): + return { self.type_property_name: [] } + else: + return { self.type_property_name: json.dumps(self._pk_value) } + + @classmethod + def from_json(cls, data: dict[str, any]) -> 'FeedRangePartitionKey': + if is_key_exists_and_not_none(data, cls.type_property_name): + pk_value = data.get(cls.type_property_name) + if isinstance(pk_value, list): + if not pk_value: + return cls(_Empty()) + if pk_value == [{}]: + return cls(_Undefined()) + + return cls(json.loads(data.get(cls.type_property_name))) + raise ValueError(f"Can not parse FeedRangePartitionKey from the json, there is no property {cls.type_property_name}") + +class FeedRangeEpk(FeedRange): + type_property_name = "Range" + + def __init__(self, feed_range: Range): + if feed_range is None: + raise ValueError("feed_range cannot be None") + + self._range = feed_range + + def get_normalized_range(self, partition_key_definition: PartitionKey) -> Range: + return self._range.to_normalized_range() + + def to_dict(self) -> dict[str, any]: + return { + self.type_property_name: self._range.to_dict() + } + + @classmethod + def from_json(cls, data: dict[str, any]) -> 'FeedRangeEpk': + if is_key_exists_and_not_none(data, cls.type_property_name): + feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) + return cls(feed_range) + raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index a8559839aad7..c68ddad7eb0d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -41,10 +41,9 @@ GenerateGuidId, _set_properties_cache ) -from .._change_feed.aio.change_feed_state import ChangeFeedState from .._routing import routing_range from .._routing.routing_range import Range -from .._utils import is_key_exists_and_not_none, is_base64_encoded +from .._utils import is_key_exists_and_not_none from ..offer import ThroughputProperties from ..partition_key import ( NonePartitionKeyValue, @@ -137,25 +136,12 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - async def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: - container_properties = await self._get_properties() - partition_key_definition = container_properties.get("partitionKey") - partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) - - is_prefix_partition_key = await self.__is_prefix_partition_key(partition_key_value) - - return partition_key._get_epk_range_for_partition_key(partition_key_value, is_prefix_partition_key) - async def __is_prefix_partition_key(self, partition_key: PartitionKeyType) -> bool: properties = await self._get_properties() pk_properties = properties.get("partitionKey") partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) - if partition_key_definition.kind != "MultiHash": - return False - if isinstance(partition_key, list) and len(partition_key_definition['paths']) == len(partition_key): - return False - return True + return partition_key_definition._is_prefix_partition_key(partition_key) @distributed_trace_async async def read( @@ -506,13 +492,12 @@ def query_items( return items @overload - async def query_items_change_feed( + def query_items_change_feed( self, *, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, partition_key: Optional[PartitionKeyType] = None, - # -> would RU usage be more efficient, bug to backend team? deprecate it or using FeedRange to convert? priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: @@ -561,7 +546,7 @@ async def query_items_change_feed( ... @overload - async def query_items_change_feed( + def query_items_change_feed( self, *, continuation: Optional[str] = None, @@ -582,7 +567,7 @@ async def query_items_change_feed( ... @distributed_trace - async def query_items_change_feed( + def query_items_change_feed( self, *args: Any, **kwargs: Any @@ -637,16 +622,7 @@ async def query_items_change_feed( continuation = feed_options.pop('continuation') except KeyError: continuation = args[2] - - # there are two types of continuation token we support currently: - # v1 version: the continuation token would just be the _etag, - # which is being returned when customer is using partition_key_range_id, - # which is under deprecation and does not support split/merge - # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state - if is_base64_encoded(continuation): - change_feed_state_context["continuationFeedRange"] = continuation - else: - change_feed_state_context["continuationPkRangeId"] = continuation + change_feed_state_context["continuation"] = continuation if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): try: @@ -655,42 +631,21 @@ async def query_items_change_feed( feed_options["maxItemCount"] = args[3] if is_key_exists_and_not_none(kwargs, "partition_key"): - partition_key = kwargs.pop("partition_key") - change_feed_state_context["partitionKey"] = await self._set_partition_key(partition_key) - change_feed_state_context["partitionKeyFeedRange"] = await self._get_epk_range_for_partition_key(partition_key) + change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop("partition_key")) if is_key_exists_and_not_none(kwargs, "feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') - # validate exclusive or in-compatible parameters - if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): - # if continuation token is in v1 format, throw exception if feed_range is set - if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): - raise ValueError("feed_range and continuation are incompatible") - elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): - # if continuation token is in v2 format, since the token itself contains the full change feed state - # so we will ignore other parameters if they passed in - if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId"): - raise ValueError("partition_key_range_id and continuation are incompatible") - else: - # validation when no continuation is passed - exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] - count = sum(1 for key in exclusive_keys if - key in change_feed_state_context and change_feed_state_context[key] is not None) - if count > 1: - raise ValueError( - "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") - - container_properties = await self._get_properties() - container_rid = container_properties.get("_rid") - change_feed_state = ChangeFeedState.from_json(self.container_link, container_rid, change_feed_state_context) - feed_options["changeFeedState"] = change_feed_state - feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + change_feed_state_context["containerProperties"] = self._get_properties() + feed_options["changeFeedStateContext"] = change_feed_state_context response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() + if self.container_link in self.__get_client_container_caches(): + feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) @@ -1269,5 +1224,3 @@ async def read_feed_ranges( [Range("", "FF", True, False)]) return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] - - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 32fd818075f8..d4f1d5480241 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -327,7 +327,6 @@ def query_items_change_feed( max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, partition_key: Optional[PartitionKeyType] = None, - # -> would RU usage be more efficient, bug to backend team? deprecate it or using FeedRange to convert? priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 9f0a5cde29a2..c89e1d9ac771 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -175,17 +175,14 @@ def _get_epk_range_for_prefix_partition_key( def _get_epk_range_for_partition_key( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]], - is_prefix_pk_value: bool = False + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] ) -> _Range: - if is_prefix_pk_value: + if self._is_prefix_partition_key(pk_value): return self._get_epk_range_for_prefix_partition_key(pk_value) # else return point range effective_partition_key_string = self._get_effective_partition_key_string(pk_value) - partition_key_range = _Range(effective_partition_key_string, effective_partition_key_string, True, True) - - return partition_key_range.to_normalized_range() + return _Range(effective_partition_key_string, effective_partition_key_string, True, True) def _get_effective_partition_key_for_hash_partitioning(self) -> str: # We shouldn't be supporting V1 @@ -279,6 +276,15 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( return ''.join(sb).upper() + def _is_prefix_partition_key( + self, + partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: + if self.kind!= "MultiHash": + return False + if isinstance(partition_key, list) and len(self.path) == len(partition_key): + return False + return True + def _return_undefined_or_empty_partition_key(is_system_key: bool) -> Union[_Empty, _Undefined]: if is_system_key: diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index c3246768a796..b4165af0601c 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -76,12 +76,12 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f filter_param = None # Read change feed without passing any options - query_iterable = await created_collection.query_items_change_feed() + query_iterable = created_collection.query_items_change_feed() iter_list = [item async for item in query_iterable] assert len(iter_list) == 0 # Read change feed from current should return an empty list - query_iterable = await created_collection.query_items_change_feed(filter_param) + query_iterable = created_collection.query_items_change_feed(filter_param) iter_list = [item async for item in query_iterable] assert len(iter_list) == 0 if 'Etag' in created_collection.client_connection.last_response_headers: @@ -92,7 +92,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f fail("No Etag or etag found in last response headers") # Read change feed from beginning should return an empty list - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( is_start_from_beginning=True, **filter_param ) @@ -109,7 +109,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f # Create a document. Read change feed should return be able to read that document document_definition = {'pk': 'pk', 'id': 'doc1'} await created_collection.create_item(body=document_definition) - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( is_start_from_beginning=True, **filter_param ) @@ -134,7 +134,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f for pageSize in [2, 100]: # verify iterator - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( continuation=continuation2, max_item_count=pageSize, **filter_param) @@ -147,7 +147,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f # verify by_page # the options is not copied, therefore it need to be restored - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( continuation=continuation2, max_item_count=pageSize, **filter_param @@ -167,7 +167,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f assert actual_ids == expected_ids # verify reading change feed from the beginning - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( is_start_from_beginning=True, **filter_param ) @@ -184,7 +184,7 @@ async def test_query_change_feed_with_different_filter_async(self, change_feed_f fail("No Etag or etag found in last response headers") # verify reading empty change feed - query_iterable = await created_collection.query_items_change_feed( + query_iterable = created_collection.query_items_change_feed( continuation=continuation3, is_start_from_beginning=True, **filter_param @@ -235,7 +235,7 @@ async def create_random_items(container, batch_size): await create_random_items(created_collection, batchSize) # now query change feed based on start time - change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=start_time)] + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=start_time)] totalCount = len(change_feed_iter) # now check if the number of items that were changed match the batch size @@ -243,13 +243,13 @@ async def create_random_items(container, batch_size): # negative test: pass in a valid time in the future future_time = start_time + timedelta(hours=1) - change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=future_time)] + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=future_time)] totalCount = len(change_feed_iter) # A future time should return 0 assert totalCount == 0 # test a date that is not utc, will be converted to utc by sdk - change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=not_utc_time)] + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=not_utc_time)] totalCount = len(change_feed_iter) # Should equal batch size assert totalCount == batchSize @@ -257,7 +257,7 @@ async def create_random_items(container, batch_size): # test an invalid value, Attribute error will be raised for passing non datetime object invalid_time = "Invalid value" try: - change_feed_iter = [i async for i in await created_collection.query_items_change_feed(start_time=invalid_time)] + change_feed_iter = [i async for i in created_collection.query_items_change_feed(start_time=invalid_time)] fail("Cannot format date on a non datetime object.") except ValueError as e: assert ("Invalid start_time 'Invalid value'" == e.args[0]) @@ -270,7 +270,7 @@ async def test_query_change_feed_with_split_async(self, setup): offer_throughput=400) # initial change feed query returns empty result - query_iterable = await created_collection.query_items_change_feed(start_time="Beginning") + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") iter_list = [item async for item in query_iterable] assert len(iter_list) == 0 continuation = created_collection.client_connection.last_response_headers['etag'] @@ -279,7 +279,7 @@ async def test_query_change_feed_with_split_async(self, setup): # create one doc and make sure change feed query can return the document document_definition = {'pk': 'pk', 'id': 'doc1'} await created_collection.create_item(body=document_definition) - query_iterable = await created_collection.query_items_change_feed(continuation=continuation) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) iter_list = [item async for item in query_iterable] assert len(iter_list) == 1 continuation = created_collection.client_connection.last_response_headers['etag'] @@ -309,7 +309,7 @@ async def test_query_change_feed_with_split_async(self, setup): for document in new_documents: await created_collection.create_item(body=document) - query_iterable = await created_collection.query_items_change_feed(continuation=continuation) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) it = query_iterable.__aiter__() actual_ids = [] async for item in it: From b6c53fb71465a01512ecf01c22c247cae97e55a9 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Sat, 17 Aug 2024 22:50:20 -0700 Subject: [PATCH 03/20] refactor --- .../_change_feed/aio/change_feed_iterable.py | 13 ++- .../_change_feed/aio/change_feed_state.py | 26 +++-- ...feed_range_composite_continuation_token.py | 13 +-- .../_change_feed/change_feed_fetcher.py | 1 - .../_change_feed/change_feed_iterable.py | 100 ++++++++++++------ .../cosmos/_change_feed/change_feed_state.py | 46 +++++--- .../azure/cosmos/_change_feed/feed_range.py | 24 +++-- ...feed_range_composite_continuation_token.py | 38 ++++--- .../azure/cosmos/aio/_container.py | 2 +- .../azure-cosmos/azure/cosmos/container.py | 55 ++-------- 10 files changed, 170 insertions(+), 148 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 16a431653e9c..c792d1357550 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -114,15 +114,16 @@ async def _fetch_next(self, *args): # pylint: disable=unused-argument async def _initialize_change_feed_fetcher(self): change_feed_state_context = self._options.pop("changeFeedStateContext") - conn_properties = await change_feed_state_context.pop("containerProperties") + conn_properties = await self._options.pop("containerProperties") if is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") - - pk_properties = conn_properties.get("partitionKey") - partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) + pk_properties = conn_properties.get("partitionKey") + partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) + change_feed_state_context["partitionKeyFeedRange"] =\ + partition_key_definition._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) change_feed_state =\ - ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], partition_key_definition, change_feed_state_context) + ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], change_feed_state_context) self._options["changeFeedState"] = change_feed_state if isinstance(change_feed_state, ChangeFeedStateV1): @@ -158,5 +159,3 @@ def _validate_change_feed_state_context(self, change_feed_state_context: dict[st if count > 1: raise ValueError( "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") - - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py index eede9bd4fe15..ceb83166bdab 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py @@ -29,7 +29,7 @@ from abc import ABC, abstractmethod from typing import Optional, Union, List, Any -from azure.cosmos import http_constants, PartitionKey +from azure.cosmos import http_constants from azure.cosmos._change_feed.aio.change_feed_start_from import ChangeFeedStartFromETagAndFeedRange, \ ChangeFeedStartFromInternal from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken @@ -64,7 +64,6 @@ def apply_server_response_continuation(self, continuation: str) -> None: def from_json( container_link: str, container_rid: str, - partition_key_definition: PartitionKey, data: dict[str, Any]): if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): return ChangeFeedStateV1.from_json(container_link, container_rid, data) @@ -77,11 +76,11 @@ def from_json( if version is None: raise ValueError("Invalid base64 encoded continuation string [Missing version]") elif version == "V2": - return ChangeFeedStateV2.from_continuation(container_link, container_rid, partition_key_definition, continuation_json) + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) else: raise ValueError("Invalid base64 encoded continuation string [Invalid version]") # when there is no continuation token, by default construct ChangeFeedStateV2 - return ChangeFeedStateV2.from_initial_state(container_link, container_rid, partition_key_definition, data) + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) class ChangeFeedStateV1(ChangeFeedState): """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag @@ -151,7 +150,6 @@ def __init__( self, container_link: str, container_rid: str, - partition_key_definition: PartitionKey, feed_range: FeedRange, change_feed_start_from: ChangeFeedStartFromInternal, continuation: Optional[FeedRangeCompositeContinuation] = None): @@ -163,9 +161,10 @@ def __init__( self._continuation = continuation if self._continuation is None: composite_continuation_token_queue = collections.deque() - composite_continuation_token_queue.append(CompositeContinuationToken( - self._feed_range.get_normalized_range(partition_key_definition), - None)) + composite_continuation_token_queue.append( + CompositeContinuationToken( + self._feed_range.get_normalized_range(), + None)) self._continuation =\ FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) @@ -241,7 +240,6 @@ def from_continuation( cls, container_link: str, container_rid: str, - partition_key_definition: PartitionKey, continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) @@ -262,8 +260,7 @@ def from_continuation( return ChangeFeedStateV2( container_link=container_link, container_rid=container_rid, - partition_key_definition=partition_key_definition, - feed_range=continuation.feed_range, + feed_range=continuation._feed_range, change_feed_start_from=change_feed_start_from, continuation=continuation) @@ -272,7 +269,6 @@ def from_initial_state( cls, container_link: str, collection_rid: str, - partition_key_definition: PartitionKey, data: dict[str, Any]) -> 'ChangeFeedStateV2': if is_key_exists_and_not_none(data, "feedRange"): @@ -280,7 +276,10 @@ def from_initial_state( feed_range_json = json.loads(feed_range_str) feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) elif is_key_exists_and_not_none(data, "partitionKey"): - feed_range = FeedRangePartitionKey(data["partitionKey"]) + if is_key_exists_and_not_none(data, "partitionKeyFeedRange"): + feed_range = FeedRangePartitionKey(data["partitionKey"], data["partitionKeyFeedRange"]) + else: + raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: # default to full range feed_range = FeedRangeEpk( @@ -294,7 +293,6 @@ def from_initial_state( return cls( container_link=container_link, container_rid=collection_rid, - partition_key_definition=partition_key_definition, feed_range=feed_range, change_feed_start_from=change_feed_start_from, continuation=None) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py index d7bf97c0a903..32122145009c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py @@ -56,12 +56,6 @@ def __init__( def current_token(self): return self._current_token - def get_feed_range(self) -> FeedRange: - if isinstance(self._feed_range, FeedRangeEpk): - return FeedRangeEpk(self.current_token.feed_range) - else: - return self._feed_range - def to_dict(self) -> dict[str, Any]: json_data = { self._version_property_name: "v2", @@ -93,16 +87,17 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': # parsing feed range if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): - feed_range = FeedRangeEpk.from_json({ FeedRangeEpk.type_property_name: data[FeedRangeEpk.type_property_name] }) + feed_range = FeedRangeEpk.from_json(data) elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): - feed_range = FeedRangePartitionKey.from_json({ FeedRangePartitionKey.type_property_name: data[FeedRangePartitionKey.type_property_name] }) + feed_range =\ + FeedRangePartitionKey.from_json(data, continuation[0].feed_range) else: raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: - overlapping_ranges = await routing_provider.get_overlapping_ranges(collection_link, self._current_token.feed_range) + overlapping_ranges = await routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) if len(overlapping_ranges) == 1: # merge,reusing the existing the feedRange and continuationToken diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index fd8ac2787a8f..6edaf8e73fd9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -115,7 +115,6 @@ def __init__( self._change_feed_state = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV2): raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") - self._change_feed_state.__class__ = ChangeFeedStateV2 self._resource_link = resource_link self._fetch_function = fetch_function diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 676036180d29..39e94a30c4c0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -26,7 +26,7 @@ from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedState -from azure.cosmos._utils import is_base64_encoded +from azure.cosmos._utils import is_base64_encoded, is_key_exists_and_not_none class ChangeFeedIterable(PageIterator): @@ -57,40 +57,29 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link + self._change_feed_fetcher = None - change_feed_state = self._options.get("changeFeedState") - if not change_feed_state: - raise ValueError("Missing changeFeedState in feed options") + if not is_key_exists_and_not_none(self._options, "changeFeedStateContext"): + raise ValueError("Missing changeFeedStateContext in feed options") - if isinstance(change_feed_state, ChangeFeedStateV1): - if continuation_token: - if is_base64_encoded(continuation_token): - raise ValueError("Incompatible continuation token") - else: - change_feed_state.apply_server_response_continuation(continuation_token) + change_feed_state_context = self._options.pop("changeFeedStateContext") + continuation = continuation_token if continuation_token is not None else change_feed_state_context.pop("continuation", None) - self._change_feed_fetcher = ChangeFeedFetcherV1( - self._client, - self._collection_link, - self._options, - fetch_function - ) - else: - if continuation_token: - if not is_base64_encoded(continuation_token): - raise ValueError("Incompatible continuation token") + # analysis and validate continuation token + # there are two types of continuation token we support currently: + # v1 version: the continuation token would just be the _etag, + # which is being returned when customer is using partition_key_range_id, + # which is under deprecation and does not support split/merge + # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + if continuation is not None: + if is_base64_encoded(continuation): + change_feed_state_context["continuationFeedRange"] = continuation + else: + change_feed_state_context["continuationPkRangeId"] = continuation - effective_change_feed_context = {"continuationFeedRange": continuation_token} - effective_change_feed_state = ChangeFeedState.from_json(change_feed_state.container_rid, effective_change_feed_context) - # replace with the effective change feed state - self._options["continuationFeedRange"] = effective_change_feed_state + self._validate_change_feed_state_context(change_feed_state_context) + self._options["changeFeedStateContext"] = change_feed_state_context - self._change_feed_fetcher = ChangeFeedFetcherV2( - self._client, - self._collection_link, - self._options, - fetch_function - ) super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) def _unpack(self, block): @@ -112,7 +101,58 @@ def _fetch_next(self, *args): # pylint: disable=unused-argument :return: List of results. :rtype: list """ + + if self._change_feed_fetcher is None: + self._initialize_change_feed_fetcher() + block = self._change_feed_fetcher.fetch_next_block() if not block: raise StopIteration return block + + def _initialize_change_feed_fetcher(self): + change_feed_state_context = self._options.pop("changeFeedStateContext") + change_feed_state = \ + ChangeFeedState.from_json( + self._collection_link, + self._options.get("containerRID"), + change_feed_state_context) + + self._options["changeFeedState"] = change_feed_state + + if isinstance(change_feed_state, ChangeFeedStateV1): + self._change_feed_fetcher = ChangeFeedFetcherV1( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + else: + self._change_feed_fetcher = ChangeFeedFetcherV2( + self._client, + self._collection_link, + self._options, + self._fetch_function + ) + + def _validate_change_feed_state_context(self, change_feed_state_context: dict[str, any]) -> None: + + if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + # if continuation token is in v1 format, throw exception if feed_range is set + if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + raise ValueError("feed_range and continuation are incompatible") + elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + # if continuation token is in v2 format, since the token itself contains the full change feed state + # so we will ignore other parameters (including incompatible parameters) if they passed in + pass + else: + # validation when no continuation is passed + exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] + count = sum(1 for key in exclusive_keys if + key in change_feed_state_context and change_feed_state_context[key] is not None) + if count > 1: + raise ValueError( + "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") + + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 8c61c306b94e..210563ab8411 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -33,6 +33,7 @@ from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ ChangeFeedStartFromETagAndFeedRange from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range @@ -57,13 +58,18 @@ def apply_server_response_continuation(self, continuation: str) -> None: pass @staticmethod - def from_json(container_link: str, container_rid: str, data: dict[str, Any]): - if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): - return ChangeFeedStateV1.from_json(container_link, container_rid, data) + def from_json( + container_link: str, + container_rid: str, + change_feed_state_context: dict[str, Any]): + + if (is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId") + or is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId")): + return ChangeFeedStateV1.from_json(container_link, container_rid, change_feed_state_context) else: - if is_key_exists_and_not_none(data, "continuationFeedRange"): + if is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): # get changeFeedState from continuation - continuation_json_str = base64.b64decode(data["continuationFeedRange"]).decode('utf-8') + continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode('utf-8') continuation_json = json.loads(continuation_json_str) version = continuation_json.get(ChangeFeedState.version_property_name) if version is None: @@ -73,7 +79,7 @@ def from_json(container_link: str, container_rid: str, data: dict[str, Any]): else: raise ValueError("Invalid base64 encoded continuation string [Invalid version]") # when there is no continuation token, by default construct ChangeFeedStateV2 - return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, change_feed_state_context) class ChangeFeedStateV1(ChangeFeedState): """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag @@ -140,7 +146,7 @@ def __init__( self, container_link: str, container_rid: str, - feed_range: Range, + feed_range: FeedRange, change_feed_start_from: ChangeFeedStartFromInternal, continuation: Optional[FeedRangeCompositeContinuation] = None): @@ -151,7 +157,10 @@ def __init__( self._continuation = continuation if self._continuation is None: composite_continuation_token_queue = collections.deque() - composite_continuation_token_queue.append(CompositeContinuationToken(self._feed_range, None)) + composite_continuation_token_queue.append( + CompositeContinuationToken( + self._feed_range.get_normalized_range(), + None)) self._continuation =\ FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) @@ -253,23 +262,28 @@ def from_initial_state( cls, container_link: str, collection_rid: str, - data: dict[str, Any]) -> 'ChangeFeedStateV2': + change_feed_state_context: dict[str, Any]) -> 'ChangeFeedStateV2': - if is_key_exists_and_not_none(data, "feedRange"): - feed_range_str = base64.b64decode(data["feedRange"]).decode('utf-8') + if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') feed_range_json = json.loads(feed_range_str) - feed_range = Range.ParseFromDict(feed_range_json) - elif is_key_exists_and_not_none(data, "partitionKeyFeedRange"): - feed_range = data["partitionKeyFeedRange"] + feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) + elif is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): + if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyFeedRange"): + feed_range = FeedRangePartitionKey(change_feed_state_context["partitionKey"], change_feed_state_context["partitionKeyFeedRange"]) + else: + raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: # default to full range - feed_range = Range( + feed_range = FeedRangeEpk( + Range( "", "FF", True, False) + ) - change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(data.get("startTime")) + change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime")) return cls( container_link=container_link, container_rid=collection_rid, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py index a4b4b5dfedda..1b6ef79e4176 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -26,7 +26,6 @@ from abc import ABC, abstractmethod from typing import Union, List -from azure.cosmos import PartitionKey from azure.cosmos._routing.routing_range import Range from azure.cosmos._utils import is_key_exists_and_not_none from azure.cosmos.partition_key import _Undefined, _Empty @@ -35,7 +34,7 @@ class FeedRange(ABC): @abstractmethod - def get_normalized_range(self, partition_key_range_definition: PartitionKey) -> Range: + def get_normalized_range(self) -> Range: pass @abstractmethod @@ -47,14 +46,19 @@ class FeedRangePartitionKey(FeedRange): def __init__( self, - pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]): + pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined], + feed_range: Range): + if pk_value is None: raise ValueError("PartitionKey cannot be None") + if feed_range is None: + raise ValueError("Feed range cannot be None") self._pk_value = pk_value + self._feed_range = feed_range - def get_normalized_range(self, partition_key_definition: PartitionKey) -> Range: - return partition_key_definition._get_epk_range_for_partition_key(self._pk_value).to_normalized_range() + def get_normalized_range(self) -> Range: + return self._feed_range.to_normalized_range() def to_dict(self) -> dict[str, any]: if isinstance(self._pk_value, _Undefined): @@ -65,16 +69,16 @@ def to_dict(self) -> dict[str, any]: return { self.type_property_name: json.dumps(self._pk_value) } @classmethod - def from_json(cls, data: dict[str, any]) -> 'FeedRangePartitionKey': + def from_json(cls, data: dict[str, any], feed_range: Range) -> 'FeedRangePartitionKey': if is_key_exists_and_not_none(data, cls.type_property_name): pk_value = data.get(cls.type_property_name) if isinstance(pk_value, list): if not pk_value: - return cls(_Empty()) + return cls(_Empty(), feed_range) if pk_value == [{}]: - return cls(_Undefined()) + return cls(_Undefined(), feed_range) - return cls(json.loads(data.get(cls.type_property_name))) + return cls(json.loads(data.get(cls.type_property_name)), feed_range) raise ValueError(f"Can not parse FeedRangePartitionKey from the json, there is no property {cls.type_property_name}") class FeedRangeEpk(FeedRange): @@ -86,7 +90,7 @@ def __init__(self, feed_range: Range): self._range = feed_range - def get_normalized_range(self, partition_key_definition: PartitionKey) -> Range: + def get_normalized_range(self) -> Range: return self._range.to_normalized_range() def to_dict(self) -> dict[str, any]: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 2461436924aa..26abd66ba132 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -27,20 +27,21 @@ from typing import Any from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken +from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range +from azure.cosmos._utils import is_key_exists_and_not_none class FeedRangeCompositeContinuation(object): - _version_property_name = "V" - _container_rid_property_name = "Rid" - _continuation_property_name = "Continuation" - _feed_range_property_name = "Range" + _version_property_name = "v" + _container_rid_property_name = "rid" + _continuation_property_name = "continuation" def __init__( self, container_rid: str, - feed_range: Range, + feed_range: FeedRange, continuation: collections.deque[CompositeContinuationToken]): if container_rid is None: raise ValueError("container_rid is missing") @@ -56,30 +57,26 @@ def current_token(self): return self._current_token def to_dict(self) -> dict[str, Any]: - return { - self._version_property_name: "v1", #TODO: should this start from v2 + json_data = { + self._version_property_name: "v2", self._container_rid_property_name: self._container_rid, self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], - self._feed_range_property_name: self._feed_range.to_dict() } + json_data.update(self._feed_range.to_dict()) + return json_data @classmethod def from_json(cls, data) -> 'FeedRangeCompositeContinuation': version = data.get(cls._version_property_name) if version is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") - if version != "v1": + if version != "v2": raise ValueError("Invalid feed range composite continuation token [Invalid version]") container_rid = data.get(cls._container_rid_property_name) if container_rid is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") - feed_range_data = data.get(cls._feed_range_property_name) - if feed_range_data is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._feed_range_property_name}]") - feed_range = Range.ParseFromDict(feed_range_data) - continuation_data = data.get(cls._continuation_property_name) if continuation_data is None: raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") @@ -87,10 +84,18 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] + # parsing feed range + if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): + feed_range = FeedRangeEpk.from_json(data) + elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): + feed_range = FeedRangePartitionKey.from_json(data, continuation[0].feed_range) + else: + raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") + return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: - overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, self._current_token.feed_range) + overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) if len(overlapping_ranges) == 1: # merge,reusing the existing the feedRange and continuationToken @@ -130,5 +135,6 @@ def apply_not_modified_response(self) -> None: self._initial_no_result_range = self._current_token.feed_range @property - def feed_range(self) -> Range: + def feed_range(self) -> FeedRange: return self._feed_range + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index c68ddad7eb0d..5e8da4f54cf4 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -636,7 +636,7 @@ def query_items_change_feed( if is_key_exists_and_not_none(kwargs, "feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') - change_feed_state_context["containerProperties"] = self._get_properties() + feed_options["containerProperties"] = self._get_properties() feed_options["changeFeedStateContext"] = change_feed_state_context response_hook = kwargs.pop('response_hook', None) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index d4f1d5480241..e2fa8ad4071e 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -38,11 +38,10 @@ GenerateGuidId, _set_properties_cache ) -from ._change_feed.change_feed_state import ChangeFeedState from ._cosmos_client_connection import CosmosClientConnection from ._routing import routing_range from ._routing.routing_range import Range -from ._utils import is_key_exists_and_not_none, is_base64_encoded +from ._utils import is_key_exists_and_not_none from .offer import Offer, ThroughputProperties from .partition_key import ( NonePartitionKeyValue, @@ -133,15 +132,15 @@ def _set_partition_key( return _return_undefined_or_empty_partition_key(self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - def __get_client_container_caches(self) -> Dict[str, Dict[str, Any]]: - return self.client_connection._container_properties_cache - def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: container_properties = self._get_properties() partition_key_definition = container_properties.get("partitionKey") partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) - return partition_key._get_epk_range_for_partition_key(partition_key_value, self.__is_prefix_partitionkey(partition_key_value)) + return partition_key._get_epk_range_for_partition_key(partition_key_value) + + def __get_client_container_caches(self) -> Dict[str, Dict[str, Any]]: + return self.client_connection._container_properties_cache @distributed_trace def read( # pylint:disable=docstring-missing-param @@ -451,16 +450,7 @@ def query_items_change_feed( continuation = feed_options.pop('continuation') except KeyError: continuation = args[2] - - # there are two types of continuation token we support currently: - # v1 version: the continuation token would just be the _etag, - # which is being returned when customer is using partition_key_range_id, - # which is under deprecation and does not support split/merge - # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state - if is_base64_encoded(continuation): - change_feed_state_context["continuationFeedRange"] = continuation - else: - change_feed_state_context["continuationPkRangeId"] = continuation + change_feed_state_context["continuation"] = continuation if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): try: @@ -469,40 +459,21 @@ def query_items_change_feed( feed_options["maxItemCount"] = args[3] if is_key_exists_and_not_none(kwargs, "partition_key"): - partition_key = kwargs.pop("partition_key") + partition_key = kwargs.pop('partition_key') change_feed_state_context["partitionKey"] = self._set_partition_key(partition_key) change_feed_state_context["partitionKeyFeedRange"] = self._get_epk_range_for_partition_key(partition_key) if is_key_exists_and_not_none(kwargs, "feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') - # validate exclusive or in-compatible parameters - if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): - # if continuation token is in v1 format, throw exception if feed_range is set - if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): - raise ValueError("feed_range and continuation are incompatible") - elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): - # if continuation token is in v2 format, since the token itself contains the full change feed state - # so we will ignore other parameters if they passed in - if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId"): - raise ValueError("partition_key_range_id and continuation are incompatible") - else: - # validation when no continuation is passed - exclusive_keys = ["partitionKeyRangeId", "partitionKey", "feedRange"] - count = sum(1 for key in exclusive_keys if key in change_feed_state_context and change_feed_state_context[key] is not None) - if count > 1: - raise ValueError("partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") - container_properties = self._get_properties() - container_rid = container_properties.get("_rid") - change_feed_state = ChangeFeedState.from_json(self.container_link, container_rid, change_feed_state_context) - feed_options["changeFeedState"] = change_feed_state + feed_options["changeFeedStateContext"] = change_feed_state_context + feed_options["containerRID"] = container_properties["_rid"] response_hook = kwargs.pop('response_hook', None) if hasattr(response_hook, "clear"): response_hook.clear() - if self.container_link in self.__get_client_container_caches(): - feed_options["containerRID"] = self.__get_client_container_caches()[self.container_link]["_rid"] + result = self.client_connection.QueryItemsChangeFeed( self.container_link, options=feed_options, response_hook=response_hook, **kwargs ) @@ -639,11 +610,7 @@ def __is_prefix_partitionkey( properties = self._get_properties() pk_properties = properties["partitionKey"] partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) - if partition_key_definition.kind != "MultiHash": - return False - if isinstance(partition_key, list) and len(partition_key_definition['paths']) == len(partition_key): - return False - return True + return partition_key_definition._is_prefix_partition_key(partition_key) @distributed_trace def replace_item( # pylint:disable=docstring-missing-param From 5f16b143fd341b8ef82e5ea49f515c846f32f52e Mon Sep 17 00:00:00 2001 From: annie-mac Date: Sun, 18 Aug 2024 11:02:49 -0700 Subject: [PATCH 04/20] refactor --- .../_change_feed/aio/change_feed_fetcher.py | 62 ++-- .../_change_feed/aio/change_feed_iterable.py | 15 +- .../aio/change_feed_start_from.py | 189 ----------- .../_change_feed/aio/change_feed_state.py | 299 ------------------ .../aio/composite_continuation_token.py | 70 ---- ...feed_range_composite_continuation_token.py | 141 --------- .../_change_feed/change_feed_fetcher.py | 66 ++-- .../_change_feed/change_feed_iterable.py | 14 +- .../_change_feed/change_feed_start_from.py | 46 +-- .../cosmos/_change_feed/change_feed_state.py | 238 ++++++++++---- .../azure/cosmos/_change_feed/feed_range.py | 38 ++- ...feed_range_composite_continuation_token.py | 61 +++- .../azure/cosmos/_cosmos_client_connection.py | 6 +- .../azure/cosmos/_routing/routing_range.py | 12 +- .../azure-cosmos/azure/cosmos/_utils.py | 6 +- .../azure/cosmos/aio/_container.py | 46 ++- .../aio/_cosmos_client_connection_async.py | 6 +- .../azure-cosmos/azure/cosmos/container.py | 54 ++-- .../azure-cosmos/azure/cosmos/exceptions.py | 2 +- .../azure/cosmos/partition_key.py | 2 +- 20 files changed, 436 insertions(+), 937 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py delete mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py delete mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py delete mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index 83ca3025ee07..4d85a891ac3f 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -26,12 +26,15 @@ import copy import json from abc import ABC, abstractmethod +from typing import Dict, Any, List from azure.cosmos import http_constants, exceptions -from azure.cosmos._change_feed.aio.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromPointInTime +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 from azure.cosmos.aio import _retry_utility_async from azure.cosmos.exceptions import CosmosHttpResponseError +# pylint: disable=protected-access class ChangeFeedFetcher(ABC): @@ -49,7 +52,7 @@ def __init__( self, client, resource_link: str, - feed_options: dict[str, any], + feed_options: Dict[str, Any], fetch_function): self._client = client @@ -57,8 +60,8 @@ def __init__( self._change_feed_state = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV1): - raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version {type(self._change_feed_state)}") - self._change_feed_state.__class__ = ChangeFeedStateV1 + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function @@ -74,24 +77,27 @@ async def callback(): return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) - async def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: new_options = copy.deepcopy(self._feed_options) new_options["changeFeedState"] = self._change_feed_state self._change_feed_state.populate_feed_options(new_options) - is_s_time_first_fetch = True + is_s_time_first_fetch = self._change_feed_state._continuation is None while True: (fetched_items, response_headers) = await fetch_function(new_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token? Is this true? self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) if fetched_items: break - elif is_s_time_first_fetch: + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + and is_s_time_first_fetch): is_s_time_first_fetch = False else: break @@ -106,16 +112,15 @@ def __init__( self, client, resource_link: str, - feed_options: dict[str, any], + feed_options: Dict[str, Any], fetch_function): self._client = client self._feed_options = feed_options - self._change_feed_state = self._feed_options.pop("changeFeedState") + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV2): raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") - self._change_feed_state.__class__ = ChangeFeedStateV2 self._resource_link = resource_link self._fetch_function = fetch_function @@ -131,17 +136,22 @@ async def callback(): return await self.fetch_change_feed_items(self._fetch_function) try: - return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) + return await _retry_utility_async.ExecuteAsync( + self._client, + self._client._global_endpoint_manager, + callback) except CosmosHttpResponseError as e: if exceptions._partition_range_is_gone(e) or exceptions._is_partition_split_or_merge(e): # refresh change feed state - await self._change_feed_state.handle_feed_range_gone(self._client._routing_map_provider, self._resource_link) + await self._change_feed_state.handle_feed_range_gone_async( + self._client._routing_map_provider, + self._resource_link) else: raise e return await self.fetch_next_block() - async def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: new_options = copy.deepcopy(self._feed_options) new_options["changeFeedState"] = self._change_feed_state @@ -154,19 +164,33 @@ async def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token? Is this true? if fetched_items: self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) response_headers[continuation_key] = self._get_base64_encoded_continuation() break - else: + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions self._change_feed_state.apply_not_modified_response() self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) - response_headers[continuation_key] = self._get_base64_encoded_continuation() - should_retry = self._change_feed_state.should_retry_on_not_modified_response() or is_s_time_first_fetch - is_s_time_first_fetch = False + + #TODO: can this part logic be simplified + if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False + if not should_retry: break diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index c792d1357550..83c12f59157c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,14 +21,16 @@ """Iterable change feed results in the Azure Cosmos database service. """ +from typing import Dict, Any from azure.core.async_paging import AsyncPageIterator from azure.cosmos import PartitionKey from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 -from azure.cosmos._change_feed.aio.change_feed_state import ChangeFeedStateV1, ChangeFeedState +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateV1 from azure.cosmos._utils import is_base64_encoded, is_key_exists_and_not_none +# pylint: disable=protected-access class ChangeFeedIterable(AsyncPageIterator): """Represents an iterable object of the change feed results. @@ -66,14 +68,16 @@ def __init__( change_feed_state_context = self._options.pop("changeFeedStateContext") - continuation = continuation_token if continuation_token is not None else change_feed_state_context.pop("continuation", None) + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) # analysis and validate continuation token # there are two types of continuation token we support currently: # v1 version: the continuation token would just be the _etag, # which is being returned when customer is using partition_key_range_id, # which is under deprecation and does not support split/merge - # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state if continuation is not None: if is_base64_encoded(continuation): change_feed_state_context["continuationFeedRange"] = continuation @@ -141,7 +145,7 @@ async def _initialize_change_feed_fetcher(self): self._fetch_function ) - def _validate_change_feed_state_context(self, change_feed_state_context: dict[str, any]) -> None: + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): # if continuation token is in v1 format, throw exception if feed_range is set @@ -158,4 +162,5 @@ def _validate_change_feed_state_context(self, change_feed_state_context: dict[st key in change_feed_state_context and change_feed_state_context[key] is not None) if count > 1: raise ValueError( - "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py deleted file mode 100644 index 99aeeb6eb914..000000000000 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_start_from.py +++ /dev/null @@ -1,189 +0,0 @@ -# The MIT License (MIT) -# Copyright (c) 2014 Microsoft Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Internal class for change feed start from implementation in the Azure Cosmos database service. -""" - -from abc import ABC, abstractmethod -from datetime import datetime, timezone -from enum import Enum -from typing import Optional, Union, Literal, Any - -from azure.cosmos import http_constants -from azure.cosmos._routing.routing_range import Range - -class ChangeFeedStartFromType(Enum): - BEGINNING = "Beginning" - NOW = "Now" - LEASE = "Lease" - POINT_IN_TIME = "PointInTime" - -class ChangeFeedStartFromInternal(ABC): - """Abstract class for change feed start from implementation in the Azure Cosmos database service. - """ - - _type_property_name = "Type" - - @abstractmethod - def to_dict(self) -> dict[str, Any]: - pass - - @staticmethod - def from_start_time(start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': - if start_time is None: - return ChangeFeedStartFromNow() - elif isinstance(start_time, datetime): - return ChangeFeedStartFromPointInTime(start_time) - elif start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): - return ChangeFeedStartFromNow() - elif start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): - return ChangeFeedStartFromBeginning() - else: - raise ValueError(f"Invalid start_time '{start_time}'") - - @staticmethod - def from_json(data: dict[str, any]) -> 'ChangeFeedStartFromInternal': - change_feed_start_from_type = data.get(ChangeFeedStartFromInternal._type_property_name) - if change_feed_start_from_type is None: - raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal._type_property_name}]") - - if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: - return ChangeFeedStartFromBeginning.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: - return ChangeFeedStartFromETagAndFeedRange.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: - return ChangeFeedStartFromNow.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: - return ChangeFeedStartFromPointInTime.from_json(data) - else: - raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") - - @abstractmethod - def populate_request_headers(self, request_headers) -> None: - pass - - -class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): - """Class for change feed start from beginning implementation in the Azure Cosmos database service. - """ - - def to_dict(self) -> dict[str, Any]: - return { - self._type_property_name: ChangeFeedStartFromType.BEGINNING.value - } - - def populate_request_headers(self, request_headers) -> None: - pass # there is no headers need to be set for start from beginning - - @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromBeginning': - return ChangeFeedStartFromBeginning() - - -class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): - """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. - """ - - _etag_property_name = "Etag" - _feed_range_property_name = "FeedRange" - - def __init__(self, etag, feed_range): - if feed_range is None: - raise ValueError("feed_range is missing") - - self._etag = etag - self._feed_range = feed_range - - def to_dict(self) -> dict[str, Any]: - return { - self._type_property_name: ChangeFeedStartFromType.LEASE.value, - self._etag_property_name: self._etag, - self._feed_range_property_name: self._feed_range.to_dict() - } - - @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': - etag = data.get(cls._etag_property_name) - if etag is None: - raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") - - feed_range_data = data.get(cls._feed_range_property_name) - if feed_range_data is None: - raise ValueError(f"Invalid change feed start from [Missing {cls._feed_range_property_name}]") - feed_range = Range.ParseFromDict(feed_range_data) - return cls(etag, feed_range) - - def populate_request_headers(self, request_headers) -> None: - # change feed uses etag as the continuationToken - if self._etag: - request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._etag - - -class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): - """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. - """ - - def to_dict(self) -> dict[str, Any]: - return { - self._type_property_name: ChangeFeedStartFromType.NOW.value - } - - def populate_request_headers(self, request_headers) -> None: - request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" - - @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromNow': - return ChangeFeedStartFromNow() - - -class ChangeFeedStartFromPointInTime(ChangeFeedStartFromInternal): - """Class for change feed start from point in time implementation in the Azure Cosmos database service. - """ - - _point_in_time_ms_property_name = "PointInTimeMs" - - def __init__(self, start_time: datetime): - if start_time is None: - raise ValueError("start_time is missing") - - self._start_time = start_time - - def to_dict(self) -> dict[str, Any]: - return { - self._type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, - self._point_in_time_ms_property_name: - int(self._start_time.astimezone(timezone.utc).timestamp() * 1000) - } - - def populate_request_headers(self, request_headers) -> None: - request_headers[http_constants.HttpHeaders.IfModified_since] =\ - self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') - - @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': - point_in_time_ms = data.get(cls._point_in_time_ms_property_name) - if point_in_time_ms is None: - raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") - - point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) - return ChangeFeedStartFromPointInTime(point_in_time) - - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py deleted file mode 100644 index ceb83166bdab..000000000000 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_state.py +++ /dev/null @@ -1,299 +0,0 @@ -# The MIT License (MIT) -# Copyright (c) 2014 Microsoft Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Internal class for change feed state implementation in the Azure Cosmos -database service. -""" - -import base64 -import collections -import json -from abc import ABC, abstractmethod -from typing import Optional, Union, List, Any - -from azure.cosmos import http_constants -from azure.cosmos._change_feed.aio.change_feed_start_from import ChangeFeedStartFromETagAndFeedRange, \ - ChangeFeedStartFromInternal -from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.aio.feed_range_composite_continuation_token import FeedRangeCompositeContinuation -from azure.cosmos._change_feed.feed_range import FeedRangeEpk, FeedRangePartitionKey, FeedRange -from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider -from azure.cosmos._routing.routing_range import Range -from azure.cosmos._utils import is_key_exists_and_not_none -from azure.cosmos.exceptions import CosmosFeedRangeGoneError -from azure.cosmos.partition_key import _Empty, _Undefined - - -class ChangeFeedState(ABC): - version_property_name = "v" - - @abstractmethod - def populate_feed_options(self, feed_options: dict[str, any]) -> None: - pass - - @abstractmethod - async def populate_request_headers( - self, - routing_provider: SmartRoutingMapProvider, - request_headers: dict[str, any]) -> None: - pass - - @abstractmethod - def apply_server_response_continuation(self, continuation: str) -> None: - pass - - @staticmethod - def from_json( - container_link: str, - container_rid: str, - data: dict[str, Any]): - if is_key_exists_and_not_none(data, "partitionKeyRangeId") or is_key_exists_and_not_none(data, "continuationPkRangeId"): - return ChangeFeedStateV1.from_json(container_link, container_rid, data) - else: - if is_key_exists_and_not_none(data, "continuationFeedRange"): - # get changeFeedState from continuation - continuation_json_str = base64.b64decode(data["continuationFeedRange"]).decode('utf-8') - continuation_json = json.loads(continuation_json_str) - version = continuation_json.get(ChangeFeedState.version_property_name) - if version is None: - raise ValueError("Invalid base64 encoded continuation string [Missing version]") - elif version == "V2": - return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) - else: - raise ValueError("Invalid base64 encoded continuation string [Invalid version]") - # when there is no continuation token, by default construct ChangeFeedStateV2 - return ChangeFeedStateV2.from_initial_state(container_link, container_rid, data) - -class ChangeFeedStateV1(ChangeFeedState): - """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag - """ - - def __init__( - self, - container_link: str, - container_rid: str, - change_feed_start_from: ChangeFeedStartFromInternal, - partition_key_range_id: Optional[str] = None, - partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, - continuation: Optional[str] = None): - - self._container_link = container_link - self._container_rid = container_rid - self._change_feed_start_from = change_feed_start_from - self._partition_key_range_id = partition_key_range_id - self._partition_key = partition_key - self._continuation = continuation - - @property - def container_rid(self): - return self._container_rid - - @classmethod - def from_json(cls, container_link: str, container_rid: str, data: dict[str, Any]) -> 'ChangeFeedStateV1': - return cls( - container_link, - container_rid, - ChangeFeedStartFromInternal.from_start_time(data.get("startTime")), - data.get("partitionKeyRangeId"), - data.get("partitionKey"), - data.get("continuationPkRangeId") - ) - - async def populate_request_headers( - self, - routing_provider: SmartRoutingMapProvider, - headers: dict[str, Any]) -> None: - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - - # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. - self._change_feed_start_from.populate_request_headers(headers) - if self._continuation: - headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation - - def populate_feed_options(self, feed_options: dict[str, any]) -> None: - if self._partition_key_range_id is not None: - feed_options["partitionKeyRangeId"] = self._partition_key_range_id - if self._partition_key is not None: - feed_options["partitionKey"] = self._partition_key - - def apply_server_response_continuation(self, continuation: str) -> None: - self._continuation = continuation - -class ChangeFeedStateV2(ChangeFeedState): - container_rid_property_name = "containerRid" - change_feed_mode_property_name = "mode" - change_feed_start_from_property_name = "startFrom" - continuation_property_name = "continuation" - - # TODO: adding change feed mode - def __init__( - self, - container_link: str, - container_rid: str, - feed_range: FeedRange, - change_feed_start_from: ChangeFeedStartFromInternal, - continuation: Optional[FeedRangeCompositeContinuation] = None): - - self._container_link = container_link - self._container_rid = container_rid - self._feed_range = feed_range - self._change_feed_start_from = change_feed_start_from - self._continuation = continuation - if self._continuation is None: - composite_continuation_token_queue = collections.deque() - composite_continuation_token_queue.append( - CompositeContinuationToken( - self._feed_range.get_normalized_range(), - None)) - self._continuation =\ - FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) - - @property - def container_rid(self) -> str : - return self._container_rid - - def to_dict(self) -> dict[str, Any]: - return { - self.version_property_name: "V2", - self.container_rid_property_name: self._container_rid, - self.change_feed_mode_property_name: "Incremental", - self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), - self.continuation_property_name: self._continuation.to_dict() - } - - async def populate_request_headers( - self, - routing_provider: SmartRoutingMapProvider, - headers: dict[str, any]) -> None: - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - - # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. - self._change_feed_start_from.populate_request_headers(headers) - - if self._continuation.current_token is not None and self._continuation.current_token.token is not None: - change_feed_start_from_feed_range_and_etag =\ - ChangeFeedStartFromETagAndFeedRange(self._continuation.current_token.token, self._continuation.current_token.feed_range) - change_feed_start_from_feed_range_and_etag.populate_request_headers(headers) - - # based on the feed range to find the overlapping partition key range id - over_lapping_ranges =\ - await routing_provider.get_overlapping_ranges( - self._container_link, - [self._continuation.current_token.feed_range]) - - if len(over_lapping_ranges) > 1: - raise CosmosFeedRangeGoneError(message= - f"Range {self._continuation.current_token.feed_range}" - f" spans {len(over_lapping_ranges)}" - f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") - else: - overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) - if overlapping_feed_range == self._continuation.current_token.feed_range: - # exactly mapping to one physical partition, only need to set the partitionKeyRangeId - headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] - else: - # the current token feed range spans less than single physical partition - # for this case, need to set both the partition key range id and epk filter headers - headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] - headers[http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min - headers[http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max - - def populate_feed_options(self, feed_options: dict[str, any]) -> None: - pass - - async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, resource_link: str) -> None: - await self._continuation.handle_feed_range_gone(routing_provider, resource_link) - - def apply_server_response_continuation(self, continuation: str) -> None: - self._continuation.apply_server_response_continuation(continuation) - - def should_retry_on_not_modified_response(self): - self._continuation.should_retry_on_not_modified_response() - - def apply_not_modified_response(self) -> None: - self._continuation.apply_not_modified_response() - - @classmethod - def from_continuation( - cls, - container_link: str, - container_rid: str, - continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': - - container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) - if container_rid_from_continuation is None: - raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") - elif container_rid_from_continuation != container_rid: - raise ValueError("Invalid continuation: [Mismatch collection rid]") - - change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) - if change_feed_start_from_data is None: - raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") - change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) - - continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) - if continuation_data is None: - raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.continuation_property_name}]") - continuation = FeedRangeCompositeContinuation.from_json(continuation_data) - return ChangeFeedStateV2( - container_link=container_link, - container_rid=container_rid, - feed_range=continuation._feed_range, - change_feed_start_from=change_feed_start_from, - continuation=continuation) - - @classmethod - def from_initial_state( - cls, - container_link: str, - collection_rid: str, - data: dict[str, Any]) -> 'ChangeFeedStateV2': - - if is_key_exists_and_not_none(data, "feedRange"): - feed_range_str = base64.b64decode(data["feedRange"]).decode('utf-8') - feed_range_json = json.loads(feed_range_str) - feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) - elif is_key_exists_and_not_none(data, "partitionKey"): - if is_key_exists_and_not_none(data, "partitionKeyFeedRange"): - feed_range = FeedRangePartitionKey(data["partitionKey"], data["partitionKeyFeedRange"]) - else: - raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") - else: - # default to full range - feed_range = FeedRangeEpk( - Range( - "", - "FF", - True, - False)) - - change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(data.get("startTime")) - return cls( - container_link=container_link, - container_rid=collection_rid, - feed_range=feed_range, - change_feed_start_from=change_feed_start_from, - continuation=None) - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py deleted file mode 100644 index 6d779fed1037..000000000000 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/composite_continuation_token.py +++ /dev/null @@ -1,70 +0,0 @@ -# The MIT License (MIT) -# Copyright (c) 2014 Microsoft Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Internal class for change feed composite continuation token in the Azure Cosmos -database service. -""" -from azure.cosmos._routing.routing_range import Range - - -class CompositeContinuationToken(object): - _token_property_name = "token" - _feed_range_property_name = "range" - - def __init__(self, feed_range: Range, token): - if range is None: - raise ValueError("range is missing") - - self._token = token - self._feed_range = feed_range - - def to_dict(self): - return { - self._token_property_name: self._token, - self._feed_range_property_name: self._feed_range.to_dict() - } - - @property - def feed_range(self): - return self._feed_range - - @property - def token(self): - return self._token - - def update_token(self, etag): - self._token = etag - - @classmethod - def from_json(cls, data): - token = data.get(cls._token_property_name) - if token is None: - raise ValueError(f"Invalid composite token [Missing {cls._token_property_name}]") - - feed_range_data = data.get(cls._feed_range_property_name) - if feed_range_data is None: - raise ValueError(f"Invalid composite token [Missing {cls._feed_range_property_name}]") - - feed_range = Range.ParseFromDict(feed_range_data) - return cls(feed_range=feed_range, token=token) - - def __repr__(self): - return f"CompositeContinuationToken(token={self.token}, range={self._feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py deleted file mode 100644 index 32122145009c..000000000000 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/feed_range_composite_continuation_token.py +++ /dev/null @@ -1,141 +0,0 @@ -# The MIT License (MIT) -# Copyright (c) 2014 Microsoft Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""Internal class for change feed continuation token by feed range in the Azure Cosmos -database service. -""" -import collections -from collections import deque -from typing import Any - -from azure.cosmos._change_feed.aio.composite_continuation_token import CompositeContinuationToken -from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey -from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider -from azure.cosmos._routing.routing_range import Range -from azure.cosmos._utils import is_key_exists_and_not_none - - -class FeedRangeCompositeContinuation(object): - _version_property_name = "v" - _container_rid_property_name = "rid" - _continuation_property_name = "continuation" - - def __init__( - self, - container_rid: str, - feed_range: FeedRange, - continuation: collections.deque[CompositeContinuationToken]): - if container_rid is None: - raise ValueError("container_rid is missing") - - self._container_rid = container_rid - self._feed_range = feed_range - self._continuation = continuation - self._current_token = self._continuation[0] - self._initial_no_result_range = None - - @property - def current_token(self): - return self._current_token - - def to_dict(self) -> dict[str, Any]: - json_data = { - self._version_property_name: "v2", - self._container_rid_property_name: self._container_rid, - self._continuation_property_name: [childToken.to_dict() for childToken in self._continuation], - } - - json_data.update(self._feed_range.to_dict()) - return json_data - - @classmethod - def from_json(cls, data) -> 'FeedRangeCompositeContinuation': - version = data.get(cls._version_property_name) - if version is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._version_property_name}]") - if version != "v2": - raise ValueError("Invalid feed range composite continuation token [Invalid version]") - - container_rid = data.get(cls._container_rid_property_name) - if container_rid is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") - - continuation_data = data.get(cls._continuation_property_name) - if continuation_data is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") - if not isinstance(continuation_data, list) or len(continuation_data) == 0: - raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") - continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] - - # parsing feed range - if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): - feed_range = FeedRangeEpk.from_json(data) - elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): - feed_range =\ - FeedRangePartitionKey.from_json(data, continuation[0].feed_range) - else: - raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") - - return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) - - async def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: - overlapping_ranges = await routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) - - if len(overlapping_ranges) == 1: - # merge,reusing the existing the feedRange and continuationToken - pass - else: - # split, remove the parent range and then add new child ranges. - # For each new child range, using the continuation token from the parent - self._continuation.popleft() - for child_range in overlapping_ranges: - self._continuation.append(CompositeContinuationToken(Range.PartitionKeyRangeToRange(child_range), self._current_token.token)) - - self._current_token = self._continuation[0] - - def should_retry_on_not_modified_response(self) -> bool: - # when getting 304(Not Modified) response from one sub feed range, we will try to fetch for the next sub feed range - # we will repeat the above logic until we have looped through all sub feed ranges - - # TODO: validate the response headers, can we get the status code - if len(self._continuation) > 1: - return self._current_token.feed_range != self._initial_no_result_range - - else: - return False - - def _move_to_next_token(self) -> None: - first_composition_token = self._continuation.popleft() - # add the composition token to the end of the list - self._continuation.append(first_composition_token) - self._current_token = self._continuation[0] - - def apply_server_response_continuation(self, etag) -> None: - self._current_token.update_token(etag) - self._move_to_next_token() - - def apply_not_modified_response(self) -> None: - if self._initial_no_result_range is None: - self._initial_no_result_range = self._current_token.feed_range - - @property - def feed_range(self) -> FeedRange: - return self._feed_range diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index 6edaf8e73fd9..abad86dfd119 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -26,11 +26,14 @@ import copy import json from abc import ABC, abstractmethod +from typing import Dict, Any, List from azure.cosmos import _retry_utility, http_constants, exceptions +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromPointInTime from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 from azure.cosmos.exceptions import CosmosHttpResponseError +# pylint: disable=protected-access class ChangeFeedFetcher(ABC): @@ -48,16 +51,16 @@ def __init__( self, client, resource_link: str, - feed_options: dict[str, any], + feed_options: Dict[str, Any], fetch_function): self._client = client self._feed_options = feed_options - self._change_feed_state = self._feed_options.pop("changeFeedState") + self._change_feed_state: ChangeFeedStateV1 = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV1): - raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version {type(self._change_feed_state)}") - self._change_feed_state.__class__ = ChangeFeedStateV1 + raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" + f" {type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function @@ -73,28 +76,30 @@ def callback(): return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) - def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: new_options = copy.deepcopy(self._feed_options) new_options["changeFeedState"] = self._change_feed_state self._change_feed_state.populate_feed_options(new_options) - is_s_time_first_fetch = True + is_s_time_first_fetch = self._change_feed_state._continuation is None while True: (fetched_items, response_headers) = fetch_function(new_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token? Is this true? self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) if fetched_items: break - elif is_s_time_first_fetch: + + # When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token again + if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + and is_s_time_first_fetch): is_s_time_first_fetch = False else: break - return fetched_items @@ -106,15 +111,16 @@ def __init__( self, client, resource_link: str, - feed_options: dict[str, any], + feed_options: Dict[str, Any], fetch_function): self._client = client self._feed_options = feed_options - self._change_feed_state = self._feed_options.pop("changeFeedState") + self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV2): - raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function @@ -140,34 +146,47 @@ def callback(): return self.fetch_next_block() - def fetch_change_feed_items(self, fetch_function) -> list[dict[str, any]]: + def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: new_options = copy.deepcopy(self._feed_options) new_options["changeFeedState"] = self._change_feed_state self._change_feed_state.populate_feed_options(new_options) - is_s_time_first_fetch = True + is_s_time_first_fetch = self._change_feed_state._continuation.current_token.token is None while True: (fetched_items, response_headers) = fetch_function(new_options) continuation_key = http_constants.HttpHeaders.ETag - # In change feed queries, the continuation token is always populated. The hasNext() test is whether - # there is any items in the response or not. - # For start time however we get no initial results, so we need to pass continuation token? Is this true? + # In change feed queries, the continuation token is always populated. if fetched_items: self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) + self._change_feed_state._continuation._move_to_next_token() response_headers[continuation_key] = self._get_base64_encoded_continuation() break + + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + self._change_feed_state.apply_not_modified_response() + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + + if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True else: - self._change_feed_state.apply_not_modified_response() - self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + self._change_feed_state._continuation._move_to_next_token() response_headers[continuation_key] = self._get_base64_encoded_continuation() - should_retry = self._change_feed_state.should_retry_on_not_modified_response() or is_s_time_first_fetch + should_retry = self._change_feed_state.should_retry_on_not_modified_response() is_s_time_first_fetch = False - if not should_retry: - break + + if not should_retry: + break return fetched_items @@ -178,3 +197,4 @@ def _get_base64_encoded_continuation(self) -> str: base64_bytes = base64.b64encode(json_bytes) # Convert the Base64 bytes to a string return base64_bytes.decode('utf-8') + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 39e94a30c4c0..55f98374252a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -21,6 +21,7 @@ """Iterable change feed results in the Azure Cosmos database service. """ +from typing import Dict, Any from azure.core.paging import PageIterator @@ -63,14 +64,16 @@ def __init__( raise ValueError("Missing changeFeedStateContext in feed options") change_feed_state_context = self._options.pop("changeFeedStateContext") - continuation = continuation_token if continuation_token is not None else change_feed_state_context.pop("continuation", None) + continuation = continuation_token if continuation_token is not None\ + else change_feed_state_context.pop("continuation", None) # analysis and validate continuation token # there are two types of continuation token we support currently: # v1 version: the continuation token would just be the _etag, # which is being returned when customer is using partition_key_range_id, # which is under deprecation and does not support split/merge - # v2 version: the continuation token will be base64 encoded composition token which includes full change feed state + # v2 version: the continuation token will be base64 encoded composition token + # which includes full change feed state if continuation is not None: if is_base64_encoded(continuation): change_feed_state_context["continuationFeedRange"] = continuation @@ -135,7 +138,7 @@ def _initialize_change_feed_fetcher(self): self._fetch_function ) - def _validate_change_feed_state_context(self, change_feed_state_context: dict[str, any]) -> None: + def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): # if continuation token is in v1 format, throw exception if feed_range is set @@ -152,7 +155,6 @@ def _validate_change_feed_state_context(self, change_feed_state_context: dict[st key in change_feed_state_context and change_feed_state_context[key] is not None) if count > 1: raise ValueError( - "partition_key_range_id, partition_key, feed_range are exclusive parameters, please only set one of them") - - + "partition_key_range_id, partition_key, feed_range are exclusive parameters," + " please only set one of them") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py index 76a4d6b56803..632f87715819 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -25,7 +25,7 @@ from abc import ABC, abstractmethod from datetime import datetime, timezone from enum import Enum -from typing import Optional, Union, Literal, Any +from typing import Optional, Union, Literal, Any, Dict from azure.cosmos import http_constants from azure.cosmos._routing.routing_range import Range @@ -43,38 +43,39 @@ class ChangeFeedStartFromInternal(ABC): type_property_name = "Type" @abstractmethod - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: pass @staticmethod - def from_start_time(start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': + def from_start_time( + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]]) -> 'ChangeFeedStartFromInternal': if start_time is None: return ChangeFeedStartFromNow() - elif isinstance(start_time, datetime): + if isinstance(start_time, datetime): return ChangeFeedStartFromPointInTime(start_time) - elif start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): + if start_time.lower() == ChangeFeedStartFromType.NOW.value.lower(): return ChangeFeedStartFromNow() - elif start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): + if start_time.lower() == ChangeFeedStartFromType.BEGINNING.value.lower(): return ChangeFeedStartFromBeginning() - else: - raise ValueError(f"Invalid start_time '{start_time}'") + + raise ValueError(f"Invalid start_time '{start_time}'") @staticmethod - def from_json(data: dict[str, any]) -> 'ChangeFeedStartFromInternal': + def from_json(data: Dict[str, Any]) -> 'ChangeFeedStartFromInternal': change_feed_start_from_type = data.get(ChangeFeedStartFromInternal.type_property_name) if change_feed_start_from_type is None: raise ValueError(f"Invalid start from json [Missing {ChangeFeedStartFromInternal.type_property_name}]") if change_feed_start_from_type == ChangeFeedStartFromType.BEGINNING.value: return ChangeFeedStartFromBeginning.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: + if change_feed_start_from_type == ChangeFeedStartFromType.LEASE.value: return ChangeFeedStartFromETagAndFeedRange.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: + if change_feed_start_from_type == ChangeFeedStartFromType.NOW.value: return ChangeFeedStartFromNow.from_json(data) - elif change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: + if change_feed_start_from_type == ChangeFeedStartFromType.POINT_IN_TIME.value: return ChangeFeedStartFromPointInTime.from_json(data) - else: - raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") + + raise ValueError(f"Can not process changeFeedStartFrom for type {change_feed_start_from_type}") @abstractmethod def populate_request_headers(self, request_headers) -> None: @@ -85,7 +86,7 @@ class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): """Class for change feed start from beginning implementation in the Azure Cosmos database service. """ - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.BEGINNING.value } @@ -94,7 +95,7 @@ def populate_request_headers(self, request_headers) -> None: pass # there is no headers need to be set for start from beginning @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromBeginning': + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromBeginning': return ChangeFeedStartFromBeginning() @@ -112,7 +113,7 @@ def __init__(self, etag, feed_range): self._etag = etag self._feed_range = feed_range - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.LEASE.value, self._etag_property_name: self._etag, @@ -120,7 +121,7 @@ def to_dict(self) -> dict[str, Any]: } @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromETagAndFeedRange': etag = data.get(cls._etag_property_name) if etag is None: raise ValueError(f"Invalid change feed start from [Missing {cls._etag_property_name}]") @@ -141,7 +142,7 @@ class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. """ - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.NOW.value } @@ -150,7 +151,7 @@ def populate_request_headers(self, request_headers) -> None: request_headers[http_constants.HttpHeaders.IfNoneMatch] = "*" @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromNow': + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromNow': return ChangeFeedStartFromNow() @@ -166,7 +167,7 @@ def __init__(self, start_time: datetime): self._start_time = start_time - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.POINT_IN_TIME.value, self._point_in_time_ms_property_name: @@ -178,7 +179,7 @@ def populate_request_headers(self, request_headers) -> None: self._start_time.astimezone(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') @classmethod - def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': + def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': point_in_time_ms = data.get(cls._point_in_time_ms_property_name) if point_in_time_ms is None: raise ValueError(f"Invalid change feed start from {cls._point_in_time_ms_property_name} ") @@ -187,3 +188,4 @@ def from_json(cls, data: dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': return ChangeFeedStartFromPointInTime(point_in_time) + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 210563ab8411..2d2f4d0d6ae2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -27,7 +27,7 @@ import collections import json from abc import ABC, abstractmethod -from typing import Optional, Union, List, Any +from typing import Optional, Union, List, Any, Dict, Deque from azure.cosmos import http_constants from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromInternal, \ @@ -35,6 +35,7 @@ from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey from azure.cosmos._change_feed.feed_range_composite_continuation_token import FeedRangeCompositeContinuation +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range from azure.cosmos._utils import is_key_exists_and_not_none @@ -46,11 +47,21 @@ class ChangeFeedState(ABC): version_property_name = "v" @abstractmethod - def populate_feed_options(self, feed_options: dict[str, any]) -> None: + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: pass @abstractmethod - def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, request_headers: dict[str, any]) -> None: + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + pass + + @abstractmethod + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: pass @abstractmethod @@ -61,28 +72,32 @@ def apply_server_response_continuation(self, continuation: str) -> None: def from_json( container_link: str, container_rid: str, - change_feed_state_context: dict[str, Any]): + change_feed_state_context: Dict[str, Any]): if (is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId") or is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId")): return ChangeFeedStateV1.from_json(container_link, container_rid, change_feed_state_context) - else: - if is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): - # get changeFeedState from continuation - continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode('utf-8') - continuation_json = json.loads(continuation_json_str) - version = continuation_json.get(ChangeFeedState.version_property_name) - if version is None: - raise ValueError("Invalid base64 encoded continuation string [Missing version]") - elif version == "V2": - return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) - else: - raise ValueError("Invalid base64 encoded continuation string [Invalid version]") - # when there is no continuation token, by default construct ChangeFeedStateV2 - return ChangeFeedStateV2.from_initial_state(container_link, container_rid, change_feed_state_context) + + if is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + # get changeFeedState from continuation + continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode( + 'utf-8') + continuation_json = json.loads(continuation_json_str) + version = continuation_json.get(ChangeFeedState.version_property_name) + if version is None: + raise ValueError("Invalid base64 encoded continuation string [Missing version]") + + if version == "V2": + return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) + + raise ValueError("Invalid base64 encoded continuation string [Invalid version]") + + # when there is no continuation token, by default construct ChangeFeedStateV2 + return ChangeFeedStateV2.from_initial_state(container_link, container_rid, change_feed_state_context) class ChangeFeedStateV1(ChangeFeedState): - """Change feed state v1 implementation. This is used when partition key range id is used or the continuation is just simple _etag + """Change feed state v1 implementation. + This is used when partition key range id is used or the continuation is just simple _etag """ def __init__( @@ -92,7 +107,7 @@ def __init__( change_feed_start_from: ChangeFeedStartFromInternal, partition_key_range_id: Optional[str] = None, partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, - continuation: Optional[str] = None): + continuation: Optional[str] = None): # pylint: disable=line-too-long self._container_link = container_link self._container_rid = container_rid @@ -106,27 +121,48 @@ def container_rid(self): return self._container_rid @classmethod - def from_json(cls, container_link: str, container_rid: str, data: dict[str, Any]) -> 'ChangeFeedStateV1': + def from_json( + cls, + container_link: str, + container_rid: str, + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV1': return cls( container_link, container_rid, - ChangeFeedStartFromInternal.from_start_time(data.get("startTime")), - data.get("partitionKeyRangeId"), - data.get("partitionKey"), - data.get("continuationPkRangeId") + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime")), + change_feed_state_context.get("partitionKeyRangeId"), + change_feed_state_context.get("partitionKey"), + change_feed_state_context.get("continuationPkRangeId") ) - def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, Any]) -> None: - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. - self._change_feed_start_from.populate_request_headers(headers) + self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation: - headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + async def populate_request_headers_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: - def populate_feed_options(self, feed_options: dict[str, any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + if self._continuation: + request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: if self._partition_key_range_id is not None: feed_options["partitionKeyRangeId"] = self._partition_key_range_id if self._partition_key is not None: @@ -148,7 +184,7 @@ def __init__( container_rid: str, feed_range: FeedRange, change_feed_start_from: ChangeFeedStartFromInternal, - continuation: Optional[FeedRangeCompositeContinuation] = None): + continuation: Optional[FeedRangeCompositeContinuation]): self._container_link = container_link self._container_rid = container_rid @@ -156,39 +192,49 @@ def __init__( self._change_feed_start_from = change_feed_start_from self._continuation = continuation if self._continuation is None: - composite_continuation_token_queue = collections.deque() + composite_continuation_token_queue: Deque = collections.deque() composite_continuation_token_queue.append( CompositeContinuationToken( self._feed_range.get_normalized_range(), None)) self._continuation =\ - FeedRangeCompositeContinuation(self._container_rid, self._feed_range, composite_continuation_token_queue) + FeedRangeCompositeContinuation( + self._container_rid, + self._feed_range, + composite_continuation_token_queue) @property def container_rid(self) -> str : return self._container_rid - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { self.version_property_name: "V2", self.container_rid_property_name: self._container_rid, self.change_feed_mode_property_name: "Incremental", self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), - self.continuation_property_name: self._continuation.to_dict() + self.continuation_property_name: self._continuation.to_dict() if self._continuation is not None else None } - def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, headers: dict[str, any]) -> None: - headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue + def populate_request_headers( + self, + routing_provider: SmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. - self._change_feed_start_from.populate_request_headers(headers) + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation.current_token is not None and self._continuation.current_token.token is not None: change_feed_start_from_feed_range_and_etag =\ - ChangeFeedStartFromETagAndFeedRange(self._continuation.current_token.token, self._continuation.current_token.feed_range) - change_feed_start_from_feed_range_and_etag.populate_request_headers(headers) + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) # based on the feed range to find the overlapping partition key range id over_lapping_ranges =\ @@ -197,28 +243,87 @@ def populate_request_headers(self, routing_provider: SmartRoutingMapProvider, he [self._continuation.current_token.feed_range]) if len(over_lapping_ranges) > 1: - raise CosmosFeedRangeGoneError(message= - f"Range {self._continuation.current_token.feed_range}" - f" spans {len(over_lapping_ranges)}" - f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + raise CosmosFeedRangeGoneError( + message= + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)}" + f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] else: - overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) - if overlapping_feed_range == self._continuation.current_token.feed_range: - # exactly mapping to one physical partition, only need to set the partitionKeyRangeId - headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] - else: - # the current token feed range spans less than single physical partition - # for this case, need to set both the partition key range id and epk filter headers - headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] - headers[http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min - headers[http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + request_headers[ + http_constants.HttpHeaders.StartEpkString] = self._continuation.current_token.feed_range.min + request_headers[ + http_constants.HttpHeaders.EndEpkString] = self._continuation.current_token.feed_range.max + + async def populate_request_headers_async( + self, + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: + request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - def populate_feed_options(self, feed_options: dict[str, any]) -> None: + # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. + self._change_feed_start_from.populate_request_headers(request_headers) + + if self._continuation.current_token is not None and self._continuation.current_token.token is not None: + change_feed_start_from_feed_range_and_etag = \ + ChangeFeedStartFromETagAndFeedRange( + self._continuation.current_token.token, + self._continuation.current_token.feed_range) + change_feed_start_from_feed_range_and_etag.populate_request_headers(request_headers) + + # based on the feed range to find the overlapping partition key range id + over_lapping_ranges = \ + await async_routing_provider.get_overlapping_ranges( + self._container_link, + [self._continuation.current_token.feed_range]) + + if len(over_lapping_ranges) > 1: + raise CosmosFeedRangeGoneError( + message= + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)}" + f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + + overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) + if overlapping_feed_range == self._continuation.current_token.feed_range: + # exactly mapping to one physical partition, only need to set the partitionKeyRangeId + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_ranges[0]["id"] + else: + # the current token feed range spans less than single physical partition + # for this case, need to set both the partition key range id and epk filter headers + request_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = \ + over_lapping_ranges[0]["id"] + request_headers[http_constants.HttpHeaders.StartEpkString] = \ + self._continuation.current_token.feed_range.min + request_headers[http_constants.HttpHeaders.EndEpkString] = \ + self._continuation.current_token.feed_range.max + + def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: pass - def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, resource_link: str) -> None: + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + resource_link: str) -> None: self._continuation.handle_feed_range_gone(routing_provider, resource_link) + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + resource_link: str) -> None: + await self._continuation.handle_feed_range_gone_async(routing_provider, resource_link) + def apply_server_response_continuation(self, continuation: str) -> None: self._continuation.apply_server_response_continuation(continuation) @@ -233,17 +338,18 @@ def from_continuation( cls, container_link: str, container_rid: str, - continuation_json: dict[str, Any]) -> 'ChangeFeedStateV2': + continuation_json: Dict[str, Any]) -> 'ChangeFeedStateV2': container_rid_from_continuation = continuation_json.get(ChangeFeedStateV2.container_rid_property_name) if container_rid_from_continuation is None: raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.container_rid_property_name}]") - elif container_rid_from_continuation != container_rid: + if container_rid_from_continuation != container_rid: raise ValueError("Invalid continuation: [Mismatch collection rid]") change_feed_start_from_data = continuation_json.get(ChangeFeedStateV2.change_feed_start_from_property_name) if change_feed_start_from_data is None: - raise ValueError(f"Invalid continuation: [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") + raise ValueError(f"Invalid continuation:" + f" [Missing {ChangeFeedStateV2.change_feed_start_from_property_name}]") change_feed_start_from = ChangeFeedStartFromInternal.from_json(change_feed_start_from_data) continuation_data = continuation_json.get(ChangeFeedStateV2.continuation_property_name) @@ -270,7 +376,10 @@ def from_initial_state( feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) elif is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyFeedRange"): - feed_range = FeedRangePartitionKey(change_feed_state_context["partitionKey"], change_feed_state_context["partitionKeyFeedRange"]) + feed_range =\ + FeedRangePartitionKey( + change_feed_state_context["partitionKey"], + change_feed_state_context["partitionKeyFeedRange"]) else: raise ValueError("partitionKey is in the changeFeedStateContext, but missing partitionKeyFeedRange") else: @@ -283,7 +392,8 @@ def from_initial_state( False) ) - change_feed_start_from = ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime")) + change_feed_start_from = ( + ChangeFeedStartFromInternal.from_start_time(change_feed_state_context.get("startTime"))) return cls( container_link=container_link, container_rid=collection_rid, @@ -291,3 +401,5 @@ def from_initial_state( change_feed_start_from=change_feed_start_from, continuation=None) + + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py index 1b6ef79e4176..3b9707371fb8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -22,9 +22,8 @@ """Internal class for feed range implementation in the Azure Cosmos database service. """ -import json from abc import ABC, abstractmethod -from typing import Union, List +from typing import Union, List, Dict, Any from azure.cosmos._routing.routing_range import Range from azure.cosmos._utils import is_key_exists_and_not_none @@ -38,7 +37,7 @@ def get_normalized_range(self) -> Range: pass @abstractmethod - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> Dict[str, Any]: pass class FeedRangePartitionKey(FeedRange): @@ -47,7 +46,7 @@ class FeedRangePartitionKey(FeedRange): def __init__( self, pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined], - feed_range: Range): + feed_range: Range): # pylint: disable=line-too-long if pk_value is None: raise ValueError("PartitionKey cannot be None") @@ -60,26 +59,31 @@ def __init__( def get_normalized_range(self) -> Range: return self._feed_range.to_normalized_range() - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> Dict[str, Any]: if isinstance(self._pk_value, _Undefined): return { self.type_property_name: [{}] } - elif isinstance(self._pk_value, _Empty): + if isinstance(self._pk_value, _Empty): return { self.type_property_name: [] } - else: - return { self.type_property_name: json.dumps(self._pk_value) } + if isinstance(self._pk_value, list): + return { self.type_property_name: [item for item in self._pk_value] } + + return { self.type_property_name: self._pk_value } @classmethod - def from_json(cls, data: dict[str, any], feed_range: Range) -> 'FeedRangePartitionKey': + def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartitionKey': if is_key_exists_and_not_none(data, cls.type_property_name): pk_value = data.get(cls.type_property_name) - if isinstance(pk_value, list): - if not pk_value: - return cls(_Empty(), feed_range) - if pk_value == [{}]: + if not pk_value: + return cls(_Empty(), feed_range) + if pk_value is [{}]: return cls(_Undefined(), feed_range) + if isinstance(pk_value, list): + return cls([item for item in pk_value], feed_range) + return cls(data[cls.type_property_name], feed_range) + + raise ValueError(f"Can not parse FeedRangePartitionKey from the json," + f" there is no property {cls.type_property_name}") - return cls(json.loads(data.get(cls.type_property_name)), feed_range) - raise ValueError(f"Can not parse FeedRangePartitionKey from the json, there is no property {cls.type_property_name}") class FeedRangeEpk(FeedRange): type_property_name = "Range" @@ -93,13 +97,13 @@ def __init__(self, feed_range: Range): def get_normalized_range(self) -> Range: return self._range.to_normalized_range() - def to_dict(self) -> dict[str, any]: + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: self._range.to_dict() } @classmethod - def from_json(cls, data: dict[str, any]) -> 'FeedRangeEpk': + def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeEpk': if is_key_exists_and_not_none(data, cls.type_property_name): feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) return cls(feed_range) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 26abd66ba132..fb67b6b4603c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -22,13 +22,13 @@ """Internal class for change feed continuation token by feed range in the Azure Cosmos database service. """ -import collections from collections import deque -from typing import Any +from typing import Any, Deque, Dict from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider +from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range from azure.cosmos._utils import is_key_exists_and_not_none @@ -42,7 +42,7 @@ def __init__( self, container_rid: str, feed_range: FeedRange, - continuation: collections.deque[CompositeContinuationToken]): + continuation: Deque[CompositeContinuationToken]): if container_rid is None: raise ValueError("container_rid is missing") @@ -56,7 +56,7 @@ def __init__( def current_token(self): return self._current_token - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: json_data = { self._version_property_name: "v2", self._container_rid_property_name: self._container_rid, @@ -75,14 +75,18 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': container_rid = data.get(cls._container_rid_property_name) if container_rid is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._container_rid_property_name}]") + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._container_rid_property_name}]") continuation_data = data.get(cls._continuation_property_name) if continuation_data is None: - raise ValueError(f"Invalid feed range composite continuation token [Missing {cls._continuation_property_name}]") + raise ValueError(f"Invalid feed range composite continuation token " + f"[Missing {cls._continuation_property_name}]") if not isinstance(continuation_data, list) or len(continuation_data) == 0: - raise ValueError(f"Invalid feed range composite continuation token [The {cls._continuation_property_name} must be non-empty array]") - continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) for child_range_continuation_token in continuation_data] + raise ValueError(f"Invalid feed range composite continuation token " + f"[The {cls._continuation_property_name} must be non-empty array]") + continuation = [CompositeContinuationToken.from_json(child_range_continuation_token) + for child_range_continuation_token in continuation_data] # parsing feed range if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): @@ -94,7 +98,10 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': return cls(container_rid=container_rid, feed_range=feed_range, continuation=deque(continuation)) - def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, collection_link: str) -> None: + def handle_feed_range_gone( + self, + routing_provider: SmartRoutingMapProvider, + collection_link: str) -> None: overlapping_ranges = routing_provider.get_overlapping_ranges(collection_link, [self._current_token.feed_range]) if len(overlapping_ranges) == 1: @@ -105,20 +112,47 @@ def handle_feed_range_gone(self, routing_provider: SmartRoutingMapProvider, coll # For each new child range, using the continuation token from the parent self._continuation.popleft() for child_range in overlapping_ranges: - self._continuation.append(CompositeContinuationToken(Range.PartitionKeyRangeToRange(child_range), self._current_token.token)) + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) + + self._current_token = self._continuation[0] + + async def handle_feed_range_gone_async( + self, + routing_provider: AsyncSmartRoutingMapProvider, + collection_link: str) -> None: + overlapping_ranges = \ + await routing_provider.get_overlapping_ranges( + collection_link, + [self._current_token.feed_range]) + + if len(overlapping_ranges) == 1: + # merge,reusing the existing the feedRange and continuationToken + pass + else: + # split, remove the parent range and then add new child ranges. + # For each new child range, using the continuation token from the parent + self._continuation.popleft() + for child_range in overlapping_ranges: + self._continuation.append( + CompositeContinuationToken( + Range.PartitionKeyRangeToRange(child_range), + self._current_token.token)) self._current_token = self._continuation[0] def should_retry_on_not_modified_response(self) -> bool: - # when getting 304(Not Modified) response from one sub feed range, we will try to fetch for the next sub feed range + # when getting 304(Not Modified) response from one sub feed range, + # we will try to fetch for the next sub feed range # we will repeat the above logic until we have looped through all sub feed ranges # TODO: validate the response headers, can we get the status code if len(self._continuation) > 1: return self._current_token.feed_range != self._initial_no_result_range - else: - return False + return False def _move_to_next_token(self) -> None: first_composition_token = self._continuation.popleft() @@ -128,7 +162,6 @@ def _move_to_next_token(self) -> None: def apply_server_response_continuation(self, etag) -> None: self._current_token.update_token(etag) - self._move_to_next_token() def apply_not_modified_response(self) -> None: if self._initial_no_result_range is None: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index a81ab438cbf2..87e72391cf0a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -26,6 +26,8 @@ import os import urllib.parse from typing import Callable, Dict, Any, Iterable, List, Mapping, Optional, Sequence, Tuple, Union, cast, Type +from typing_extensions import TypedDict +from urllib3.util.retry import Retry from azure.core import PipelineClient from azure.core.credentials import TokenCredential @@ -42,8 +44,6 @@ ) from azure.core.pipeline.transport import HttpRequest, \ HttpResponse # pylint: disable=no-legacy-azure-core-http-response-import -from typing_extensions import TypedDict -from urllib3.util.retry import Retry from . import _base as base from . import _global_endpoint_manager as global_endpoint_manager @@ -3025,7 +3025,7 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: partition_key_range_id ) - change_feed_state = options.pop("changeFeedState", None) + change_feed_state = options.get("changeFeedState", None) if change_feed_state and isinstance(change_feed_state, ChangeFeedState): change_feed_state.populate_request_headers(self._routing_map_provider, headers) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index f3269af47271..a2d789f20644 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -108,7 +108,7 @@ def to_normalized_range(self): return Range(normalized_min, normalized_max, True, False) def add_to_effective_partition_key(self, effective_partition_key: str, value: int): - if value != 1 and value != -1: + if value not in (-1, 1): raise ValueError("Invalid value - only 1 or -1 is allowed") byte_array = self.hex_binary_to_byte_array(effective_partition_key) @@ -117,15 +117,13 @@ def add_to_effective_partition_key(self, effective_partition_key: str, value: in if byte_array[i] < 255: byte_array[i] += 1 break - else: - byte_array[i] = 0 + byte_array[i] = 0 else: for i in range(len(byte_array) - 1, -1, -1): if byte_array[i] != 0: byte_array[i] -= 1 break - else: - byte_array[i] = 255 + byte_array[i] = 255 return binascii.hexlify(byte_array).decode() @@ -143,8 +141,8 @@ def from_base64_encoded_json_string(cls, data: str): feed_range_json_string = base64.b64decode(data, validate=True).decode('utf-8') feed_range_json = json.loads(feed_range_json_string) return cls.ParseFromDict(feed_range_json) - except Exception: - raise ValueError(f"Invalid feed_range json string {data}") + except Exception as exc: + raise ValueError(f"Invalid feed_range json string {data}") from exc def to_base64_encoded_string(self): data_json = json.dumps(self.to_dict()) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py index 1c03b8a054c5..cf4b4977ed44 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py @@ -79,7 +79,7 @@ def is_base64_encoded(data: str) -> bool: return True except (json.JSONDecodeError, ValueError): return False - - -def is_key_exists_and_not_none(data: dict[str, Any], key: str) -> bool: + +def is_key_exists_and_not_none(data: Dict[str, Any], key: str) -> bool: return key in data and data[key] is not None + diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 5e8da4f54cf4..e70bdf9ab546 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -24,12 +24,12 @@ import warnings from datetime import datetime from typing import Any, Dict, Mapping, Optional, Sequence, Type, Union, List, Tuple, cast, overload +from typing_extensions import Literal from azure.core import MatchConditions from azure.core.async_paging import AsyncItemPaged from azure.core.tracing.decorator import distributed_trace from azure.core.tracing.decorator_async import distributed_trace_async # type: ignore -from typing_extensions import Literal from ._cosmos_client_connection_async import CosmosClientConnection from ._scripts import ScriptsProxy @@ -49,13 +49,14 @@ NonePartitionKeyValue, _return_undefined_or_empty_partition_key, _Empty, - _Undefined, PartitionKey + _Undefined ) __all__ = ("ContainerProxy",) # pylint: disable=protected-access, too-many-lines # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs +# pylint: disable=too-many-public-methods PartitionKeyType = Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long @@ -136,13 +137,6 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - async def __is_prefix_partition_key(self, partition_key: PartitionKeyType) -> bool: - - properties = await self._get_properties() - pk_properties = properties.get("partitionKey") - partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) - return partition_key_definition._is_prefix_partition_key(partition_key) - @distributed_trace_async async def read( self, @@ -500,17 +494,17 @@ def query_items_change_feed( partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: + ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. Now: Processing change feed from the current time, so only events for all future changes will be retrieved. ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current (NOW) - :param PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An AsyncItemPaged of items (dicts). @@ -519,7 +513,7 @@ def query_items_change_feed( ... @overload - async def query_items_change_feed( + def query_items_change_feed( self, *, feed_range: Optional[str] = None, @@ -527,17 +521,17 @@ async def query_items_change_feed( start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: + ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + :keyword str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. Now: Processing change feed from the current time, so only events for all future changes will be retrieved. ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current (NOW) - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An AsyncItemPaged of items (dicts). @@ -553,12 +547,12 @@ def query_items_change_feed( max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: + ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param str continuation: The continuation token retrieved from previous response. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An AsyncItemPaged of items (dicts). @@ -571,7 +565,7 @@ def query_items_change_feed( self, *args: Any, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: + ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements if is_key_exists_and_not_none(kwargs, "priority"): kwargs['priority'] = kwargs['priority'] @@ -1216,7 +1210,7 @@ async def execute_item_batch( async def read_feed_ranges( self, **kwargs: Any - ) -> List[str]: + ) -> List[str]: # pylint: disable=unused-argument partition_key_ranges =\ await self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 7cccac695769..b2a9aaff9ec1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -51,7 +51,7 @@ from .._base import _set_properties_cache from .. import documents from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable -from .._change_feed.aio.change_feed_state import ChangeFeedState +from .._change_feed.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2814,9 +2814,9 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) - change_feed_state = options.pop("changeFeedState", None) + change_feed_state = options.get("changeFeedState", None) if change_feed_state and isinstance(change_feed_state, ChangeFeedState): - await change_feed_state.populate_request_headers(self._routing_map_provider, headers) + await change_feed_state.populate_request_headers_async(self._routing_map_provider, headers) result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index e2fa8ad4071e..7a6e466bfc1d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -24,11 +24,11 @@ import warnings from datetime import datetime from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Mapping, Type, cast, overload +from typing_extensions import Literal from azure.core import MatchConditions from azure.core.paging import ItemPaged from azure.core.tracing.decorator import distributed_trace -from typing_extensions import Literal from ._base import ( build_options, @@ -328,17 +328,19 @@ def query_items_change_feed( partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: + ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) - :param PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: + The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :keyword PartitionKeyType partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An Iterable of items (dicts). @@ -355,17 +357,19 @@ def query_items_change_feed( start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: + ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword str feed_range: The feed range that is used to define the scope. + By default, the scope will be the entire container. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Union[datetime, Literal["Now", "Beginning"]] + start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current (NOW) + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An Iterable of items (dicts). @@ -381,12 +385,12 @@ def query_items_change_feed( max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: + ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :param str continuation: The continuation token retrieved from previous response. - :param int max_item_count: Max number of items to be returned in the enumeration operation. - :param Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + :keyword str continuation: The continuation token retrieved from previous response. + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. :returns: An Iterable of items (dicts). @@ -399,7 +403,7 @@ def query_items_change_feed( self, *args: Any, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: + ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements if is_key_exists_and_not_none(kwargs, "priority"): kwargs['priority'] = kwargs['priority'] @@ -1273,7 +1277,7 @@ def delete_all_items_by_partition_key( def read_feed_ranges( self, **kwargs: Any - ) -> List[str]: + ) -> List[str]: # pylint: disable=unused-argument partition_key_ranges =\ self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 768890dacfa6..6913979cf81d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -167,4 +167,4 @@ def _container_recreate_exception(e) -> bool: def _is_partition_split_or_merge(e): - return e.status_code == StatusCodes.GONE and e.status_code == SubStatusCodes.COMPLETING_SPLIT \ No newline at end of file + return e.status_code == StatusCodes.GONE and e.status_code == SubStatusCodes.COMPLETING_SPLIT diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index c89e1d9ac771..21aca775cbd5 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -278,7 +278,7 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( def _is_prefix_partition_key( self, - partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: + partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long if self.kind!= "MultiHash": return False if isinstance(partition_key, list) and len(self.path) == len(partition_key): From 36990ef428145a56660b0dc784f62250ef67145f Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 20 Aug 2024 09:47:59 -0700 Subject: [PATCH 05/20] fix pylint --- .../_change_feed/aio/change_feed_fetcher.py | 48 +++++++++---------- .../_change_feed/change_feed_fetcher.py | 1 - .../_change_feed/change_feed_iterable.py | 1 - .../_change_feed/change_feed_start_from.py | 3 -- .../cosmos/_change_feed/change_feed_state.py | 28 ++++++----- .../azure/cosmos/_change_feed/feed_range.py | 8 ++-- ...feed_range_composite_continuation_token.py | 1 - .../azure/cosmos/aio/_container.py | 16 +++++-- .../azure-cosmos/azure/cosmos/container.py | 29 ++++++----- 9 files changed, 72 insertions(+), 63 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index 4d85a891ac3f..ee926aa0e92c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -120,7 +120,8 @@ def __init__( self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") if not isinstance(self._change_feed_state, ChangeFeedStateV2): - raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version {type(self._change_feed_state)}") + raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " + f"{type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function @@ -170,29 +171,29 @@ async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: response_headers[continuation_key] = self._get_base64_encoded_continuation() break - # when there is no items being returned, we will decide to retry based on: - # 1. When processing from point in time, there will be no initial results being returned, - # so we will retry with the new continuation token - # 2. if the feed range of the changeFeedState span multiple physical partitions - # then we will read from the next feed range until we have looped through all physical partitions - self._change_feed_state.apply_not_modified_response() - self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + # when there is no items being returned, we will decide to retry based on: + # 1. When processing from point in time, there will be no initial results being returned, + # so we will retry with the new continuation token + # 2. if the feed range of the changeFeedState span multiple physical partitions + # then we will read from the next feed range until we have looped through all physical partitions + self._change_feed_state.apply_not_modified_response() + self._change_feed_state.apply_server_response_continuation( + response_headers.get(continuation_key)) + + #TODO: can this part logic be simplified + if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + and is_s_time_first_fetch): + response_headers[continuation_key] = self._get_base64_encoded_continuation() + is_s_time_first_fetch = False + should_retry = True + else: + self._change_feed_state._continuation._move_to_next_token() + response_headers[continuation_key] = self._get_base64_encoded_continuation() + should_retry = self._change_feed_state.should_retry_on_not_modified_response() + is_s_time_first_fetch = False - #TODO: can this part logic be simplified - if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) - and is_s_time_first_fetch): - response_headers[continuation_key] = self._get_base64_encoded_continuation() - is_s_time_first_fetch = False - should_retry = True - else: - self._change_feed_state._continuation._move_to_next_token() - response_headers[continuation_key] = self._get_base64_encoded_continuation() - should_retry = self._change_feed_state.should_retry_on_not_modified_response() - is_s_time_first_fetch = False - - if not should_retry: - break + if not should_retry: + break return fetched_items @@ -203,4 +204,3 @@ def _get_base64_encoded_continuation(self) -> str: base64_bytes = base64.b64encode(json_bytes) # Convert the Base64 bytes to a string return base64_bytes.decode('utf-8') - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index abad86dfd119..92f0b2446f74 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -197,4 +197,3 @@ def _get_base64_encoded_continuation(self) -> str: base64_bytes = base64.b64encode(json_bytes) # Convert the Base64 bytes to a string return base64_bytes.decode('utf-8') - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 55f98374252a..4b03e33d0122 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -157,4 +157,3 @@ def _validate_change_feed_state_context(self, change_feed_state_context: Dict[st raise ValueError( "partition_key_range_id, partition_key, feed_range are exclusive parameters," " please only set one of them") - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py index 632f87715819..30d0ce787983 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -186,6 +186,3 @@ def from_json(cls, data: Dict[str, Any]) -> 'ChangeFeedStartFromPointInTime': point_in_time = datetime.fromtimestamp(point_in_time_ms).astimezone(timezone.utc) return ChangeFeedStartFromPointInTime(point_in_time) - - - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 2d2f4d0d6ae2..05d51daace59 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -106,8 +106,8 @@ def __init__( container_rid: str, change_feed_start_from: ChangeFeedStartFromInternal, partition_key_range_id: Optional[str] = None, - partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, - continuation: Optional[str] = None): # pylint: disable=line-too-long + partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, # pylint: disable=line-too-long + continuation: Optional[str] = None): self._container_link = container_link self._container_rid = container_rid @@ -142,22 +142,26 @@ def populate_request_headers( request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + # of the documents may not be sequential. So when reading the changeFeed by LSN, + # it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation: request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation async def populate_request_headers_async( self, - routing_provider: AsyncSmartRoutingMapProvider, - request_headers: Dict[str, Any]) -> None: + async_routing_provider: AsyncSmartRoutingMapProvider, + request_headers: Dict[str, Any]) -> None: # pylint: disable=unused-argument request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, we will need to always pass the start time in the header. + # of the documents may not be sequential. + # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. + # In order to guarantee we always get the documents after customer's point start time, + # we will need to always pass the start time in the header. self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation: request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation @@ -190,7 +194,6 @@ def __init__( self._container_rid = container_rid self._feed_range = feed_range self._change_feed_start_from = change_feed_start_from - self._continuation = continuation if self._continuation is None: composite_continuation_token_queue: Deque = collections.deque() composite_continuation_token_queue.append( @@ -202,6 +205,8 @@ def __init__( self._container_rid, self._feed_range, composite_continuation_token_queue) + else: + self._continuation = continuation @property def container_rid(self) -> str : @@ -368,7 +373,7 @@ def from_initial_state( cls, container_link: str, collection_rid: str, - change_feed_state_context: dict[str, Any]) -> 'ChangeFeedStateV2': + change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') @@ -400,6 +405,3 @@ def from_initial_state( feed_range=feed_range, change_feed_start_from=change_feed_start_from, continuation=None) - - - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py index 3b9707371fb8..856ccd6c5b48 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -65,7 +65,7 @@ def to_dict(self) -> Dict[str, Any]: if isinstance(self._pk_value, _Empty): return { self.type_property_name: [] } if isinstance(self._pk_value, list): - return { self.type_property_name: [item for item in self._pk_value] } + return { self.type_property_name: list(self._pk_value) } return { self.type_property_name: self._pk_value } @@ -75,10 +75,10 @@ def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartiti pk_value = data.get(cls.type_property_name) if not pk_value: return cls(_Empty(), feed_range) - if pk_value is [{}]: + if pk_value == [{}]: return cls(_Undefined(), feed_range) if isinstance(pk_value, list): - return cls([item for item in pk_value], feed_range) + return cls(list(pk_value), feed_range) return cls(data[cls.type_property_name], feed_range) raise ValueError(f"Can not parse FeedRangePartitionKey from the json," @@ -107,4 +107,4 @@ def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeEpk': if is_key_exists_and_not_none(data, cls.type_property_name): feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) return cls(feed_range) - raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") \ No newline at end of file + raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index fb67b6b4603c..76bc9f02fde2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -170,4 +170,3 @@ def apply_not_modified_response(self) -> None: @property def feed_range(self) -> FeedRange: return self._feed_range - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index e70bdf9ab546..dd0e6f0e34eb 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -494,7 +494,8 @@ def query_items_change_feed( partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> AsyncItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. @@ -510,6 +511,7 @@ def query_items_change_feed( :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @overload @@ -521,7 +523,8 @@ def query_items_change_feed( start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> AsyncItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. @@ -537,6 +540,7 @@ def query_items_change_feed( :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @overload @@ -547,7 +551,8 @@ def query_items_change_feed( max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> AsyncItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str continuation: The continuation token retrieved from previous response. @@ -558,6 +563,7 @@ def query_items_change_feed( :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @distributed_trace @@ -565,8 +571,8 @@ def query_items_change_feed( self, *args: Any, **kwargs: Any - ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - + ) -> AsyncItemPaged[Dict[str, Any]]: + # pylint: disable=too-many-statements if is_key_exists_and_not_none(kwargs, "priority"): kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 7a6e466bfc1d..94e1f3d7d9fb 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -134,7 +134,7 @@ def _set_partition_key( def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: container_properties = self._get_properties() - partition_key_definition = container_properties.get("partitionKey") + partition_key_definition = container_properties["partitionKey"] partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) return partition_key._get_epk_range_for_partition_key(partition_key_value) @@ -328,7 +328,8 @@ def query_items_change_feed( partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> ItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. @@ -346,6 +347,7 @@ def query_items_change_feed( :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @overload @@ -357,7 +359,9 @@ def query_items_change_feed( start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> ItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long + """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str feed_range: The feed range that is used to define the scope. @@ -375,6 +379,7 @@ def query_items_change_feed( :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @overload @@ -385,7 +390,8 @@ def query_items_change_feed( max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=line-too-long + ) -> ItemPaged[Dict[str, Any]]: + # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str continuation: The continuation token retrieved from previous response. @@ -396,6 +402,7 @@ def query_items_change_feed( :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @distributed_trace @@ -403,8 +410,8 @@ def query_items_change_feed( self, *args: Any, **kwargs: Any - ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - + ) -> ItemPaged[Dict[str, Any]]: + # pylint: disable=too-many-statements if is_key_exists_and_not_none(kwargs, "priority"): kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) @@ -1274,14 +1281,14 @@ def delete_all_items_by_partition_key( self.client_connection.DeleteAllItemsByPartitionKey( collection_link=self.container_link, options=request_options, **kwargs) - def read_feed_ranges( + def read_feed_ranges( # pylint: disable=unused-argument self, **kwargs: Any - ) -> List[str]: # pylint: disable=unused-argument + ) -> List[str]: partition_key_ranges =\ self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, - # default to full range - [Range("", "FF", True, False)]) + [Range("", "FF", True, False)]) # default to full range - return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] \ No newline at end of file + return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() + for partitionKeyRange in partition_key_ranges] From 7479b0c9663a8759e640a4f5e16d4117cd15878b Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 20 Aug 2024 15:39:59 -0700 Subject: [PATCH 06/20] pylint fix --- .../azure/cosmos/_change_feed/change_feed_state.py | 5 +++-- .../feed_range_composite_continuation_token.py | 3 ++- sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py | 1 - sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py | 7 ++++--- sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py | 4 ++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 05d51daace59..2bb219eb5497 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -194,7 +194,7 @@ def __init__( self._container_rid = container_rid self._feed_range = feed_range self._change_feed_start_from = change_feed_start_from - if self._continuation is None: + if continuation is None: composite_continuation_token_queue: Deque = collections.deque() composite_continuation_token_queue.append( CompositeContinuationToken( @@ -374,7 +374,8 @@ def from_initial_state( container_link: str, collection_rid: str, change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': - + + feed_range: Optional[FeedRange] = None if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') feed_range_json = json.loads(feed_range_str) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 76bc9f02fde2..2f73af4e5c47 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -23,7 +23,7 @@ database service. """ from collections import deque -from typing import Any, Deque, Dict +from typing import Any, Deque, Dict, Optional from azure.cosmos._change_feed.composite_continuation_token import CompositeContinuationToken from azure.cosmos._change_feed.feed_range import FeedRange, FeedRangeEpk, FeedRangePartitionKey @@ -89,6 +89,7 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': for child_range_continuation_token in continuation_data] # parsing feed range + feed_range: Optional[FeedRange] = None if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): feed_range = FeedRangeEpk.from_json(data) elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py index cf4b4977ed44..7690383d375a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py @@ -82,4 +82,3 @@ def is_base64_encoded(data: str) -> bool: def is_key_exists_and_not_none(data: Dict[str, Any], key: str) -> bool: return key in data and data[key] is not None - diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index dd0e6f0e34eb..5cae847872d5 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -1213,14 +1213,15 @@ async def execute_item_batch( return await self.client_connection.Batch( collection_link=self.container_link, batch_operations=batch_operations, options=request_options, **kwargs) - async def read_feed_ranges( + async def read_feed_ranges( # pylint: disable=unused-argument self, **kwargs: Any - ) -> List[str]: # pylint: disable=unused-argument + ) -> List[str]: partition_key_ranges =\ await self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, # default to full range [Range("", "FF", True, False)]) - return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] + return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() + for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 21aca775cbd5..6a89da3c4f22 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -175,13 +175,13 @@ def _get_epk_range_for_prefix_partition_key( def _get_epk_range_for_partition_key( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] + pk_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long ) -> _Range: if self._is_prefix_partition_key(pk_value): return self._get_epk_range_for_prefix_partition_key(pk_value) # else return point range - effective_partition_key_string = self._get_effective_partition_key_string(pk_value) + effective_partition_key_string = self._get_effective_partition_key_string([pk_value]) return _Range(effective_partition_key_string, effective_partition_key_string, True, True) def _get_effective_partition_key_for_hash_partitioning(self) -> str: From 2e766204285056245f6f970862d4312116ad8af5 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 20 Aug 2024 17:53:09 -0700 Subject: [PATCH 07/20] fix --- .../cosmos/_change_feed/change_feed_state.py | 6 ++--- .../azure-cosmos/azure/cosmos/container.py | 8 +++---- .../azure-cosmos/azure/cosmos/exceptions.py | 6 ++--- .../azure/cosmos/partition_key.py | 17 +++++++------ .../azure-cosmos/test/test_change_feed.py | 24 +++++++++++++++++++ .../test/test_change_feed_async.py | 24 +++++++++++++++++++ 6 files changed, 68 insertions(+), 17 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 2bb219eb5497..988932ff3af6 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -332,8 +332,8 @@ async def handle_feed_range_gone_async( def apply_server_response_continuation(self, continuation: str) -> None: self._continuation.apply_server_response_continuation(continuation) - def should_retry_on_not_modified_response(self): - self._continuation.should_retry_on_not_modified_response() + def should_retry_on_not_modified_response(self) -> bool: + return self._continuation.should_retry_on_not_modified_response() def apply_not_modified_response(self) -> None: self._continuation.apply_not_modified_response() @@ -374,7 +374,7 @@ def from_initial_state( container_link: str, collection_rid: str, change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': - + feed_range: Optional[FeedRange] = None if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 94e1f3d7d9fb..b04972bdee6c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -470,9 +470,9 @@ def query_items_change_feed( feed_options["maxItemCount"] = args[3] if is_key_exists_and_not_none(kwargs, "partition_key"): - partition_key = kwargs.pop('partition_key') - change_feed_state_context["partitionKey"] = self._set_partition_key(partition_key) - change_feed_state_context["partitionKeyFeedRange"] = self._get_epk_range_for_partition_key(partition_key) + change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop('partition_key')) + change_feed_state_context["partitionKeyFeedRange"] =\ + self._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) if is_key_exists_and_not_none(kwargs, "feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') @@ -587,7 +587,7 @@ def query_items( # pylint:disable=docstring-missing-param kwargs["isPrefixPartitionQuery"] = True properties = self._get_properties() kwargs["partitionKeyDefinition"] = properties["partitionKey"] - kwargs["partitionKeyDefinition"]["partition_key"] = partition_key + kwargs["partitionKeyDefinition"]["partition_key"] = self._set_partition_key(partition_key) else: feed_options["partitionKey"] = self._set_partition_key(partition_key) if enable_scan_in_query is not None: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 6913979cf81d..262de2ffbbbe 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -142,12 +142,12 @@ def __init__(self, message=None, response=None, **kwargs): """ :param int sub_status_code: HTTP response sub code. """ - self.status_code = StatusCodes.GONE + self.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE self.http_error_message = message - formatted_message = "Status code: %d Sub-status: %d\n%s" % (self.status_code, self.sub_status, str(message)) + formatted_message = "Status code: %d Sub-status: %d\n%s" % (StatusCodes.GONE, self.sub_status, str(message)) super(CosmosHttpResponseError, self).__init__(message=formatted_message, response=response, **kwargs) - + self.status_code = StatusCodes.GONE def _partition_range_is_gone(e): if (e.status_code == http_constants.StatusCodes.GONE diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 6a89da3c4f22..881cce0895e1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -23,7 +23,7 @@ from io import BytesIO import binascii import struct -from typing import IO, Sequence, Type, Union, overload, List +from typing import IO, Sequence, Type, Union, overload, List, cast from typing_extensions import Literal from ._cosmos_integers import _UInt64, _UInt128 @@ -149,7 +149,7 @@ def version(self, value: int) -> None: def _get_epk_range_for_prefix_partition_key( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] + pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] ) -> _Range: if self.kind != "MultiHash": raise ValueError( @@ -175,13 +175,16 @@ def _get_epk_range_for_prefix_partition_key( def _get_epk_range_for_partition_key( self, - pk_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long + pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined] # pylint: disable=line-too-long ) -> _Range: if self._is_prefix_partition_key(pk_value): - return self._get_epk_range_for_prefix_partition_key(pk_value) + return self._get_epk_range_for_prefix_partition_key( + cast(List[Union[str, int, float, bool]], pk_value)) # else return point range - effective_partition_key_string = self._get_effective_partition_key_string([pk_value]) + effective_partition_key_string =\ + self._get_effective_partition_key_string( + cast(List[Union[str, int, float, bool, _Empty, _Undefined]], [pk_value])) return _Range(effective_partition_key_string, effective_partition_key_string, True, True) def _get_effective_partition_key_for_hash_partitioning(self) -> str: @@ -190,7 +193,7 @@ def _get_effective_partition_key_for_hash_partitioning(self) -> str: def _get_effective_partition_key_string( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] + pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] ) -> Union[int, str]: if not pk_value: return _MinimumInclusiveEffectivePartitionKey @@ -278,7 +281,7 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( def _is_prefix_partition_key( self, - partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long + partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> bool: # pylint: disable=line-too-long if self.kind!= "MultiHash": return False if isinstance(partition_key, list) and len(self.path) == len(partition_key): diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py index a1d34262cae7..4b286d2b82f8 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -291,5 +291,29 @@ def test_query_change_feed_with_split(self, setup): assert actual_ids == expected_ids setup["created_db"].delete_container(created_collection.id) + def test_query_change_feed_with_multi_partition(self, setup): + created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + if __name__ == "__main__": unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index b4165af0601c..886c1ffc1bcc 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -318,5 +318,29 @@ async def test_query_change_feed_with_split_async(self, setup): assert actual_ids == expected_ids setup["created_db"].delete_container(created_collection.id) + async def test_query_change_feed_with_multi_partition_async(self, setup): + created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=11000) + + # create one doc and make sure change feed query can return the document + new_documents = [ + {'pk': 'pk', 'id': 'doc1'}, + {'pk': 'pk2', 'id': 'doc2'}, + {'pk': 'pk3', 'id': 'doc3'}, + {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc1', 'doc2', 'doc3', 'doc4'] + + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + if __name__ == '__main__': unittest.main() From 8c0aa4604015588c0e39af42c7bddde69a2942db Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 21 Aug 2024 09:41:54 -0700 Subject: [PATCH 08/20] fix mypy --- .../azure-cosmos/azure/cosmos/container.py | 16 ++++++++++------ .../azure-cosmos/azure/cosmos/partition_key.py | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index b04972bdee6c..cc42b6f87411 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -132,7 +132,9 @@ def _set_partition_key( return _return_undefined_or_empty_partition_key(self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: + def _get_epk_range_for_partition_key( + self, + partition_key_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> Range: # pylint: disable=line-too-long container_properties = self._get_properties() partition_key_definition = container_properties["partitionKey"] partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) @@ -583,13 +585,14 @@ def query_items( # pylint:disable=docstring-missing-param if populate_index_metrics is not None: feed_options["populateIndexMetrics"] = populate_index_metrics if partition_key is not None: - if self.__is_prefix_partitionkey(partition_key): + partition_key_value = self._set_partition_key(partition_key) + if self.__is_prefix_partitionkey(partition_key_value): kwargs["isPrefixPartitionQuery"] = True properties = self._get_properties() kwargs["partitionKeyDefinition"] = properties["partitionKey"] - kwargs["partitionKeyDefinition"]["partition_key"] = self._set_partition_key(partition_key) + kwargs["partitionKeyDefinition"]["partition_key"] = partition_key_value else: - feed_options["partitionKey"] = self._set_partition_key(partition_key) + feed_options["partitionKey"] = partition_key_value if enable_scan_in_query is not None: feed_options["enableScanInQuery"] = enable_scan_in_query if max_integrated_cache_staleness_in_ms: @@ -616,8 +619,9 @@ def query_items( # pylint:disable=docstring-missing-param return items def __is_prefix_partitionkey( - self, partition_key: PartitionKeyType - ) -> bool: + self, + partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> bool: # pylint: disable=line-too-long + properties = self._get_properties() pk_properties = properties["partitionKey"] partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 881cce0895e1..e4d659a08fac 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -238,7 +238,7 @@ def _write_for_hashing_v2( def _get_effective_partition_key_for_hash_partitioning_v2( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] + pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] ) -> str: with BytesIO() as ms: for component in pk_value: @@ -257,7 +257,7 @@ def _get_effective_partition_key_for_hash_partitioning_v2( def _get_effective_partition_key_for_multi_hash_partitioning_v2( self, - pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] + pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] ) -> str: sb = [] for value in pk_value: From 25c3363fc03c0f30eb2ab3820548eba720d44860 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 27 Aug 2024 13:53:26 -0700 Subject: [PATCH 09/20] resolve comments --- .../_change_feed/aio/change_feed_iterable.py | 13 +++++++------ .../cosmos/_change_feed/change_feed_iterable.py | 10 +++++----- .../cosmos/_change_feed/change_feed_state.py | 13 ++++++------- .../azure/cosmos/_change_feed/feed_range.py | 5 ++--- .../feed_range_composite_continuation_token.py | 6 ++---- sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py | 3 --- .../azure-cosmos/azure/cosmos/aio/_container.py | 17 ++++++++--------- .../azure-cosmos/azure/cosmos/container.py | 17 ++++++++--------- 8 files changed, 38 insertions(+), 46 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 83c12f59157c..cae3bc5c9bf7 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -28,7 +28,8 @@ from azure.cosmos import PartitionKey from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateV1 -from azure.cosmos._utils import is_base64_encoded, is_key_exists_and_not_none +from azure.cosmos._utils import is_base64_encoded + # pylint: disable=protected-access @@ -63,7 +64,7 @@ def __init__( self._collection_link = collection_link self._change_feed_fetcher = None - if not is_key_exists_and_not_none(self._options, "changeFeedStateContext"): + if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") change_feed_state_context = self._options.pop("changeFeedStateContext") @@ -119,7 +120,7 @@ async def _fetch_next(self, *args): # pylint: disable=unused-argument async def _initialize_change_feed_fetcher(self): change_feed_state_context = self._options.pop("changeFeedStateContext") conn_properties = await self._options.pop("containerProperties") - if is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): + if change_feed_state_context.get("partitionKey"): change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") pk_properties = conn_properties.get("partitionKey") partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) @@ -147,11 +148,11 @@ async def _initialize_change_feed_fetcher(self): def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: - if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + if change_feed_state_context.get("continuationPkRangeId"): # if continuation token is in v1 format, throw exception if feed_range is set - if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + if change_feed_state_context.get("feedRange"): raise ValueError("feed_range and continuation are incompatible") - elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + elif change_feed_state_context.get("continuationFeedRange"): # if continuation token is in v2 format, since the token itself contains the full change feed state # so we will ignore other parameters (including incompatible parameters) if they passed in pass diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 4b03e33d0122..7fc62684d9ff 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -27,7 +27,7 @@ from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedState -from azure.cosmos._utils import is_base64_encoded, is_key_exists_and_not_none +from azure.cosmos._utils import is_base64_encoded class ChangeFeedIterable(PageIterator): @@ -60,7 +60,7 @@ def __init__( self._collection_link = collection_link self._change_feed_fetcher = None - if not is_key_exists_and_not_none(self._options, "changeFeedStateContext"): + if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") change_feed_state_context = self._options.pop("changeFeedStateContext") @@ -140,11 +140,11 @@ def _initialize_change_feed_fetcher(self): def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: - if is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId"): + if change_feed_state_context.get("continuationPkRangeId"): # if continuation token is in v1 format, throw exception if feed_range is set - if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + if change_feed_state_context.get("feedRange"): raise ValueError("feed_range and continuation are incompatible") - elif is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + elif change_feed_state_context.get("continuationFeedRange"): # if continuation token is in v2 format, since the token itself contains the full change feed state # so we will ignore other parameters (including incompatible parameters) if they passed in pass diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 988932ff3af6..742c4891bfdf 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -38,7 +38,6 @@ from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range -from azure.cosmos._utils import is_key_exists_and_not_none from azure.cosmos.exceptions import CosmosFeedRangeGoneError from azure.cosmos.partition_key import _Empty, _Undefined @@ -74,11 +73,11 @@ def from_json( container_rid: str, change_feed_state_context: Dict[str, Any]): - if (is_key_exists_and_not_none(change_feed_state_context, "partitionKeyRangeId") - or is_key_exists_and_not_none(change_feed_state_context, "continuationPkRangeId")): + if (change_feed_state_context.get("partitionKeyRangeId") + or change_feed_state_context.get("continuationPkRangeId")): return ChangeFeedStateV1.from_json(container_link, container_rid, change_feed_state_context) - if is_key_exists_and_not_none(change_feed_state_context, "continuationFeedRange"): + if change_feed_state_context.get("continuationFeedRange"): # get changeFeedState from continuation continuation_json_str = base64.b64decode(change_feed_state_context["continuationFeedRange"]).decode( 'utf-8') @@ -376,12 +375,12 @@ def from_initial_state( change_feed_state_context: Dict[str, Any]) -> 'ChangeFeedStateV2': feed_range: Optional[FeedRange] = None - if is_key_exists_and_not_none(change_feed_state_context, "feedRange"): + if change_feed_state_context.get("feedRange"): feed_range_str = base64.b64decode(change_feed_state_context["feedRange"]).decode('utf-8') feed_range_json = json.loads(feed_range_str) feed_range = FeedRangeEpk(Range.ParseFromDict(feed_range_json)) - elif is_key_exists_and_not_none(change_feed_state_context, "partitionKey"): - if is_key_exists_and_not_none(change_feed_state_context, "partitionKeyFeedRange"): + elif change_feed_state_context.get("partitionKey"): + if change_feed_state_context.get("partitionKeyFeedRange"): feed_range =\ FeedRangePartitionKey( change_feed_state_context["partitionKey"], diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py index 856ccd6c5b48..481496159cf3 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -26,7 +26,6 @@ from typing import Union, List, Dict, Any from azure.cosmos._routing.routing_range import Range -from azure.cosmos._utils import is_key_exists_and_not_none from azure.cosmos.partition_key import _Undefined, _Empty @@ -71,7 +70,7 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartitionKey': - if is_key_exists_and_not_none(data, cls.type_property_name): + if data.get(cls.type_property_name): pk_value = data.get(cls.type_property_name) if not pk_value: return cls(_Empty(), feed_range) @@ -104,7 +103,7 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_json(cls, data: Dict[str, Any]) -> 'FeedRangeEpk': - if is_key_exists_and_not_none(data, cls.type_property_name): + if data.get(cls.type_property_name): feed_range = Range.ParseFromDict(data.get(cls.type_property_name)) return cls(feed_range) raise ValueError(f"Can not parse FeedRangeEPK from the json, there is no property {cls.type_property_name}") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 2f73af4e5c47..e8bfe60ced3f 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -30,8 +30,6 @@ from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range -from azure.cosmos._utils import is_key_exists_and_not_none - class FeedRangeCompositeContinuation(object): _version_property_name = "v" @@ -90,9 +88,9 @@ def from_json(cls, data) -> 'FeedRangeCompositeContinuation': # parsing feed range feed_range: Optional[FeedRange] = None - if is_key_exists_and_not_none(data, FeedRangeEpk.type_property_name): + if data.get(FeedRangeEpk.type_property_name): feed_range = FeedRangeEpk.from_json(data) - elif is_key_exists_and_not_none(data, FeedRangePartitionKey.type_property_name): + elif data.get(FeedRangePartitionKey.type_property_name): feed_range = FeedRangePartitionKey.from_json(data, continuation[0].feed_range) else: raise ValueError("Invalid feed range composite continuation token [Missing feed range scope]") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py index 7690383d375a..6e3a8c67fcfe 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py @@ -79,6 +79,3 @@ def is_base64_encoded(data: str) -> bool: return True except (json.JSONDecodeError, ValueError): return False - -def is_key_exists_and_not_none(data: Dict[str, Any], key: str) -> bool: - return key in data and data[key] is not None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 5cae847872d5..1e86ccb1aa44 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -43,7 +43,6 @@ ) from .._routing import routing_range from .._routing.routing_range import Range -from .._utils import is_key_exists_and_not_none from ..offer import ThroughputProperties from ..partition_key import ( NonePartitionKeyValue, @@ -573,13 +572,13 @@ def query_items_change_feed( **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - if is_key_exists_and_not_none(kwargs, "priority"): + if kwargs.get("priority"): kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) change_feed_state_context = {} # Back compatibility with deprecation warnings for partition_key_range_id - if (args and args[0] is not None) or is_key_exists_and_not_none(kwargs, "partition_key_range_id"): + if (args and args[0] is not None) or kwargs.get("partition_key_range_id"): warnings.warn( "partition_key_range_id is deprecated. Please pass in feed_range instead.", DeprecationWarning @@ -591,7 +590,7 @@ def query_items_change_feed( change_feed_state_context['partitionKeyRangeId'] = args[0] # Back compatibility with deprecation warnings for is_start_from_beginning - if (len(args) >= 2 and args[1] is not None) or is_key_exists_and_not_none(kwargs, "is_start_from_beginning"): + if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning"): warnings.warn( "is_start_from_beginning is deprecated. Please pass in start_time instead.", DeprecationWarning @@ -606,7 +605,7 @@ def query_items_change_feed( change_feed_state_context["startTime"] = "Beginning" # parse start_time - if is_key_exists_and_not_none(kwargs, "start_time"): + if kwargs.get("start_time"): if change_feed_state_context.get("startTime") is not None: raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") @@ -617,23 +616,23 @@ def query_items_change_feed( change_feed_state_context["startTime"] = start_time # parse continuation token - if len(args) >= 3 and args[2] is not None or is_key_exists_and_not_none(feed_options, "continuation"): + if len(args) >= 3 and args[2] is not None or feed_options.get("continuation"): try: continuation = feed_options.pop('continuation') except KeyError: continuation = args[2] change_feed_state_context["continuation"] = continuation - if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): + if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count"): try: feed_options["maxItemCount"] = kwargs.pop('max_item_count') except KeyError: feed_options["maxItemCount"] = args[3] - if is_key_exists_and_not_none(kwargs, "partition_key"): + if kwargs.get("partition_key"): change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop("partition_key")) - if is_key_exists_and_not_none(kwargs, "feed_range"): + if kwargs.get("feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') feed_options["containerProperties"] = self._get_properties() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index cc42b6f87411..5e29302bcaae 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -41,7 +41,6 @@ from ._cosmos_client_connection import CosmosClientConnection from ._routing import routing_range from ._routing.routing_range import Range -from ._utils import is_key_exists_and_not_none from .offer import Offer, ThroughputProperties from .partition_key import ( NonePartitionKeyValue, @@ -414,13 +413,13 @@ def query_items_change_feed( **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - if is_key_exists_and_not_none(kwargs, "priority"): + if kwargs.get("priority"): kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) change_feed_state_context = {} # Back compatibility with deprecation warnings for partition_key_range_id - if (args and args[0] is not None) or is_key_exists_and_not_none(kwargs, "partition_key_range_id"): + if (args and args[0] is not None) or kwargs.get("partition_key_range_id"): warnings.warn( "partition_key_range_id is deprecated. Please pass in feed_range instead.", DeprecationWarning @@ -432,7 +431,7 @@ def query_items_change_feed( change_feed_state_context['partitionKeyRangeId'] = args[0] # Back compatibility with deprecation warnings for is_start_from_beginning - if (len(args) >= 2 and args[1] is not None) or is_key_exists_and_not_none(kwargs, "is_start_from_beginning"): + if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning"): warnings.warn( "is_start_from_beginning is deprecated. Please pass in start_time instead.", DeprecationWarning @@ -447,7 +446,7 @@ def query_items_change_feed( change_feed_state_context["startTime"] = "Beginning" # parse start_time - if is_key_exists_and_not_none(kwargs, "start_time"): + if kwargs.get("start_time"): if change_feed_state_context.get("startTime") is not None: raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") @@ -458,25 +457,25 @@ def query_items_change_feed( change_feed_state_context["startTime"] = start_time # parse continuation token - if len(args) >= 3 and args[2] is not None or is_key_exists_and_not_none(feed_options, "continuation"): + if len(args) >= 3 and args[2] is not None or feed_options.get("continuation"): try: continuation = feed_options.pop('continuation') except KeyError: continuation = args[2] change_feed_state_context["continuation"] = continuation - if len(args) >= 4 and args[3] is not None or is_key_exists_and_not_none(kwargs, "max_item_count"): + if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count"): try: feed_options["maxItemCount"] = kwargs.pop('max_item_count') except KeyError: feed_options["maxItemCount"] = args[3] - if is_key_exists_and_not_none(kwargs, "partition_key"): + if kwargs.get("partition_key"): change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop('partition_key')) change_feed_state_context["partitionKeyFeedRange"] =\ self._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) - if is_key_exists_and_not_none(kwargs, "feed_range"): + if kwargs.get("feed_range"): change_feed_state_context["feedRange"] = kwargs.pop('feed_range') container_properties = self._get_properties() From cecdfa5ed2862d8ec14b4a14ae0b7c828fa53a37 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 27 Aug 2024 17:11:08 -0700 Subject: [PATCH 10/20] resolve comments --- .../_change_feed/aio/change_feed_fetcher.py | 52 ++++----- .../_change_feed/aio/change_feed_iterable.py | 44 ++++--- .../_change_feed/change_feed_fetcher.py | 46 ++++---- .../_change_feed/change_feed_iterable.py | 31 +++-- .../_change_feed/change_feed_start_from.py | 13 ++- .../cosmos/_change_feed/change_feed_state.py | 17 ++- .../composite_continuation_token.py | 20 ++-- .../azure/cosmos/_change_feed/feed_range.py | 4 +- ...feed_range_composite_continuation_token.py | 6 +- .../azure/cosmos/aio/_container.py | 110 ++++++++++-------- .../azure-cosmos/azure/cosmos/container.py | 96 +++++++++------ .../azure-cosmos/azure/cosmos/exceptions.py | 2 +- sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst | 8 ++ 13 files changed, 255 insertions(+), 194 deletions(-) create mode 100644 sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index ee926aa0e92c..90aa2d01adfa 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -23,14 +23,13 @@ database service. """ import base64 -import copy import json from abc import ABC, abstractmethod -from typing import Dict, Any, List +from typing import Dict, Any, List, Callable, Tuple, Awaitable from azure.cosmos import http_constants, exceptions -from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromPointInTime -from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV2, ChangeFeedStateVersion from azure.cosmos.aio import _retry_utility_async from azure.cosmos.exceptions import CosmosHttpResponseError @@ -39,7 +38,7 @@ class ChangeFeedFetcher(ABC): @abstractmethod - async def fetch_next_block(self): + async def fetch_next_block(self) -> List[Dict[str, Any]]: pass class ChangeFeedFetcherV1(ChangeFeedFetcher): @@ -53,38 +52,38 @@ def __init__( client, resource_link: str, feed_options: Dict[str, Any], - fetch_function): + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: self._client = client self._feed_options = feed_options self._change_feed_state = self._feed_options.pop("changeFeedState") - if not isinstance(self._change_feed_state, ChangeFeedStateV1): + if self._change_feed_state.version != ChangeFeedStateVersion.V1: raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" f" {type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function - async def fetch_next_block(self): + async def fetch_next_block(self) -> List[Dict[str, Any]]: """Returns a block of results. :return: List of results. :rtype: list """ async def callback(): - return await self.fetch_change_feed_items(self._fetch_function) + return await self.fetch_change_feed_items() return await _retry_utility_async.ExecuteAsync(self._client, self._client._global_endpoint_manager, callback) - async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: - new_options = copy.deepcopy(self._feed_options) - new_options["changeFeedState"] = self._change_feed_state + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state - self._change_feed_state.populate_feed_options(new_options) + self._change_feed_state.populate_feed_options(self._feed_options) is_s_time_first_fetch = self._change_feed_state._continuation is None while True: - (fetched_items, response_headers) = await fetch_function(new_options) + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. @@ -96,7 +95,7 @@ async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: # When processing from point in time, there will be no initial results being returned, # so we will retry with the new continuation token again - if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): is_s_time_first_fetch = False else: @@ -113,20 +112,21 @@ def __init__( client, resource_link: str, feed_options: Dict[str, Any], - fetch_function): + fetch_function: Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]] + ) -> None: self._client = client self._feed_options = feed_options self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") - if not isinstance(self._change_feed_state, ChangeFeedStateV2): + if self._change_feed_state.version != ChangeFeedStateVersion.V2: raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " - f"{type(self._change_feed_state)}") + f"{type(self._change_feed_state.version)}") self._resource_link = resource_link self._fetch_function = fetch_function - async def fetch_next_block(self): + async def fetch_next_block(self) -> List[Dict[str, Any]]: """Returns a block of results. :return: List of results. @@ -134,7 +134,7 @@ async def fetch_next_block(self): """ async def callback(): - return await self.fetch_change_feed_items(self._fetch_function) + return await self.fetch_change_feed_items() try: return await _retry_utility_async.ExecuteAsync( @@ -152,15 +152,14 @@ async def callback(): return await self.fetch_next_block() - async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: - new_options = copy.deepcopy(self._feed_options) - new_options["changeFeedState"] = self._change_feed_state + async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state - self._change_feed_state.populate_feed_options(new_options) + self._change_feed_state.populate_feed_options(self._feed_options) is_s_time_first_fetch = True while True: - (fetched_items, response_headers) = await fetch_function(new_options) + (fetched_items, response_headers) = await self._fetch_function(self._feed_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether @@ -180,8 +179,7 @@ async def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) - #TODO: can this part logic be simplified - if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): response_headers[continuation_key] = self._get_base64_encoded_continuation() is_s_time_first_fetch = False diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index cae3bc5c9bf7..745ed19279c7 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,13 +21,13 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any +from typing import Dict, Any, Optional, Callable, Coroutine, Tuple, List, AsyncIterator from azure.core.async_paging import AsyncPageIterator from azure.cosmos import PartitionKey from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 -from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateV1 +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion from azure.cosmos._utils import is_base64_encoded @@ -42,21 +42,20 @@ class ChangeFeedIterable(AsyncPageIterator): def __init__( self, client, - options, - fetch_function=None, - collection_link=None, - continuation_token=None, - ): + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Coroutine[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: """Instantiates a ChangeFeedIterable for non-client side partitioning queries. - ChangeFeedFetcher will be used as the internal query execution - context. - - :param CosmosClient client: Instance of document client. - :param dict options: The request options for the request. - :param method fetch_function: - + :param CosmosClient client: Instance of document client. + :param dict options: The request options for the request. + :param fetch_function: The fetch function. + :param collection_link: The collection resource link. + :param continuation_token: The continuation token passed in from by_page """ + self._client = client self.retry_options = client.connection_policy.RetryOptions self._options = options @@ -90,7 +89,7 @@ def __init__( super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) - async def _unpack(self, block): + async def _unpack(self, block) -> Tuple[str, AsyncIterator[List[Dict[str, Any]]]]: continuation = None if self._client.last_response_headers: continuation = self._client.last_response_headers.get('etag') @@ -99,12 +98,9 @@ async def _unpack(self, block): self._did_a_call_already = False return continuation, block - async def _fetch_next(self, *args): # pylint: disable=unused-argument + async def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument """Return a block of results with respecting retry policy. - This method only exists for backward compatibility reasons. (Because - QueryIterable has exposed fetch_next_block api). - :param Any args: :return: List of results. :rtype: list @@ -117,7 +113,7 @@ async def _fetch_next(self, *args): # pylint: disable=unused-argument raise StopAsyncIteration return block - async def _initialize_change_feed_fetcher(self): + async def _initialize_change_feed_fetcher(self) -> None: change_feed_state_context = self._options.pop("changeFeedStateContext") conn_properties = await self._options.pop("containerProperties") if change_feed_state_context.get("partitionKey"): @@ -131,7 +127,7 @@ async def _initialize_change_feed_fetcher(self): ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], change_feed_state_context) self._options["changeFeedState"] = change_feed_state - if isinstance(change_feed_state, ChangeFeedStateV1): + if change_feed_state.version != ChangeFeedStateVersion.V1: self._change_feed_fetcher = ChangeFeedFetcherV1( self._client, self._collection_link, @@ -148,11 +144,11 @@ async def _initialize_change_feed_fetcher(self): def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: - if change_feed_state_context.get("continuationPkRangeId"): + if change_feed_state_context.get("continuationPkRangeId") is not None: # if continuation token is in v1 format, throw exception if feed_range is set - if change_feed_state_context.get("feedRange"): + if change_feed_state_context.get("feedRange") is not None: raise ValueError("feed_range and continuation are incompatible") - elif change_feed_state_context.get("continuationFeedRange"): + elif change_feed_state_context.get("continuationFeedRange") is not None: # if continuation token is in v2 format, since the token itself contains the full change feed state # so we will ignore other parameters (including incompatible parameters) if they passed in pass diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index 92f0b2446f74..35ae9a15a08a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -23,14 +23,13 @@ database service. """ import base64 -import copy import json from abc import ABC, abstractmethod -from typing import Dict, Any, List +from typing import Dict, Any, List, Callable, Tuple from azure.cosmos import _retry_utility, http_constants, exceptions -from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromPointInTime -from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2 +from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType +from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedStateV2, ChangeFeedStateVersion from azure.cosmos.exceptions import CosmosHttpResponseError # pylint: disable=protected-access @@ -52,38 +51,38 @@ def __init__( client, resource_link: str, feed_options: Dict[str, Any], - fetch_function): + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]] + ) -> None: self._client = client self._feed_options = feed_options self._change_feed_state: ChangeFeedStateV1 = self._feed_options.pop("changeFeedState") - if not isinstance(self._change_feed_state, ChangeFeedStateV1): + if self._change_feed_state.version != ChangeFeedStateVersion.V1: raise ValueError(f"ChangeFeedFetcherV1 can not handle change feed state version" f" {type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function - def fetch_next_block(self): + def fetch_next_block(self) -> List[Dict[str, Any]]: """Returns a block of results. :return: List of results. :rtype: list """ def callback(): - return self.fetch_change_feed_items(self._fetch_function) + return self.fetch_change_feed_items() return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) - def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: - new_options = copy.deepcopy(self._feed_options) - new_options["changeFeedState"] = self._change_feed_state + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state - self._change_feed_state.populate_feed_options(new_options) + self._change_feed_state.populate_feed_options(self._feed_options) is_s_time_first_fetch = self._change_feed_state._continuation is None while True: - (fetched_items, response_headers) = fetch_function(new_options) + (fetched_items, response_headers) = self._fetch_function(self._feed_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. @@ -95,7 +94,7 @@ def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: # When processing from point in time, there will be no initial results being returned, # so we will retry with the new continuation token again - if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): is_s_time_first_fetch = False else: @@ -112,20 +111,20 @@ def __init__( client, resource_link: str, feed_options: Dict[str, Any], - fetch_function): + fetch_function: Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]): self._client = client self._feed_options = feed_options self._change_feed_state: ChangeFeedStateV2 = self._feed_options.pop("changeFeedState") - if not isinstance(self._change_feed_state, ChangeFeedStateV2): + if self._change_feed_state.version != ChangeFeedStateVersion.V2: raise ValueError(f"ChangeFeedFetcherV2 can not handle change feed state version " f"{type(self._change_feed_state)}") self._resource_link = resource_link self._fetch_function = fetch_function - def fetch_next_block(self): + def fetch_next_block(self) -> List[Dict[str, Any]]: """Returns a block of results. :return: List of results. @@ -133,7 +132,7 @@ def fetch_next_block(self): """ def callback(): - return self.fetch_change_feed_items(self._fetch_function) + return self.fetch_change_feed_items() try: return _retry_utility.Execute(self._client, self._client._global_endpoint_manager, callback) @@ -146,15 +145,14 @@ def callback(): return self.fetch_next_block() - def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: - new_options = copy.deepcopy(self._feed_options) - new_options["changeFeedState"] = self._change_feed_state + def fetch_change_feed_items(self) -> List[Dict[str, Any]]: + self._feed_options["changeFeedState"] = self._change_feed_state - self._change_feed_state.populate_feed_options(new_options) + self._change_feed_state.populate_feed_options(self._feed_options) is_s_time_first_fetch = self._change_feed_state._continuation.current_token.token is None while True: - (fetched_items, response_headers) = fetch_function(new_options) + (fetched_items, response_headers) = self._fetch_function(self._feed_options) continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. @@ -174,7 +172,7 @@ def fetch_change_feed_items(self, fetch_function) -> List[Dict[str, Any]]: self._change_feed_state.apply_server_response_continuation( response_headers.get(continuation_key)) - if (isinstance(self._change_feed_state._change_feed_start_from, ChangeFeedStartFromPointInTime) + if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): response_headers[continuation_key] = self._get_base64_encoded_continuation() is_s_time_first_fetch = False diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 7fc62684d9ff..6eaaa31bfd8f 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -21,12 +21,12 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any +from typing import Dict, Any, Tuple, List, Optional, Callable from azure.core.paging import PageIterator from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 -from azure.cosmos._change_feed.change_feed_state import ChangeFeedStateV1, ChangeFeedState +from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion from azure.cosmos._utils import is_base64_encoded @@ -39,11 +39,11 @@ class ChangeFeedIterable(PageIterator): def __init__( self, client, - options, - fetch_function=None, - collection_link=None, - continuation_token=None, - ): + options: Dict[str, Any], + fetch_function=Optional[Callable[[Dict[str, Any]], Tuple[List[Dict[str, Any]], Dict[str, Any]]]], + collection_link=Optional[str], + continuation_token=Optional[str], + ) -> None: """Instantiates a ChangeFeedIterable for non-client side partitioning queries. :param CosmosClient client: Instance of document client. @@ -85,7 +85,7 @@ def __init__( super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) - def _unpack(self, block): + def _unpack(self, block) -> Tuple[str, List[Dict[str, Any]]]: continuation = None if self._client.last_response_headers: continuation = self._client.last_response_headers.get('etag') @@ -94,12 +94,9 @@ def _unpack(self, block): self._did_a_call_already = False return continuation, block - def _fetch_next(self, *args): # pylint: disable=unused-argument + def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused-argument """Return a block of results with respecting retry policy. - This method only exists for backward compatibility reasons. (Because - QueryIterable has exposed fetch_next_block api). - :param Any args: :return: List of results. :rtype: list @@ -113,7 +110,7 @@ def _fetch_next(self, *args): # pylint: disable=unused-argument raise StopIteration return block - def _initialize_change_feed_fetcher(self): + def _initialize_change_feed_fetcher(self) -> None: change_feed_state_context = self._options.pop("changeFeedStateContext") change_feed_state = \ ChangeFeedState.from_json( @@ -123,7 +120,7 @@ def _initialize_change_feed_fetcher(self): self._options["changeFeedState"] = change_feed_state - if isinstance(change_feed_state, ChangeFeedStateV1): + if change_feed_state.version == ChangeFeedStateVersion.V1: self._change_feed_fetcher = ChangeFeedFetcherV1( self._client, self._collection_link, @@ -140,11 +137,11 @@ def _initialize_change_feed_fetcher(self): def _validate_change_feed_state_context(self, change_feed_state_context: Dict[str, Any]) -> None: - if change_feed_state_context.get("continuationPkRangeId"): + if change_feed_state_context.get("continuationPkRangeId") is not None: # if continuation token is in v1 format, throw exception if feed_range is set - if change_feed_state_context.get("feedRange"): + if change_feed_state_context.get("feedRange") is not None: raise ValueError("feed_range and continuation are incompatible") - elif change_feed_state_context.get("continuationFeedRange"): + elif change_feed_state_context.get("continuationFeedRange") is not None: # if continuation token is in v2 format, since the token itself contains the full change feed state # so we will ignore other parameters (including incompatible parameters) if they passed in pass diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py index 30d0ce787983..dc255eced586 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_start_from.py @@ -42,6 +42,9 @@ class ChangeFeedStartFromInternal(ABC): type_property_name = "Type" + def __init__(self, start_from_type: ChangeFeedStartFromType) -> None: + self.version = start_from_type + @abstractmethod def to_dict(self) -> Dict[str, Any]: pass @@ -86,6 +89,9 @@ class ChangeFeedStartFromBeginning(ChangeFeedStartFromInternal): """Class for change feed start from beginning implementation in the Azure Cosmos database service. """ + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.BEGINNING) + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.BEGINNING.value @@ -106,12 +112,13 @@ class ChangeFeedStartFromETagAndFeedRange(ChangeFeedStartFromInternal): _etag_property_name = "Etag" _feed_range_property_name = "FeedRange" - def __init__(self, etag, feed_range): + def __init__(self, etag, feed_range) -> None: if feed_range is None: raise ValueError("feed_range is missing") self._etag = etag self._feed_range = feed_range + super().__init__(ChangeFeedStartFromType.LEASE) def to_dict(self) -> Dict[str, Any]: return { @@ -142,6 +149,9 @@ class ChangeFeedStartFromNow(ChangeFeedStartFromInternal): """Class for change feed start from etag and feed range implementation in the Azure Cosmos database service. """ + def __init__(self) -> None: + super().__init__(ChangeFeedStartFromType.NOW) + def to_dict(self) -> Dict[str, Any]: return { self.type_property_name: ChangeFeedStartFromType.NOW.value @@ -166,6 +176,7 @@ def __init__(self, start_time: datetime): raise ValueError("start_time is missing") self._start_time = start_time + super().__init__(ChangeFeedStartFromType.POINT_IN_TIME) def to_dict(self) -> Dict[str, Any]: return { diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 742c4891bfdf..95d5624017a8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -27,6 +27,7 @@ import collections import json from abc import ABC, abstractmethod +from enum import Enum from typing import Optional, Union, List, Any, Dict, Deque from azure.cosmos import http_constants @@ -41,10 +42,16 @@ from azure.cosmos.exceptions import CosmosFeedRangeGoneError from azure.cosmos.partition_key import _Empty, _Undefined +class ChangeFeedStateVersion(Enum): + V1 = "v1" + V2 = "v2" class ChangeFeedState(ABC): version_property_name = "v" + def __init__(self, version: ChangeFeedStateVersion) -> None: + self.version = version + @abstractmethod def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: pass @@ -106,7 +113,7 @@ def __init__( change_feed_start_from: ChangeFeedStartFromInternal, partition_key_range_id: Optional[str] = None, partition_key: Optional[Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]] = None, # pylint: disable=line-too-long - continuation: Optional[str] = None): + continuation: Optional[str] = None) -> None: self._container_link = container_link self._container_rid = container_rid @@ -114,6 +121,7 @@ def __init__( self._partition_key_range_id = partition_key_range_id self._partition_key = partition_key self._continuation = continuation + super(ChangeFeedStateV1).__init__(ChangeFeedStateVersion.V1) @property def container_rid(self): @@ -187,7 +195,8 @@ def __init__( container_rid: str, feed_range: FeedRange, change_feed_start_from: ChangeFeedStartFromInternal, - continuation: Optional[FeedRangeCompositeContinuation]): + continuation: Optional[FeedRangeCompositeContinuation] + ) -> None: self._container_link = container_link self._container_rid = container_rid @@ -207,13 +216,15 @@ def __init__( else: self._continuation = continuation + super(ChangeFeedStateV2).__init__(ChangeFeedStateVersion.V2) + @property def container_rid(self) -> str : return self._container_rid def to_dict(self) -> Dict[str, Any]: return { - self.version_property_name: "V2", + self.version_property_name: ChangeFeedStateVersion.V2.value, self.container_rid_property_name: self._container_rid, self.change_feed_mode_property_name: "Incremental", self.change_feed_start_from_property_name: self._change_feed_start_from.to_dict(), diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py index 9945405e4b57..90d3d6132822 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py @@ -22,41 +22,41 @@ """Internal class for change feed composite continuation token in the Azure Cosmos database service. """ -from typing import Optional +from typing import Optional, Dict, Any from azure.cosmos._routing.routing_range import Range -class CompositeContinuationToken(object): +class CompositeContinuationToken: token_property_name = "token" feed_range_property_name = "range" - def __init__(self, feed_range: Range, token: Optional[str] = None): + def __init__(self, feed_range: Range, token: Optional[str] = None) -> None: if feed_range is None: raise ValueError("Missing required parameter feed_range") self._token = token self._feed_range = feed_range - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: return { self.token_property_name: self._token, - self.feed_range_property_name: self._feed_range.to_dict() + self.feed_range_property_name: self.feed_range.to_dict() } @property - def feed_range(self): + def feed_range(self) -> Range: return self._feed_range @property - def token(self): + def token(self) -> str: return self._token - def update_token(self, etag): + def update_token(self, etag) -> None: self._token = etag @classmethod - def from_json(cls, data): + def from_json(cls, data) -> 'CompositeContinuationToken': token = data.get(cls.token_property_name) if token is None: raise ValueError(f"Invalid composite token [Missing {cls.token_property_name}]") @@ -69,4 +69,4 @@ def from_json(cls, data): return cls(feed_range=feed_range, token=token) def __repr__(self): - return f"CompositeContinuationToken(token={self.token}, range={self._feed_range})" + return f"CompositeContinuationToken(token={self.token}, range={self.feed_range})" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py index 481496159cf3..b4f731f2c2ef 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range.py @@ -45,7 +45,7 @@ class FeedRangePartitionKey(FeedRange): def __init__( self, pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined], - feed_range: Range): # pylint: disable=line-too-long + feed_range: Range) -> None: # pylint: disable=line-too-long if pk_value is None: raise ValueError("PartitionKey cannot be None") @@ -87,7 +87,7 @@ def from_json(cls, data: Dict[str, Any], feed_range: Range) -> 'FeedRangePartiti class FeedRangeEpk(FeedRange): type_property_name = "Range" - def __init__(self, feed_range: Range): + def __init__(self, feed_range: Range) -> None: if feed_range is None: raise ValueError("feed_range cannot be None") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index e8bfe60ced3f..0aaebb616249 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -31,7 +31,7 @@ from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range -class FeedRangeCompositeContinuation(object): +class FeedRangeCompositeContinuation: _version_property_name = "v" _container_rid_property_name = "rid" _continuation_property_name = "continuation" @@ -40,7 +40,7 @@ def __init__( self, container_rid: str, feed_range: FeedRange, - continuation: Deque[CompositeContinuationToken]): + continuation: Deque[CompositeContinuationToken]) -> None: if container_rid is None: raise ValueError("container_rid is missing") @@ -51,7 +51,7 @@ def __init__( self._initial_no_result_range = None @property - def current_token(self): + def current_token(self) -> CompositeContinuationToken: return self._current_token def to_dict(self) -> Dict[str, Any]: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 1e86ccb1aa44..bc4f975a19e8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -490,68 +490,97 @@ def query_items_change_feed( *, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, - partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified. + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An AsyncItemPaged of items (dicts). + :rtype: AsyncItemPaged[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. Now: Processing change feed from the current time, so only events for all future changes will be retrieved. ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) - :keyword PartitionKeyType partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @overload def query_items_change_feed( self, *, - feed_range: Optional[str] = None, + feed_range: str, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword str feed_range: The feed range that is used to define the scope. By default, the scope will be the entire container. + :keyword str feed_range: The feed range that is used to define the scope. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: The start time to start processing chang feed items. + :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. Now: Processing change feed from the current time, so only events for all future changes will be retrieved. ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @overload def query_items_change_feed( self, *, - continuation: Optional[str] = None, + continuation: str, max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str continuation: The continuation token retrieved from previous response. @@ -559,6 +588,7 @@ def query_items_change_feed( :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ @@ -566,73 +596,59 @@ def query_items_change_feed( ... @distributed_trace - def query_items_change_feed( + def query_items_change_feed( # pylint: disable=unused-argument self, *args: Any, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - if kwargs.get("priority"): + if kwargs.get("priority") is not None: kwargs['priority'] = kwargs['priority'] feed_options = _build_options(kwargs) change_feed_state_context = {} # Back compatibility with deprecation warnings for partition_key_range_id - if (args and args[0] is not None) or kwargs.get("partition_key_range_id"): + if kwargs.get("partition_key_range_id") is not None: warnings.warn( "partition_key_range_id is deprecated. Please pass in feed_range instead.", DeprecationWarning ) - try: - change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') - except KeyError: - change_feed_state_context['partitionKeyRangeId'] = args[0] + change_feed_state_context["partitionKeyRangeId"] = kwargs.pop('partition_key_range_id') # Back compatibility with deprecation warnings for is_start_from_beginning - if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning"): + if kwargs.get("is_start_from_beginning") is not None: warnings.warn( "is_start_from_beginning is deprecated. Please pass in start_time instead.", DeprecationWarning ) - try: - is_start_from_beginning = kwargs.pop('is_start_from_beginning') - except KeyError: - is_start_from_beginning = args[1] + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") - if is_start_from_beginning: + is_start_from_beginning = kwargs.pop('is_start_from_beginning') + if is_start_from_beginning is True: change_feed_state_context["startTime"] = "Beginning" # parse start_time - if kwargs.get("start_time"): - if change_feed_state_context.get("startTime") is not None: - raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") - + if kwargs.get("start_time") is not None: start_time = kwargs.pop('start_time') if not isinstance(start_time, (datetime, str)): raise TypeError( - "'start_time' must be either a datetime object, or either the values 'now' or 'beginning'.") + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") change_feed_state_context["startTime"] = start_time # parse continuation token - if len(args) >= 3 and args[2] is not None or feed_options.get("continuation"): - try: - continuation = feed_options.pop('continuation') - except KeyError: - continuation = args[2] - change_feed_state_context["continuation"] = continuation - - if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count"): - try: - feed_options["maxItemCount"] = kwargs.pop('max_item_count') - except KeyError: - feed_options["maxItemCount"] = args[3] - - if kwargs.get("partition_key"): + if feed_options.get("continuation") is not None: + change_feed_state_context["continuation"] = feed_options.pop('continuation') + + if kwargs.get("max_item_count") is not None: + feed_options["maxItemCount"] = kwargs.pop('max_item_count') + + if kwargs.get("partition_key") is not None: change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop("partition_key")) - if kwargs.get("feed_range"): + if kwargs.get("feed_range") is not None: change_feed_state_context["feedRange"] = kwargs.pop('feed_range') feed_options["containerProperties"] = self._get_properties() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 5e29302bcaae..cee4bcb4843b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -326,73 +326,98 @@ def query_items_change_feed( *, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, - partition_key: Optional[PartitionKeyType] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified, + + :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each + request. Once the user has reached their provisioned throughput, low priority requests are throttled + before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. + :returns: An Iterable of items (dicts). + :rtype: Iterable[Dict[str, Any]] + """ + ... + + @overload + def query_items_change_feed( + self, + *, + max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, + priority: Optional[Literal["High", "Low"]] = None, + **kwargs: Any + ) -> ItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Union[datetime, Literal["Now", "Beginning"]] start_time: - The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) - :keyword PartitionKeyType partition_key: The partition key that is used to define the scope + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @overload def query_items_change_feed( self, *, - feed_range: Optional[str] = None, + feed_range: str, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str feed_range: The feed range that is used to define the scope. - By default, the scope will be the entire container. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword Union[datetime, Literal["Now", "Beginning"]] - start_time: The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current (NOW) + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @overload def query_items_change_feed( self, *, - continuation: Optional[str] = None, + continuation: str, max_item_count: Optional[int] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - # pylint: disable=line-too-long """Get a sorted list of items that were changed, in the order in which they were modified. :keyword str continuation: The continuation token retrieved from previous response. @@ -400,10 +425,10 @@ def query_items_change_feed( :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. + :keyword Callable response_hook: A callable invoked with the response metadata. :returns: An Iterable of items (dicts). :rtype: Iterable[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @distributed_trace @@ -413,13 +438,13 @@ def query_items_change_feed( **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: # pylint: disable=too-many-statements - if kwargs.get("priority"): + if kwargs.get("priority") is not None: kwargs['priority'] = kwargs['priority'] feed_options = build_options(kwargs) change_feed_state_context = {} # Back compatibility with deprecation warnings for partition_key_range_id - if (args and args[0] is not None) or kwargs.get("partition_key_range_id"): + if (args and args[0] is not None) or kwargs.get("partition_key_range_id") is not None: warnings.warn( "partition_key_range_id is deprecated. Please pass in feed_range instead.", DeprecationWarning @@ -431,51 +456,52 @@ def query_items_change_feed( change_feed_state_context['partitionKeyRangeId'] = args[0] # Back compatibility with deprecation warnings for is_start_from_beginning - if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning"): + if (len(args) >= 2 and args[1] is not None) or kwargs.get("is_start_from_beginning") is not None: warnings.warn( "is_start_from_beginning is deprecated. Please pass in start_time instead.", DeprecationWarning ) + if kwargs.get("start_time") is not None: + raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + try: is_start_from_beginning = kwargs.pop('is_start_from_beginning') except KeyError: is_start_from_beginning = args[1] - if is_start_from_beginning: + if is_start_from_beginning is True: change_feed_state_context["startTime"] = "Beginning" # parse start_time - if kwargs.get("start_time"): - if change_feed_state_context.get("startTime") is not None: - raise ValueError("is_start_from_beginning and start_time are exclusive, please only set one of them") + if kwargs.get("start_time") is not None: start_time = kwargs.pop('start_time') if not isinstance(start_time, (datetime, str)): raise TypeError( - "'start_time' must be either a datetime object, or either the values 'now' or 'beginning'.") + "'start_time' must be either a datetime object, or either the values 'Now' or 'Beginning'.") change_feed_state_context["startTime"] = start_time # parse continuation token - if len(args) >= 3 and args[2] is not None or feed_options.get("continuation"): + if len(args) >= 3 and args[2] is not None or feed_options.get("continuation") is not None: try: continuation = feed_options.pop('continuation') except KeyError: continuation = args[2] change_feed_state_context["continuation"] = continuation - if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count"): + if len(args) >= 4 and args[3] is not None or kwargs.get("max_item_count") is not None: try: feed_options["maxItemCount"] = kwargs.pop('max_item_count') except KeyError: feed_options["maxItemCount"] = args[3] - if kwargs.get("partition_key"): + if kwargs.get("partition_key") is not None: change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop('partition_key')) change_feed_state_context["partitionKeyFeedRange"] =\ self._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) - if kwargs.get("feed_range"): + if kwargs.get("feed_range") is not None: change_feed_state_context["feedRange"] = kwargs.pop('feed_range') container_properties = self._get_properties() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index 262de2ffbbbe..ed6e6b114869 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -137,7 +137,7 @@ def __init__(self, **kwargs): class CosmosFeedRangeGoneError(CosmosHttpResponseError): - """An HTTP error response with status code 404.""" + """An HTTP error response with status code 410.""" def __init__(self, message=None, response=None, **kwargs): """ :param int sub_status_code: HTTP response sub code. diff --git a/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst b/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst new file mode 100644 index 000000000000..5b7484884dd7 --- /dev/null +++ b/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst @@ -0,0 +1,8 @@ +.. toctree:: + :maxdepth: 5 + :glob: + :caption: Developer Documentation + + ref/azure.common + /Users/annie-mac/dev/git/azure-sdk-for-python/sdk/cosmos/azure-mgmt-cosmosdb/.tox/sphinx/tmp/dist/unzipped/docgen/azure.mgmt.cosmosdb.rst + ref/azure.servicemanagement From 65ed1329052930c6508da2cea505f9a2994aebf8 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 27 Aug 2024 22:21:43 -0700 Subject: [PATCH 11/20] resolve comments --- .../_routing/aio/routing_map_provider.py | 8 +++---- .../cosmos/_routing/routing_map_provider.py | 8 +++---- .../azure/cosmos/aio/_container.py | 18 +++++++++++++-- .../azure-cosmos/azure/cosmos/container.py | 22 +++++++++++++++---- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py index ebf1ee82b005..ba0b5ca3a3e6 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py @@ -49,7 +49,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -64,7 +64,7 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link)] + collection_pk_ranges = [pk async for pk in cl._ReadPartitionKeyRanges(collection_link, **kwargs)] # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -131,7 +131,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - async def get_overlapping_ranges(self, collection_link, partition_key_ranges): + async def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -166,7 +166,7 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges): queryRange = currentProvidedRange overlappingRanges = await PartitionKeyRangeCache.get_overlapping_ranges(self, - collection_link, queryRange) + collection_link, queryRange, **kwargs) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py index 59c609dec7ea..5a6bb304b5c8 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py @@ -50,7 +50,7 @@ def __init__(self, client): # keeps the cached collection routing map by collection id self._collection_routing_map_by_item = {} - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """Given a partition key range and a collection, return the list of overlapping partition key ranges. @@ -65,7 +65,7 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): collection_routing_map = self._collection_routing_map_by_item.get(collection_id) if collection_routing_map is None: - collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link)) + collection_pk_ranges = list(cl._ReadPartitionKeyRanges(collection_link, **kwargs)) # for large collections, a split may complete between the read partition key ranges query page responses, # causing the partitionKeyRanges to have both the children ranges and their parents. Therefore, we need # to discard the parent ranges to have a valid routing map. @@ -132,7 +132,7 @@ class SmartRoutingMapProvider(PartitionKeyRangeCache): invocation of CollectionRoutingMap.get_overlapping_ranges() """ - def get_overlapping_ranges(self, collection_link, partition_key_ranges): + def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs): """ Given the sorted ranges and a collection, Returns the list of overlapping partition key ranges @@ -166,7 +166,7 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges): else: queryRange = currentProvidedRange - overlappingRanges = PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, queryRange) + overlappingRanges = PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, queryRange, **kwargs) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index bc4f975a19e8..f289442cf3b3 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -1228,15 +1228,29 @@ async def execute_item_batch( return await self.client_connection.Batch( collection_link=self.container_link, batch_operations=batch_operations, options=request_options, **kwargs) - async def read_feed_ranges( # pylint: disable=unused-argument + async def read_feed_ranges( self, + *, + force_refresh: Optional[bool] = False, **kwargs: Any ) -> List[str]: + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :param bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + partition_key_ranges =\ await self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, # default to full range - [Range("", "FF", True, False)]) + [Range("", "FF", True, False)], + **kwargs) return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index cee4bcb4843b..017c58b8b492 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -1310,14 +1310,28 @@ def delete_all_items_by_partition_key( self.client_connection.DeleteAllItemsByPartitionKey( collection_link=self.container_link, options=request_options, **kwargs) - def read_feed_ranges( # pylint: disable=unused-argument + def read_feed_ranges( self, - **kwargs: Any - ) -> List[str]: + *, + force_refresh: Optional[bool] = False, + **kwargs: Any) -> List[str]: + + """ Obtains a list of feed ranges that can be used to parallelize feed operations. + + :param bool force_refresh: + Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. + :returns: A list representing the feed ranges in base64 encoded string + :rtype: List[str] + + """ + if force_refresh is True: + self.client_connection.refresh_routing_map_provider() + partition_key_ranges =\ self.client_connection._routing_map_provider.get_overlapping_ranges( self.container_link, - [Range("", "FF", True, False)]) # default to full range + [Range("", "FF", True, False)], # default to full range + **kwargs) return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() for partitionKeyRange in partition_key_ranges] From 4bb30d27fe3e936f70a1cb632545eea6b65632f1 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Tue, 27 Aug 2024 22:43:45 -0700 Subject: [PATCH 12/20] resolve comments --- .../_change_feed/aio/change_feed_iterable.py | 19 ++++---- .../_change_feed/change_feed_iterable.py | 7 ++- .../cosmos/_change_feed/change_feed_state.py | 43 ++++++++----------- .../azure/cosmos/_cosmos_client_connection.py | 5 +-- .../azure/cosmos/_routing/routing_range.py | 4 ++ .../azure-cosmos/azure/cosmos/_utils.py | 10 ----- .../azure/cosmos/aio/_container.py | 22 +++++++--- .../aio/_cosmos_client_connection_async.py | 5 +-- .../azure-cosmos/azure/cosmos/container.py | 16 +++---- .../azure-cosmos/azure/cosmos/exceptions.py | 14 ------ .../azure/cosmos/partition_key.py | 16 +++---- 11 files changed, 68 insertions(+), 93 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 745ed19279c7..f265805d390c 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,14 +21,13 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any, Optional, Callable, Coroutine, Tuple, List, AsyncIterator +from collections.abc import Awaitable +from typing import Dict, Any, Optional, Callable, Tuple, List, AsyncIterator from azure.core.async_paging import AsyncPageIterator -from azure.cosmos import PartitionKey from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion -from azure.cosmos._utils import is_base64_encoded # pylint: disable=protected-access @@ -43,7 +42,7 @@ def __init__( self, client, options: Dict[str, Any], - fetch_function=Optional[Callable[[Dict[str, Any]], Coroutine[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]], + fetch_function=Optional[Callable[[Dict[str, Any]], Awaitable[Tuple[List[Dict[str, Any]], Dict[str, Any]]]]], collection_link=Optional[str], continuation_token=Optional[str], ) -> None: @@ -79,10 +78,10 @@ def __init__( # v2 version: the continuation token will be base64 encoded composition token # which includes full change feed state if continuation is not None: - if is_base64_encoded(continuation): - change_feed_state_context["continuationFeedRange"] = continuation - else: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context @@ -118,16 +117,14 @@ async def _initialize_change_feed_fetcher(self) -> None: conn_properties = await self._options.pop("containerProperties") if change_feed_state_context.get("partitionKey"): change_feed_state_context["partitionKey"] = await change_feed_state_context.pop("partitionKey") - pk_properties = conn_properties.get("partitionKey") - partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) change_feed_state_context["partitionKeyFeedRange"] =\ - partition_key_definition._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) + await change_feed_state_context.pop("partitionKeyFeedRange") change_feed_state =\ ChangeFeedState.from_json(self._collection_link, conn_properties["_rid"], change_feed_state_context) self._options["changeFeedState"] = change_feed_state - if change_feed_state.version != ChangeFeedStateVersion.V1: + if change_feed_state.version == ChangeFeedStateVersion.V1: self._change_feed_fetcher = ChangeFeedFetcherV1( self._client, self._collection_link, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 6eaaa31bfd8f..e8f3e414bc4f 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -27,7 +27,6 @@ from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion -from azure.cosmos._utils import is_base64_encoded class ChangeFeedIterable(PageIterator): @@ -75,10 +74,10 @@ def __init__( # v2 version: the continuation token will be base64 encoded composition token # which includes full change feed state if continuation is not None: - if is_base64_encoded(continuation): - change_feed_state_context["continuationFeedRange"] = continuation - else: + if continuation.isdigit() or continuation.strip('\'"').isdigit(): change_feed_state_context["continuationPkRangeId"] = continuation + else: + change_feed_state_context["continuationFeedRange"] = continuation self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 95d5624017a8..77e603b3d834 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -39,7 +39,8 @@ from azure.cosmos._routing.aio.routing_map_provider import SmartRoutingMapProvider as AsyncSmartRoutingMapProvider from azure.cosmos._routing.routing_map_provider import SmartRoutingMapProvider from azure.cosmos._routing.routing_range import Range -from azure.cosmos.exceptions import CosmosFeedRangeGoneError +from azure.cosmos.exceptions import CosmosHttpResponseError +from azure.cosmos.http_constants import StatusCodes, SubStatusCodes from azure.cosmos.partition_key import _Empty, _Undefined class ChangeFeedStateVersion(Enum): @@ -93,7 +94,7 @@ def from_json( if version is None: raise ValueError("Invalid base64 encoded continuation string [Missing version]") - if version == "V2": + if version == ChangeFeedStateVersion.V2.value: return ChangeFeedStateV2.from_continuation(container_link, container_rid, continuation_json) raise ValueError("Invalid base64 encoded continuation string [Invalid version]") @@ -121,7 +122,7 @@ def __init__( self._partition_key_range_id = partition_key_range_id self._partition_key = partition_key self._continuation = continuation - super(ChangeFeedStateV1).__init__(ChangeFeedStateVersion.V1) + super(ChangeFeedStateV1, self).__init__(ChangeFeedStateVersion.V1) @property def container_rid(self): @@ -148,11 +149,6 @@ def populate_request_headers( request_headers: Dict[str, Any]) -> None: request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. So when reading the changeFeed by LSN, - # it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, - # we will need to always pass the start time in the header. self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation: request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation @@ -164,11 +160,6 @@ async def populate_request_headers_async( request_headers[http_constants.HttpHeaders.AIM] = http_constants.HttpHeaders.IncrementalFeedHeaderValue - # When a merge happens, the child partition will contain documents ordered by LSN but the _ts/creation time - # of the documents may not be sequential. - # So when reading the changeFeed by LSN, it is possible to encounter documents with lower _ts. - # In order to guarantee we always get the documents after customer's point start time, - # we will need to always pass the start time in the header. self._change_feed_start_from.populate_request_headers(request_headers) if self._continuation: request_headers[http_constants.HttpHeaders.IfNoneMatch] = self._continuation @@ -216,7 +207,7 @@ def __init__( else: self._continuation = continuation - super(ChangeFeedStateV2).__init__(ChangeFeedStateVersion.V2) + super(ChangeFeedStateV2, self).__init__(ChangeFeedStateVersion.V2) @property def container_rid(self) -> str : @@ -258,11 +249,7 @@ def populate_request_headers( [self._continuation.current_token.feed_range]) if len(over_lapping_ranges) > 1: - raise CosmosFeedRangeGoneError( - message= - f"Range {self._continuation.current_token.feed_range}" - f" spans {len(over_lapping_ranges)}" - f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + raise self.get_feed_range_gone_error(over_lapping_ranges) overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) if overlapping_feed_range == self._continuation.current_token.feed_range: @@ -304,11 +291,7 @@ async def populate_request_headers_async( [self._continuation.current_token.feed_range]) if len(over_lapping_ranges) > 1: - raise CosmosFeedRangeGoneError( - message= - f"Range {self._continuation.current_token.feed_range}" - f" spans {len(over_lapping_ranges)}" - f" physical partitions: {[child_range['id'] for child_range in over_lapping_ranges]}") + raise self.get_feed_range_gone_error(over_lapping_ranges) overlapping_feed_range = Range.PartitionKeyRangeToRange(over_lapping_ranges[0]) if overlapping_feed_range == self._continuation.current_token.feed_range: @@ -348,6 +331,18 @@ def should_retry_on_not_modified_response(self) -> bool: def apply_not_modified_response(self) -> None: self._continuation.apply_not_modified_response() + def get_feed_range_gone_error(self, over_lapping_ranges: list[Dict[str, Any]]) -> CosmosHttpResponseError: + formatted_message =\ + (f"Status code: {StatusCodes.GONE} " + f"Sub-status: {SubStatusCodes.PARTITION_KEY_RANGE_GONE}. " + f"Range {self._continuation.current_token.feed_range}" + f" spans {len(over_lapping_ranges)} physical partitions:" + f" {[child_range['id'] for child_range in over_lapping_ranges]}") + + response_error = CosmosHttpResponseError(status_code=StatusCodes.GONE, message=formatted_message) + response_error.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE + return response_error + @classmethod def from_continuation( cls, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 87e72391cf0a..1f44c491ae46 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -3025,9 +3025,8 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: partition_key_range_id ) - change_feed_state = options.get("changeFeedState", None) - if change_feed_state and isinstance(change_feed_state, ChangeFeedState): - change_feed_state.populate_request_headers(self._routing_map_provider, headers) + if options.get("changeFeedState") is not None: + options.pop("changeFeedState").populate_request_headers(self._routing_map_provider, headers) result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py index a2d789f20644..f2e7576bf376 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py @@ -25,8 +25,12 @@ import base64 import binascii import json +from typing import Dict, Any +def partition_key_range_to_range_string(partition_key_range: Dict[str, Any]) -> str: + return Range.PartitionKeyRangeToRange(partition_key_range).to_base64_encoded_string() + class PartitionKeyRange(object): """Partition Key Range Constants""" diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py index 6e3a8c67fcfe..1b3d0370e6ef 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_utils.py @@ -69,13 +69,3 @@ def get_index_metrics_info(delimited_string: Optional[str]) -> Dict[str, Any]: return result except (json.JSONDecodeError, ValueError): return {} - - -def is_base64_encoded(data: str) -> bool: - if data is None: - return False - try: - base64.b64decode(data, validate=True).decode('utf-8') - return True - except (json.JSONDecodeError, ValueError): - return False diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index f289442cf3b3..fc6503aa630e 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -41,14 +41,13 @@ GenerateGuidId, _set_properties_cache ) -from .._routing import routing_range -from .._routing.routing_range import Range +from .._routing.routing_range import Range, partition_key_range_to_range_string from ..offer import ThroughputProperties from ..partition_key import ( NonePartitionKeyValue, _return_undefined_or_empty_partition_key, _Empty, - _Undefined + _Undefined, PartitionKey ) __all__ = ("ContainerProxy",) @@ -136,6 +135,16 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) + async def _get_epk_range_for_partition_key( + self, + partition_key_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> Range: # pylint: disable=line-too-long + + container_properties = await self._get_properties() + partition_key_definition = container_properties["partitionKey"] + partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) + + return partition_key._get_epk_range_for_partition_key(partition_key_value) + @distributed_trace_async async def read( self, @@ -646,7 +655,9 @@ def query_items_change_feed( # pylint: disable=unused-argument feed_options["maxItemCount"] = kwargs.pop('max_item_count') if kwargs.get("partition_key") is not None: - change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop("partition_key")) + change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.get("partition_key")) + change_feed_state_context["partitionKeyFeedRange"] = \ + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: change_feed_state_context["feedRange"] = kwargs.pop('feed_range') @@ -1252,5 +1263,4 @@ async def read_feed_ranges( [Range("", "FF", True, False)], **kwargs) - return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() - for partitionKeyRange in partition_key_ranges] + return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index b2a9aaff9ec1..47e83f3e31df 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -2814,9 +2814,8 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) - change_feed_state = options.get("changeFeedState", None) - if change_feed_state and isinstance(change_feed_state, ChangeFeedState): - await change_feed_state.populate_request_headers_async(self._routing_map_provider, headers) + if options.get("changeFeedState") is not None: + await options.pop("changeFeedState").populate_request_headers_async(self._routing_map_provider, headers) result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 017c58b8b492..7ecac5391407 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -39,8 +39,7 @@ _set_properties_cache ) from ._cosmos_client_connection import CosmosClientConnection -from ._routing import routing_range -from ._routing.routing_range import Range +from ._routing.routing_range import Range, partition_key_range_to_range_string from .offer import Offer, ThroughputProperties from .partition_key import ( NonePartitionKeyValue, @@ -133,7 +132,7 @@ def _set_partition_key( def _get_epk_range_for_partition_key( self, - partition_key_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> Range: # pylint: disable=line-too-long + partition_key_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> Range: # pylint: disable=line-too-long container_properties = self._get_properties() partition_key_definition = container_properties["partitionKey"] partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) @@ -497,9 +496,9 @@ def query_items_change_feed( feed_options["maxItemCount"] = args[3] if kwargs.get("partition_key") is not None: - change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.pop('partition_key')) + change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.get('partition_key')) change_feed_state_context["partitionKeyFeedRange"] =\ - self._get_epk_range_for_partition_key(change_feed_state_context["partitionKey"]) + self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) if kwargs.get("feed_range") is not None: change_feed_state_context["feedRange"] = kwargs.pop('feed_range') @@ -644,9 +643,7 @@ def query_items( # pylint:disable=docstring-missing-param return items def __is_prefix_partitionkey( - self, - partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> bool: # pylint: disable=line-too-long - + self, partition_key: PartitionKeyType) -> bool: properties = self._get_properties() pk_properties = properties["partitionKey"] partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"]) @@ -1333,5 +1330,4 @@ def read_feed_ranges( [Range("", "FF", True, False)], # default to full range **kwargs) - return [routing_range.Range.PartitionKeyRangeToRange(partitionKeyRange).to_base64_encoded_string() - for partitionKeyRange in partition_key_ranges] + return [partition_key_range_to_range_string(partitionKeyRange) for partitionKeyRange in partition_key_ranges] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py index ed6e6b114869..7170a4d1dc39 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/exceptions.py @@ -135,20 +135,6 @@ def __init__(self, **kwargs): self.history = None super(CosmosClientTimeoutError, self).__init__(message, **kwargs) - -class CosmosFeedRangeGoneError(CosmosHttpResponseError): - """An HTTP error response with status code 410.""" - def __init__(self, message=None, response=None, **kwargs): - """ - :param int sub_status_code: HTTP response sub code. - """ - - self.sub_status = SubStatusCodes.PARTITION_KEY_RANGE_GONE - self.http_error_message = message - formatted_message = "Status code: %d Sub-status: %d\n%s" % (StatusCodes.GONE, self.sub_status, str(message)) - super(CosmosHttpResponseError, self).__init__(message=formatted_message, response=response, **kwargs) - self.status_code = StatusCodes.GONE - def _partition_range_is_gone(e): if (e.status_code == http_constants.StatusCodes.GONE and e.sub_status == http_constants.SubStatusCodes.PARTITION_KEY_RANGE_GONE): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index e4d659a08fac..5870e7519e8b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -149,7 +149,7 @@ def version(self, value: int) -> None: def _get_epk_range_for_prefix_partition_key( self, - pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] ) -> _Range: if self.kind != "MultiHash": raise ValueError( @@ -175,16 +175,16 @@ def _get_epk_range_for_prefix_partition_key( def _get_epk_range_for_partition_key( self, - pk_value: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined] # pylint: disable=line-too-long + pk_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]] # pylint: disable=line-too-long ) -> _Range: if self._is_prefix_partition_key(pk_value): return self._get_epk_range_for_prefix_partition_key( - cast(List[Union[str, int, float, bool]], pk_value)) + cast(Sequence[Union[None, bool, int, float, str, Type[NonePartitionKeyValue]]], pk_value)) # else return point range effective_partition_key_string =\ self._get_effective_partition_key_string( - cast(List[Union[str, int, float, bool, _Empty, _Undefined]], [pk_value])) + cast(List[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]], [pk_value])) return _Range(effective_partition_key_string, effective_partition_key_string, True, True) def _get_effective_partition_key_for_hash_partitioning(self) -> str: @@ -193,7 +193,7 @@ def _get_effective_partition_key_for_hash_partitioning(self) -> str: def _get_effective_partition_key_string( self, - pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] ) -> Union[int, str]: if not pk_value: return _MinimumInclusiveEffectivePartitionKey @@ -238,7 +238,7 @@ def _write_for_hashing_v2( def _get_effective_partition_key_for_hash_partitioning_v2( self, - pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] ) -> str: with BytesIO() as ms: for component in pk_value: @@ -257,7 +257,7 @@ def _get_effective_partition_key_for_hash_partitioning_v2( def _get_effective_partition_key_for_multi_hash_partitioning_v2( self, - pk_value: Sequence[Union[str, int, float, bool, _Empty, _Undefined]] + pk_value: Sequence[Union[None, bool, int, float, str, _Undefined, Type[NonePartitionKeyValue]]] ) -> str: sb = [] for value in pk_value: @@ -281,7 +281,7 @@ def _get_effective_partition_key_for_multi_hash_partitioning_v2( def _is_prefix_partition_key( self, - partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]], _Empty, _Undefined]) -> bool: # pylint: disable=line-too-long + partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long if self.kind!= "MultiHash": return False if isinstance(partition_key, list) and len(self.path) == len(partition_key): From 5addcdcc0275d876a335f07215c347327ca54197 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Thu, 29 Aug 2024 09:42:28 -0700 Subject: [PATCH 13/20] fix pylint --- .../_change_feed/aio/change_feed_fetcher.py | 6 +-- .../_change_feed/aio/change_feed_iterable.py | 14 +++--- .../_change_feed/change_feed_fetcher.py | 8 ++-- .../_change_feed/change_feed_iterable.py | 13 +++--- .../cosmos/_change_feed/change_feed_state.py | 2 +- .../composite_continuation_token.py | 2 +- ...feed_range_composite_continuation_token.py | 2 +- .../azure/cosmos/_cosmos_client_connection.py | 3 +- .../_routing/aio/routing_map_provider.py | 8 +++- .../cosmos/_routing/routing_map_provider.py | 3 +- .../azure/cosmos/aio/_container.py | 5 ++- .../aio/_cosmos_client_connection_async.py | 3 +- .../azure-cosmos/azure/cosmos/container.py | 45 ++++++++++--------- .../azure-cosmos/test/test_change_feed.py | 1 + .../test/test_change_feed_async.py | 1 + 15 files changed, 64 insertions(+), 52 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index 90aa2d01adfa..376fd0fac397 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -25,7 +25,7 @@ import base64 import json from abc import ABC, abstractmethod -from typing import Dict, Any, List, Callable, Tuple, Awaitable +from typing import Dict, Any, List, Callable, Tuple, Awaitable, cast from azure.cosmos import http_constants, exceptions from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType @@ -166,7 +166,7 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # there is any items in the response or not. if fetched_items: self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key))) response_headers[continuation_key] = self._get_base64_encoded_continuation() break @@ -177,7 +177,7 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # then we will read from the next feed range until we have looped through all physical partitions self._change_feed_state.apply_not_modified_response() self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key))) if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index f265805d390c..1da44219daa4 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,12 +21,12 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from collections.abc import Awaitable -from typing import Dict, Any, Optional, Callable, Tuple, List, AsyncIterator +from typing import Dict, Any, Optional, Callable, Tuple, List, AsyncIterator, Awaitable from azure.core.async_paging import AsyncPageIterator -from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2, \ + ChangeFeedFetcher from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion @@ -60,7 +60,7 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link - self._change_feed_fetcher = None + self._change_feed_fetcher: Optional[ChangeFeedFetcher] = None if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") @@ -88,7 +88,10 @@ def __init__( super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) - async def _unpack(self, block) -> Tuple[str, AsyncIterator[List[Dict[str, Any]]]]: + async def _unpack( + self, + block: AsyncIterator[List[Dict[str, Any]]] + ) -> Tuple[Optional[str], AsyncIterator[List[Dict[str, Any]]]]: continuation = None if self._client.last_response_headers: continuation = self._client.last_response_headers.get('etag') @@ -107,6 +110,7 @@ async def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=u if self._change_feed_fetcher is None: await self._initialize_change_feed_fetcher() + assert self._change_feed_fetcher is not None block = await self._change_feed_fetcher.fetch_next_block() if not block: raise StopAsyncIteration diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index 35ae9a15a08a..2417eff46259 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -25,7 +25,7 @@ import base64 import json from abc import ABC, abstractmethod -from typing import Dict, Any, List, Callable, Tuple +from typing import Dict, Any, List, Callable, Tuple, cast from azure.cosmos import _retry_utility, http_constants, exceptions from azure.cosmos._change_feed.change_feed_start_from import ChangeFeedStartFromType @@ -87,7 +87,7 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key))) if fetched_items: break @@ -158,7 +158,7 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # In change feed queries, the continuation token is always populated. if fetched_items: self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key))) self._change_feed_state._continuation._move_to_next_token() response_headers[continuation_key] = self._get_base64_encoded_continuation() break @@ -170,7 +170,7 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # then we will read from the next feed range until we have looped through all physical partitions self._change_feed_state.apply_not_modified_response() self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key))) if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index e8f3e414bc4f..c1174db5f93d 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -21,11 +21,11 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any, Tuple, List, Optional, Callable +from typing import Dict, Any, Tuple, List, Optional, Callable, cast from azure.core.paging import PageIterator -from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 +from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2, ChangeFeedFetcher from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion @@ -57,7 +57,7 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link - self._change_feed_fetcher = None + self._change_feed_fetcher: Optional[ChangeFeedFetcher] = None if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") @@ -84,8 +84,8 @@ def __init__( super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) - def _unpack(self, block) -> Tuple[str, List[Dict[str, Any]]]: - continuation = None + def _unpack(self, block: List[Dict[str, Any]]) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None if self._client.last_response_headers: continuation = self._client.last_response_headers.get('etag') @@ -104,6 +104,7 @@ def _fetch_next(self, *args) -> List[Dict[str, Any]]: # pylint: disable=unused- if self._change_feed_fetcher is None: self._initialize_change_feed_fetcher() + assert self._change_feed_fetcher is not None block = self._change_feed_fetcher.fetch_next_block() if not block: raise StopIteration @@ -114,7 +115,7 @@ def _initialize_change_feed_fetcher(self) -> None: change_feed_state = \ ChangeFeedState.from_json( self._collection_link, - self._options.get("containerRID"), + cast(str, self._options.get("containerRID")), change_feed_state_context) self._options["changeFeedState"] = change_feed_state diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index 77e603b3d834..d7c52bf89b88 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -331,7 +331,7 @@ def should_retry_on_not_modified_response(self) -> bool: def apply_not_modified_response(self) -> None: self._continuation.apply_not_modified_response() - def get_feed_range_gone_error(self, over_lapping_ranges: list[Dict[str, Any]]) -> CosmosHttpResponseError: + def get_feed_range_gone_error(self, over_lapping_ranges: List[Dict[str, Any]]) -> CosmosHttpResponseError: formatted_message =\ (f"Status code: {StatusCodes.GONE} " f"Sub-status: {SubStatusCodes.PARTITION_KEY_RANGE_GONE}. " diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py index 90d3d6132822..f0d433fd966e 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/composite_continuation_token.py @@ -49,7 +49,7 @@ def feed_range(self) -> Range: return self._feed_range @property - def token(self) -> str: + def token(self) -> Optional[str]: return self._token def update_token(self, etag) -> None: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index 0aaebb616249..fc9b94f27eef 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -48,7 +48,7 @@ def __init__( self._feed_range = feed_range self._continuation = continuation self._current_token = self._continuation[0] - self._initial_no_result_range = None + self._initial_no_result_range: Optional[Range] = None @property def current_token(self) -> CompositeContinuationToken: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 1f44c491ae46..010b91c76dd6 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -57,7 +57,6 @@ from ._auth_policy import CosmosBearerTokenCredentialPolicy from ._base import _set_properties_cache from ._change_feed.change_feed_iterable import ChangeFeedIterable -from ._change_feed.change_feed_state import ChangeFeedState from ._constants import _Constants as Constants from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy from ._range_partition_resolver import RangePartitionResolver @@ -3026,7 +3025,7 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: ) if options.get("changeFeedState") is not None: - options.pop("changeFeedState").populate_request_headers(self._routing_map_provider, headers) + options.get("changeFeedState").populate_request_headers(self._routing_map_provider, headers) result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py index ba0b5ca3a3e6..e70ae355c495 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py @@ -165,8 +165,12 @@ async def get_overlapping_ranges(self, collection_link, partition_key_ranges, ** else: queryRange = currentProvidedRange - overlappingRanges = await PartitionKeyRangeCache.get_overlapping_ranges(self, - collection_link, queryRange, **kwargs) + overlappingRanges =\ + await PartitionKeyRangeCache.get_overlapping_ranges( + self, + collection_link, + [queryRange], + **kwargs) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py index 5a6bb304b5c8..8dacb5190e07 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py @@ -166,7 +166,8 @@ def get_overlapping_ranges(self, collection_link, partition_key_ranges, **kwargs else: queryRange = currentProvidedRange - overlappingRanges = PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, queryRange, **kwargs) + overlappingRanges = ( + PartitionKeyRangeCache.get_overlapping_ranges(self, collection_link, [queryRange], **kwargs)) assert overlappingRanges, "code bug: returned overlapping ranges for queryRange {} is empty".format( queryRange ) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index fc6503aa630e..4d867a952179 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -655,7 +655,8 @@ def query_items_change_feed( # pylint: disable=unused-argument feed_options["maxItemCount"] = kwargs.pop('max_item_count') if kwargs.get("partition_key") is not None: - change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.get("partition_key")) + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get("partition_key"))) change_feed_state_context["partitionKeyFeedRange"] = \ self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) @@ -1247,7 +1248,7 @@ async def read_feed_ranges( ) -> List[str]: """ Obtains a list of feed ranges that can be used to parallelize feed operations. - :param bool force_refresh: + :keyword bool force_refresh: Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. :returns: A list representing the feed ranges in base64 encoded string :rtype: List[str] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 47e83f3e31df..675cbbe9dc69 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -51,7 +51,6 @@ from .._base import _set_properties_cache from .. import documents from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable -from .._change_feed.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2815,7 +2814,7 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) if options.get("changeFeedState") is not None: - await options.pop("changeFeedState").populate_request_headers_async(self._routing_map_provider, headers) + await options.get("changeFeedState").populate_request_headers_async(self._routing_map_provider, headers) result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 7ecac5391407..b8dcc9a45dc0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -325,11 +325,11 @@ def query_items_change_feed( *, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - """Get a sorted list of items that were changed in the entire container, - in the order in which they were modified, + """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword start_time:The start time to start processing chang feed items. @@ -338,6 +338,9 @@ def query_items_change_feed( ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current ("Now") :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -351,24 +354,23 @@ def query_items_change_feed( def query_items_change_feed( self, *, + feed_range: str, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, - partition_key: PartitionKeyType, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: + """Get a sorted list of items that were changed, in the order in which they were modified. + :keyword str feed_range: The feed range that is used to define the scope. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword start_time:The start time to start processing chang feed items. + :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. Now: Processing change feed from the current time, so only events for all future changes will be retrieved. ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current ("Now") :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] - :keyword partition_key: The partition key that is used to define the scope - (logical partition or a subset of a container) - :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -382,23 +384,15 @@ def query_items_change_feed( def query_items_change_feed( self, *, - feed_range: str, + continuation: str, max_item_count: Optional[int] = None, - start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword str feed_range: The feed range that is used to define the scope. + :keyword str continuation: The continuation token retrieved from previous response. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword start_time: The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current ("Now") - :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -412,15 +406,21 @@ def query_items_change_feed( def query_items_change_feed( self, *, - continuation: str, max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> ItemPaged[Dict[str, Any]]: - """Get a sorted list of items that were changed, in the order in which they were modified. + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified, - :keyword str continuation: The continuation token retrieved from previous response. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time:The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -496,7 +496,8 @@ def query_items_change_feed( feed_options["maxItemCount"] = args[3] if kwargs.get("partition_key") is not None: - change_feed_state_context["partitionKey"] = self._set_partition_key(kwargs.get('partition_key')) + change_feed_state_context["partitionKey"] =\ + self._set_partition_key(cast(PartitionKeyType, kwargs.get('partition_key'))) change_feed_state_context["partitionKeyFeedRange"] =\ self._get_epk_range_for_partition_key(kwargs.pop('partition_key')) @@ -1315,7 +1316,7 @@ def read_feed_ranges( """ Obtains a list of feed ranges that can be used to parallelize feed operations. - :param bool force_refresh: + :keyword bool force_refresh: Flag to indicate whether obtain the list of feed ranges directly from cache or refresh the cache. :returns: A list representing the feed ranges in base64 encoded string :rtype: List[str] diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py index 4b286d2b82f8..6b96355bb126 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -237,6 +237,7 @@ def create_random_items(container, batch_size): setup["created_db"].delete_container(created_collection.id) + @pytest.mark.skip def test_query_change_feed_with_split(self, setup): created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), PartitionKey(path="/pk"), diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index 886c1ffc1bcc..c1a1e633d62e 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -264,6 +264,7 @@ async def create_random_items(container, batch_size): await setup["created_db"].delete_container(created_collection.id) + @pytest.mark.skip async def test_query_change_feed_with_split_async(self, setup): created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), PartitionKey(path="/pk"), From 59814d7e368420bfdc806fabede945d61af9420d Mon Sep 17 00:00:00 2001 From: annie-mac Date: Thu, 29 Aug 2024 13:30:37 -0700 Subject: [PATCH 14/20] fix mypy --- .../_change_feed/aio/change_feed_iterable.py | 13 +++---- .../_change_feed/change_feed_iterable.py | 4 +- .../azure/cosmos/_cosmos_client_connection.py | 6 ++- .../azure/cosmos/aio/_container.py | 38 +++++++++---------- .../aio/_cosmos_client_connection_async.py | 6 ++- .../azure-cosmos/azure/cosmos/container.py | 2 +- 6 files changed, 36 insertions(+), 33 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 1da44219daa4..6a02d82c0b93 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -21,12 +21,11 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any, Optional, Callable, Tuple, List, AsyncIterator, Awaitable +from typing import Dict, Any, Optional, Callable, Tuple, List, Awaitable, Union from azure.core.async_paging import AsyncPageIterator -from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2, \ - ChangeFeedFetcher +from azure.cosmos._change_feed.aio.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion @@ -60,7 +59,7 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link - self._change_feed_fetcher: Optional[ChangeFeedFetcher] = None + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") @@ -90,9 +89,9 @@ def __init__( async def _unpack( self, - block: AsyncIterator[List[Dict[str, Any]]] - ) -> Tuple[Optional[str], AsyncIterator[List[Dict[str, Any]]]]: - continuation = None + block: List[Dict[str, Any]] + ) -> Tuple[Optional[str], List[Dict[str, Any]]]: + continuation: Optional[str] = None if self._client.last_response_headers: continuation = self._client.last_response_headers.get('etag') diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index c1174db5f93d..a7590b4442df 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -21,7 +21,7 @@ """Iterable change feed results in the Azure Cosmos database service. """ -from typing import Dict, Any, Tuple, List, Optional, Callable, cast +from typing import Dict, Any, Tuple, List, Optional, Callable, cast, Union from azure.core.paging import PageIterator @@ -57,7 +57,7 @@ def __init__( self._options = options self._fetch_function = fetch_function self._collection_link = collection_link - self._change_feed_fetcher: Optional[ChangeFeedFetcher] = None + self._change_feed_fetcher: Optional[Union[ChangeFeedFetcherV1, ChangeFeedFetcherV2]] = None if self._options.get("changeFeedStateContext") is None: raise ValueError("Missing changeFeedStateContext in feed options") diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 010b91c76dd6..49198910b772 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -57,6 +57,7 @@ from ._auth_policy import CosmosBearerTokenCredentialPolicy from ._base import _set_properties_cache from ._change_feed.change_feed_iterable import ChangeFeedIterable +from ._change_feed.change_feed_state import ChangeFeedState from ._constants import _Constants as Constants from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy from ._range_partition_resolver import RangePartitionResolver @@ -3024,8 +3025,9 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: partition_key_range_id ) - if options.get("changeFeedState") is not None: - options.get("changeFeedState").populate_request_headers(self._routing_map_provider, headers) + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + change_feed_state.populate_request_headers(self._routing_map_provider, headers) result, last_response_headers = self.__Get(path, request_params, headers, **kwargs) self.last_response_headers = last_response_headers diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 4d867a952179..1ba8bf4b2b47 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -499,11 +499,11 @@ def query_items_change_feed( *, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, + partition_key: PartitionKeyType, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: - """Get a sorted list of items that were changed in the entire container, - in the order in which they were modified. + """Get a sorted list of items that were changed, in the order in which they were modified. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword start_time: The start time to start processing chang feed items. @@ -512,6 +512,9 @@ def query_items_change_feed( ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current ("Now") :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] + :keyword partition_key: The partition key that is used to define the scope + (logical partition or a subset of a container) + :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -525,14 +528,15 @@ def query_items_change_feed( def query_items_change_feed( self, *, + feed_range: str, max_item_count: Optional[int] = None, start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, - partition_key: PartitionKeyType, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. + :keyword str feed_range: The feed range that is used to define the scope. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. :keyword start_time: The start time to start processing chang feed items. Beginning: Processing the change feed items from the beginning of the change feed. @@ -540,9 +544,6 @@ def query_items_change_feed( ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. By default, it is start from current ("Now") :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] - :keyword partition_key: The partition key that is used to define the scope - (logical partition or a subset of a container) - :type partition_key: Union[str, int, float, bool, List[Union[str, int, float, bool]]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -556,22 +557,15 @@ def query_items_change_feed( def query_items_change_feed( self, *, - feed_range: str, + continuation: str, max_item_count: Optional[int] = None, - start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: """Get a sorted list of items that were changed, in the order in which they were modified. - :keyword str feed_range: The feed range that is used to define the scope. + :keyword str continuation: The continuation token retrieved from previous response. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. - :keyword start_time: The start time to start processing chang feed items. - Beginning: Processing the change feed items from the beginning of the change feed. - Now: Processing change feed from the current time, so only events for all future changes will be retrieved. - ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. - By default, it is start from current ("Now") - :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -579,21 +573,28 @@ def query_items_change_feed( :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ + # pylint: enable=line-too-long ... @overload def query_items_change_feed( self, *, - continuation: str, max_item_count: Optional[int] = None, + start_time: Optional[Union[datetime, Literal["Now", "Beginning"]]] = None, priority: Optional[Literal["High", "Low"]] = None, **kwargs: Any ) -> AsyncItemPaged[Dict[str, Any]]: - """Get a sorted list of items that were changed, in the order in which they were modified. + """Get a sorted list of items that were changed in the entire container, + in the order in which they were modified. - :keyword str continuation: The continuation token retrieved from previous response. :keyword int max_item_count: Max number of items to be returned in the enumeration operation. + :keyword start_time: The start time to start processing chang feed items. + Beginning: Processing the change feed items from the beginning of the change feed. + Now: Processing change feed from the current time, so only events for all future changes will be retrieved. + ~datetime.datetime: processing change feed from a point of time. Provided value will be converted to UTC. + By default, it is start from current ("Now") + :type start_time: Union[~datetime.datetime, Literal["Now", "Beginning"]] :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -601,7 +602,6 @@ def query_items_change_feed( :returns: An AsyncItemPaged of items (dicts). :rtype: AsyncItemPaged[Dict[str, Any]] """ - # pylint: enable=line-too-long ... @distributed_trace diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 675cbbe9dc69..9e73445e2063 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -51,6 +51,7 @@ from .._base import _set_properties_cache from .. import documents from .._change_feed.aio.change_feed_iterable import ChangeFeedIterable +from .._change_feed.change_feed_state import ChangeFeedState from .._routing import routing_range from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants @@ -2813,8 +2814,9 @@ def __GetBodiesFromQueryResult(result: Dict[str, Any]) -> List[Dict[str, Any]]: ) headers = base.GetHeaders(self, initial_headers, "get", path, id_, typ, options, partition_key_range_id) - if options.get("changeFeedState") is not None: - await options.get("changeFeedState").populate_request_headers_async(self._routing_map_provider, headers) + change_feed_state: Optional[ChangeFeedState] = options.get("changeFeedState") + if change_feed_state is not None: + await change_feed_state.populate_request_headers_async(self._routing_map_provider, headers) result, self.last_response_headers = await self.__Get(path, request_params, headers, **kwargs) if response_hook: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index b8dcc9a45dc0..3f52b43d9994 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -611,7 +611,7 @@ def query_items( # pylint:disable=docstring-missing-param feed_options["populateIndexMetrics"] = populate_index_metrics if partition_key is not None: partition_key_value = self._set_partition_key(partition_key) - if self.__is_prefix_partitionkey(partition_key_value): + if self.__is_prefix_partitionkey(partition_key): kwargs["isPrefixPartitionQuery"] = True properties = self._get_properties() kwargs["partitionKeyDefinition"] = properties["partitionKey"] From 66c3f7bf98f6679e8ced39fda1877e1e1e7b1349 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 4 Sep 2024 11:30:44 -0700 Subject: [PATCH 15/20] fix tests --- sdk/cosmos/azure-cosmos/test/test_change_feed_async.py | 1 - sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py | 2 +- .../azure-cosmos/test/test_container_properties_cache_async.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index c1a1e633d62e..b65694d6c138 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -32,7 +32,6 @@ async def setup(): } yield created_db_data - await test_client.delete_database(config.TEST_DATABASE_ID) await test_client.close() @pytest.mark.cosmosEmulator diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py index 6ced2c6d0cd9..fbac47dfb215 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache.py @@ -599,7 +599,7 @@ def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = list(created_container.query_items_change_feed()) + change_feed = list(created_container.query_items_change_feed(start_time='Beginning')) self.assertEqual(len(change_feed), 2) # Verify that the change feed contains the new items diff --git a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py index 88fd6e20cc14..8cf3b9f39ba0 100644 --- a/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_container_properties_cache_async.py @@ -612,7 +612,7 @@ async def test_container_recreate_change_feed(self): client.client_connection._CosmosClientConnection__container_properties_cache = copy.deepcopy(old_cache) # Query change feed for the new items - change_feed = [item async for item in created_container.query_items_change_feed()] + change_feed = [item async for item in created_container.query_items_change_feed(start_time='Beginning')] assert len(change_feed) == 2 # Verify that the change feed contains the new items From 3a2e4e159bcf88086e46f8991b9f4d0d2b1185fc Mon Sep 17 00:00:00 2001 From: annie-mac Date: Wed, 4 Sep 2024 22:41:37 -0700 Subject: [PATCH 16/20] add tests --- .../_change_feed/aio/change_feed_fetcher.py | 15 +-- .../_change_feed/aio/change_feed_iterable.py | 2 +- .../_change_feed/change_feed_fetcher.py | 13 ++- .../_change_feed/change_feed_iterable.py | 4 +- .../cosmos/_change_feed/change_feed_state.py | 8 +- ...feed_range_composite_continuation_token.py | 6 +- .../azure-cosmos/test/test_change_feed.py | 55 ----------- .../test/test_change_feed_async.py | 56 ----------- .../test/test_change_feed_split.py | 81 ++++++++++++++++ .../test/test_change_feed_split_async.py | 94 +++++++++++++++++++ 10 files changed, 201 insertions(+), 133 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split.py create mode 100644 sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index 376fd0fac397..c1db30d41fa2 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -88,7 +88,8 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. self._change_feed_state.apply_server_response_continuation( - response_headers.get(continuation_key)) + cast(str, response_headers.get(continuation_key)), + (True if fetched_items else False)) if fetched_items: break @@ -164,9 +165,13 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. + + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + (True if fetched_items else False)) + if fetched_items: - self._change_feed_state.apply_server_response_continuation( - cast(str, response_headers.get(continuation_key))) + self._change_feed_state._continuation._move_to_next_token() response_headers[continuation_key] = self._get_base64_encoded_continuation() break @@ -175,10 +180,6 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # so we will retry with the new continuation token # 2. if the feed range of the changeFeedState span multiple physical partitions # then we will read from the next feed range until we have looped through all physical partitions - self._change_feed_state.apply_not_modified_response() - self._change_feed_state.apply_server_response_continuation( - cast(str, response_headers.get(continuation_key))) - if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): response_headers[continuation_key] = self._get_base64_encoded_continuation() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 6a02d82c0b93..8d7b4eaf699b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -85,7 +85,7 @@ def __init__( self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context - super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) + super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) # type: ignore[arg-type] async def _unpack( self, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index 2417eff46259..846861c704cc 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -87,7 +87,8 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # In change feed queries, the continuation token is always populated. The hasNext() test is whether # there is any items in the response or not. self._change_feed_state.apply_server_response_continuation( - cast(str, response_headers.get(continuation_key))) + cast(str, response_headers.get(continuation_key)), + (True if fetched_items else False)) if fetched_items: break @@ -156,9 +157,11 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: continuation_key = http_constants.HttpHeaders.ETag # In change feed queries, the continuation token is always populated. + self._change_feed_state.apply_server_response_continuation( + cast(str, response_headers.get(continuation_key)), + (True if fetched_items else False)) + if fetched_items: - self._change_feed_state.apply_server_response_continuation( - cast(str, response_headers.get(continuation_key))) self._change_feed_state._continuation._move_to_next_token() response_headers[continuation_key] = self._get_base64_encoded_continuation() break @@ -168,10 +171,6 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # so we will retry with the new continuation token # 2. if the feed range of the changeFeedState span multiple physical partitions # then we will read from the next feed range until we have looped through all physical partitions - self._change_feed_state.apply_not_modified_response() - self._change_feed_state.apply_server_response_continuation( - cast(str, response_headers.get(continuation_key))) - if (self._change_feed_state._change_feed_start_from.version == ChangeFeedStartFromType.POINT_IN_TIME and is_s_time_first_fetch): response_headers[continuation_key] = self._get_base64_encoded_continuation() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index a7590b4442df..00193ec3da72 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -25,7 +25,7 @@ from azure.core.paging import PageIterator -from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2, ChangeFeedFetcher +from azure.cosmos._change_feed.change_feed_fetcher import ChangeFeedFetcherV1, ChangeFeedFetcherV2 from azure.cosmos._change_feed.change_feed_state import ChangeFeedState, ChangeFeedStateVersion @@ -82,7 +82,7 @@ def __init__( self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context - super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) + super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) # type: ignore[arg-type] def _unpack(self, block: List[Dict[str, Any]]) -> Tuple[Optional[str], List[Dict[str, Any]]]: continuation: Optional[str] = None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py index d7c52bf89b88..46dd1afddcfe 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_state.py @@ -72,7 +72,7 @@ async def populate_request_headers_async( pass @abstractmethod - def apply_server_response_continuation(self, continuation: str) -> None: + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: pass @staticmethod @@ -170,7 +170,7 @@ def populate_feed_options(self, feed_options: Dict[str, Any]) -> None: if self._partition_key is not None: feed_options["partitionKey"] = self._partition_key - def apply_server_response_continuation(self, continuation: str) -> None: + def apply_server_response_continuation(self, continuation: str, has_modified_response) -> None: self._continuation = continuation class ChangeFeedStateV2(ChangeFeedState): @@ -322,8 +322,8 @@ async def handle_feed_range_gone_async( resource_link: str) -> None: await self._continuation.handle_feed_range_gone_async(routing_provider, resource_link) - def apply_server_response_continuation(self, continuation: str) -> None: - self._continuation.apply_server_response_continuation(continuation) + def apply_server_response_continuation(self, continuation: str, has_modified_response: bool) -> None: + self._continuation.apply_server_response_continuation(continuation, has_modified_response) def should_retry_on_not_modified_response(self) -> bool: return self._continuation.should_retry_on_not_modified_response() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py index fc9b94f27eef..f5967b6bf34b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/feed_range_composite_continuation_token.py @@ -159,8 +159,12 @@ def _move_to_next_token(self) -> None: self._continuation.append(first_composition_token) self._current_token = self._continuation[0] - def apply_server_response_continuation(self, etag) -> None: + def apply_server_response_continuation(self, etag, has_modified_response: bool) -> None: self._current_token.update_token(etag) + if has_modified_response: + self._initial_no_result_range = None + else: + self.apply_not_modified_response() def apply_not_modified_response(self) -> None: if self._initial_no_result_range is None: diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py index 6b96355bb126..bd6e1b7c4faa 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -237,61 +237,6 @@ def create_random_items(container, batch_size): setup["created_db"].delete_container(created_collection.id) - @pytest.mark.skip - def test_query_change_feed_with_split(self, setup): - created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk"), - offer_throughput=400) - - # initial change feed query returns empty result - query_iterable = created_collection.query_items_change_feed(start_time="Beginning") - iter_list = list(query_iterable) - assert len(iter_list) == 0 - continuation = created_collection.client_connection.last_response_headers['etag'] - assert continuation != '' - - # create one doc and make sure change feed query can return the document - document_definition = {'pk': 'pk', 'id': 'doc1'} - created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed(continuation=continuation) - iter_list = list(query_iterable) - assert len(iter_list) == 1 - continuation = created_collection.client_connection.last_response_headers['etag'] - - print("Triggering a split in test_query_change_feed_with_split") - created_collection.replace_throughput(11000) - print("changed offer to 11k") - print("--------------------------------") - print("Waiting for split to complete") - start_time = time.time() - - while True: - offer = created_collection.get_throughput() - if offer.properties['content'].get('isOfferReplacePending', False): - if time.time() - start_time > 60 * 25: # timeout test at 25 minutes - unittest.skip("Partition split didn't complete in time.") - else: - print("Waiting for split to complete") - time.sleep(60) - else: - break - - print("Split in test_query_change_feed_with_split has completed") - print("creating few more documents") - new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] - expected_ids = ['doc2', 'doc3', 'doc4'] - for document in new_documents: - created_collection.create_item(body=document) - - query_iterable = created_collection.query_items_change_feed(continuation=continuation) - it = query_iterable.__iter__() - actual_ids = [] - for item in it: - actual_ids.append(item['id']) - - assert actual_ids == expected_ids - setup["created_db"].delete_container(created_collection.id) - def test_query_change_feed_with_multi_partition(self, setup): created_collection = setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), PartitionKey(path="/pk"), diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index b65694d6c138..2d574f2f8ee7 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright (c) Microsoft Corporation. All rights reserved. -import time import unittest import uuid from asyncio import sleep @@ -263,61 +262,6 @@ async def create_random_items(container, batch_size): await setup["created_db"].delete_container(created_collection.id) - @pytest.mark.skip - async def test_query_change_feed_with_split_async(self, setup): - created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), - PartitionKey(path="/pk"), - offer_throughput=400) - - # initial change feed query returns empty result - query_iterable = created_collection.query_items_change_feed(start_time="Beginning") - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 0 - continuation = created_collection.client_connection.last_response_headers['etag'] - assert continuation != '' - - # create one doc and make sure change feed query can return the document - document_definition = {'pk': 'pk', 'id': 'doc1'} - await created_collection.create_item(body=document_definition) - query_iterable = created_collection.query_items_change_feed(continuation=continuation) - iter_list = [item async for item in query_iterable] - assert len(iter_list) == 1 - continuation = created_collection.client_connection.last_response_headers['etag'] - - print("Triggering a split in test_query_change_feed_with_split") - await created_collection.replace_throughput(11000) - print("changed offer to 11k") - print("--------------------------------") - print("Waiting for split to complete") - start_time = time.time() - - while True: - offer = await created_collection.get_throughput() - if offer.properties['content'].get('isOfferReplacePending', False): - if time.time() - start_time > 60 * 25: # timeout test at 25 minutes - unittest.skip("Partition split didn't complete in time.") - else: - print("Waiting for split to complete") - time.sleep(60) - else: - break - - print("Split in test_query_change_feed_with_split has completed") - print("creating few more documents") - new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] - expected_ids = ['doc2', 'doc3', 'doc4'] - for document in new_documents: - await created_collection.create_item(body=document) - - query_iterable = created_collection.query_items_change_feed(continuation=continuation) - it = query_iterable.__aiter__() - actual_ids = [] - async for item in it: - actual_ids.append(item['id']) - - assert actual_ids == expected_ids - setup["created_db"].delete_container(created_collection.id) - async def test_query_change_feed_with_multi_partition_async(self, setup): created_collection = await setup["created_db"].create_container("change_feed_test_" + str(uuid.uuid4()), PartitionKey(path="/pk"), diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py new file mode 100644 index 000000000000..8ecb7da9cff3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split.py @@ -0,0 +1,81 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import azure.cosmos.cosmos_client as cosmos_client +import test_config +from azure.cosmos import DatabaseProxy, PartitionKey + + +class TestPartitionSplitChangeFeed(unittest.TestCase): + database: DatabaseProxy = None + client: cosmos_client.CosmosClient = None + configs = test_config.TestConfig + host = configs.host + masterKey = configs.masterKey + TEST_DATABASE_ID = configs.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + cls.client = cosmos_client.CosmosClient(cls.host, cls.masterKey) + cls.database = cls.client.get_database_client(cls.TEST_DATABASE_ID) + + def test_query_change_feed_with_split(self): + created_collection = self.database.create_container("change_feed_split_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = list(query_iterable) + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = list(query_iterable) + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__iter__() + actual_ids = [] + for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.database.delete_container(created_collection.id) + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py new file mode 100644 index 000000000000..60f7b2810884 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_split_async.py @@ -0,0 +1,94 @@ +# The MIT License (MIT) +# Copyright (c) Microsoft Corporation. All rights reserved. + +import time +import unittest +import uuid + +import test_config +from azure.cosmos import PartitionKey +from azure.cosmos.aio import CosmosClient, DatabaseProxy + + +class TestPartitionSplitChangeFeedAsync(unittest.IsolatedAsyncioTestCase): + host = test_config.TestConfig.host + masterKey = test_config.TestConfig.masterKey + connectionPolicy = test_config.TestConfig.connectionPolicy + + client: CosmosClient = None + created_database: DatabaseProxy = None + + TEST_DATABASE_ID = test_config.TestConfig.TEST_DATABASE_ID + + @classmethod + def setUpClass(cls): + if (cls.masterKey == '[YOUR_KEY_HERE]' or + cls.host == '[YOUR_ENDPOINT_HERE]'): + raise Exception( + "You must specify your Azure Cosmos account values for " + "'masterKey' and 'host' at the top of this class to run the " + "tests.") + + async def asyncSetUp(self): + self.client = CosmosClient(self.host, self.masterKey) + self.created_database = self.client.get_database_client(self.TEST_DATABASE_ID) + + async def tearDown(self): + await self.client.close() + + async def test_query_change_feed_with_split_async(self): + created_collection = await self.created_database.create_container("change_feed_test_" + str(uuid.uuid4()), + PartitionKey(path="/pk"), + offer_throughput=400) + + # initial change feed query returns empty result + query_iterable = created_collection.query_items_change_feed(start_time="Beginning") + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 0 + continuation = created_collection.client_connection.last_response_headers['etag'] + assert continuation != '' + + # create one doc and make sure change feed query can return the document + document_definition = {'pk': 'pk', 'id': 'doc1'} + await created_collection.create_item(body=document_definition) + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + iter_list = [item async for item in query_iterable] + assert len(iter_list) == 1 + continuation = created_collection.client_connection.last_response_headers['etag'] + + print("Triggering a split in test_query_change_feed_with_split") + await created_collection.replace_throughput(11000) + print("changed offer to 11k") + print("--------------------------------") + print("Waiting for split to complete") + start_time = time.time() + + while True: + offer = await created_collection.get_throughput() + if offer.properties['content'].get('isOfferReplacePending', False): + if time.time() - start_time > 60 * 25: # timeout test at 25 minutes + unittest.skip("Partition split didn't complete in time.") + else: + print("Waiting for split to complete") + time.sleep(60) + else: + break + + print("Split in test_query_change_feed_with_split has completed") + print("creating few more documents") + new_documents = [{'pk': 'pk2', 'id': 'doc2'}, {'pk': 'pk3', 'id': 'doc3'}, {'pk': 'pk4', 'id': 'doc4'}] + expected_ids = ['doc2', 'doc3', 'doc4'] + for document in new_documents: + await created_collection.create_item(body=document) + + query_iterable = created_collection.query_items_change_feed(continuation=continuation) + it = query_iterable.__aiter__() + actual_ids = [] + async for item in it: + actual_ids.append(item['id']) + + assert actual_ids == expected_ids + self.created_database.delete_container(created_collection.id) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 0883dac536e9940fe281b64e0ffb8bc33c21e304 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Thu, 5 Sep 2024 08:13:06 -0700 Subject: [PATCH 17/20] fix pylint --- .../azure/cosmos/_change_feed/aio/change_feed_fetcher.py | 4 ++-- .../azure/cosmos/_change_feed/aio/change_feed_iterable.py | 5 ++++- .../azure/cosmos/_change_feed/change_feed_fetcher.py | 4 ++-- .../azure/cosmos/_change_feed/change_feed_iterable.py | 5 ++++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py index c1db30d41fa2..d997360e4c41 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_fetcher.py @@ -89,7 +89,7 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # there is any items in the response or not. self._change_feed_state.apply_server_response_continuation( cast(str, response_headers.get(continuation_key)), - (True if fetched_items else False)) + bool(fetched_items)) if fetched_items: break @@ -168,7 +168,7 @@ async def fetch_change_feed_items(self) -> List[Dict[str, Any]]: self._change_feed_state.apply_server_response_continuation( cast(str, response_headers.get(continuation_key)), - (True if fetched_items else False)) + bool(fetched_items)) if fetched_items: self._change_feed_state._continuation._move_to_next_token() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py index 8d7b4eaf699b..3f73050dfc7a 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/aio/change_feed_iterable.py @@ -85,7 +85,10 @@ def __init__( self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context - super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) # type: ignore[arg-type] + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) async def _unpack( self, diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py index 846861c704cc..c3ff6472af28 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_fetcher.py @@ -88,7 +88,7 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # there is any items in the response or not. self._change_feed_state.apply_server_response_continuation( cast(str, response_headers.get(continuation_key)), - (True if fetched_items else False)) + bool(fetched_items)) if fetched_items: break @@ -159,7 +159,7 @@ def fetch_change_feed_items(self) -> List[Dict[str, Any]]: # In change feed queries, the continuation token is always populated. self._change_feed_state.apply_server_response_continuation( cast(str, response_headers.get(continuation_key)), - (True if fetched_items else False)) + bool(fetched_items)) if fetched_items: self._change_feed_state._continuation._move_to_next_token() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py index 00193ec3da72..bd37b60926cf 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_change_feed/change_feed_iterable.py @@ -82,7 +82,10 @@ def __init__( self._validate_change_feed_state_context(change_feed_state_context) self._options["changeFeedStateContext"] = change_feed_state_context - super(ChangeFeedIterable, self).__init__(self._fetch_next, self._unpack, continuation_token=continuation_token) # type: ignore[arg-type] + super(ChangeFeedIterable, self).__init__( + self._fetch_next, + self._unpack, # type: ignore[arg-type] + continuation_token=continuation_token) def _unpack(self, block: List[Dict[str, Any]]) -> Tuple[Optional[str], List[Dict[str, Any]]]: continuation: Optional[str] = None From 195c47cd3783db43a5b4ef062de6a898887fe41e Mon Sep 17 00:00:00 2001 From: annie-mac Date: Fri, 6 Sep 2024 09:01:30 -0700 Subject: [PATCH 18/20] fix and resolve comments --- sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py | 4 +--- sdk/cosmos/azure-cosmos/azure/cosmos/container.py | 4 +--- sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 1ba8bf4b2b47..d7d66738b4ee 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -135,9 +135,7 @@ async def _set_partition_key( return _return_undefined_or_empty_partition_key(await self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - async def _get_epk_range_for_partition_key( - self, - partition_key_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> Range: # pylint: disable=line-too-long + async def _get_epk_range_for_partition_key(self, partition_key_value: PartitionKeyType) -> Range: container_properties = await self._get_properties() partition_key_definition = container_properties["partitionKey"] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 3f52b43d9994..e6a6ac7b36b9 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -130,9 +130,7 @@ def _set_partition_key( return _return_undefined_or_empty_partition_key(self.is_system_key) return cast(Union[str, int, float, bool, List[Union[str, int, float, bool]]], partition_key) - def _get_epk_range_for_partition_key( - self, - partition_key_value: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> Range: # pylint: disable=line-too-long + def _get_epk_range_for_partition_key( self, partition_key_value: PartitionKeyType) -> Range: container_properties = self._get_properties() partition_key_definition = container_properties["partitionKey"] partition_key = PartitionKey(path=partition_key_definition["paths"], kind=partition_key_definition["kind"]) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py index 5870e7519e8b..7fa093aa15e1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/partition_key.py @@ -284,7 +284,7 @@ def _is_prefix_partition_key( partition_key: Union[str, int, float, bool, Sequence[Union[str, int, float, bool, None]], Type[NonePartitionKeyValue]]) -> bool: # pylint: disable=line-too-long if self.kind!= "MultiHash": return False - if isinstance(partition_key, list) and len(self.path) == len(partition_key): + if isinstance(partition_key, list) and len(self['paths']) == len(partition_key): return False return True From 246b1be3cd8e2163b2853ad9595ae4d8f822ca4c Mon Sep 17 00:00:00 2001 From: annie-mac Date: Fri, 6 Sep 2024 14:32:40 -0700 Subject: [PATCH 19/20] fix and resolve comments --- .../azure-cosmos/test/test_change_feed.py | 19 +++++-------------- .../test/test_change_feed_async.py | 12 +----------- .../azure-cosmos/test/test_vector_policy.py | 2 +- .../test/test_vector_policy_async.py | 2 +- 4 files changed, 8 insertions(+), 27 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed.py b/sdk/cosmos/azure-cosmos/test/test_change_feed.py index bd6e1b7c4faa..01e2dc21ddb6 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed.py @@ -1,7 +1,6 @@ # The MIT License (MIT) # Copyright (c) Microsoft Corporation. All rights reserved. -import time import unittest import uuid from datetime import datetime, timedelta, timezone @@ -13,21 +12,21 @@ import azure.cosmos.cosmos_client as cosmos_client import azure.cosmos.exceptions as exceptions import test_config -from azure.cosmos import DatabaseProxy from azure.cosmos.partition_key import PartitionKey @pytest.fixture(scope="class") def setup(): - if (TestChangeFeed.masterKey == '[YOUR_KEY_HERE]' or - TestChangeFeed.host == '[YOUR_ENDPOINT_HERE]'): + config = test_config.TestConfig() + if (config.masterKey == '[YOUR_KEY_HERE]' or + config.host == '[YOUR_ENDPOINT_HERE]'): raise Exception( "You must specify your Azure Cosmos account values for " "'masterKey' and 'host' at the top of this class to run the " "tests.") - test_client = cosmos_client.CosmosClient(test_config.TestConfig.host, test_config.TestConfig.masterKey), + test_client = cosmos_client.CosmosClient(config.host, config.masterKey), return { - "created_db": test_client[0].get_database_client(TestChangeFeed.TEST_DATABASE_ID) + "created_db": test_client[0].get_database_client(config.TEST_DATABASE_ID) } @pytest.mark.cosmosEmulator @@ -36,14 +35,6 @@ def setup(): class TestChangeFeed: """Test to ensure escaping of non-ascii characters from partition key""" - created_db: DatabaseProxy = None - client: cosmos_client.CosmosClient = None - config = test_config.TestConfig - host = config.host - masterKey = config.masterKey - connectionPolicy = config.connectionPolicy - TEST_DATABASE_ID = config.TEST_DATABASE_ID - def test_get_feed_ranges(self, setup): created_collection = setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), PartitionKey(path="/pk")) diff --git a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py index 2d574f2f8ee7..2ef61ee5c8a3 100644 --- a/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_change_feed_async.py @@ -12,7 +12,7 @@ import azure.cosmos.exceptions as exceptions import test_config -from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy +from azure.cosmos.aio import CosmosClient from azure.cosmos.partition_key import PartitionKey @@ -39,16 +39,6 @@ async def setup(): class TestChangeFeedAsync: """Test to ensure escaping of non-ascii characters from partition key""" - created_db: DatabaseProxy = None - created_container: ContainerProxy = None - client: CosmosClient = None - config = test_config.TestConfig - TEST_CONTAINER_ID = config.TEST_MULTI_PARTITION_CONTAINER_ID - TEST_DATABASE_ID = config.TEST_DATABASE_ID - host = config.host - masterKey = config.masterKey - connectionPolicy = config.connectionPolicy - async def test_get_feed_ranges(self, setup): created_collection = await setup["created_db"].create_container("get_feed_ranges_" + str(uuid.uuid4()), PartitionKey(path="/pk")) diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy.py index da0aeb8ec6a8..e44f8f21c5fd 100644 --- a/sdk/cosmos/azure-cosmos/test/test_vector_policy.py +++ b/sdk/cosmos/azure-cosmos/test/test_vector_policy.py @@ -163,7 +163,7 @@ def test_fail_replace_vector_indexing_policy(self): pytest.fail("Container replace should have failed for indexing policy.") except exceptions.CosmosHttpResponseError as e: assert e.status_code == 400 - assert "Vector Indexing Policy cannot be changed in Collection Replace" in e.http_error_message + assert "vector indexing policy cannot be modified in Collection Replace" in e.http_error_message self.test_db.delete_container(container_id) def test_fail_create_vector_embedding_policy(self): diff --git a/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py b/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py index 19dd48268417..71c4997e3179 100644 --- a/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py +++ b/sdk/cosmos/azure-cosmos/test/test_vector_policy_async.py @@ -173,7 +173,7 @@ async def test_fail_replace_vector_indexing_policy_async(self): pytest.fail("Container replace should have failed for indexing policy.") except exceptions.CosmosHttpResponseError as e: assert e.status_code == 400 - assert "Vector Indexing Policy cannot be changed in Collection Replace" in e.http_error_message + assert "vector indexing policy cannot be modified in Collection Replace" in e.http_error_message await self.test_db.delete_container(container_id) async def test_fail_create_vector_embedding_policy_async(self): From 5cde59b96df8aa3219684b6b341891d9d96f9622 Mon Sep 17 00:00:00 2001 From: annie-mac Date: Fri, 13 Sep 2024 15:02:22 -0700 Subject: [PATCH 20/20] revert unnecessary change --- sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst diff --git a/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst b/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst deleted file mode 100644 index 5b7484884dd7..000000000000 --- a/sdk/cosmos/azure-mgmt-cosmosdb/toc_tree.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. toctree:: - :maxdepth: 5 - :glob: - :caption: Developer Documentation - - ref/azure.common - /Users/annie-mac/dev/git/azure-sdk-for-python/sdk/cosmos/azure-mgmt-cosmosdb/.tox/sphinx/tmp/dist/unzipped/docgen/azure.mgmt.cosmosdb.rst - ref/azure.servicemanagement