From 748310b020a8d55f0c2fbd3828948745b27e57ef Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 16 Mar 2023 08:01:31 -0400 Subject: [PATCH 001/213] chore(deps): Update nox in .kokoro/requirements.in [autoapprove] (#750) Source-Link: https://github.com/googleapis/synthtool/commit/92006bb3cdc84677aa93c7f5235424ec2b157146 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/requirements.in | 2 +- .kokoro/requirements.txt | 14 +++++--------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 5fc5daa31..b8edda51c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8555f0e37e6261408f792bfd6635102d2da5ad73f8f09bcb24f25e6afb5fac97 + digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in index 882178ce6..ec867d9fd 100644 --- a/.kokoro/requirements.in +++ b/.kokoro/requirements.in @@ -5,6 +5,6 @@ typing-extensions twine wheel setuptools -nox +nox>=2022.11.21 # required to remove dependency on py charset-normalizer<3 click<8.1.0 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index fa99c1290..66a2172a7 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in # @@ -335,9 +335,9 @@ more-itertools==9.0.0 \ --hash=sha256:250e83d7e81d0c87ca6bd942e6aeab8cc9daa6096d12c5308f3f92fa5e5c1f41 \ --hash=sha256:5a6257e40878ef0520b1803990e3e22303a41b5714006c32a3fd8304b26ea1ab # via jaraco-classes -nox==2022.8.7 \ - --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \ - --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c +nox==2022.11.21 \ + --hash=sha256:0e41a990e290e274cb205a976c4c97ee3c5234441a8132c8c3fd9ea3c22149eb \ + --hash=sha256:e21c31de0711d1274ca585a2c5fde36b1aa962005ba8e9322bf5eeed16dcd684 # via -r requirements.in packaging==21.3 \ --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ @@ -380,10 +380,6 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via nox pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From 45d3e4308c4f494228c2e6e18a36285c557cb0c3 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 23 Mar 2023 09:44:02 -0400 Subject: [PATCH 002/213] docs: Fix formatting of request arg in docstring (#756) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: Fix formatting of request arg in docstring chore: Update gapic-generator-python to v1.9.1 PiperOrigin-RevId: 518604533 Source-Link: https://github.com/googleapis/googleapis/commit/8a085aeddfa010af5bcef090827aac5255383d7e Source-Link: https://github.com/googleapis/googleapis-gen/commit/b2ab4b0a0ae2907e812c209198a74e0898afcb04 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiYjJhYjRiMGEwYWUyOTA3ZTgxMmMyMDkxOThhNzRlMDg5OGFmY2IwNCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- .../bigtable_instance_admin/async_client.py | 13 +++++------- .../bigtable_instance_admin/client.py | 13 +++++------- .../transports/rest.py | 18 ---------------- .../bigtable_table_admin/async_client.py | 14 +++++++------ .../services/bigtable_table_admin/client.py | 14 +++++++------ .../bigtable_table_admin/transports/rest.py | 21 ------------------- .../services/bigtable/async_client.py | 15 ++++++------- .../bigtable_v2/services/bigtable/client.py | 15 ++++++------- .../services/bigtable/transports/rest.py | 9 -------- 9 files changed, 42 insertions(+), 90 deletions(-) diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/async_client.py b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/async_client.py index ddeaf979a..12811bcea 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/async_client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/async_client.py @@ -1137,8 +1137,8 @@ async def update_cluster( Args: request (Optional[Union[google.cloud.bigtable_admin_v2.types.Cluster, dict]]): - The request object. A resizable group of nodes in a - particular cloud location, capable of serving all + The request object. A resizable group of nodes in a particular cloud + location, capable of serving all [Tables][google.bigtable.admin.v2.Table] in the parent [Instance][google.bigtable.admin.v2.Instance]. retry (google.api_core.retry.Retry): Designation of what errors, if any, @@ -1880,8 +1880,7 @@ async def get_iam_policy( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.GetIamPolicyRequest, dict]]): - The request object. Request message for `GetIamPolicy` - method. + The request object. Request message for ``GetIamPolicy`` method. resource (:class:`str`): REQUIRED: The resource for which the policy is being requested. See the @@ -2030,8 +2029,7 @@ async def set_iam_policy( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.SetIamPolicyRequest, dict]]): - The request object. Request message for `SetIamPolicy` - method. + The request object. Request message for ``SetIamPolicy`` method. resource (:class:`str`): REQUIRED: The resource for which the policy is being specified. See the @@ -2171,8 +2169,7 @@ async def test_iam_permissions( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest, dict]]): - The request object. Request message for - `TestIamPermissions` method. + The request object. Request message for ``TestIamPermissions`` method. resource (:class:`str`): REQUIRED: The resource for which the policy detail is being requested. See diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/client.py b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/client.py index fcb767a3d..ecc9bf1e2 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/client.py @@ -1400,8 +1400,8 @@ def update_cluster( Args: request (Union[google.cloud.bigtable_admin_v2.types.Cluster, dict]): - The request object. A resizable group of nodes in a - particular cloud location, capable of serving all + The request object. A resizable group of nodes in a particular cloud + location, capable of serving all [Tables][google.bigtable.admin.v2.Table] in the parent [Instance][google.bigtable.admin.v2.Instance]. retry (google.api_core.retry.Retry): Designation of what errors, if any, @@ -2104,8 +2104,7 @@ def get_iam_policy( Args: request (Union[google.iam.v1.iam_policy_pb2.GetIamPolicyRequest, dict]): - The request object. Request message for `GetIamPolicy` - method. + The request object. Request message for ``GetIamPolicy`` method. resource (str): REQUIRED: The resource for which the policy is being requested. See the @@ -2241,8 +2240,7 @@ def set_iam_policy( Args: request (Union[google.iam.v1.iam_policy_pb2.SetIamPolicyRequest, dict]): - The request object. Request message for `SetIamPolicy` - method. + The request object. Request message for ``SetIamPolicy`` method. resource (str): REQUIRED: The resource for which the policy is being specified. See the @@ -2379,8 +2377,7 @@ def test_iam_permissions( Args: request (Union[google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest, dict]): - The request object. Request message for - `TestIamPermissions` method. + The request object. Request message for ``TestIamPermissions`` method. resource (str): REQUIRED: The resource for which the policy detail is being requested. See diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/transports/rest.py b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/transports/rest.py index 5ae9600a9..e9b94cf78 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/transports/rest.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_instance_admin/transports/rest.py @@ -874,7 +874,6 @@ def __call__( request (~.bigtable_instance_admin.CreateAppProfileRequest): The request object. Request message for BigtableInstanceAdmin.CreateAppProfile. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -978,7 +977,6 @@ def __call__( request (~.bigtable_instance_admin.CreateClusterRequest): The request object. Request message for BigtableInstanceAdmin.CreateCluster. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1076,7 +1074,6 @@ def __call__( request (~.bigtable_instance_admin.CreateInstanceRequest): The request object. Request message for BigtableInstanceAdmin.CreateInstance. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1176,7 +1173,6 @@ def __call__( request (~.bigtable_instance_admin.DeleteAppProfileRequest): The request object. Request message for BigtableInstanceAdmin.DeleteAppProfile. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1254,7 +1250,6 @@ def __call__( request (~.bigtable_instance_admin.DeleteClusterRequest): The request object. Request message for BigtableInstanceAdmin.DeleteCluster. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1330,7 +1325,6 @@ def __call__( request (~.bigtable_instance_admin.DeleteInstanceRequest): The request object. Request message for BigtableInstanceAdmin.DeleteInstance. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1406,7 +1400,6 @@ def __call__( request (~.bigtable_instance_admin.GetAppProfileRequest): The request object. Request message for BigtableInstanceAdmin.GetAppProfile. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1497,7 +1490,6 @@ def __call__( request (~.bigtable_instance_admin.GetClusterRequest): The request object. Request message for BigtableInstanceAdmin.GetCluster. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1759,7 +1751,6 @@ def __call__( request (~.bigtable_instance_admin.GetInstanceRequest): The request object. Request message for BigtableInstanceAdmin.GetInstance. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1853,7 +1844,6 @@ def __call__( request (~.bigtable_instance_admin.ListAppProfilesRequest): The request object. Request message for BigtableInstanceAdmin.ListAppProfiles. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1945,7 +1935,6 @@ def __call__( request (~.bigtable_instance_admin.ListClustersRequest): The request object. Request message for BigtableInstanceAdmin.ListClusters. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2035,7 +2024,6 @@ def __call__( request (~.bigtable_instance_admin.ListHotTabletsRequest): The request object. Request message for BigtableInstanceAdmin.ListHotTablets. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2127,7 +2115,6 @@ def __call__( request (~.bigtable_instance_admin.ListInstancesRequest): The request object. Request message for BigtableInstanceAdmin.ListInstances. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2219,7 +2206,6 @@ def __call__( request (~.bigtable_instance_admin.PartialUpdateClusterRequest): The request object. Request message for BigtableInstanceAdmin.PartialUpdateCluster. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2321,7 +2307,6 @@ def __call__( request (~.bigtable_instance_admin.PartialUpdateInstanceRequest): The request object. Request message for BigtableInstanceAdmin.PartialUpdateInstance. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2692,7 +2677,6 @@ def __call__( request (~.bigtable_instance_admin.UpdateAppProfileRequest): The request object. Request message for BigtableInstanceAdmin.UpdateAppProfile. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2784,7 +2768,6 @@ def __call__( location, capable of serving all [Tables][google.bigtable.admin.v2.Table] in the parent [Instance][google.bigtable.admin.v2.Instance]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2885,7 +2868,6 @@ def __call__( served from all [Clusters][google.bigtable.admin.v2.Cluster] in the instance. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py index bc85e5c5d..91f059f8b 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py @@ -369,6 +369,7 @@ async def create_table_from_snapshot( request (Optional[Union[google.cloud.bigtable_admin_v2.types.CreateTableFromSnapshotRequest, dict]]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.CreateTableFromSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.CreateTableFromSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1300,6 +1301,7 @@ async def snapshot_table( request (Optional[Union[google.cloud.bigtable_admin_v2.types.SnapshotTableRequest, dict]]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.SnapshotTable][google.bigtable.admin.v2.BigtableTableAdmin.SnapshotTable] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1437,6 +1439,7 @@ async def get_snapshot( request (Optional[Union[google.cloud.bigtable_admin_v2.types.GetSnapshotRequest, dict]]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.GetSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.GetSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1549,6 +1552,7 @@ async def list_snapshots( request (Optional[Union[google.cloud.bigtable_admin_v2.types.ListSnapshotsRequest, dict]]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.ListSnapshots][google.bigtable.admin.v2.BigtableTableAdmin.ListSnapshots] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1672,6 +1676,7 @@ async def delete_snapshot( request (Optional[Union[google.cloud.bigtable_admin_v2.types.DeleteSnapshotRequest, dict]]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.DeleteSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.DeleteSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -2290,8 +2295,7 @@ async def get_iam_policy( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.GetIamPolicyRequest, dict]]): - The request object. Request message for `GetIamPolicy` - method. + The request object. Request message for ``GetIamPolicy`` method. resource (:class:`str`): REQUIRED: The resource for which the policy is being requested. See the @@ -2440,8 +2444,7 @@ async def set_iam_policy( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.SetIamPolicyRequest, dict]]): - The request object. Request message for `SetIamPolicy` - method. + The request object. Request message for ``SetIamPolicy`` method. resource (:class:`str`): REQUIRED: The resource for which the policy is being specified. See the @@ -2581,8 +2584,7 @@ async def test_iam_permissions( Args: request (Optional[Union[google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest, dict]]): - The request object. Request message for - `TestIamPermissions` method. + The request object. Request message for ``TestIamPermissions`` method. resource (:class:`str`): REQUIRED: The resource for which the policy detail is being requested. See diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py index aa7eaa197..efceae90a 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py @@ -696,6 +696,7 @@ def create_table_from_snapshot( request (Union[google.cloud.bigtable_admin_v2.types.CreateTableFromSnapshotRequest, dict]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.CreateTableFromSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.CreateTableFromSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1594,6 +1595,7 @@ def snapshot_table( request (Union[google.cloud.bigtable_admin_v2.types.SnapshotTableRequest, dict]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.SnapshotTable][google.bigtable.admin.v2.BigtableTableAdmin.SnapshotTable] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1731,6 +1733,7 @@ def get_snapshot( request (Union[google.cloud.bigtable_admin_v2.types.GetSnapshotRequest, dict]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.GetSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.GetSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1833,6 +1836,7 @@ def list_snapshots( request (Union[google.cloud.bigtable_admin_v2.types.ListSnapshotsRequest, dict]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.ListSnapshots][google.bigtable.admin.v2.BigtableTableAdmin.ListSnapshots] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -1946,6 +1950,7 @@ def delete_snapshot( request (Union[google.cloud.bigtable_admin_v2.types.DeleteSnapshotRequest, dict]): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.DeleteSnapshot][google.bigtable.admin.v2.BigtableTableAdmin.DeleteSnapshot] + Note: This is a private alpha release of Cloud Bigtable snapshots. This feature is not currently available to most Cloud Bigtable customers. This feature might be @@ -2545,8 +2550,7 @@ def get_iam_policy( Args: request (Union[google.iam.v1.iam_policy_pb2.GetIamPolicyRequest, dict]): - The request object. Request message for `GetIamPolicy` - method. + The request object. Request message for ``GetIamPolicy`` method. resource (str): REQUIRED: The resource for which the policy is being requested. See the @@ -2682,8 +2686,7 @@ def set_iam_policy( Args: request (Union[google.iam.v1.iam_policy_pb2.SetIamPolicyRequest, dict]): - The request object. Request message for `SetIamPolicy` - method. + The request object. Request message for ``SetIamPolicy`` method. resource (str): REQUIRED: The resource for which the policy is being specified. See the @@ -2820,8 +2823,7 @@ def test_iam_permissions( Args: request (Union[google.iam.v1.iam_policy_pb2.TestIamPermissionsRequest, dict]): - The request object. Request message for - `TestIamPermissions` method. + The request object. Request message for ``TestIamPermissions`` method. resource (str): REQUIRED: The resource for which the policy detail is being requested. See diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/transports/rest.py b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/transports/rest.py index 5c25ac556..4d5b2ed1c 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/transports/rest.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/transports/rest.py @@ -938,7 +938,6 @@ def __call__( request (~.bigtable_table_admin.CheckConsistencyRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.CheckConsistency][google.bigtable.admin.v2.BigtableTableAdmin.CheckConsistency] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1041,7 +1040,6 @@ def __call__( request (~.bigtable_table_admin.CreateBackupRequest): The request object. The request for [CreateBackup][google.bigtable.admin.v2.BigtableTableAdmin.CreateBackup]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1139,7 +1137,6 @@ def __call__( request (~.bigtable_table_admin.CreateTableRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.CreateTable][google.bigtable.admin.v2.BigtableTableAdmin.CreateTable] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1248,7 +1245,6 @@ def __call__( changed in backward-incompatible ways and is not recommended for production use. It is not subject to any SLA or deprecation policy. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1348,7 +1344,6 @@ def __call__( request (~.bigtable_table_admin.DeleteBackupRequest): The request object. The request for [DeleteBackup][google.bigtable.admin.v2.BigtableTableAdmin.DeleteBackup]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1431,7 +1426,6 @@ def __call__( changed in backward-incompatible ways and is not recommended for production use. It is not subject to any SLA or deprecation policy. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1507,7 +1501,6 @@ def __call__( request (~.bigtable_table_admin.DeleteTableRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.DeleteTable][google.bigtable.admin.v2.BigtableTableAdmin.DeleteTable] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1583,7 +1576,6 @@ def __call__( request (~.bigtable_table_admin.DropRowRangeRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.DropRowRange][google.bigtable.admin.v2.BigtableTableAdmin.DropRowRange] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1669,7 +1661,6 @@ def __call__( request (~.bigtable_table_admin.GenerateConsistencyTokenRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.GenerateConsistencyToken][google.bigtable.admin.v2.BigtableTableAdmin.GenerateConsistencyToken] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1772,7 +1763,6 @@ def __call__( request (~.bigtable_table_admin.GetBackupRequest): The request object. The request for [GetBackup][google.bigtable.admin.v2.BigtableTableAdmin.GetBackup]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2042,7 +2032,6 @@ def __call__( changed in backward-incompatible ways and is not recommended for production use. It is not subject to any SLA or deprecation policy. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2142,7 +2131,6 @@ def __call__( request (~.bigtable_table_admin.GetTableRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.GetTable][google.bigtable.admin.v2.BigtableTableAdmin.GetTable] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2234,7 +2222,6 @@ def __call__( request (~.bigtable_table_admin.ListBackupsRequest): The request object. The request for [ListBackups][google.bigtable.admin.v2.BigtableTableAdmin.ListBackups]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2331,7 +2318,6 @@ def __call__( changed in backward-incompatible ways and is not recommended for production use. It is not subject to any SLA or deprecation policy. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2428,7 +2414,6 @@ def __call__( request (~.bigtable_table_admin.ListTablesRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.ListTables][google.bigtable.admin.v2.BigtableTableAdmin.ListTables] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2518,7 +2503,6 @@ def __call__( request (~.bigtable_table_admin.ModifyColumnFamiliesRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.ModifyColumnFamilies][google.bigtable.admin.v2.BigtableTableAdmin.ModifyColumnFamilies] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2621,7 +2605,6 @@ def __call__( request (~.bigtable_table_admin.RestoreTableRequest): The request object. The request for [RestoreTable][google.bigtable.admin.v2.BigtableTableAdmin.RestoreTable]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -2901,7 +2884,6 @@ def __call__( changed in backward-incompatible ways and is not recommended for production use. It is not subject to any SLA or deprecation policy. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -3101,7 +3083,6 @@ def __call__( request (~.bigtable_table_admin.UndeleteTableRequest): The request object. Request message for [google.bigtable.admin.v2.BigtableTableAdmin.UndeleteTable][google.bigtable.admin.v2.BigtableTableAdmin.UndeleteTable] - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -3201,7 +3182,6 @@ def __call__( request (~.bigtable_table_admin.UpdateBackupRequest): The request object. The request for [UpdateBackup][google.bigtable.admin.v2.BigtableTableAdmin.UpdateBackup]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -3300,7 +3280,6 @@ def __call__( request (~.bigtable_table_admin.UpdateTableRequest): The request object. The request for [UpdateTable][google.bigtable.admin.v2.BigtableTableAdmin.UpdateTable]. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index 3465569b3..1233e1288 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -807,8 +807,8 @@ async def ping_and_warm( Args: request (Optional[Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]]): - The request object. Request message for client - connection keep-alive and warming. + The request object. Request message for client connection + keep-alive and warming. name (:class:`str`): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -1027,8 +1027,9 @@ def generate_initial_change_stream_partitions( Args: request (Optional[Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (:class:`str`): Required. The unique name of the table from which to get @@ -1126,9 +1127,9 @@ def read_change_stream( Args: request (Optional[Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for - Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.ReadChangeStream. table_name (:class:`str`): Required. The unique name of the table from which to read a change stream. Values are of the form diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index 37ab65fe2..38618fa31 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -1091,8 +1091,8 @@ def ping_and_warm( Args: request (Union[google.cloud.bigtable_v2.types.PingAndWarmRequest, dict]): - The request object. Request message for client - connection keep-alive and warming. + The request object. Request message for client connection + keep-alive and warming. name (str): Required. The unique name of the instance to check permissions for as well as respond. Values are of the @@ -1327,8 +1327,9 @@ def generate_initial_change_stream_partitions( Args: request (Union[google.cloud.bigtable_v2.types.GenerateInitialChangeStreamPartitionsRequest, dict]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.GenerateInitialChangeStreamPartitions. table_name (str): Required. The unique name of the table from which to get @@ -1430,9 +1431,9 @@ def read_change_stream( Args: request (Union[google.cloud.bigtable_v2.types.ReadChangeStreamRequest, dict]): - The request object. NOTE: This API is intended to be - used by Apache Beam BigtableIO. Request message for - Bigtable.ReadChangeStream. + The request object. NOTE: This API is intended to be used + by Apache Beam BigtableIO. Request + message for Bigtable.ReadChangeStream. table_name (str): Required. The unique name of the table from which to read a change stream. Values are of the form diff --git a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py index ee9cb046f..4343fbb90 100644 --- a/google/cloud/bigtable_v2/services/bigtable/transports/rest.py +++ b/google/cloud/bigtable_v2/services/bigtable/transports/rest.py @@ -471,7 +471,6 @@ def __call__( request (~.bigtable.CheckAndMutateRowRequest): The request object. Request message for Bigtable.CheckAndMutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -575,7 +574,6 @@ def __call__( by Apache Beam BigtableIO. Request message for Bigtable.GenerateInitialChangeStreamPartitions. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -684,7 +682,6 @@ def __call__( request (~.bigtable.MutateRowRequest): The request object. Request message for Bigtable.MutateRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -783,7 +780,6 @@ def __call__( request (~.bigtable.MutateRowsRequest): The request object. Request message for BigtableService.MutateRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -881,7 +877,6 @@ def __call__( request (~.bigtable.PingAndWarmRequest): The request object. Request message for client connection keep-alive and warming. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -982,7 +977,6 @@ def __call__( The request object. NOTE: This API is intended to be used by Apache Beam BigtableIO. Request message for Bigtable.ReadChangeStream. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1083,7 +1077,6 @@ def __call__( request (~.bigtable.ReadModifyWriteRowRequest): The request object. Request message for Bigtable.ReadModifyWriteRow. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1184,7 +1177,6 @@ def __call__( request (~.bigtable.ReadRowsRequest): The request object. Request message for Bigtable.ReadRows. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. @@ -1280,7 +1272,6 @@ def __call__( request (~.bigtable.SampleRowKeysRequest): The request object. Request message for Bigtable.SampleRowKeys. - retry (google.api_core.retry.Retry): Designation of what errors, if any, should be retried. timeout (float): The timeout for this request. From 505273b72bf83d8f92d0e0a92d62f22bce96cc3d Mon Sep 17 00:00:00 2001 From: Mariatta Wijaya Date: Thu, 30 Mar 2023 15:07:35 -0700 Subject: [PATCH 003/213] fix: Pass the "retry" when calling read_rows. (#759) --- google/cloud/bigtable/row_data.py | 4 +++- tests/unit/test_row_data.py | 4 +++- tests/unit/test_table.py | 12 +++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/row_data.py b/google/cloud/bigtable/row_data.py index a50fab1ee..e11379108 100644 --- a/google/cloud/bigtable/row_data.py +++ b/google/cloud/bigtable/row_data.py @@ -157,7 +157,9 @@ def __init__(self, read_method, request, retry=DEFAULT_RETRY_READ_ROWS): # Otherwise there is a risk of entering an infinite loop that resets # the timeout counter just before it being triggered. The increment # by 1 second here is customary but should not be much less than that. - self.response_iterator = read_method(request, timeout=self.retry._deadline + 1) + self.response_iterator = read_method( + request, timeout=self.retry._deadline + 1, retry=self.retry + ) self.rows = {} diff --git a/tests/unit/test_row_data.py b/tests/unit/test_row_data.py index 382a81ef1..fba69ceba 100644 --- a/tests/unit/test_row_data.py +++ b/tests/unit/test_row_data.py @@ -446,7 +446,9 @@ def test_partial_rows_data_constructor_with_retry(): client._data_stub.ReadRows, request, retry ) partial_rows_data.read_method.assert_called_once_with( - request, timeout=DEFAULT_RETRY_READ_ROWS.deadline + 1 + request, + timeout=DEFAULT_RETRY_READ_ROWS.deadline + 1, + retry=DEFAULT_RETRY_READ_ROWS, ) assert partial_rows_data.request is request assert partial_rows_data.rows == {} diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e66a8f0f6..3d7d2e8ee 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -643,6 +643,7 @@ def _table_read_row_helper(chunks, expected_result, app_profile_id=None): from google.cloud.bigtable import table as MUT from google.cloud.bigtable.row_set import RowSet from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -691,7 +692,9 @@ def mock_create_row_request(table_name, **kwargs): assert result == expected_result assert mock_created == expected_request - data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0) + data_api.read_rows.assert_called_once_with( + request_pb, timeout=61.0, retry=DEFAULT_RETRY_READ_ROWS + ) def test_table_read_row_miss_no__responses(): @@ -906,7 +909,7 @@ def mock_create_row_request(table_name, **kwargs): } assert mock_created == [(table.name, created_kwargs)] - data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0) + data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0, retry=retry) def test_table_read_retry_rows(): @@ -1082,6 +1085,7 @@ def test_table_yield_rows_with_row_set(): from google.cloud.bigtable.row_set import RowSet from google.cloud.bigtable.row_set import RowRange from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1149,7 +1153,9 @@ def test_table_yield_rows_with_row_set(): end_key=ROW_KEY_2, ) expected_request.rows.row_keys.append(ROW_KEY_3) - data_api.read_rows.assert_called_once_with(expected_request, timeout=61.0) + data_api.read_rows.assert_called_once_with( + expected_request, timeout=61.0, retry=DEFAULT_RETRY_READ_ROWS + ) def test_table_sample_row_keys(): From efe332c6932292b808e1cc02276d7a41c6711f87 Mon Sep 17 00:00:00 2001 From: Shweta Shetye-Sabharwal Date: Wed, 5 Apr 2023 18:34:04 +0000 Subject: [PATCH 004/213] chore(samples): Fixed a typo in the readme (#760) --- samples/hello/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/hello/README.md b/samples/hello/README.md index 0e1fc92f9..b3779fb43 100644 --- a/samples/hello/README.md +++ b/samples/hello/README.md @@ -17,7 +17,7 @@ Demonstrates how to connect to Cloud Bigtable and run some basic operations. Mor To run this sample: -1. If this is your first time working with GCP products, you will need to set up [the Cloud SDK][cloud_sdk] or utilize [Google Cloud Shell][gcloud_shell]. This sample may [require authetication][authentication] and you will need to [enable billing][enable_billing]. +1. If this is your first time working with GCP products, you will need to set up [the Cloud SDK][cloud_sdk] or utilize [Google Cloud Shell][gcloud_shell]. This sample may [require authentication][authentication] and you will need to [enable billing][enable_billing]. 1. Make a fork of this repo and clone the branch locally, then navigate to the sample directory you want to use. From 18b8f1878ff3d25e95d0880c1d59f34f4e962d6b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 6 Apr 2023 17:14:36 +0100 Subject: [PATCH 005/213] chore(deps): update all dependencies (#746) --- samples/beam/requirements.txt | 2 +- samples/metricscaler/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/beam/requirements.txt b/samples/beam/requirements.txt index bcb270e72..8be9b98e0 100644 --- a/samples/beam/requirements.txt +++ b/samples/beam/requirements.txt @@ -1,3 +1,3 @@ -apache-beam==2.45.0 +apache-beam==2.46.0 google-cloud-bigtable==2.17.0 google-cloud-core==2.3.2 diff --git a/samples/metricscaler/requirements.txt b/samples/metricscaler/requirements.txt index e9647809f..02e08b4c8 100644 --- a/samples/metricscaler/requirements.txt +++ b/samples/metricscaler/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigtable==2.17.0 -google-cloud-monitoring==2.14.1 +google-cloud-monitoring==2.14.2 From 128b4e1f3eea2dad903d84c8f2933b17a5f0d226 Mon Sep 17 00:00:00 2001 From: Billy Jacobson Date: Fri, 14 Apr 2023 11:08:12 -0400 Subject: [PATCH 006/213] docs: fix delete from column family example (#764) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigtable/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- samples/snippets/deletes/deletes_snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/deletes/deletes_snippets.py b/samples/snippets/deletes/deletes_snippets.py index 4e89189db..8e78083bf 100644 --- a/samples/snippets/deletes/deletes_snippets.py +++ b/samples/snippets/deletes/deletes_snippets.py @@ -38,7 +38,7 @@ def delete_from_column_family(project_id, instance_id, table_id): table = instance.table(table_id) row = table.row("phone#4c410523#20190501") row.delete_cells( - column_family_id="cell_plan", columns=["data_plan_01gb", "data_plan_05gb"] + column_family_id="cell_plan", columns=row.ALL_COLUMNS ) row.commit() From b5d10d61fd48ded8655e07b8ecb768c539ee7bf4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 18 Apr 2023 19:00:48 +0200 Subject: [PATCH 007/213] chore(deps): update all dependencies (#763) --- samples/beam/requirements-test.txt | 2 +- samples/hello/requirements-test.txt | 2 +- samples/hello_happybase/requirements-test.txt | 2 +- samples/instanceadmin/requirements-test.txt | 2 +- samples/metricscaler/requirements-test.txt | 4 ++-- samples/quickstart/requirements-test.txt | 2 +- samples/quickstart_happybase/requirements-test.txt | 2 +- samples/snippets/deletes/requirements-test.txt | 2 +- samples/snippets/filters/requirements-test.txt | 2 +- samples/snippets/reads/requirements-test.txt | 2 +- samples/snippets/writes/requirements-test.txt | 2 +- samples/tableadmin/requirements-test.txt | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/beam/requirements-test.txt b/samples/beam/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/beam/requirements-test.txt +++ b/samples/beam/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/hello/requirements-test.txt b/samples/hello/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/hello/requirements-test.txt +++ b/samples/hello/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/hello_happybase/requirements-test.txt b/samples/hello_happybase/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/hello_happybase/requirements-test.txt +++ b/samples/hello_happybase/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/instanceadmin/requirements-test.txt b/samples/instanceadmin/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/instanceadmin/requirements-test.txt +++ b/samples/instanceadmin/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/metricscaler/requirements-test.txt b/samples/metricscaler/requirements-test.txt index 82f315c7f..761227068 100644 --- a/samples/metricscaler/requirements-test.txt +++ b/samples/metricscaler/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.2.2 -mock==5.0.1 +pytest==7.3.1 +mock==5.0.2 google-cloud-testutils diff --git a/samples/quickstart/requirements-test.txt b/samples/quickstart/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/quickstart/requirements-test.txt +++ b/samples/quickstart/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/quickstart_happybase/requirements-test.txt b/samples/quickstart_happybase/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/quickstart_happybase/requirements-test.txt +++ b/samples/quickstart_happybase/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/snippets/deletes/requirements-test.txt b/samples/snippets/deletes/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/snippets/deletes/requirements-test.txt +++ b/samples/snippets/deletes/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/snippets/filters/requirements-test.txt b/samples/snippets/filters/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/snippets/filters/requirements-test.txt +++ b/samples/snippets/filters/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/snippets/reads/requirements-test.txt b/samples/snippets/reads/requirements-test.txt index c021c5b5b..c4d04a08d 100644 --- a/samples/snippets/reads/requirements-test.txt +++ b/samples/snippets/reads/requirements-test.txt @@ -1 +1 @@ -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/snippets/writes/requirements-test.txt b/samples/snippets/writes/requirements-test.txt index 8d6117f16..96aa71dab 100644 --- a/samples/snippets/writes/requirements-test.txt +++ b/samples/snippets/writes/requirements-test.txt @@ -1,2 +1,2 @@ backoff==2.2.1 -pytest==7.2.2 +pytest==7.3.1 diff --git a/samples/tableadmin/requirements-test.txt b/samples/tableadmin/requirements-test.txt index d3ddc990f..ca1f33bd3 100644 --- a/samples/tableadmin/requirements-test.txt +++ b/samples/tableadmin/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==7.2.2 +pytest==7.3.1 google-cloud-testutils==1.3.3 From 1d02154823ddff1e899648b276714eed1df5aeae Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 13:15:59 -0700 Subject: [PATCH 008/213] added initial implementation of mutate_rows --- google/cloud/bigtable/client.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index dfd8b16cd..4caee0217 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -568,7 +568,26 @@ async def mutate_row( - GoogleAPIError: raised on non-idempotent operations that cannot be safely retried. """ - raise NotImplementedError + operation_timeout = operation_timeout or self.default_operation_timeout + per_request_timeout = per_request_timeout or self.default_per_request_timeout + + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout <= 0: + raise ValueError("per_request_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout > operation_timeout: + raise ValueError("per_request_timeout must be less than operation_timeout") + + if isinstance(row_key, str): + row_key = row_key.encode("utf-8") + request = {"table_name": self.table_name, "row_key": row_key} + if self.app_profile_id: + request["app_profile_id"] = self.app_profile_id + + if isinstance(mutations, Mutation): + mutations = [mutations] + request["mutations"] = [mutation.to_dict() for mutation in mutations] + await self._gapic_client.mutate_row(request) async def bulk_mutate_rows( self, From ab63cbaa5249d98e33fe408726f9065ebb7e952c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 13:22:41 -0700 Subject: [PATCH 009/213] implemented mutation models --- google/cloud/bigtable/mutations.py | 83 ++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 4ff59bff9..176259433 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -13,42 +13,91 @@ # limitations under the License. # from __future__ import annotations - +from typing import TYPE_CHECKING, Any from dataclasses import dataclass -from google.cloud.bigtable.row import family_id, qualifier, row_key +from abc import ABC, abstractmethod + +if TYPE_CHECKING: + from google.cloud.bigtable import RowKeySamples -class Mutation: - pass +class Mutation(ABC): + """Model class for mutations""" + + @abstractmethod + def _to_dict(self) -> dict[str, Any]: + raise NotImplementedError @dataclass class SetCell(Mutation): - family: family_id - column_qualifier: qualifier - new_value: bytes | str | int - timestamp_ms: int | None = None + family:str + qualifier:bytes + new_value:bytes|str|int + timestamp_micros:int|None + + def _to_dict(self) -> dict[str, Any]: + return { + "set_cell": { + "family_name": self.family, + "column_qualifier": self.qualifier, + "timestamp_micros": self.timestamp_micros if self.timestamp_micros is not None else -1, + "value": self.new_value, + } + } @dataclass class DeleteRangeFromColumn(Mutation): - family: family_id - column_qualifier: qualifier - start_timestamp_ms: int - end_timestamp_ms: int + family:str + qualifier:bytes + # None represents 0 + start_timestamp_micros:int|None + # None represents infinity + end_timestamp_micros:int|None + def _to_dict(self) -> dict[str, Any]: + timestamp_range = {} + if self.start_timestamp_micros is not None: + timestamp_range["start_timestamp_micros"] = self.start_timestamp_micros + if self.end_timestamp_micros is not None: + timestamp_range["end_timestamp_micros"] = self.end_timestamp_micros + return { + "delete_from_column": { + "family_name": self.family, + "column_qualifier": self.qualifier, + "time_range": timestamp_range, + } + } @dataclass class DeleteAllFromFamily(Mutation): - family_to_delete: family_id + family_to_delete:str + + def _to_dict(self) -> dict[str, Any]: + return { + "delete_from_family": { + "family_name": self.family_to_delete, + } + } @dataclass class DeleteAllFromRow(Mutation): - pass + + def _to_dict(self) -> dict[str, Any]: + return { + "delete_from_row": {}, + } @dataclass -class BulkMutationsEntry: - row: row_key - mutations: list[Mutation] | Mutation +class BulkMutationsEntry(): + row_key:bytes + mutations: list[Mutation]|Mutation + + def _to_dict(self) -> dict[str, Any]: + return { + "row_key": self.row_key, + "mutations": [mutation._to_dict() for mutation in self.mutations] + } From cf9daa590937401b658f275c4d93a375123295ad Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 13:31:13 -0700 Subject: [PATCH 010/213] added retries to mutate_row --- google/cloud/bigtable/client.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4caee0217..fc751ad9d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -32,7 +32,8 @@ ) from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError - +from google.api_core import retry_async as retries +from google.api_core import exceptions as core_exceptions import google.auth.credentials import google.auth._default @@ -586,8 +587,20 @@ async def mutate_row( if isinstance(mutations, Mutation): mutations = [mutations] - request["mutations"] = [mutation.to_dict() for mutation in mutations] - await self._gapic_client.mutate_row(request) + request["mutations"] = [mutation._to_dict() for mutation in mutations] + + retry = retries.AsyncRetry( + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted + ), + timeout = operation_timeout, + initial=0.01, + multiplier=2, + maximum=60, + ) + await retry(self._gapic_client.mutate_row)(request, timeout=per_request_timeout) async def bulk_mutate_rows( self, From 1247da47286bd36b2bcafd8686b85158886a81a1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 13:38:39 -0700 Subject: [PATCH 011/213] return exception group if possible --- google/cloud/bigtable/client.py | 39 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index fc751ad9d..1e2e21c90 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -39,6 +39,7 @@ import google.auth._default from google.api_core import client_options as client_options_lib +from google.cloud.bigtable.exceptions import RetryExceptionGroup if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -589,18 +590,42 @@ async def mutate_row( mutations = [mutations] request["mutations"] = [mutation._to_dict() for mutation in mutations] + transient_errors = [] + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ) + def on_error_fn(exc): + if predicate(exc): + transient_errors.append(exc) + retry = retries.AsyncRetry( - predicate = retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted - ), - timeout = operation_timeout, + predicate=predicate, + on_error=on_error_fn, + timeout=operation_timeout, initial=0.01, multiplier=2, maximum=60, ) - await retry(self._gapic_client.mutate_row)(request, timeout=per_request_timeout) + try: + await retry(self._gapic_client.mutate_row)(request, timeout=per_request_timeout) + except core_exceptions.RetryError as e: + # raised by AsyncRetry after operation deadline exceeded + # TODO: merge with similar logic in ReadRowsIterator + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {operation_timeout:0.1f}s exceeded" + ) + source_exc = None + if transient_errors: + source_exc = RetryExceptionGroup( + f"{len(transient_errors)} failed attempts", + transient_errors, + ) + new_exc.__cause__ = source_exc + await self._finish_with_error(new_exc) + raise new_exc from source_exc + async def bulk_mutate_rows( self, From 3b3ed8ce865e51111783140b4a04723d7475a6a6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 13:47:21 -0700 Subject: [PATCH 012/213] check for idempotence --- google/cloud/bigtable/client.py | 19 +++++++++++++------ google/cloud/bigtable/mutations.py | 22 +++++++++++++++++++--- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1e2e21c90..1cf03af97 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -590,12 +590,19 @@ async def mutate_row( mutations = [mutations] request["mutations"] = [mutation._to_dict() for mutation in mutations] + if all(mutation.is_idempotent for mutation in mutations): + # mutations are all idempotent and safe to retry + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ) + else: + # mutations should not be retried + predicate = retries.if_exception_type() + transient_errors = [] - predicate = retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, - ) + def on_error_fn(exc): if predicate(exc): transient_errors.append(exc) @@ -610,7 +617,7 @@ def on_error_fn(exc): ) try: await retry(self._gapic_client.mutate_row)(request, timeout=per_request_timeout) - except core_exceptions.RetryError as e: + except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded # TODO: merge with similar logic in ReadRowsIterator new_exc = core_exceptions.DeadlineExceeded( diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 176259433..f175b3f78 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -28,6 +28,13 @@ class Mutation(ABC): def _to_dict(self) -> dict[str, Any]: raise NotImplementedError + def is_idempotent(self) -> bool: + """ + Check if the mutation is idempotent + If false, the mutation will not be retried + """ + return True + @dataclass class SetCell(Mutation): @@ -46,15 +53,19 @@ def _to_dict(self) -> dict[str, Any]: } } + def is_idempotent(self) -> bool: + """Check if the mutation is idempotent""" + return self.timestamp_micros is not None and self.timestamp_micros >= 0 + @dataclass class DeleteRangeFromColumn(Mutation): family:str qualifier:bytes # None represents 0 - start_timestamp_micros:int|None + start_timestamp_micros:int | None # None represents infinity - end_timestamp_micros:int|None + end_timestamp_micros:int | None def _to_dict(self) -> dict[str, Any]: timestamp_range = {} @@ -70,6 +81,7 @@ def _to_dict(self) -> dict[str, Any]: } } + @dataclass class DeleteAllFromFamily(Mutation): family_to_delete:str @@ -94,10 +106,14 @@ def _to_dict(self) -> dict[str, Any]: @dataclass class BulkMutationsEntry(): row_key:bytes - mutations: list[Mutation]|Mutation + mutations: list[Mutation] | Mutation def _to_dict(self) -> dict[str, Any]: return { "row_key": self.row_key, "mutations": [mutation._to_dict() for mutation in self.mutations] } + + def is_idempotent(self) -> bool: + """Check if the mutation is idempotent""" + return all(mutation.is_idempotent() for mutation in self.mutations) From 5d20037aa95f9806a275f91d07c277990febee3b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 15:24:17 -0700 Subject: [PATCH 013/213] initial implementation for bulk_mutations --- google/cloud/bigtable/client.py | 91 ++++++++++++++++++++++++++++- google/cloud/bigtable/exceptions.py | 16 ++++- google/cloud/bigtable/mutations.py | 3 + 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1cf03af97..835f6ecfd 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -670,7 +670,96 @@ async def bulk_mutate_rows( - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions """ - raise NotImplementedError + operation_timeout = operation_timeout or self.default_operation_timeout + per_request_timeout = per_request_timeout or self.default_per_request_timeout + + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout <= 0: + raise ValueError("per_request_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout > operation_timeout: + raise ValueError("per_request_timeout must be less than operation_timeout") + + request = {"table_name": self.table_name, "row_key": row_key} + if self.app_profile_id: + request["app_profile_id"] = self.app_profile_id + + mutations_dict = {idx:mut for idx, mut in enumerate(mutation_entries)} + error_dict = {idx:None for idx in mutations_dict.keys()} + + request_level_errors = [] + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ) + def on_error_fn(exc): + if predicate(exc): + # add this exception to list for each active mutation + for idx in error_dict.keys(): + if mutations_dict[idx] is not None: + error_dict.setdefault(idx, []).append(exc)i + # remove non-idempotent mutations from mutations_dict, so they are not retried + for idx, mut in mutations_dict.items(): + if mut is not None and not mut.is_idempotent() + mutations_dict[idx] = None + + retry = retries.AsyncRetry( + predicate=predicate, + on_error=on_error_fn, + timeout=operation_timeout, + initial=0.01, + multiplier=2, + maximum=60, + ) + try: + await retry(self._mutations_retryable_attempt)(request, per_request_timeout, mutations_dict, error_dict) + except core_exceptions.RetryError: + # raised by AsyncRetry after operation deadline exceeded + # add DeadlineExceeded to list for each active mutation + deadline_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {operation_timeout:0.1f}s exceeded" + ) + for idx in error_dict.keys(): + if mutations_dict[idx] is not None: + error_dict.setdefault(idx, []).append(deadline_exc) + except Exception as exc: + # other exceptions are added to the list of exceptions for unprocessed mutations + for idx in error_dict.keys(): + if mutations_dict[idx] is not None: + error_dict.setdefault(idx, []).append(exc) + finally: + # raise exception detailing incomplete mutations + all_errors = [] + for idx, exc_list in error_dict.items(): + if exc_list: + if len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = RetryExceptionGroup(exc_list) + all_errors.append(FailedMutationException(idx, mutation_entries[idx], cause_exc)) + if all_errors: + raise MutationsExceptionGroup(all_errors) + + + async def _mutations_retryable_attempt(self, request, per_request_timeout, mutation_dict, error_dict): + new_request = request.copy() + while any(mutation is not None for mutation in mutation_dict.values()): + # continue to retry until timeout, or all mutations are complete (success or failure) + new_request["entries"] = [mutation_dict[i]._to_dict() for i in range(len(mutation_dict)) if mutation_dict[i] is not None] + async for result in self._gapic_client.mutate_rows(new_request, timeout=per_request_timeout): + idx = result.index + if result.status.code == 0: + # mutation succeeded + mutation_dict[idx] = None + error_dict[idx] = None + if result.status.code != 0: + # mutation failed + exception = core_exceptions.from_grpc_status(result.status) + error_dict.setdefault(idx, []).append(exception) + # if not idempotent, remove from retry list + if mutation_dict[idx].is_idempotent(): + mutation_dict[idx] = None async def check_and_mutate_row( self, diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 86bfe9247..35a226b02 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -29,7 +29,12 @@ class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # t """ def __init__(self, message, excs): - raise NotImplementedError() + if is_311_plus: + super().__init__(message, excs) + else: + self.exceptions = excs + revised_message = f"{message} ({len(excs)} sub-exceptions)" + super().__init__(revised_message) class MutationsExceptionGroup(BigtableExceptionGroup): @@ -39,6 +44,15 @@ class MutationsExceptionGroup(BigtableExceptionGroup): pass +class FailedMutationException(Exception): + """ + Represents a failed mutation entry for bulk mutation operations + """ + def __init__(self, failed_idx:int, failed_mutation_obj:"Mutation", cause:Exception): + super.init(f"Failed mutation at index: {failed_idx} with cause: {cause}") + self.failed_idx = failed_idx + self.failed_mutation_obj = failed_mutation_obj + self.__cause__ = cause class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index f175b3f78..4427257cf 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -35,6 +35,9 @@ def is_idempotent(self) -> bool: """ return True + def __str__(self) -> str: + return str(self._to_dict()) + @dataclass class SetCell(Mutation): From 3d322a144e80c516e9dba98d5d3b4ddbf604b4e8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 15:26:10 -0700 Subject: [PATCH 014/213] include successes in bulk mutation error message --- google/cloud/bigtable/client.py | 2 +- google/cloud/bigtable/exceptions.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 835f6ecfd..e43458c28 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -739,7 +739,7 @@ def on_error_fn(exc): cause_exc = RetryExceptionGroup(exc_list) all_errors.append(FailedMutationException(idx, mutation_entries[idx], cause_exc)) if all_errors: - raise MutationsExceptionGroup(all_errors) + raise MutationsExceptionGroup(all_errors, len(mutation_entries)) async def _mutations_retryable_attempt(self, request, per_request_timeout, mutation_dict, error_dict): diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 35a226b02..feefbb0b3 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -42,7 +42,9 @@ class MutationsExceptionGroup(BigtableExceptionGroup): Represents one or more exceptions that occur during a bulk mutation operation """ - pass + def __init__(self, excs): + super().__init__(f"len(excs) failed mutations", excs) + class FailedMutationException(Exception): """ @@ -54,7 +56,9 @@ def __init__(self, failed_idx:int, failed_mutation_obj:"Mutation", cause:Excepti self.failed_mutation_obj = failed_mutation_obj self.__cause__ = cause + class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" - pass + def __init__(self, excs, total_num): + super().__init__(f"{len(excs)} failed attempts (out of {total_num})", excs) From a31232ba3b76c327f3b68f6bd6b9505a92877ed5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 15:52:22 -0700 Subject: [PATCH 015/213] fixed style checks --- google/cloud/bigtable/client.py | 95 ++++++++++++++++++----------- google/cloud/bigtable/exceptions.py | 24 +++++--- google/cloud/bigtable/mutations.py | 36 ++++++----- 3 files changed, 93 insertions(+), 62 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e43458c28..1a89a7111 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -40,6 +40,9 @@ from google.api_core import client_options as client_options_lib from google.cloud.bigtable.exceptions import RetryExceptionGroup +from google.cloud.bigtable.exceptions import FailedMutationError +from google.cloud.bigtable.exceptions import MutationsExceptionGroup + if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -570,15 +573,16 @@ async def mutate_row( - GoogleAPIError: raised on non-idempotent operations that cannot be safely retried. """ - operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout + # TODO: bring in default, from read_rows + # operation_timeout = operation_timeout or self.default_operation_timeout + # per_request_timeout = per_request_timeout or self.default_per_request_timeout - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must be less than operation_timeout") + # if operation_timeout <= 0: + # raise ValueError("operation_timeout must be greater than 0") + # if per_request_timeout is not None and per_request_timeout <= 0: + # raise ValueError("per_request_timeout must be greater than 0") + # if per_request_timeout is not None and per_request_timeout > operation_timeout: + # raise ValueError("per_request_timeout must be less than operation_timeout") if isinstance(row_key, str): row_key = row_key.encode("utf-8") @@ -616,7 +620,9 @@ def on_error_fn(exc): maximum=60, ) try: - await retry(self._gapic_client.mutate_row)(request, timeout=per_request_timeout) + await retry(self.client._gapic_client.mutate_row)( + request, timeout=per_request_timeout + ) except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded # TODO: merge with similar logic in ReadRowsIterator @@ -626,14 +632,11 @@ def on_error_fn(exc): source_exc = None if transient_errors: source_exc = RetryExceptionGroup( - f"{len(transient_errors)} failed attempts", transient_errors, ) new_exc.__cause__ = source_exc - await self._finish_with_error(new_exc) raise new_exc from source_exc - async def bulk_mutate_rows( self, mutation_entries: list[BulkMutationsEntry], @@ -670,38 +673,43 @@ async def bulk_mutate_rows( - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions """ - operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout + # TODO: bring in default, from read_rows + # operation_timeout = operation_timeout or self.default_operation_timeout + # per_request_timeout = per_request_timeout or self.default_per_request_timeout - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must be less than operation_timeout") + # if operation_timeout <= 0: + # raise ValueError("operation_timeout must be greater than 0") + # if per_request_timeout is not None and per_request_timeout <= 0: + # raise ValueError("per_request_timeout must be greater than 0") + # if per_request_timeout is not None and per_request_timeout > operation_timeout: + # raise ValueError("per_request_timeout must be less than operation_timeout") - request = {"table_name": self.table_name, "row_key": row_key} + request = {"table_name": self.table_name} if self.app_profile_id: request["app_profile_id"] = self.app_profile_id - mutations_dict = {idx:mut for idx, mut in enumerate(mutation_entries)} - error_dict = {idx:None for idx in mutations_dict.keys()} + mutations_dict: dict[int, BulkMutationsEntry | None] = { + idx: mut for idx, mut in enumerate(mutation_entries) + } + error_dict: dict[int, list[Exception]] = { + idx: [] for idx in mutations_dict.keys() + } - request_level_errors = [] predicate = retries.if_exception_type( core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, core_exceptions.Aborted, ) + def on_error_fn(exc): if predicate(exc): # add this exception to list for each active mutation for idx in error_dict.keys(): if mutations_dict[idx] is not None: - error_dict.setdefault(idx, []).append(exc)i + error_dict[idx].append(exc) # remove non-idempotent mutations from mutations_dict, so they are not retried for idx, mut in mutations_dict.items(): - if mut is not None and not mut.is_idempotent() + if mut is not None and not mut.is_idempotent(): mutations_dict[idx] = None retry = retries.AsyncRetry( @@ -713,7 +721,9 @@ def on_error_fn(exc): maximum=60, ) try: - await retry(self._mutations_retryable_attempt)(request, per_request_timeout, mutations_dict, error_dict) + await retry(self._mutations_retryable_attempt)( + request, per_request_timeout, mutations_dict, error_dict + ) except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded # add DeadlineExceeded to list for each active mutation @@ -722,12 +732,12 @@ def on_error_fn(exc): ) for idx in error_dict.keys(): if mutations_dict[idx] is not None: - error_dict.setdefault(idx, []).append(deadline_exc) + error_dict[idx].append(deadline_exc) except Exception as exc: # other exceptions are added to the list of exceptions for unprocessed mutations for idx in error_dict.keys(): if mutations_dict[idx] is not None: - error_dict.setdefault(idx, []).append(exc) + error_dict[idx].append(exc) finally: # raise exception detailing incomplete mutations all_errors = [] @@ -737,25 +747,38 @@ def on_error_fn(exc): cause_exc = exc_list[0] else: cause_exc = RetryExceptionGroup(exc_list) - all_errors.append(FailedMutationException(idx, mutation_entries[idx], cause_exc)) + all_errors.append( + FailedMutationError(idx, mutation_entries[idx], cause_exc) + ) if all_errors: raise MutationsExceptionGroup(all_errors, len(mutation_entries)) - - async def _mutations_retryable_attempt(self, request, per_request_timeout, mutation_dict, error_dict): + async def _mutations_retryable_attempt( + self, request, per_request_timeout, mutation_dict, error_dict + ): new_request = request.copy() while any(mutation is not None for mutation in mutation_dict.values()): # continue to retry until timeout, or all mutations are complete (success or failure) - new_request["entries"] = [mutation_dict[i]._to_dict() for i in range(len(mutation_dict)) if mutation_dict[i] is not None] - async for result in self._gapic_client.mutate_rows(new_request, timeout=per_request_timeout): + new_request["entries"] = [ + mutation_dict[i]._to_dict() + for i in range(len(mutation_dict)) + if mutation_dict[i] is not None + ] + async for result in await self.client._gapic_client.mutate_rows( + new_request, timeout=per_request_timeout + ): idx = result.index if result.status.code == 0: # mutation succeeded mutation_dict[idx] = None - error_dict[idx] = None + error_dict[idx] = [] if result.status.code != 0: # mutation failed - exception = core_exceptions.from_grpc_status(result.status) + exception = core_exceptions.from_grpc_status( + result.status.code, + result.status.message, + details=result.status.details, + ) error_dict.setdefault(idx, []).append(exception) # if not idempotent, remove from retry list if mutation_dict[idx].is_idempotent(): diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index feefbb0b3..debf97e10 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -15,9 +15,13 @@ import sys +from typing import TYPE_CHECKING is_311_plus = sys.version_info >= (3, 11) +if TYPE_CHECKING: + from google.cloud.bigtable.mutations import BulkMutationsEntry + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ @@ -42,16 +46,22 @@ class MutationsExceptionGroup(BigtableExceptionGroup): Represents one or more exceptions that occur during a bulk mutation operation """ - def __init__(self, excs): - super().__init__(f"len(excs) failed mutations", excs) + def __init__(self, excs, total_num): + super().__init__(f"{len(excs)} failed mutations (out of {total_num})", excs) -class FailedMutationException(Exception): +class FailedMutationError(Exception): """ Represents a failed mutation entry for bulk mutation operations """ - def __init__(self, failed_idx:int, failed_mutation_obj:"Mutation", cause:Exception): - super.init(f"Failed mutation at index: {failed_idx} with cause: {cause}") + + def __init__( + self, + failed_idx: int, + failed_mutation_obj: "BulkMutationsEntry", + cause: Exception, + ): + super().__init__(f"Failed mutation at index: {failed_idx} with cause: {cause}") self.failed_idx = failed_idx self.failed_mutation_obj = failed_mutation_obj self.__cause__ = cause @@ -60,5 +70,5 @@ def __init__(self, failed_idx:int, failed_mutation_obj:"Mutation", cause:Excepti class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" - def __init__(self, excs, total_num): - super().__init__(f"{len(excs)} failed attempts (out of {total_num})", excs) + def __init__(self, excs): + super().__init__(f"{len(excs)} failed attempts", excs) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 4427257cf..b4c5dcce8 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -13,13 +13,10 @@ # limitations under the License. # from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import Any from dataclasses import dataclass from abc import ABC, abstractmethod -if TYPE_CHECKING: - from google.cloud.bigtable import RowKeySamples - class Mutation(ABC): """Model class for mutations""" @@ -41,17 +38,19 @@ def __str__(self) -> str: @dataclass class SetCell(Mutation): - family:str - qualifier:bytes - new_value:bytes|str|int - timestamp_micros:int|None + family: str + qualifier: bytes + new_value: bytes | str | int + timestamp_micros: int | None def _to_dict(self) -> dict[str, Any]: return { "set_cell": { "family_name": self.family, "column_qualifier": self.qualifier, - "timestamp_micros": self.timestamp_micros if self.timestamp_micros is not None else -1, + "timestamp_micros": self.timestamp_micros + if self.timestamp_micros is not None + else -1, "value": self.new_value, } } @@ -63,12 +62,12 @@ def is_idempotent(self) -> bool: @dataclass class DeleteRangeFromColumn(Mutation): - family:str - qualifier:bytes + family: str + qualifier: bytes # None represents 0 - start_timestamp_micros:int | None + start_timestamp_micros: int | None # None represents infinity - end_timestamp_micros:int | None + end_timestamp_micros: int | None def _to_dict(self) -> dict[str, Any]: timestamp_range = {} @@ -87,7 +86,7 @@ def _to_dict(self) -> dict[str, Any]: @dataclass class DeleteAllFromFamily(Mutation): - family_to_delete:str + family_to_delete: str def _to_dict(self) -> dict[str, Any]: return { @@ -99,7 +98,6 @@ def _to_dict(self) -> dict[str, Any]: @dataclass class DeleteAllFromRow(Mutation): - def _to_dict(self) -> dict[str, Any]: return { "delete_from_row": {}, @@ -107,14 +105,14 @@ def _to_dict(self) -> dict[str, Any]: @dataclass -class BulkMutationsEntry(): - row_key:bytes - mutations: list[Mutation] | Mutation +class BulkMutationsEntry: + row_key: bytes + mutations: list[Mutation] def _to_dict(self) -> dict[str, Any]: return { "row_key": self.row_key, - "mutations": [mutation._to_dict() for mutation in self.mutations] + "mutations": [mutation._to_dict() for mutation in self.mutations], } def is_idempotent(self) -> bool: From 8da2d657bdabacbb9eb370e0b3fd568506335e0f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 16:25:35 -0700 Subject: [PATCH 016/213] added basic system tests --- google/cloud/bigtable/client.py | 35 +++++++++++++++--------------- google/cloud/bigtable/mutations.py | 4 ++-- tests/system/test_system.py | 20 +++++++++++++++++ 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1a89a7111..c9a4975db 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -43,9 +43,9 @@ from google.cloud.bigtable.exceptions import FailedMutationError from google.cloud.bigtable.exceptions import MutationsExceptionGroup +from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry if TYPE_CHECKING: - from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery @@ -764,25 +764,26 @@ async def _mutations_retryable_attempt( for i in range(len(mutation_dict)) if mutation_dict[i] is not None ] - async for result in await self.client._gapic_client.mutate_rows( + async for result_list in await self.client._gapic_client.mutate_rows( new_request, timeout=per_request_timeout ): - idx = result.index - if result.status.code == 0: - # mutation succeeded - mutation_dict[idx] = None - error_dict[idx] = [] - if result.status.code != 0: - # mutation failed - exception = core_exceptions.from_grpc_status( - result.status.code, - result.status.message, - details=result.status.details, - ) - error_dict.setdefault(idx, []).append(exception) - # if not idempotent, remove from retry list - if mutation_dict[idx].is_idempotent(): + for result in result_list.entries: + idx = result.index + if result.status.code == 0: + # mutation succeeded mutation_dict[idx] = None + error_dict[idx] = [] + if result.status.code != 0: + # mutation failed + exception = core_exceptions.from_grpc_status( + result.status.code, + result.status.message, + details=result.status.details, + ) + error_dict.setdefault(idx, []).append(exception) + # if not idempotent, remove from retry list + if mutation_dict[idx].is_idempotent(): + mutation_dict[idx] = None async def check_and_mutate_row( self, diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index b4c5dcce8..d4ac9919a 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -40,8 +40,8 @@ def __str__(self) -> str: class SetCell(Mutation): family: str qualifier: bytes - new_value: bytes | str | int - timestamp_micros: int | None + new_value: bytes + timestamp_micros: int | None = None def _to_dict(self) -> dict[str, Any]: return { diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 05633ac91..56a325d09 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -154,3 +154,23 @@ async def test_ping_and_warm_gapic(client, table): """ request = {"name": table.instance_name} await client._gapic_client.ping_and_warm(request) + +@pytest.mark.asyncio +async def test_mutation_set_cell(client, table): + """ + Ensure cells can be set properly + """ + from google.cloud.bigtable.mutations import SetCell + mutation = SetCell(family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value") + await table.mutate_row("abc", mutation) + +@pytest.mark.asyncio +async def test_bulk_mutations_set_cell(client, table): + """ + Ensure cells can be set properly + """ + from google.cloud.bigtable.mutations import SetCell, BulkMutationsEntry + mutation = SetCell(family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value") + bulk_mutation = BulkMutationsEntry(b"abc", [mutation]) + await table.bulk_mutate_rows([bulk_mutation]) + From 2b89d9cbab532a2876101d48657093778e8f465d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 17:11:32 -0700 Subject: [PATCH 017/213] added unit tests for mutate_row --- google/cloud/bigtable/client.py | 2 +- google/cloud/bigtable/mutations.py | 4 +- tests/unit/test_client.py | 98 ++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c9a4975db..0158f9abe 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -594,7 +594,7 @@ async def mutate_row( mutations = [mutations] request["mutations"] = [mutation._to_dict() for mutation in mutations] - if all(mutation.is_idempotent for mutation in mutations): + if all(mutation.is_idempotent() for mutation in mutations): # mutations are all idempotent and safe to retry predicate = retries.if_exception_type( core_exceptions.DeadlineExceeded, diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index d4ac9919a..2b5a5e711 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -65,9 +65,9 @@ class DeleteRangeFromColumn(Mutation): family: str qualifier: bytes # None represents 0 - start_timestamp_micros: int | None + start_timestamp_micros: int | None=None # None represents infinity - end_timestamp_micros: int | None + end_timestamp_micros: int | None=None def _to_dict(self) -> dict[str, Any]: timestamp_range = {} diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca7220800..cd5928255 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -21,6 +21,9 @@ from google.auth.credentials import AnonymousCredentials import pytest +from google.cloud.bigtable import mutations +from google.api_core import exceptions as core_exceptions + # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -812,3 +815,98 @@ def test_table_ctor_sync(self): with pytest.raises(RuntimeError) as e: Table(client, "instance-id", "table-id") assert e.match("Table must be created within an async event loop context.") + +class TestMutateRow(): + + def _make_client(self, *args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient(*args, **kwargs) + + @pytest.mark.asyncio + @pytest.mark.parametrize("mutation_arg", [ + mutations.SetCell("family", b"qualifier", b"value"), + mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=1234567890), + mutations.DeleteRangeFromColumn("family", b"qualifier"), + mutations.DeleteAllFromFamily("family"), + mutations.DeleteAllFromRow(), + [mutations.SetCell("family", b"qualifier", b"value")], + [mutations.DeleteRangeFromColumn("family", b"qualifier"), mutations.DeleteAllFromRow()], + ]) + async def test_mutate_row(self, mutation_arg): + """Test mutations with no errors""" + expected_per_request_timeout = 19 + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + mock_gapic.return_value = None + await table.mutate_row("row_key", mutation_arg, per_request_timeout=expected_per_request_timeout) + assert mock_gapic.call_count == 1 + request = mock_gapic.call_args[0][0] + assert request["table_name"] == f"projects/project/instances/instance/tables/table" + assert request["row_key"] == b"row_key" + formatted_mutations = [mutation._to_dict() for mutation in mutation_arg] if isinstance(mutation_arg, list) else [mutation_arg._to_dict()] + assert request["mutations"] == formatted_mutations + found_per_request_timeout = mock_gapic.call_args[1]["timeout"] + assert found_per_request_timeout == expected_per_request_timeout + + + @pytest.mark.parametrize("retryable_exception", [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ]) + @pytest.mark.asyncio + async def test_mutate_row_retryable_errors(self, retryable_exception): + from google.api_core.exceptions import DeadlineExceeded + from google.cloud.bigtable.exceptions import RetryExceptionGroup + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + mock_gapic.side_effect = retryable_exception("mock") + with pytest.raises(DeadlineExceeded) as e: + mutation = mutations.DeleteAllFromRow() + assert mutation.is_idempotent() == True + await table.mutate_row("row_key", mutation, operation_timeout=0.05) + cause = e.value.__cause__ + assert isinstance(cause, RetryExceptionGroup) + assert isinstance(cause.exceptions[0], retryable_exception) + + + @pytest.mark.parametrize("retryable_exception", [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ]) + @pytest.mark.asyncio + async def test_mutate_row_non_idempotent_retryable_errors(self, retryable_exception): + """ + Non-idempotent mutations should not be retried + """ + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + mock_gapic.side_effect = retryable_exception("mock") + with pytest.raises(retryable_exception) as e: + mutation = mutations.SetCell("family", b"qualifier", b"value") + assert mutation.is_idempotent() is False + await table.mutate_row("row_key", mutation, operation_timeout=0.2) + + @pytest.mark.parametrize("non_retryable_exception", [ + core_exceptions.OutOfRange, + core_exceptions.NotFound, + core_exceptions.FailedPrecondition, + RuntimeError, + ValueError, + ]) + @pytest.mark.asyncio + async def test_mutate_row_non_retryable_errors(self, non_retryable_exception): + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + mock_gapic.side_effect = non_retryable_exception("mock") + with pytest.raises(non_retryable_exception) as e: + mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=1234567890) + assert mutation.is_idempotent() is True + await table.mutate_row("row_key", mutation, operation_timeout=0.2) + From 47c59850e4f49358e5bd7b71193d8dff0a8e9933 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 24 Apr 2023 17:13:56 -0700 Subject: [PATCH 018/213] ran blacken --- google/cloud/bigtable/mutations.py | 4 +- tests/system/test_system.py | 13 ++- tests/unit/test_client.py | 160 +++++++++++++++++++++-------- 3 files changed, 128 insertions(+), 49 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 2b5a5e711..966fa56a8 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -65,9 +65,9 @@ class DeleteRangeFromColumn(Mutation): family: str qualifier: bytes # None represents 0 - start_timestamp_micros: int | None=None + start_timestamp_micros: int | None = None # None represents infinity - end_timestamp_micros: int | None=None + end_timestamp_micros: int | None = None def _to_dict(self) -> dict[str, Any]: timestamp_range = {} diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 56a325d09..b3ec26d57 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -155,22 +155,29 @@ async def test_ping_and_warm_gapic(client, table): request = {"name": table.instance_name} await client._gapic_client.ping_and_warm(request) + @pytest.mark.asyncio async def test_mutation_set_cell(client, table): """ Ensure cells can be set properly """ from google.cloud.bigtable.mutations import SetCell - mutation = SetCell(family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value") + + mutation = SetCell( + family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value" + ) await table.mutate_row("abc", mutation) + @pytest.mark.asyncio async def test_bulk_mutations_set_cell(client, table): """ Ensure cells can be set properly """ from google.cloud.bigtable.mutations import SetCell, BulkMutationsEntry - mutation = SetCell(family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value") + + mutation = SetCell( + family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value" + ) bulk_mutation = BulkMutationsEntry(b"abc", [mutation]) await table.bulk_mutate_rows([bulk_mutation]) - diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd5928255..249b71461 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -816,97 +816,169 @@ def test_table_ctor_sync(self): Table(client, "instance-id", "table-id") assert e.match("Table must be created within an async event loop context.") -class TestMutateRow(): +class TestMutateRow: def _make_client(self, *args, **kwargs): from google.cloud.bigtable.client import BigtableDataClient return BigtableDataClient(*args, **kwargs) @pytest.mark.asyncio - @pytest.mark.parametrize("mutation_arg", [ - mutations.SetCell("family", b"qualifier", b"value"), - mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=1234567890), - mutations.DeleteRangeFromColumn("family", b"qualifier"), - mutations.DeleteAllFromFamily("family"), - mutations.DeleteAllFromRow(), - [mutations.SetCell("family", b"qualifier", b"value")], - [mutations.DeleteRangeFromColumn("family", b"qualifier"), mutations.DeleteAllFromRow()], - ]) + @pytest.mark.parametrize( + "mutation_arg", + [ + mutations.SetCell("family", b"qualifier", b"value"), + mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=1234567890 + ), + mutations.DeleteRangeFromColumn("family", b"qualifier"), + mutations.DeleteAllFromFamily("family"), + mutations.DeleteAllFromRow(), + [mutations.SetCell("family", b"qualifier", b"value")], + [ + mutations.DeleteRangeFromColumn("family", b"qualifier"), + mutations.DeleteAllFromRow(), + ], + ], + ) async def test_mutate_row(self, mutation_arg): """Test mutations with no errors""" expected_per_request_timeout = 19 async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: - with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + with mock.patch.object( + client._gapic_client, "mutate_row" + ) as mock_gapic: mock_gapic.return_value = None - await table.mutate_row("row_key", mutation_arg, per_request_timeout=expected_per_request_timeout) + await table.mutate_row( + "row_key", + mutation_arg, + per_request_timeout=expected_per_request_timeout, + ) assert mock_gapic.call_count == 1 request = mock_gapic.call_args[0][0] - assert request["table_name"] == f"projects/project/instances/instance/tables/table" + assert ( + request["table_name"] + == "projects/project/instances/instance/tables/table" + ) assert request["row_key"] == b"row_key" - formatted_mutations = [mutation._to_dict() for mutation in mutation_arg] if isinstance(mutation_arg, list) else [mutation_arg._to_dict()] + formatted_mutations = ( + [mutation._to_dict() for mutation in mutation_arg] + if isinstance(mutation_arg, list) + else [mutation_arg._to_dict()] + ) assert request["mutations"] == formatted_mutations found_per_request_timeout = mock_gapic.call_args[1]["timeout"] assert found_per_request_timeout == expected_per_request_timeout - - @pytest.mark.parametrize("retryable_exception", [ - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, - ]) + @pytest.mark.parametrize( + "retryable_exception", + [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ], + ) @pytest.mark.asyncio async def test_mutate_row_retryable_errors(self, retryable_exception): from google.api_core.exceptions import DeadlineExceeded from google.cloud.bigtable.exceptions import RetryExceptionGroup + async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: - with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + with mock.patch.object( + client._gapic_client, "mutate_row" + ) as mock_gapic: mock_gapic.side_effect = retryable_exception("mock") with pytest.raises(DeadlineExceeded) as e: mutation = mutations.DeleteAllFromRow() - assert mutation.is_idempotent() == True - await table.mutate_row("row_key", mutation, operation_timeout=0.05) + assert mutation.is_idempotent() is True + await table.mutate_row( + "row_key", mutation, operation_timeout=0.05 + ) cause = e.value.__cause__ assert isinstance(cause, RetryExceptionGroup) assert isinstance(cause.exceptions[0], retryable_exception) - - @pytest.mark.parametrize("retryable_exception", [ - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, - ]) + @pytest.mark.parametrize( + "retryable_exception", + [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ], + ) @pytest.mark.asyncio - async def test_mutate_row_non_idempotent_retryable_errors(self, retryable_exception): + async def test_mutate_row_non_idempotent_retryable_errors( + self, retryable_exception + ): """ Non-idempotent mutations should not be retried """ async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: - with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + with mock.patch.object( + client._gapic_client, "mutate_row" + ) as mock_gapic: mock_gapic.side_effect = retryable_exception("mock") - with pytest.raises(retryable_exception) as e: + with pytest.raises(retryable_exception): mutation = mutations.SetCell("family", b"qualifier", b"value") assert mutation.is_idempotent() is False - await table.mutate_row("row_key", mutation, operation_timeout=0.2) + await table.mutate_row( + "row_key", mutation, operation_timeout=0.2 + ) - @pytest.mark.parametrize("non_retryable_exception", [ - core_exceptions.OutOfRange, - core_exceptions.NotFound, - core_exceptions.FailedPrecondition, - RuntimeError, - ValueError, - ]) + @pytest.mark.parametrize( + "non_retryable_exception", + [ + core_exceptions.OutOfRange, + core_exceptions.NotFound, + core_exceptions.FailedPrecondition, + RuntimeError, + ValueError, + ], + ) @pytest.mark.asyncio async def test_mutate_row_non_retryable_errors(self, non_retryable_exception): async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: - with mock.patch.object(client._gapic_client, "mutate_row") as mock_gapic: + with mock.patch.object( + client._gapic_client, "mutate_row" + ) as mock_gapic: mock_gapic.side_effect = non_retryable_exception("mock") - with pytest.raises(non_retryable_exception) as e: - mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=1234567890) + with pytest.raises(non_retryable_exception): + mutation = mutations.SetCell( + "family", + b"qualifier", + b"value", + timestamp_micros=1234567890, + ) assert mutation.is_idempotent() is True - await table.mutate_row("row_key", mutation, operation_timeout=0.2) + await table.mutate_row( + "row_key", mutation, operation_timeout=0.2 + ) + +class TestBulkMutateRows: + def _make_client(self, *args, **kwargs): + from google.cloud.bigtable.client import BigtableDataClient + + return BigtableDataClient(*args, **kwargs) + + @pytest.mark.asyncio + def test_bulk_mutate_rows(self): + """Test mutations with no errors""" + pass + + @pytest.mark.asyncio + def test_bulk_mutate_rows_retryable_errors(self): + pass + + @pytest.mark.asyncio + def test_bulk_mutate_rows_non_retryable_errors(self): + pass + + @pytest.mark.asyncio + def test_bulk_mutate_rows_idempotent(self): + """Idempotent mutations should never be retried""" + pass From 38fdcd763f39021156e0af0b796b5cb2dd74c213 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 11:40:44 -0700 Subject: [PATCH 019/213] improved exceptions --- google/cloud/bigtable/client.py | 9 +++++---- google/cloud/bigtable/exceptions.py | 24 +++++++++++++++--------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 0158f9abe..bcf635ae5 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -40,7 +40,7 @@ from google.api_core import client_options as client_options_lib from google.cloud.bigtable.exceptions import RetryExceptionGroup -from google.cloud.bigtable.exceptions import FailedMutationError +from google.cloud.bigtable.exceptions import FailedMutationEntryError from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry @@ -748,7 +748,7 @@ def on_error_fn(exc): else: cause_exc = RetryExceptionGroup(exc_list) all_errors.append( - FailedMutationError(idx, mutation_entries[idx], cause_exc) + FailedMutationEntryError(idx, mutation_entries[idx], cause_exc) ) if all_errors: raise MutationsExceptionGroup(all_errors, len(mutation_entries)) @@ -758,6 +758,7 @@ async def _mutations_retryable_attempt( ): new_request = request.copy() while any(mutation is not None for mutation in mutation_dict.values()): + await asyncio.sleep(0) # continue to retry until timeout, or all mutations are complete (success or failure) new_request["entries"] = [ mutation_dict[i]._to_dict() @@ -780,9 +781,9 @@ async def _mutations_retryable_attempt( result.status.message, details=result.status.details, ) - error_dict.setdefault(idx, []).append(exception) + error_dict[idx].append(exception) # if not idempotent, remove from retry list - if mutation_dict[idx].is_idempotent(): + if not mutation_dict[idx].is_idempotent(): mutation_dict[idx] = None async def check_and_mutate_row( diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index debf97e10..3f4f48ca8 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -37,8 +37,7 @@ def __init__(self, message, excs): super().__init__(message, excs) else: self.exceptions = excs - revised_message = f"{message} ({len(excs)} sub-exceptions)" - super().__init__(revised_message) + super().__init__(message) class MutationsExceptionGroup(BigtableExceptionGroup): @@ -47,10 +46,10 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ def __init__(self, excs, total_num): - super().__init__(f"{len(excs)} failed mutations (out of {total_num})", excs) + super().__init__(f"{len(excs)} out of {total_num} mutation entries failed", excs) -class FailedMutationError(Exception): +class FailedMutationEntryError(Exception): """ Represents a failed mutation entry for bulk mutation operations """ @@ -58,12 +57,14 @@ class FailedMutationError(Exception): def __init__( self, failed_idx: int, - failed_mutation_obj: "BulkMutationsEntry", + failed_mutation_entry: "BulkMutationsEntry", cause: Exception, ): - super().__init__(f"Failed mutation at index: {failed_idx} with cause: {cause}") - self.failed_idx = failed_idx - self.failed_mutation_obj = failed_mutation_obj + idempotent_msg = "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" + message = f"Failed {idempotent_msg} mutation entry at index {failed_idx} with cause: {cause!r}" + super().__init__(message) + self.index = failed_idx + self.entry = failed_mutation_entry self.__cause__ = cause @@ -71,4 +72,9 @@ class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" def __init__(self, excs): - super().__init__(f"{len(excs)} failed attempts", excs) + if len(excs) == 0: + raise ValueError("RetryExceptionGroup must have at least one exception") + elif len(excs) == 1: + super().__init__(f"1 failed attempt: {excs[0]!r}") + else: + super().__init__(f"{len(excs)} failed attempts. Latest: {excs[-1]!r}", excs) From 504d2d8f8fd8d2d40077ebf35cafd3227b87243e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 11:41:04 -0700 Subject: [PATCH 020/213] added bulk_mutate_rows unit tests --- tests/unit/test_client.py | 217 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 209 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 249b71461..f1a03c3b8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -965,20 +965,221 @@ def _make_client(self, *args, **kwargs): return BigtableDataClient(*args, **kwargs) + async def _mock_response(self, response_list): + from google.cloud.bigtable_v2.types import MutateRowsResponse + from google.rpc import status_pb2 + statuses = [] + for response in response_list: + if isinstance(response, core_exceptions.GoogleAPICallError): + statuses.append(status_pb2.Status(message=str(response), code=response.grpc_status_code.value[0])) + else: + statuses.append(status_pb2.Status(code=0)) + entries = [MutateRowsResponse.Entry(index=i, status=statuses[i]) for i in range(len(response_list))] + async def generator(): + yield MutateRowsResponse(entries=entries) + return generator() + + @pytest.mark.asyncio @pytest.mark.asyncio - def test_bulk_mutate_rows(self): + @pytest.mark.parametrize( + "mutation_arg", + [ + [mutations.SetCell("family", b"qualifier", b"value")], + [mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=1234567890 + )], + [mutations.DeleteRangeFromColumn("family", b"qualifier")], + [mutations.DeleteAllFromFamily("family")], + [mutations.DeleteAllFromRow()], + [mutations.SetCell("family", b"qualifier", b"value")], + [ + mutations.DeleteRangeFromColumn("family", b"qualifier"), + mutations.DeleteAllFromRow(), + ], + ], + ) + async def test_bulk_mutate_rows(self, mutation_arg): """Test mutations with no errors""" - pass + expected_per_request_timeout = 19 + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.return_value = self._mock_response([None]) + bulk_mutation = mutations.BulkMutationsEntry(b"row_key", mutation_arg) + await table.bulk_mutate_rows( + [bulk_mutation], + per_request_timeout=expected_per_request_timeout, + ) + assert mock_gapic.call_count == 1 + request = mock_gapic.call_args[0][0] + assert ( + request["table_name"] + == "projects/project/instances/instance/tables/table" + ) + assert request["entries"] == [bulk_mutation._to_dict()] + found_per_request_timeout = mock_gapic.call_args[1]["timeout"] + assert found_per_request_timeout == expected_per_request_timeout + @pytest.mark.asyncio - def test_bulk_mutate_rows_retryable_errors(self): - pass + async def test_bulk_mutate_rows_multiple_entries(self): + """Test mutations with no errors""" + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.return_value = self._mock_response([None, None]) + mutation_list = [mutations.DeleteAllFromRow()] + entry_1 = mutations.BulkMutationsEntry(b"row_key_1", mutation_list) + entry_2 = mutations.BulkMutationsEntry(b"row_key_2", mutation_list) + await table.bulk_mutate_rows( + [entry_1, entry_2], + ) + assert mock_gapic.call_count == 1 + request = mock_gapic.call_args[0][0] + assert ( + request["table_name"] + == "projects/project/instances/instance/tables/table" + ) + assert request["entries"][0] == entry_1._to_dict() + assert request["entries"][1] == entry_2._to_dict() @pytest.mark.asyncio - def test_bulk_mutate_rows_non_retryable_errors(self): - pass + @pytest.mark.parametrize( + "exception", + [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + core_exceptions.OutOfRange, + core_exceptions.NotFound, + core_exceptions.FailedPrecondition, + ], + ) + async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exception): + """ + Individual idempotent mutations should be retried if they fail with any error + """ + from google.api_core.exceptions import DeadlineExceeded + from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.side_effect = lambda *a, **k: self._mock_response([exception("mock")]) + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.DeleteAllFromRow() + entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows([entry], operation_timeout=0.05) + assert len(e.value.exceptions) == 1 + failed_exception = e.value.exceptions[0] + assert "non-idempotent" not in str(failed_exception) + assert isinstance(failed_exception, FailedMutationEntryError) + cause = failed_exception.__cause__ + assert isinstance(cause, RetryExceptionGroup) + assert isinstance(cause.exceptions[0], exception) + # last exception should be due to retry timeout + assert isinstance(cause.exceptions[-1], core_exceptions.DeadlineExceeded) + + @pytest.mark.parametrize( + "retryable_exception", + [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ], + ) @pytest.mark.asyncio - def test_bulk_mutate_rows_idempotent(self): + async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception): + """ + Individual idempotent mutations should be retried if the request fails with a retryable error + """ + from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.side_effect = retryable_exception("mock") + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=123) + entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows([entry], operation_timeout=0.05) + assert len(e.value.exceptions) == 1 + failed_exception = e.value.exceptions[0] + assert isinstance(failed_exception, FailedMutationEntryError) + assert "non-idempotent" not in str(failed_exception) + cause = failed_exception.__cause__ + assert isinstance(cause, RetryExceptionGroup) + assert isinstance(cause.exceptions[0], retryable_exception) + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "retryable_exception", + [ + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ], + ) + async def test_bulk_mutate_rows_idempotent_retryable_errors(self, retryable_exception): """Idempotent mutations should never be retried""" - pass + from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.side_effect = lambda *a, **k: self._mock_response([retryable_exception("mock")]) + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.SetCell("family", b"qualifier", b"value") + entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + assert mutation.is_idempotent() is False + await table.bulk_mutate_rows([entry], operation_timeout=0.2) + assert len(e.value.exceptions) == 1 + failed_exception = e.value.exceptions[0] + assert isinstance(failed_exception, FailedMutationEntryError) + assert "non-idempotent" in str(failed_exception) + cause = failed_exception.__cause__ + assert isinstance(cause, retryable_exception) + + @pytest.mark.parametrize( + "non_retryable_exception", + [ + core_exceptions.OutOfRange, + core_exceptions.NotFound, + core_exceptions.FailedPrecondition, + RuntimeError, + ValueError, + ], + ) + @pytest.mark.asyncio + async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_exception): + """ + If the request fails with a non-retryable error, mutations should not be retried + """ + from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.side_effect = non_retryable_exception("mock") + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=123) + entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows([entry], operation_timeout=0.2) + assert len(e.value.exceptions) == 1 + failed_exception = e.value.exceptions[0] + assert isinstance(failed_exception, FailedMutationEntryError) + assert "non-idempotent" not in str(failed_exception) + cause = failed_exception.__cause__ + assert isinstance(cause, non_retryable_exception) From b16067f8eaf5e53ab8c03bcba90f6c575b61a804 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 14:03:45 -0700 Subject: [PATCH 021/213] ran blacken --- google/cloud/bigtable/exceptions.py | 8 ++- tests/unit/test_client.py | 78 ++++++++++++++++++++++------- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 3f4f48ca8..650f55f69 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -46,7 +46,9 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ def __init__(self, excs, total_num): - super().__init__(f"{len(excs)} out of {total_num} mutation entries failed", excs) + super().__init__( + f"{len(excs)} out of {total_num} mutation entries failed", excs + ) class FailedMutationEntryError(Exception): @@ -60,7 +62,9 @@ def __init__( failed_mutation_entry: "BulkMutationsEntry", cause: Exception, ): - idempotent_msg = "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" + idempotent_msg = ( + "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" + ) message = f"Failed {idempotent_msg} mutation entry at index {failed_idx} with cause: {cause!r}" super().__init__(message) self.index = failed_idx diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f1a03c3b8..dd94ed947 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -968,15 +968,25 @@ def _make_client(self, *args, **kwargs): async def _mock_response(self, response_list): from google.cloud.bigtable_v2.types import MutateRowsResponse from google.rpc import status_pb2 + statuses = [] for response in response_list: if isinstance(response, core_exceptions.GoogleAPICallError): - statuses.append(status_pb2.Status(message=str(response), code=response.grpc_status_code.value[0])) + statuses.append( + status_pb2.Status( + message=str(response), code=response.grpc_status_code.value[0] + ) + ) else: statuses.append(status_pb2.Status(code=0)) - entries = [MutateRowsResponse.Entry(index=i, status=statuses[i]) for i in range(len(response_list))] + entries = [ + MutateRowsResponse.Entry(index=i, status=statuses[i]) + for i in range(len(response_list)) + ] + async def generator(): yield MutateRowsResponse(entries=entries) + return generator() @pytest.mark.asyncio @@ -985,9 +995,11 @@ async def generator(): "mutation_arg", [ [mutations.SetCell("family", b"qualifier", b"value")], - [mutations.SetCell( - "family", b"qualifier", b"value", timestamp_micros=1234567890 - )], + [ + mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=1234567890 + ) + ], [mutations.DeleteRangeFromColumn("family", b"qualifier")], [mutations.DeleteAllFromFamily("family")], [mutations.DeleteAllFromRow()], @@ -1007,7 +1019,9 @@ async def test_bulk_mutate_rows(self, mutation_arg): client._gapic_client, "mutate_rows" ) as mock_gapic: mock_gapic.return_value = self._mock_response([None]) - bulk_mutation = mutations.BulkMutationsEntry(b"row_key", mutation_arg) + bulk_mutation = mutations.BulkMutationsEntry( + b"row_key", mutation_arg + ) await table.bulk_mutate_rows( [bulk_mutation], per_request_timeout=expected_per_request_timeout, @@ -1022,7 +1036,6 @@ async def test_bulk_mutate_rows(self, mutation_arg): found_per_request_timeout = mock_gapic.call_args[1]["timeout"] assert found_per_request_timeout == expected_per_request_timeout - @pytest.mark.asyncio async def test_bulk_mutate_rows_multiple_entries(self): """Test mutations with no errors""" @@ -1064,14 +1077,20 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio Individual idempotent mutations should be retried if they fail with any error """ from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( client._gapic_client, "mutate_rows" ) as mock_gapic: - mock_gapic.side_effect = lambda *a, **k: self._mock_response([exception("mock")]) + mock_gapic.side_effect = lambda *a, **k: self._mock_response( + [exception("mock")] + ) with pytest.raises(MutationsExceptionGroup) as e: mutation = mutations.DeleteAllFromRow() entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) @@ -1085,7 +1104,9 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio assert isinstance(cause, RetryExceptionGroup) assert isinstance(cause.exceptions[0], exception) # last exception should be due to retry timeout - assert isinstance(cause.exceptions[-1], core_exceptions.DeadlineExceeded) + assert isinstance( + cause.exceptions[-1], core_exceptions.DeadlineExceeded + ) @pytest.mark.parametrize( "retryable_exception", @@ -1100,7 +1121,12 @@ async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception """ Individual idempotent mutations should be retried if the request fails with a retryable error """ - from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) + async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( @@ -1108,7 +1134,9 @@ async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception ) as mock_gapic: mock_gapic.side_effect = retryable_exception("mock") with pytest.raises(MutationsExceptionGroup) as e: - mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=123) + mutation = mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=123 + ) entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.05) @@ -1129,15 +1157,24 @@ async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception core_exceptions.Aborted, ], ) - async def test_bulk_mutate_rows_idempotent_retryable_errors(self, retryable_exception): + async def test_bulk_mutate_rows_idempotent_retryable_errors( + self, retryable_exception + ): """Idempotent mutations should never be retried""" - from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) + async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( client._gapic_client, "mutate_rows" ) as mock_gapic: - mock_gapic.side_effect = lambda *a, **k: self._mock_response([retryable_exception("mock")]) + mock_gapic.side_effect = lambda *a, **k: self._mock_response( + [retryable_exception("mock")] + ) with pytest.raises(MutationsExceptionGroup) as e: mutation = mutations.SetCell("family", b"qualifier", b"value") entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) @@ -1165,7 +1202,12 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti """ If the request fails with a non-retryable error, mutations should not be retried """ - from google.cloud.bigtable.exceptions import RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) + async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( @@ -1173,7 +1215,9 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti ) as mock_gapic: mock_gapic.side_effect = non_retryable_exception("mock") with pytest.raises(MutationsExceptionGroup) as e: - mutation = mutations.SetCell("family", b"qualifier", b"value", timestamp_micros=123) + mutation = mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=123 + ) entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.2) From 3ab1405ffcf5cc734d9b5cb2b9d11f5664bda920 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 14:47:29 -0700 Subject: [PATCH 022/213] support __new___ for exceptions for python3.11+ --- google/cloud/bigtable/exceptions.py | 31 +++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 650f55f69..0fdb83adb 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -39,16 +39,28 @@ def __init__(self, message, excs): self.exceptions = excs super().__init__(message) + def __new__(cls, message, excs): + if is_311_plus: + return super().__new__(cls, message, excs) + else: + return super().__new__(cls) + class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation """ + @staticmethod + def _format_message(excs, total_num): + entry_str = "entry" if total_num == 1 else "entries" + return f"{len(excs)} out of {total_num} mutation {entry_str} failed" + def __init__(self, excs, total_num): - super().__init__( - f"{len(excs)} out of {total_num} mutation entries failed", excs - ) + super().__init__(self._format_message(excs, total_num), excs) + + def __new__(cls, excs, total_num): + return super().__new__(cls, cls._format_message(excs, total_num), excs) class FailedMutationEntryError(Exception): @@ -75,10 +87,17 @@ def __init__( class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" - def __init__(self, excs): + @staticmethod + def _format_message(excs): if len(excs) == 0: raise ValueError("RetryExceptionGroup must have at least one exception") elif len(excs) == 1: - super().__init__(f"1 failed attempt: {excs[0]!r}") + return f"1 failed attempt: {excs[0]!r}" else: - super().__init__(f"{len(excs)} failed attempts. Latest: {excs[-1]!r}", excs) + return f"{len(excs)} failed attempts. Latest: {excs[-1]!r}" + + def __init__(self, excs): + super().__init__(self._format_message(excs), excs) + + def __new__(cls, excs): + return super().__new__(cls, cls._format_message(excs), excs) From 0a6c0c60b6e22d294d3ade4552a4109e0ae4cc39 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 16:13:32 -0700 Subject: [PATCH 023/213] added exception unit tests --- google/cloud/bigtable/exceptions.py | 16 +-- tests/unit/test_exceptions.py | 188 ++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_exceptions.py diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 0fdb83adb..4a22c1467 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -52,15 +52,15 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ @staticmethod - def _format_message(excs, total_num): - entry_str = "entry" if total_num == 1 else "entries" - return f"{len(excs)} out of {total_num} mutation {entry_str} failed" + def _format_message(excs, total_entries): + entry_str = "entry" if total_entries == 1 else "entries" + return f"{len(excs)} out of {total_entries} mutation {entry_str} failed" - def __init__(self, excs, total_num): - super().__init__(self._format_message(excs, total_num), excs) + def __init__(self, excs, total_entries): + super().__init__(self._format_message(excs, total_entries), excs) - def __new__(cls, excs, total_num): - return super().__new__(cls, cls._format_message(excs, total_num), excs) + def __new__(cls, excs, total_entries): + return super().__new__(cls, cls._format_message(excs, total_entries), excs) class FailedMutationEntryError(Exception): @@ -90,7 +90,7 @@ class RetryExceptionGroup(BigtableExceptionGroup): @staticmethod def _format_message(excs): if len(excs) == 0: - raise ValueError("RetryExceptionGroup must have at least one exception") + raise ValueError("Empty exception list") elif len(excs) == 1: return f"1 failed attempt: {excs[0]!r}" else: diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py new file mode 100644 index 000000000..e69d76712 --- /dev/null +++ b/tests/unit/test_exceptions.py @@ -0,0 +1,188 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import pytest +import sys + +import google.cloud.bigtable.exceptions as bigtable_exceptions + + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore + +class TestBigtableExceptionGroup(): + """ + Subclass for MutationsExceptionGroup and RetryExceptionGroup + """ + + def _get_class(self): + from google.cloud.bigtable.exceptions import BigtableExceptionGroup + return BigtableExceptionGroup + + def _make_one(self, message="test_message", excs=None): + if excs is None: + excs = [RuntimeError('mock')] + + return self._get_class()(message, excs=excs) + + def test_raise(self): + """ + Create exception in raise statement, which calls __new__ and __init__ + """ + test_msg = "test message" + test_excs = [Exception(test_msg)] + with pytest.raises(self._get_class()) as e: + raise self._get_class()(test_msg, test_excs) + assert str(e.value) == test_msg + assert e.value.exceptions == test_excs + + @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") + def test_311_exception_group(self): + """ + Python 3.11+ should handle exepctions as native exception groups + """ + exceptions = [RuntimeError("mock"), ValueError("mock")] + instance = self._make_one(excs=exceptions) + assert isinstance(instance, ExceptionGroup) + # ensure split works as expected + runtime_error, others = instance.split(lambda e: isinstance(e, RuntimeError)) + assert isinstance(runtime_error, ExceptionGroup) + assert runtime_error.exceptions[0] == exceptions[0] + assert isinstance(others, ExceptionGroup) + assert others.exceptions[0] == exceptions[1] + + def test_exception_handling(self): + """ + All versions should inherit from exception + and support tranditional exception handling + """ + instance = self._make_one() + assert isinstance(instance, Exception) + try: + raise instance + except Exception as e: + assert isinstance(e, Exception) + assert e == instance + was_raised = True + assert was_raised + + + +class TestMutationsExceptionGroup(TestBigtableExceptionGroup): + def _get_class(self): + from google.cloud.bigtable.exceptions import MutationsExceptionGroup + return MutationsExceptionGroup + + def _make_one(self, excs=None, num_entries=3): + if excs is None: + excs = [RuntimeError('mock')] + + return self._get_class()(excs, num_entries) + + @pytest.mark.parametrize("exception_list,total_entries,expected_message", [ + ([Exception()], 1, "1 out of 1 mutation entry failed"), + ([Exception()], 2, "1 out of 2 mutation entries failed"), + ([Exception(), RuntimeError()], 2, "2 out of 2 mutation entries failed"), + ([], 0, "0 out of 0 mutation entries failed"), + ]) + def test_raise(self, exception_list, total_entries, expected_message): + """ + Create exception in raise statement, which calls __new__ and __init__ + """ + with pytest.raises(self._get_class()) as e: + raise self._get_class()(exception_list, total_entries) + assert str(e.value) == expected_message + assert e.value.exceptions == exception_list + +class TestRetryExceptionGroup(TestBigtableExceptionGroup): + def _get_class(self): + from google.cloud.bigtable.exceptions import RetryExceptionGroup + return RetryExceptionGroup + + def _make_one(self, excs=None): + if excs is None: + excs = [RuntimeError('mock')] + + return self._get_class()(excs=excs) + + @pytest.mark.parametrize("exception_list,expected_message", [ + ([Exception()], "1 failed attempt: Exception()"), + ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError()"), + ([Exception(), ValueError("test")], "2 failed attempts. Latest: ValueError('test')"), + ([bigtable_exceptions.RetryExceptionGroup([Exception(), ValueError("test")])], "1 failed attempt: RetryExceptionGroup(\"2 failed attempts. Latest: ValueError('test')\")"), + ]) + def test_raise(self, exception_list, expected_message): + """ + Create exception in raise statement, which calls __new__ and __init__ + """ + with pytest.raises(self._get_class()) as e: + raise self._get_class()(exception_list) + assert str(e.value) == expected_message + assert e.value.exceptions == exception_list + + def test_raise_empty_list(self): + """ + Empty exception lists are not supported + """ + with pytest.raises(ValueError) as e: + raise self._get_class()([]) + assert str(e.value) == "Empty exception list" + + +class TestFailedMutationEntryError(): + def _get_class(self): + from google.cloud.bigtable.exceptions import FailedMutationEntryError + return FailedMutationEntryError + + def _make_one(self, idx=9, entry=unittest.mock.Mock(), cause=RuntimeError('mock')): + + return self._get_class()(idx, entry, cause) + + def test_raise(self): + """ + Create exception in raise statement, which calls __new__ and __init__ + """ + test_idx = 2 + test_entry = unittest.mock.Mock() + test_exc = ValueError("test") + with pytest.raises(self._get_class()) as e: + raise self._get_class()(test_idx, test_entry, test_exc) + assert str(e.value) == "Failed idempotent mutation entry at index 2 with cause: ValueError('test')" + assert e.value.index == test_idx + assert e.value.entry == test_entry + assert e.value.__cause__ == test_exc + assert isinstance(e.value, Exception) + assert test_entry.is_idempotent.call_count == 1 + + def test_raise_idempotent(self): + """ + Test raise with non idempotent entry + """ + test_idx = 2 + test_entry = unittest.mock.Mock() + test_entry.is_idempotent.return_value = False + test_exc = ValueError("test") + with pytest.raises(self._get_class()) as e: + raise self._get_class()(test_idx, test_entry, test_exc) + assert str(e.value) == "Failed non-idempotent mutation entry at index 2 with cause: ValueError('test')" + assert e.value.index == test_idx + assert e.value.entry == test_entry + assert e.value.__cause__ == test_exc + assert test_entry.is_idempotent.call_count == 1 From ec043cf06a275f19a38e41c8fd226ab04755f7bd Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 17:51:55 -0700 Subject: [PATCH 024/213] makde exceptions tuple --- google/cloud/bigtable/exceptions.py | 2 +- tests/unit/test_exceptions.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 4a22c1467..83b314edf 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -36,7 +36,7 @@ def __init__(self, message, excs): if is_311_plus: super().__init__(message, excs) else: - self.exceptions = excs + self.exceptions = tuple(excs) super().__init__(message) def __new__(cls, message, excs): diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index e69d76712..9a3f0a05e 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -51,7 +51,7 @@ def test_raise(self): with pytest.raises(self._get_class()) as e: raise self._get_class()(test_msg, test_excs) assert str(e.value) == test_msg - assert e.value.exceptions == test_excs + assert list(e.value.exceptions) == test_excs @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") def test_311_exception_group(self): @@ -109,7 +109,7 @@ def test_raise(self, exception_list, total_entries, expected_message): with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list, total_entries) assert str(e.value) == expected_message - assert e.value.exceptions == exception_list + assert list(e.value.exceptions) == exception_list class TestRetryExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): @@ -135,7 +135,7 @@ def test_raise(self, exception_list, expected_message): with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list) assert str(e.value) == expected_message - assert e.value.exceptions == exception_list + assert list(e.value.exceptions) == exception_list def test_raise_empty_list(self): """ From 518530ef0f9bda4ed260daf32775e0cb462dfd04 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 25 Apr 2023 18:22:57 -0700 Subject: [PATCH 025/213] got exceptions to print consistently across versions --- google/cloud/bigtable/exceptions.py | 19 +++++++++++++----- tests/unit/test_exceptions.py | 31 ++++++++++++++--------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 83b314edf..71c81f910 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -36,6 +36,8 @@ def __init__(self, message, excs): if is_311_plus: super().__init__(message, excs) else: + if len(excs) == 0: + raise ValueError("exceptions must be a non-empty sequence") self.exceptions = tuple(excs) super().__init__(message) @@ -45,6 +47,12 @@ def __new__(cls, message, excs): else: return super().__new__(cls) + def __str__(self): + """ + String representation doesn't display sub-exceptions. Subexceptions are + described in message + """ + return self.args[0] class MutationsExceptionGroup(BigtableExceptionGroup): """ @@ -54,7 +62,8 @@ class MutationsExceptionGroup(BigtableExceptionGroup): @staticmethod def _format_message(excs, total_entries): entry_str = "entry" if total_entries == 1 else "entries" - return f"{len(excs)} out of {total_entries} mutation {entry_str} failed" + plural_str = "" if len(excs) == 1 else "s" + return f"{len(excs)} sub-exception{plural_str} (from {total_entries} {entry_str} attempted)" def __init__(self, excs, total_entries): super().__init__(self._format_message(excs, total_entries), excs) @@ -90,11 +99,11 @@ class RetryExceptionGroup(BigtableExceptionGroup): @staticmethod def _format_message(excs): if len(excs) == 0: - raise ValueError("Empty exception list") - elif len(excs) == 1: - return f"1 failed attempt: {excs[0]!r}" + return "No exceptions" + if len(excs) == 1: + return f"1 failed attempt: {type(excs[0]).__name__}" else: - return f"{len(excs)} failed attempts. Latest: {excs[-1]!r}" + return f"{len(excs)} failed attempts. Latest: {type(excs[-1]).__name__}" def __init__(self, excs): super().__init__(self._format_message(excs), excs) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 9a3f0a05e..6b101fdbf 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -53,6 +53,14 @@ def test_raise(self): assert str(e.value) == test_msg assert list(e.value.exceptions) == test_excs + def test_raise_empty_list(self): + """ + Empty exception lists are not supported + """ + with pytest.raises(ValueError) as e: + raise self._make_one(excs=[]) + assert "non-empty sequence" in str(e.value) + @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") def test_311_exception_group(self): """ @@ -97,10 +105,9 @@ def _make_one(self, excs=None, num_entries=3): return self._get_class()(excs, num_entries) @pytest.mark.parametrize("exception_list,total_entries,expected_message", [ - ([Exception()], 1, "1 out of 1 mutation entry failed"), - ([Exception()], 2, "1 out of 2 mutation entries failed"), - ([Exception(), RuntimeError()], 2, "2 out of 2 mutation entries failed"), - ([], 0, "0 out of 0 mutation entries failed"), + ([Exception()], 1, "1 sub-exception (from 1 entry attempted)"), + ([Exception()], 2, "1 sub-exception (from 2 entries attempted)"), + ([Exception(), RuntimeError()], 2, "2 sub-exceptions (from 2 entries attempted)"), ]) def test_raise(self, exception_list, total_entries, expected_message): """ @@ -123,10 +130,10 @@ def _make_one(self, excs=None): return self._get_class()(excs=excs) @pytest.mark.parametrize("exception_list,expected_message", [ - ([Exception()], "1 failed attempt: Exception()"), - ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError()"), - ([Exception(), ValueError("test")], "2 failed attempts. Latest: ValueError('test')"), - ([bigtable_exceptions.RetryExceptionGroup([Exception(), ValueError("test")])], "1 failed attempt: RetryExceptionGroup(\"2 failed attempts. Latest: ValueError('test')\")"), + ([Exception()], "1 failed attempt: Exception"), + ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError"), + ([Exception(), ValueError("test")], "2 failed attempts. Latest: ValueError"), + ([bigtable_exceptions.RetryExceptionGroup([Exception(), ValueError("test")])], "1 failed attempt: RetryExceptionGroup"), ]) def test_raise(self, exception_list, expected_message): """ @@ -137,14 +144,6 @@ def test_raise(self, exception_list, expected_message): assert str(e.value) == expected_message assert list(e.value.exceptions) == exception_list - def test_raise_empty_list(self): - """ - Empty exception lists are not supported - """ - with pytest.raises(ValueError) as e: - raise self._get_class()([]) - assert str(e.value) == "Empty exception list" - class TestFailedMutationEntryError(): def _get_class(self): From 9624729589372c60ba9316c4ea7fcfdfa4f62990 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 10:25:46 -0700 Subject: [PATCH 026/213] added test for 311 rich traceback --- tests/unit/test_exceptions.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 6b101fdbf..67b3f0779 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -61,6 +61,34 @@ def test_raise_empty_list(self): raise self._make_one(excs=[]) assert "non-empty sequence" in str(e.value) + @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") + def test_311_traceback(self): + """ + Exception customizations should not break rich exception group traceback in python 3.11 + """ + import traceback + sub_exc1 = RuntimeError("first sub exception") + sub_exc2 = ZeroDivisionError("second sub exception") + exc_group = self._make_one(excs=[sub_exc1, sub_exc2]) + + expected_traceback = ( + f" | google.cloud.bigtable.exceptions.{type(exc_group).__name__}: {str(exc_group)}", + " +-+---------------- 1 ----------------", + " | RuntimeError: first sub exception", + " +---------------- 2 ----------------", + " | ZeroDivisionError: second sub exception", + " +------------------------------------", + ) + exception_caught = False + try: + raise exc_group + except self._get_class() as e: + exception_caught = True + tb = traceback.format_exc() + tb_relevant_lines = tuple(tb.splitlines()[3:]) + assert expected_traceback == tb_relevant_lines + assert exception_caught + @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") def test_311_exception_group(self): """ From 308708120d5b70b20b614ea284bd8ca2f5dee22b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 13:29:33 -0700 Subject: [PATCH 027/213] moved retryable row mutations to new file --- google/cloud/bigtable/_mutate_rows.py | 65 +++++++++++++++++++++++++++ google/cloud/bigtable/client.py | 38 ++-------------- 2 files changed, 68 insertions(+), 35 deletions(-) create mode 100644 google/cloud/bigtable/_mutate_rows.py diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py new file mode 100644 index 000000000..71e653dca --- /dev/null +++ b/google/cloud/bigtable/_mutate_rows.py @@ -0,0 +1,65 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + +import asyncio + +from google.api_core import exceptions as core_exceptions + +if TYPE_CHECKING: + from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.bigtable.mutations import BulkMutationsEntry + + +async def _mutate_rows_retryable_attempt( + gapic_client:"BigtableAsyncClient", + request : dict[str, Any], + per_request_timeout : float | None, + mutation_dict: dict[int, "BulkMutationsEntry"|None], + error_dict: dict[int, list[Exception]], +): + new_request = request.copy() + while any(mutation is not None for mutation in mutation_dict.values()): + await asyncio.sleep(0) + # continue to retry until timeout, or all mutations are complete (success or failure) + request_entries : list[dict[str, Any]] = [] + for index, entry in mutation_dict.items(): + if entry is not None: + request_entries.append(entry._to_dict()) + new_request["entries"] = request_entries + async for result_list in await gapic_client.mutate_rows( + new_request, timeout=per_request_timeout + ): + for result in result_list.entries: + idx = result.index + if result.status.code == 0: + # mutation succeeded + mutation_dict[idx] = None + error_dict[idx] = [] + if result.status.code != 0: + # mutation failed + exception = core_exceptions.from_grpc_status( + result.status.code, + result.status.message, + details=result.status.details, + ) + error_dict[idx].append(exception) + # if not idempotent, remove from retry list + entry = mutation_dict[idx] + if entry is not None and not entry.is_idempotent(): + mutation_dict[idx] = None + diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index bcf635ae5..e525b8a32 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -44,6 +44,7 @@ from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry +from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt if TYPE_CHECKING: from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -721,8 +722,8 @@ def on_error_fn(exc): maximum=60, ) try: - await retry(self._mutations_retryable_attempt)( - request, per_request_timeout, mutations_dict, error_dict + await retry(_mutate_rows_retryable_attempt)( + self.client._gapic_client, request, per_request_timeout, mutations_dict, error_dict ) except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded @@ -753,39 +754,6 @@ def on_error_fn(exc): if all_errors: raise MutationsExceptionGroup(all_errors, len(mutation_entries)) - async def _mutations_retryable_attempt( - self, request, per_request_timeout, mutation_dict, error_dict - ): - new_request = request.copy() - while any(mutation is not None for mutation in mutation_dict.values()): - await asyncio.sleep(0) - # continue to retry until timeout, or all mutations are complete (success or failure) - new_request["entries"] = [ - mutation_dict[i]._to_dict() - for i in range(len(mutation_dict)) - if mutation_dict[i] is not None - ] - async for result_list in await self.client._gapic_client.mutate_rows( - new_request, timeout=per_request_timeout - ): - for result in result_list.entries: - idx = result.index - if result.status.code == 0: - # mutation succeeded - mutation_dict[idx] = None - error_dict[idx] = [] - if result.status.code != 0: - # mutation failed - exception = core_exceptions.from_grpc_status( - result.status.code, - result.status.message, - details=result.status.details, - ) - error_dict[idx].append(exception) - # if not idempotent, remove from retry list - if not mutation_dict[idx].is_idempotent(): - mutation_dict[idx] = None - async def check_and_mutate_row( self, row_key: str | bytes, From 9df588f6a798793eb822e125e4ea602f8a84a265 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 13:35:39 -0700 Subject: [PATCH 028/213] use index map --- google/cloud/bigtable/_mutate_rows.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 71e653dca..1be25d065 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -35,17 +35,21 @@ async def _mutate_rows_retryable_attempt( new_request = request.copy() while any(mutation is not None for mutation in mutation_dict.values()): await asyncio.sleep(0) + # keep map between sub-request indices and global entry indices + index_map : dict[int, int] = {} # continue to retry until timeout, or all mutations are complete (success or failure) request_entries : list[dict[str, Any]] = [] for index, entry in mutation_dict.items(): if entry is not None: + index_map[len(request_entries)] = index request_entries.append(entry._to_dict()) new_request["entries"] = request_entries async for result_list in await gapic_client.mutate_rows( new_request, timeout=per_request_timeout ): for result in result_list.entries: - idx = result.index + # convert sub-request index to global index + idx = index_map[result.index] if result.status.code == 0: # mutation succeeded mutation_dict[idx] = None From 7ed8be3fccc64e35db9648e987ff6d5f74a4bdad Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 13:53:59 -0700 Subject: [PATCH 029/213] added docstring --- google/cloud/bigtable/_mutate_rows.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 1be25d065..6f1ac7300 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -32,6 +32,25 @@ async def _mutate_rows_retryable_attempt( mutation_dict: dict[int, "BulkMutationsEntry"|None], error_dict: dict[int, list[Exception]], ): + """ + Helper function for managing the mutate_rows lifecycle. + + Function will continuously retry failed mutations, until all mutations succeed or encounter + an unrecoverable error. + + This function is intended to be wrapped in an api_core.retry.AsyncRetry object, which will handle + timeouts and retrying raised exceptions. + + Args: + - gapic_client: the client to use for the mutate_rows call + - request: the request to send to the server, populated with table name and app profile id + - per_request_timeout: the timeout to use for each mutate_rows attempt + - mutation_dict: a dictionary tracking which entries are outstanding + (stored as BulkMutationsEntry), and which have reached a terminal state (stored as None). + At the start of the request, all entries are outstanding. + - error_dict: a dictionary tracking errors associated with each entry index. + Each retry will append a new error. Successful mutations will clear the error list. + """ new_request = request.copy() while any(mutation is not None for mutation in mutation_dict.values()): await asyncio.sleep(0) From 2536cc46b9bc5ed6cbd6819ba621c3f741f3b961 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 14:00:13 -0700 Subject: [PATCH 030/213] added predicate check to failed mutations --- google/cloud/bigtable/_mutate_rows.py | 10 +++++++--- google/cloud/bigtable/client.py | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 6f1ac7300..63a52fa8f 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -31,6 +31,7 @@ async def _mutate_rows_retryable_attempt( per_request_timeout : float | None, mutation_dict: dict[int, "BulkMutationsEntry"|None], error_dict: dict[int, list[Exception]], + predicate: callable[[Exception], bool], ): """ Helper function for managing the mutate_rows lifecycle. @@ -50,6 +51,7 @@ async def _mutate_rows_retryable_attempt( At the start of the request, all entries are outstanding. - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will clear the error list. + - predicate: a function that takes an exception and returns True if the exception is retryable. """ new_request = request.copy() while any(mutation is not None for mutation in mutation_dict.values()): @@ -81,8 +83,10 @@ async def _mutate_rows_retryable_attempt( details=result.status.details, ) error_dict[idx].append(exception) - # if not idempotent, remove from retry list + # if mutation is non-idempotent or the error is not retryable, + # mark the mutation as terminal entry = mutation_dict[idx] - if entry is not None and not entry.is_idempotent(): - mutation_dict[idx] = None + if entry is not None: + if not predicate(exception) or and not entry.is_idempotent(): + mutation_dict[idx] = None diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e525b8a32..3f7a93dd6 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -642,8 +642,8 @@ async def bulk_mutate_rows( self, mutation_entries: list[BulkMutationsEntry], *, - operation_timeout: int | float | None = 60, - per_request_timeout: int | float | None = None, + operation_timeout: float | None = 60, + per_request_timeout: float | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -723,7 +723,7 @@ def on_error_fn(exc): ) try: await retry(_mutate_rows_retryable_attempt)( - self.client._gapic_client, request, per_request_timeout, mutations_dict, error_dict + self.client._gapic_client, request, per_request_timeout, mutations_dict, error_dict, predicate ) except core_exceptions.RetryError: # raised by AsyncRetry after operation deadline exceeded From 1f6875cd2aca49d23efc4a086151bbe85b03034c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 15:27:55 -0700 Subject: [PATCH 031/213] added _mutate_rows tests --- tests/unit/test__mutate_rows.py | 148 ++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 tests/unit/test__mutate_rows.py diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py new file mode 100644 index 000000000..e0e2f19bd --- /dev/null +++ b/tests/unit/test__mutate_rows.py @@ -0,0 +1,148 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import pytest +import sys + +import google.cloud.bigtable.exceptions as bigtable_exceptions +from google.cloud.bigtable_v2.types import MutateRowsResponse +from google.rpc import status_pb2 + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore + +class Test_MutateRowsRetryableAttempt(): + + async def _mock_stream(self, mutation_dict, error_dict): + for idx, entry in mutation_dict.items(): + code = error_dict.get(idx, 0) + yield MutateRowsResponse( + entries=[ + MutateRowsResponse.Entry( + index=idx, + status=status_pb2.Status(code=code) + ) + ] + ) + + def _make_mock_client(self, mutation_dict, error_dict=None): + client = mock.Mock() + client.mutate_rows = AsyncMock() + if error_dict is None: + error_dict = {} + client.mutate_rows.side_effect = lambda *args, **kwargs: self._mock_stream(mutation_dict, error_dict) + return client + + @pytest.mark.asyncio + async def test_single_entry_success(self): + """Test mutating a single entry""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + mutation = mock.Mock() + mutations = {0: mutation} + client = self._make_mock_client(mutations) + errors = {0: []} + expected_request = {"test": "data"} + expected_timeout = 9 + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: False) + assert mutations[0] is None + assert errors[0] == [] + assert client.mutate_rows.call_count == 1 + args, kwargs = client.mutate_rows.call_args + assert kwargs['timeout'] == expected_timeout + assert args[0]["test"] == "data" + assert args[0]["entries"] == [mutation._to_dict()] + + @pytest.mark.asyncio + async def test_empty_request(self): + """Calling with no mutations should result in no calls""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + client = self._make_mock_client({}) + await _mutate_rows_retryable_attempt(client, {}, None, {}, {}, lambda x: False) + assert client.mutate_rows.call_count == 0 + + @pytest.mark.asyncio + async def test_eventual_success(self): + """Fail at first, but eventually succeed""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + mutation = mock.Mock() + mutations = {0: mutation} + client = self._make_mock_client(mutations) + # fail 10 times, then succeed + client.mutate_rows.side_effect = [self._mock_stream(mutations, {0: 400})] * 10 + [self._mock_stream(mutations, {})] + errors = {0: []} + expected_request = {} + expected_timeout = 9 + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) + assert mutations[0] is None + assert errors[0] == [] + assert client.mutate_rows.call_count == 11 + args, kwargs = client.mutate_rows.call_args + assert kwargs['timeout'] == expected_timeout + assert args[0]["entries"] == [mutation._to_dict()] + + @pytest.mark.asyncio + async def test_partial_success(self): + """Some entries succeed, but one fails. Should report the proper index""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + success_mutation = mock.Mock() + success_mutation_2 = mock.Mock() + failure_mutation = mock.Mock() + mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} + errors = {0: [], 1: [], 2: []} + client = self._make_mock_client(mutations, error_dict={1: 400}) + # raise retryable error 3 times, then raise non-retryable error + client.mutate_rows.side_effect = [self._mock_stream(mutations, {1: 300}), self._mock_stream({0: failure_mutation}, {0: 400}), self._mock_stream({0: failure_mutation}, {0: 400}), self._mock_stream({0: failure_mutation}, {0: 500})] * 2 + expected_request = {} + expected_timeout = 9 + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: x.grpc_status_code <= 400) + assert mutations == {0: None, 1: None, 2: None} + assert errors[0] == [] + assert errors[2] == [] + # assert len(errors[1]) == 4 + assert errors[1][0].grpc_status_code == 300 + assert errors[1][1].grpc_status_code == 400 + assert errors[1][2].grpc_status_code == 400 + assert errors[1][3].grpc_status_code == 500 + + + @pytest.mark.asyncio + async def test_retryable_entry_error(self): + """Should continue retrying if the error is retryable""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + import asyncio + mutation = mock.Mock() + mutations = {0: mutation} + errors = {0: []} + client = self._make_mock_client(mutations, error_dict={0: 400}) + # raise retryable error 3 times, then raise non-retryable error + expected_request = {} + expected_timeout = 9 + with mock.patch.object(asyncio, 'sleep') as mock_sleep: + fail_after = 100 + mock_sleep.side_effect = [None] * fail_after + [asyncio.CancelledError("Cancelled")] + try: + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) + except asyncio.CancelledError: + pass + assert mock_sleep.call_count == fail_after + 1 + assert client.mutate_rows.call_count == fail_after + assert len(errors[0]) == fail_after + + From 1ea24e6507026a19c032cb6e88fbb633a333b445 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 27 Apr 2023 16:01:16 -0700 Subject: [PATCH 032/213] improved client tests --- tests/unit/test_client.py | 92 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dd94ed947..04b80fa62 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1067,14 +1067,11 @@ async def test_bulk_mutate_rows_multiple_entries(self): core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, core_exceptions.Aborted, - core_exceptions.OutOfRange, - core_exceptions.NotFound, - core_exceptions.FailedPrecondition, ], ) async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exception): """ - Individual idempotent mutations should be retried if they fail with any error + Individual idempotent mutations should be retried if they fail with a retryable error """ from google.api_core.exceptions import DeadlineExceeded from google.cloud.bigtable.exceptions import ( @@ -1108,6 +1105,47 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio cause.exceptions[-1], core_exceptions.DeadlineExceeded ) + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "exception", + [ + core_exceptions.OutOfRange, + core_exceptions.NotFound, + core_exceptions.FailedPrecondition, + ], + ) + async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exception): + """ + Individual idempotent mutations should not be retried if they fail with a non-retryable error + """ + from google.api_core.exceptions import DeadlineExceeded + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) + + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + mock_gapic.side_effect = lambda *a, **k: self._mock_response( + [exception("mock")] + ) + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.DeleteAllFromRow() + entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows([entry], operation_timeout=0.05) + assert len(e.value.exceptions) == 1 + failed_exception = e.value.exceptions[0] + assert "non-idempotent" not in str(failed_exception) + assert isinstance(failed_exception, FailedMutationEntryError) + cause = failed_exception.__cause__ + assert isinstance(cause, exception) + @pytest.mark.parametrize( "retryable_exception", [ @@ -1117,7 +1155,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio ], ) @pytest.mark.asyncio - async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception): + async def test_bulk_mutate_idempotent_retryable_request_errors(self, retryable_exception): """ Individual idempotent mutations should be retried if the request fails with a retryable error """ @@ -1157,10 +1195,10 @@ async def test_bulk_mutate_idempotent_retryable_errors(self, retryable_exception core_exceptions.Aborted, ], ) - async def test_bulk_mutate_rows_idempotent_retryable_errors( + async def test_bulk_mutate_rows_non_idempotent_retryable_errors( self, retryable_exception ): - """Idempotent mutations should never be retried""" + """Non-Idempotent mutations should never be retried""" from google.cloud.bigtable.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, @@ -1227,3 +1265,43 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti assert "non-idempotent" not in str(failed_exception) cause = failed_exception.__cause__ assert isinstance(cause, non_retryable_exception) + + @pytest.mark.asyncio + async def test_bulk_mutate_error_index(self): + """ + Test partial failure, partial success. Errors should be associated with the correct index + """ + from google.api_core.exceptions import DeadlineExceeded, Aborted, FailedPrecondition + from google.cloud.bigtable.exceptions import ( + RetryExceptionGroup, + FailedMutationEntryError, + MutationsExceptionGroup, + ) + + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + # fail with retryable errors, then a non-retryable one + mock_gapic.side_effect =[ self._mock_response([None, Aborted("mock"), None]), self._mock_response([DeadlineExceeded("mock")]), self._mock_response([FailedPrecondition("final")])] + with pytest.raises(MutationsExceptionGroup) as e: + mutation = mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=123 + ) + entries = [mutations.BulkMutationsEntry((f"row_key_{i}").encode(), [mutation]) for i in range(3)] + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows(entries, operation_timeout=1000) + assert len(e.value.exceptions) == 1 + failed = e.value.exceptions[0] + assert isinstance(failed, FailedMutationEntryError) + assert failed.index == 1 + assert failed.entry == entries[1] + cause = failed.__cause__ + assert isinstance(cause, RetryExceptionGroup) + assert len(cause.exceptions) == 3 + assert isinstance(cause.exceptions[0], Aborted) + assert isinstance(cause.exceptions[1], DeadlineExceeded) + assert isinstance(cause.exceptions[2], FailedPrecondition) + + From 25ca2d2c61fa19c9ad72b325b1aa9a5be45ecc8d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 09:57:50 -0700 Subject: [PATCH 033/213] refactored to loop by raising exception --- google/cloud/bigtable/_mutate_rows.py | 91 +++++++++++++++------------ google/cloud/bigtable/client.py | 4 +- tests/unit/test__mutate_rows.py | 74 +++++++--------------- 3 files changed, 77 insertions(+), 92 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 63a52fa8f..c5da766df 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -16,8 +16,6 @@ from typing import Any, TYPE_CHECKING -import asyncio - from google.api_core import exceptions as core_exceptions if TYPE_CHECKING: @@ -25,19 +23,26 @@ from google.cloud.bigtable.mutations import BulkMutationsEntry +class _MutateRowsIncomplete(RuntimeError): + """ + Exception raised when a mutate_rows call has unfinished work. + """ + pass + + async def _mutate_rows_retryable_attempt( gapic_client:"BigtableAsyncClient", request : dict[str, Any], per_request_timeout : float | None, - mutation_dict: dict[int, "BulkMutationsEntry"|None], + mutation_dict: dict[int, "BulkMutationsEntry" | None], error_dict: dict[int, list[Exception]], predicate: callable[[Exception], bool], ): """ - Helper function for managing the mutate_rows lifecycle. + Helper function for managing a single mutate_rows attempt. - Function will continuously retry failed mutations, until all mutations succeed or encounter - an unrecoverable error. + If one or more retryable mutations remain incomplete at the end of the function, + _MutateRowsIncomplete will be raised to trigger a retry This function is intended to be wrapped in an api_core.retry.AsyncRetry object, which will handle timeouts and retrying raised exceptions. @@ -52,41 +57,45 @@ async def _mutate_rows_retryable_attempt( - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will clear the error list. - predicate: a function that takes an exception and returns True if the exception is retryable. + Raises: + - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function + - GoogleAPICallError: if the server returns an error on the grpc call """ new_request = request.copy() - while any(mutation is not None for mutation in mutation_dict.values()): - await asyncio.sleep(0) - # keep map between sub-request indices and global entry indices - index_map : dict[int, int] = {} - # continue to retry until timeout, or all mutations are complete (success or failure) - request_entries : list[dict[str, Any]] = [] - for index, entry in mutation_dict.items(): - if entry is not None: - index_map[len(request_entries)] = index - request_entries.append(entry._to_dict()) - new_request["entries"] = request_entries - async for result_list in await gapic_client.mutate_rows( - new_request, timeout=per_request_timeout - ): - for result in result_list.entries: - # convert sub-request index to global index - idx = index_map[result.index] - if result.status.code == 0: - # mutation succeeded - mutation_dict[idx] = None - error_dict[idx] = [] - if result.status.code != 0: - # mutation failed - exception = core_exceptions.from_grpc_status( - result.status.code, - result.status.message, - details=result.status.details, - ) - error_dict[idx].append(exception) - # if mutation is non-idempotent or the error is not retryable, - # mark the mutation as terminal - entry = mutation_dict[idx] - if entry is not None: - if not predicate(exception) or and not entry.is_idempotent(): - mutation_dict[idx] = None + # keep map between sub-request indices and global entry indices + index_map : dict[int, int] = {} + # continue to retry until timeout, or all mutations are complete (success or failure) + request_entries : list[dict[str, Any]] = [] + for index, entry in mutation_dict.items(): + if entry is not None: + index_map[len(request_entries)] = index + request_entries.append(entry._to_dict()) + new_request["entries"] = request_entries + async for result_list in await gapic_client.mutate_rows( + new_request, timeout=per_request_timeout + ): + for result in result_list.entries: + # convert sub-request index to global index + idx = index_map[result.index] + if result.status.code == 0: + # mutation succeeded + mutation_dict[idx] = None + error_dict[idx] = [] + if result.status.code != 0: + # mutation failed + exception = core_exceptions.from_grpc_status( + result.status.code, + result.status.message, + details=result.status.details, + ) + error_dict[idx].append(exception) + # if mutation is non-idempotent or the error is not retryable, + # mark the mutation as terminal + entry = mutation_dict[idx] + if entry is not None: + if not predicate(exception) or not entry.is_idempotent(): + mutation_dict[idx] = None + if any(mutation is not None for mutation in mutation_dict.values()): + # unfinished work; raise exception to trigger retry + raise _MutateRowsIncomplete() diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 3f7a93dd6..22bb87436 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -45,6 +45,7 @@ from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt +from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete if TYPE_CHECKING: from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -700,10 +701,11 @@ async def bulk_mutate_rows( core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, core_exceptions.Aborted, + _MutateRowsIncomplete, ) def on_error_fn(exc): - if predicate(exc): + if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): # add this exception to list for each active mutation for idx in error_dict.keys(): if mutations_dict[idx] is not None: diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index e0e2f19bd..292f1a084 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -71,78 +71,52 @@ async def test_single_entry_success(self): @pytest.mark.asyncio async def test_empty_request(self): - """Calling with no mutations should result in no calls""" + """Calling with no mutations should result in a single API call""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt client = self._make_mock_client({}) await _mutate_rows_retryable_attempt(client, {}, None, {}, {}, lambda x: False) - assert client.mutate_rows.call_count == 0 + assert client.mutate_rows.call_count == 1 @pytest.mark.asyncio - async def test_eventual_success(self): - """Fail at first, but eventually succeed""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt - mutation = mock.Mock() - mutations = {0: mutation} - client = self._make_mock_client(mutations) - # fail 10 times, then succeed - client.mutate_rows.side_effect = [self._mock_stream(mutations, {0: 400})] * 10 + [self._mock_stream(mutations, {})] - errors = {0: []} + async def test_partial_success_retryable(self): + """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt, _MutateRowsIncomplete + success_mutation = mock.Mock() + success_mutation_2 = mock.Mock() + failure_mutation = mock.Mock() + mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} + errors = {0: [], 1: [], 2: []} + client = self._make_mock_client(mutations, error_dict={1: 300}) + # raise retryable error 3 times, then raise non-retryable error expected_request = {} expected_timeout = 9 - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) - assert mutations[0] is None + with pytest.raises(_MutateRowsIncomplete): + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) + assert mutations == {0: None, 1: failure_mutation, 2: None} assert errors[0] == [] - assert client.mutate_rows.call_count == 11 - args, kwargs = client.mutate_rows.call_args - assert kwargs['timeout'] == expected_timeout - assert args[0]["entries"] == [mutation._to_dict()] + assert len(errors[1]) == 1 + assert errors[1][0].grpc_status_code == 300 + assert errors[2] == [] @pytest.mark.asyncio - async def test_partial_success(self): - """Some entries succeed, but one fails. Should report the proper index""" + async def test_partial_success_non_retryable(self): + """Some entries succeed, but one fails. Exception marked as non-retryable. Do not raise incomplete error""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} errors = {0: [], 1: [], 2: []} - client = self._make_mock_client(mutations, error_dict={1: 400}) + client = self._make_mock_client(mutations, error_dict={1: 300}) # raise retryable error 3 times, then raise non-retryable error - client.mutate_rows.side_effect = [self._mock_stream(mutations, {1: 300}), self._mock_stream({0: failure_mutation}, {0: 400}), self._mock_stream({0: failure_mutation}, {0: 400}), self._mock_stream({0: failure_mutation}, {0: 500})] * 2 expected_request = {} expected_timeout = 9 - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: x.grpc_status_code <= 400) + await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: False) assert mutations == {0: None, 1: None, 2: None} assert errors[0] == [] - assert errors[2] == [] - # assert len(errors[1]) == 4 + assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 - assert errors[1][1].grpc_status_code == 400 - assert errors[1][2].grpc_status_code == 400 - assert errors[1][3].grpc_status_code == 500 - + assert errors[2] == [] - @pytest.mark.asyncio - async def test_retryable_entry_error(self): - """Should continue retrying if the error is retryable""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt - import asyncio - mutation = mock.Mock() - mutations = {0: mutation} - errors = {0: []} - client = self._make_mock_client(mutations, error_dict={0: 400}) - # raise retryable error 3 times, then raise non-retryable error - expected_request = {} - expected_timeout = 9 - with mock.patch.object(asyncio, 'sleep') as mock_sleep: - fail_after = 100 - mock_sleep.side_effect = [None] * fail_after + [asyncio.CancelledError("Cancelled")] - try: - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) - except asyncio.CancelledError: - pass - assert mock_sleep.call_count == fail_after + 1 - assert client.mutate_rows.call_count == fail_after - assert len(errors[0]) == fail_after From c0787db056714eba3dd0bd5cf64b7a2184686aa9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 11:00:41 -0700 Subject: [PATCH 034/213] refactored retry deadline logic into shared wrapper --- google/cloud/bigtable/client.py | 47 +++++++++-------------------- google/cloud/bigtable/exceptions.py | 33 ++++++++++++++++++++ 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 22bb87436..cec7f81b9 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -42,6 +42,7 @@ from google.cloud.bigtable.exceptions import RetryExceptionGroup from google.cloud.bigtable.exceptions import FailedMutationEntryError from google.cloud.bigtable.exceptions import MutationsExceptionGroup +from google.cloud.bigtable.exceptions import _convert_retry_deadline from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt @@ -621,23 +622,12 @@ def on_error_fn(exc): multiplier=2, maximum=60, ) - try: - await retry(self.client._gapic_client.mutate_row)( - request, timeout=per_request_timeout - ) - except core_exceptions.RetryError: - # raised by AsyncRetry after operation deadline exceeded - # TODO: merge with similar logic in ReadRowsIterator - new_exc = core_exceptions.DeadlineExceeded( - f"operation_timeout of {operation_timeout:0.1f}s exceeded" - ) - source_exc = None - if transient_errors: - source_exc = RetryExceptionGroup( - transient_errors, - ) - new_exc.__cause__ = source_exc - raise new_exc from source_exc + # wrap rpc in retry logic + retry_wrapped = retry(self.client._gapic_client.mutate_row) + # convert RetryErrors from retry wrapper into DeadlineExceeded errors + deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout, transient_errors) + # trigger rpc + await deadline_wrapped(request, timeout=per_request_timeout) async def bulk_mutate_rows( self, @@ -723,21 +713,17 @@ def on_error_fn(exc): multiplier=2, maximum=60, ) + # wrap attempt in retry logic + retry_wrapped = retry(_mutate_rows_retryable_attempt) + # convert RetryErrors from retry wrapper into DeadlineExceeded errors + deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) try: - await retry(_mutate_rows_retryable_attempt)( + # trigger mutate_rows + await deadline_wrapped( self.client._gapic_client, request, per_request_timeout, mutations_dict, error_dict, predicate ) - except core_exceptions.RetryError: - # raised by AsyncRetry after operation deadline exceeded - # add DeadlineExceeded to list for each active mutation - deadline_exc = core_exceptions.DeadlineExceeded( - f"operation_timeout of {operation_timeout:0.1f}s exceeded" - ) - for idx in error_dict.keys(): - if mutations_dict[idx] is not None: - error_dict[idx].append(deadline_exc) except Exception as exc: - # other exceptions are added to the list of exceptions for unprocessed mutations + # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations for idx in error_dict.keys(): if mutations_dict[idx] is not None: error_dict[idx].append(exc) @@ -746,10 +732,7 @@ def on_error_fn(exc): all_errors = [] for idx, exc_list in error_dict.items(): if exc_list: - if len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = RetryExceptionGroup(exc_list) + cause_exc = exc_list[0] if len(exc_list) == 1 else RetryExceptionGroup(exc_list) all_errors.append( FailedMutationEntryError(idx, mutation_entries[idx], cause_exc) ) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 71c81f910..3d7d45568 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -12,17 +12,50 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations import sys from typing import TYPE_CHECKING +from google.api_core import exceptions as core_exceptions + is_311_plus = sys.version_info >= (3, 11) if TYPE_CHECKING: from google.cloud.bigtable.mutations import BulkMutationsEntry +def _convert_retry_deadline(func:callable, timeout_value:float, retry_errors:list[Exception]|None=None): + """ + Decorator to convert RetryErrors raised by api_core.retry into + DeadlineExceeded exceptions, indicating that the underlying retries have + exhaused the timeout value. + + Optionally attaches a RetryExceptionGroup to the DeadlineExceeded.__cause__, + detailing the failed exceptions associated with each retry. + + Args: + - func: The function to decorate + - timeout_value: The timeout value to display in the DeadlineExceeded error message + - retry_errors: An optional list of exceptions to attach as a RetryExceptionGroup to the DeadlineExceeded.__cause__ + """ + async def wrapper(*args, **kwargs): + try: + await func(*args, **kwargs) + except core_exceptions.RetryError: + new_exc = core_exceptions.DeadlineExceeded( + f"operation_timeout of {timeout_value:0.1f}s exceeded" + ) + source_exc = None + if retry_errors: + source_exc = RetryExceptionGroup(retry_errors) + new_exc.__cause__ = source_exc + raise new_exc from source_exc + return wrapper + + + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation From 3ed5c3d32da9c103ae80cb0418bf3ff8e15cbe40 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 11:11:58 -0700 Subject: [PATCH 035/213] ran black --- google/cloud/bigtable/_mutate_rows.py | 22 ++++--- google/cloud/bigtable/client.py | 20 ++++-- google/cloud/bigtable/exceptions.py | 12 ++-- tests/unit/test__mutate_rows.py | 50 +++++++++++---- tests/unit/test_client.py | 26 ++++++-- tests/unit/test_exceptions.py | 92 +++++++++++++++++++-------- 6 files changed, 156 insertions(+), 66 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index c5da766df..c3a20e7f8 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,12 +14,14 @@ # from __future__ import annotations -from typing import Any, TYPE_CHECKING +from typing import Callable, Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions if TYPE_CHECKING: - from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient + from google.cloud.bigtable_v2.services.bigtable.async_client import ( + BigtableAsyncClient, + ) from google.cloud.bigtable.mutations import BulkMutationsEntry @@ -27,16 +29,17 @@ class _MutateRowsIncomplete(RuntimeError): """ Exception raised when a mutate_rows call has unfinished work. """ + pass async def _mutate_rows_retryable_attempt( - gapic_client:"BigtableAsyncClient", - request : dict[str, Any], - per_request_timeout : float | None, + gapic_client: "BigtableAsyncClient", + request: dict[str, Any], + per_request_timeout: float | None, mutation_dict: dict[int, "BulkMutationsEntry" | None], error_dict: dict[int, list[Exception]], - predicate: callable[[Exception], bool], + predicate: Callable[[Exception], bool], ): """ Helper function for managing a single mutate_rows attempt. @@ -51,7 +54,7 @@ async def _mutate_rows_retryable_attempt( - gapic_client: the client to use for the mutate_rows call - request: the request to send to the server, populated with table name and app profile id - per_request_timeout: the timeout to use for each mutate_rows attempt - - mutation_dict: a dictionary tracking which entries are outstanding + - mutation_dict: a dictionary tracking which entries are outstanding (stored as BulkMutationsEntry), and which have reached a terminal state (stored as None). At the start of the request, all entries are outstanding. - error_dict: a dictionary tracking errors associated with each entry index. @@ -63,9 +66,9 @@ async def _mutate_rows_retryable_attempt( """ new_request = request.copy() # keep map between sub-request indices and global entry indices - index_map : dict[int, int] = {} + index_map: dict[int, int] = {} # continue to retry until timeout, or all mutations are complete (success or failure) - request_entries : list[dict[str, Any]] = [] + request_entries: list[dict[str, Any]] = [] for index, entry in mutation_dict.items(): if entry is not None: index_map[len(request_entries)] = index @@ -98,4 +101,3 @@ async def _mutate_rows_retryable_attempt( if any(mutation is not None for mutation in mutation_dict.values()): # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete() - diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index cec7f81b9..3a2cd6c8c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -545,8 +545,8 @@ async def mutate_row( row_key: str | bytes, mutations: list[Mutation] | Mutation, *, - operation_timeout: int | float | None = 60, - per_request_timeout: int | float | None = None, + operation_timeout: float | None = 60, + per_request_timeout: float | None = None, ): """ Mutates a row atomically. @@ -625,7 +625,9 @@ def on_error_fn(exc): # wrap rpc in retry logic retry_wrapped = retry(self.client._gapic_client.mutate_row) # convert RetryErrors from retry wrapper into DeadlineExceeded errors - deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout, transient_errors) + deadline_wrapped = _convert_retry_deadline( + retry_wrapped, operation_timeout, transient_errors + ) # trigger rpc await deadline_wrapped(request, timeout=per_request_timeout) @@ -720,7 +722,12 @@ def on_error_fn(exc): try: # trigger mutate_rows await deadline_wrapped( - self.client._gapic_client, request, per_request_timeout, mutations_dict, error_dict, predicate + self.client._gapic_client, + request, + per_request_timeout, + mutations_dict, + error_dict, + predicate, ) except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations @@ -732,7 +739,10 @@ def on_error_fn(exc): all_errors = [] for idx, exc_list in error_dict.items(): if exc_list: - cause_exc = exc_list[0] if len(exc_list) == 1 else RetryExceptionGroup(exc_list) + if len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = RetryExceptionGroup(exc_list) all_errors.append( FailedMutationEntryError(idx, mutation_entries[idx], cause_exc) ) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 3d7d45568..697355a19 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -16,7 +16,7 @@ import sys -from typing import TYPE_CHECKING +from typing import Callable, Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions @@ -26,7 +26,9 @@ from google.cloud.bigtable.mutations import BulkMutationsEntry -def _convert_retry_deadline(func:callable, timeout_value:float, retry_errors:list[Exception]|None=None): +def _convert_retry_deadline( + func: Callable[..., Any], timeout_value: float, retry_errors: list[Exception] | None = None +): """ Decorator to convert RetryErrors raised by api_core.retry into DeadlineExceeded exceptions, indicating that the underlying retries have @@ -40,9 +42,10 @@ def _convert_retry_deadline(func:callable, timeout_value:float, retry_errors:lis - timeout_value: The timeout value to display in the DeadlineExceeded error message - retry_errors: An optional list of exceptions to attach as a RetryExceptionGroup to the DeadlineExceeded.__cause__ """ + async def wrapper(*args, **kwargs): try: - await func(*args, **kwargs) + return await func(*args, **kwargs) except core_exceptions.RetryError: new_exc = core_exceptions.DeadlineExceeded( f"operation_timeout of {timeout_value:0.1f}s exceeded" @@ -52,8 +55,8 @@ async def wrapper(*args, **kwargs): source_exc = RetryExceptionGroup(retry_errors) new_exc.__cause__ = source_exc raise new_exc from source_exc - return wrapper + return wrapper class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 @@ -87,6 +90,7 @@ def __str__(self): """ return self.args[0] + class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 292f1a084..10dfac374 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -28,16 +28,15 @@ import mock # type: ignore from mock import AsyncMock # type: ignore -class Test_MutateRowsRetryableAttempt(): +class Test_MutateRowsRetryableAttempt: async def _mock_stream(self, mutation_dict, error_dict): for idx, entry in mutation_dict.items(): code = error_dict.get(idx, 0) yield MutateRowsResponse( entries=[ MutateRowsResponse.Entry( - index=idx, - status=status_pb2.Status(code=code) + index=idx, status=status_pb2.Status(code=code) ) ] ) @@ -47,25 +46,35 @@ def _make_mock_client(self, mutation_dict, error_dict=None): client.mutate_rows = AsyncMock() if error_dict is None: error_dict = {} - client.mutate_rows.side_effect = lambda *args, **kwargs: self._mock_stream(mutation_dict, error_dict) + client.mutate_rows.side_effect = lambda *args, **kwargs: self._mock_stream( + mutation_dict, error_dict + ) return client @pytest.mark.asyncio async def test_single_entry_success(self): """Test mutating a single entry""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + mutation = mock.Mock() mutations = {0: mutation} client = self._make_mock_client(mutations) errors = {0: []} expected_request = {"test": "data"} expected_timeout = 9 - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: False) + await _mutate_rows_retryable_attempt( + client, + expected_request, + expected_timeout, + mutations, + errors, + lambda x: False, + ) assert mutations[0] is None assert errors[0] == [] assert client.mutate_rows.call_count == 1 args, kwargs = client.mutate_rows.call_args - assert kwargs['timeout'] == expected_timeout + assert kwargs["timeout"] == expected_timeout assert args[0]["test"] == "data" assert args[0]["entries"] == [mutation._to_dict()] @@ -73,6 +82,7 @@ async def test_single_entry_success(self): async def test_empty_request(self): """Calling with no mutations should result in a single API call""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + client = self._make_mock_client({}) await _mutate_rows_retryable_attempt(client, {}, None, {}, {}, lambda x: False) assert client.mutate_rows.call_count == 1 @@ -80,7 +90,11 @@ async def test_empty_request(self): @pytest.mark.asyncio async def test_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt, _MutateRowsIncomplete + from google.cloud.bigtable._mutate_rows import ( + _mutate_rows_retryable_attempt, + _MutateRowsIncomplete, + ) + success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() @@ -91,7 +105,14 @@ async def test_partial_success_retryable(self): expected_request = {} expected_timeout = 9 with pytest.raises(_MutateRowsIncomplete): - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: True) + await _mutate_rows_retryable_attempt( + client, + expected_request, + expected_timeout, + mutations, + errors, + lambda x: True, + ) assert mutations == {0: None, 1: failure_mutation, 2: None} assert errors[0] == [] assert len(errors[1]) == 1 @@ -102,6 +123,7 @@ async def test_partial_success_retryable(self): async def test_partial_success_non_retryable(self): """Some entries succeed, but one fails. Exception marked as non-retryable. Do not raise incomplete error""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() @@ -111,12 +133,16 @@ async def test_partial_success_non_retryable(self): # raise retryable error 3 times, then raise non-retryable error expected_request = {} expected_timeout = 9 - await _mutate_rows_retryable_attempt(client, expected_request, expected_timeout, mutations, errors, lambda x: False) + await _mutate_rows_retryable_attempt( + client, + expected_request, + expected_timeout, + mutations, + errors, + lambda x: False, + ) assert mutations == {0: None, 1: None, 2: None} assert errors[0] == [] assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 assert errors[2] == [] - - - diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 04b80fa62..4c179148b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1105,7 +1105,6 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio cause.exceptions[-1], core_exceptions.DeadlineExceeded ) - @pytest.mark.asyncio @pytest.mark.parametrize( "exception", @@ -1155,7 +1154,9 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio ], ) @pytest.mark.asyncio - async def test_bulk_mutate_idempotent_retryable_request_errors(self, retryable_exception): + async def test_bulk_mutate_idempotent_retryable_request_errors( + self, retryable_exception + ): """ Individual idempotent mutations should be retried if the request fails with a retryable error """ @@ -1271,7 +1272,11 @@ async def test_bulk_mutate_error_index(self): """ Test partial failure, partial success. Errors should be associated with the correct index """ - from google.api_core.exceptions import DeadlineExceeded, Aborted, FailedPrecondition + from google.api_core.exceptions import ( + DeadlineExceeded, + Aborted, + FailedPrecondition, + ) from google.cloud.bigtable.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, @@ -1284,12 +1289,21 @@ async def test_bulk_mutate_error_index(self): client._gapic_client, "mutate_rows" ) as mock_gapic: # fail with retryable errors, then a non-retryable one - mock_gapic.side_effect =[ self._mock_response([None, Aborted("mock"), None]), self._mock_response([DeadlineExceeded("mock")]), self._mock_response([FailedPrecondition("final")])] + mock_gapic.side_effect = [ + self._mock_response([None, Aborted("mock"), None]), + self._mock_response([DeadlineExceeded("mock")]), + self._mock_response([FailedPrecondition("final")]), + ] with pytest.raises(MutationsExceptionGroup) as e: mutation = mutations.SetCell( "family", b"qualifier", b"value", timestamp_micros=123 ) - entries = [mutations.BulkMutationsEntry((f"row_key_{i}").encode(), [mutation]) for i in range(3)] + entries = [ + mutations.BulkMutationsEntry( + (f"row_key_{i}").encode(), [mutation] + ) + for i in range(3) + ] assert mutation.is_idempotent() is True await table.bulk_mutate_rows(entries, operation_timeout=1000) assert len(e.value.exceptions) == 1 @@ -1303,5 +1317,3 @@ async def test_bulk_mutate_error_index(self): assert isinstance(cause.exceptions[0], Aborted) assert isinstance(cause.exceptions[1], DeadlineExceeded) assert isinstance(cause.exceptions[2], FailedPrecondition) - - diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 67b3f0779..e13abcc70 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -27,18 +27,20 @@ import mock # type: ignore from mock import AsyncMock # type: ignore -class TestBigtableExceptionGroup(): + +class TestBigtableExceptionGroup: """ Subclass for MutationsExceptionGroup and RetryExceptionGroup """ def _get_class(self): from google.cloud.bigtable.exceptions import BigtableExceptionGroup + return BigtableExceptionGroup def _make_one(self, message="test_message", excs=None): if excs is None: - excs = [RuntimeError('mock')] + excs = [RuntimeError("mock")] return self._get_class()(message, excs=excs) @@ -61,23 +63,26 @@ def test_raise_empty_list(self): raise self._make_one(excs=[]) assert "non-empty sequence" in str(e.value) - @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") + @pytest.mark.skipif( + sys.version_info < (3, 11), reason="requires python3.11 or higher" + ) def test_311_traceback(self): """ Exception customizations should not break rich exception group traceback in python 3.11 """ import traceback + sub_exc1 = RuntimeError("first sub exception") sub_exc2 = ZeroDivisionError("second sub exception") exc_group = self._make_one(excs=[sub_exc1, sub_exc2]) expected_traceback = ( - f" | google.cloud.bigtable.exceptions.{type(exc_group).__name__}: {str(exc_group)}", - " +-+---------------- 1 ----------------", - " | RuntimeError: first sub exception", - " +---------------- 2 ----------------", - " | ZeroDivisionError: second sub exception", - " +------------------------------------", + f" | google.cloud.bigtable.exceptions.{type(exc_group).__name__}: {str(exc_group)}", + " +-+---------------- 1 ----------------", + " | RuntimeError: first sub exception", + " +---------------- 2 ----------------", + " | ZeroDivisionError: second sub exception", + " +------------------------------------", ) exception_caught = False try: @@ -89,7 +94,9 @@ def test_311_traceback(self): assert expected_traceback == tb_relevant_lines assert exception_caught - @pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher") + @pytest.mark.skipif( + sys.version_info < (3, 11), reason="requires python3.11 or higher" + ) def test_311_exception_group(self): """ Python 3.11+ should handle exepctions as native exception groups @@ -120,23 +127,30 @@ def test_exception_handling(self): assert was_raised - class TestMutationsExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): from google.cloud.bigtable.exceptions import MutationsExceptionGroup + return MutationsExceptionGroup def _make_one(self, excs=None, num_entries=3): if excs is None: - excs = [RuntimeError('mock')] + excs = [RuntimeError("mock")] return self._get_class()(excs, num_entries) - @pytest.mark.parametrize("exception_list,total_entries,expected_message", [ - ([Exception()], 1, "1 sub-exception (from 1 entry attempted)"), - ([Exception()], 2, "1 sub-exception (from 2 entries attempted)"), - ([Exception(), RuntimeError()], 2, "2 sub-exceptions (from 2 entries attempted)"), - ]) + @pytest.mark.parametrize( + "exception_list,total_entries,expected_message", + [ + ([Exception()], 1, "1 sub-exception (from 1 entry attempted)"), + ([Exception()], 2, "1 sub-exception (from 2 entries attempted)"), + ( + [Exception(), RuntimeError()], + 2, + "2 sub-exceptions (from 2 entries attempted)", + ), + ], + ) def test_raise(self, exception_list, total_entries, expected_message): """ Create exception in raise statement, which calls __new__ and __init__ @@ -146,23 +160,38 @@ def test_raise(self, exception_list, total_entries, expected_message): assert str(e.value) == expected_message assert list(e.value.exceptions) == exception_list + class TestRetryExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): from google.cloud.bigtable.exceptions import RetryExceptionGroup + return RetryExceptionGroup def _make_one(self, excs=None): if excs is None: - excs = [RuntimeError('mock')] + excs = [RuntimeError("mock")] return self._get_class()(excs=excs) - @pytest.mark.parametrize("exception_list,expected_message", [ - ([Exception()], "1 failed attempt: Exception"), - ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError"), - ([Exception(), ValueError("test")], "2 failed attempts. Latest: ValueError"), - ([bigtable_exceptions.RetryExceptionGroup([Exception(), ValueError("test")])], "1 failed attempt: RetryExceptionGroup"), - ]) + @pytest.mark.parametrize( + "exception_list,expected_message", + [ + ([Exception()], "1 failed attempt: Exception"), + ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError"), + ( + [Exception(), ValueError("test")], + "2 failed attempts. Latest: ValueError", + ), + ( + [ + bigtable_exceptions.RetryExceptionGroup( + [Exception(), ValueError("test")] + ) + ], + "1 failed attempt: RetryExceptionGroup", + ), + ], + ) def test_raise(self, exception_list, expected_message): """ Create exception in raise statement, which calls __new__ and __init__ @@ -173,12 +202,13 @@ def test_raise(self, exception_list, expected_message): assert list(e.value.exceptions) == exception_list -class TestFailedMutationEntryError(): +class TestFailedMutationEntryError: def _get_class(self): from google.cloud.bigtable.exceptions import FailedMutationEntryError + return FailedMutationEntryError - def _make_one(self, idx=9, entry=unittest.mock.Mock(), cause=RuntimeError('mock')): + def _make_one(self, idx=9, entry=unittest.mock.Mock(), cause=RuntimeError("mock")): return self._get_class()(idx, entry, cause) @@ -191,7 +221,10 @@ def test_raise(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_entry, test_exc) - assert str(e.value) == "Failed idempotent mutation entry at index 2 with cause: ValueError('test')" + assert ( + str(e.value) + == "Failed idempotent mutation entry at index 2 with cause: ValueError('test')" + ) assert e.value.index == test_idx assert e.value.entry == test_entry assert e.value.__cause__ == test_exc @@ -208,7 +241,10 @@ def test_raise_idempotent(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_entry, test_exc) - assert str(e.value) == "Failed non-idempotent mutation entry at index 2 with cause: ValueError('test')" + assert ( + str(e.value) + == "Failed non-idempotent mutation entry at index 2 with cause: ValueError('test')" + ) assert e.value.index == test_idx assert e.value.entry == test_entry assert e.value.__cause__ == test_exc From a91fbcbe9a3e6b685cee0fa3efb22e71f988cee8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 11:17:40 -0700 Subject: [PATCH 036/213] pulled in table default timeouts --- google/cloud/bigtable/client.py | 60 ++++++++++++++++++++--------- google/cloud/bigtable/exceptions.py | 4 +- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 3a2cd6c8c..b312a71a1 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -331,6 +331,10 @@ def __init__( instance_id: str, table_id: str, app_profile_id: str | None = None, + *, + default_operation_timeout: float = 60, + default_per_row_timeout: float | None = 10, + default_per_request_timeout: float | None = None, ): """ Initialize a Table instance @@ -345,9 +349,28 @@ def __init__( instance_id and the client's project to fully specify the table app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles + default_operation_timeout: (Optional) The default timeout, in seconds + default_per_row_timeout: (Optional) The default timeout for individual + rows in all read_rows requests, in seconds + default_per_request_timeout: (Optional) The default timeout for individual + rpc requests, in seconds Raises: - RuntimeError if called outside of an async context (no running event loop) """ + # validate timeouts + if default_operation_timeout <= 0: + raise ValueError("default_operation_timeout must be greater than 0") + if default_per_row_timeout is not None and default_per_row_timeout <= 0: + raise ValueError("default_per_row_timeout must be greater than 0") + if default_per_request_timeout is not None and default_per_request_timeout <= 0: + raise ValueError("default_per_request_timeout must be greater than 0") + if ( + default_per_request_timeout is not None + and default_per_request_timeout > default_operation_timeout + ): + raise ValueError( + "default_per_request_timeout must be less than default_operation_timeout" + ) self.client = client self.instance_id = instance_id self.instance_name = self.client._gapic_client.instance_path( @@ -358,6 +381,9 @@ def __init__( self.client.project, instance_id, table_id ) self.app_profile_id = app_profile_id + self.default_operation_timeout = default_operation_timeout + self.default_per_row_timeout = default_per_row_timeout + self.default_per_request_timeout = default_per_request_timeout # raises RuntimeError if called outside of an async context (no running event loop) try: self._register_instance_task = asyncio.create_task( @@ -576,16 +602,15 @@ async def mutate_row( - GoogleAPIError: raised on non-idempotent operations that cannot be safely retried. """ - # TODO: bring in default, from read_rows - # operation_timeout = operation_timeout or self.default_operation_timeout - # per_request_timeout = per_request_timeout or self.default_per_request_timeout + operation_timeout = operation_timeout or self.default_operation_timeout + per_request_timeout = per_request_timeout or self.default_per_request_timeout - # if operation_timeout <= 0: - # raise ValueError("operation_timeout must be greater than 0") - # if per_request_timeout is not None and per_request_timeout <= 0: - # raise ValueError("per_request_timeout must be greater than 0") - # if per_request_timeout is not None and per_request_timeout > operation_timeout: - # raise ValueError("per_request_timeout must be less than operation_timeout") + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout <= 0: + raise ValueError("per_request_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout > operation_timeout: + raise ValueError("per_request_timeout must be less than operation_timeout") if isinstance(row_key, str): row_key = row_key.encode("utf-8") @@ -667,16 +692,15 @@ async def bulk_mutate_rows( - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions """ - # TODO: bring in default, from read_rows - # operation_timeout = operation_timeout or self.default_operation_timeout - # per_request_timeout = per_request_timeout or self.default_per_request_timeout + operation_timeout = operation_timeout or self.default_operation_timeout + per_request_timeout = per_request_timeout or self.default_per_request_timeout - # if operation_timeout <= 0: - # raise ValueError("operation_timeout must be greater than 0") - # if per_request_timeout is not None and per_request_timeout <= 0: - # raise ValueError("per_request_timeout must be greater than 0") - # if per_request_timeout is not None and per_request_timeout > operation_timeout: - # raise ValueError("per_request_timeout must be less than operation_timeout") + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout <= 0: + raise ValueError("per_request_timeout must be greater than 0") + if per_request_timeout is not None and per_request_timeout > operation_timeout: + raise ValueError("per_request_timeout must be less than operation_timeout") request = {"table_name": self.table_name} if self.app_profile_id: diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 697355a19..32029d801 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -27,7 +27,9 @@ def _convert_retry_deadline( - func: Callable[..., Any], timeout_value: float, retry_errors: list[Exception] | None = None + func: Callable[..., Any], + timeout_value: float, + retry_errors: list[Exception] | None = None, ): """ Decorator to convert RetryErrors raised by api_core.retry into From df8a058630dbc2991d2a3135abcd6d246192efce Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 11:34:24 -0700 Subject: [PATCH 037/213] added tests for shared deadline parsing function --- tests/unit/test_exceptions.py | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index e13abcc70..7566a30ba 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -28,6 +28,54 @@ from mock import AsyncMock # type: ignore +class TestConvertRetryDeadline: + """ + Test _convert_retry_deadline wrapper + """ + + @pytest.mark.asyncio + async def test_no_error(self): + async def test_func(): + return 1 + + wrapped = bigtable_exceptions._convert_retry_deadline(test_func, 0.1) + assert await wrapped() == 1 + + @pytest.mark.asyncio + @pytest.mark.parametrize("timeout", [0.1, 2.0, 30.0]) + async def test_retry_error(self, timeout): + from google.api_core.exceptions import RetryError, DeadlineExceeded + + async def test_func(): + raise RetryError("retry error", None) + + wrapped = bigtable_exceptions._convert_retry_deadline(test_func, timeout) + with pytest.raises(DeadlineExceeded) as e: + await wrapped() + assert e.value.__cause__ is None + assert f"operation_timeout of {timeout}s exceeded" in str(e.value) + + @pytest.mark.asyncio + async def test_with_retry_errors(self): + from google.api_core.exceptions import RetryError, DeadlineExceeded + + timeout = 10.0 + + async def test_func(): + raise RetryError("retry error", None) + + associated_errors = [RuntimeError("error1"), ZeroDivisionError("other")] + wrapped = bigtable_exceptions._convert_retry_deadline( + test_func, timeout, associated_errors + ) + with pytest.raises(DeadlineExceeded) as e: + await wrapped() + cause = e.value.__cause__ + assert isinstance(cause, bigtable_exceptions.RetryExceptionGroup) + assert cause.exceptions == tuple(associated_errors) + assert f"operation_timeout of {timeout}s exceeded" in str(e.value) + + class TestBigtableExceptionGroup: """ Subclass for MutationsExceptionGroup and RetryExceptionGroup From b866b574f3bb2058834cae8ee608c2eed27acca8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 13:31:11 -0700 Subject: [PATCH 038/213] added tests for mutation models --- google/cloud/bigtable/mutations.py | 13 +- tests/unit/test_mutations.py | 255 +++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+), 3 deletions(-) create mode 100644 tests/unit/test_mutations.py diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 966fa56a8..324d98141 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -44,13 +44,14 @@ class SetCell(Mutation): timestamp_micros: int | None = None def _to_dict(self) -> dict[str, Any]: + """Convert the mutation to a dictionary representation""" + # if timestamp not given, use -1 for server-side timestamp + timestamp = self.timestamp_micros if self.timestamp_micros is not None else -1 return { "set_cell": { "family_name": self.family, "column_qualifier": self.qualifier, - "timestamp_micros": self.timestamp_micros - if self.timestamp_micros is not None - else -1, + "timestamp_micros": timestamp, "value": self.new_value, } } @@ -69,6 +70,12 @@ class DeleteRangeFromColumn(Mutation): # None represents infinity end_timestamp_micros: int | None = None + def __post_init__(self): + if self.start_timestamp_micros is not None and self.end_timestamp_micros is not None and self.start_timestamp_micros > self.end_timestamp_micros: + raise ValueError( + "start_timestamp_micros must be <= end_timestamp_micros" + ) + def _to_dict(self) -> dict[str, Any]: timestamp_range = {} if self.start_timestamp_micros is not None: diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py new file mode 100644 index 000000000..c3e3058bc --- /dev/null +++ b/tests/unit/test_mutations.py @@ -0,0 +1,255 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import pytest +import sys + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore + +class TestBaseMutation: + + def _target_class(self): + from google.cloud.bigtable.mutations import Mutation + return Mutation + + def test__to_dict(self): + """Should be unimplemented in the base class""" + with pytest.raises(NotImplementedError): + self._target_class()._to_dict(mock.Mock()) + + def test_is_idempotent(self): + """is_idempotent should assume True""" + assert self._target_class().is_idempotent(mock.Mock()) + + def test___str__(self): + """Str representation of mutations should be to_dict""" + self_mock = mock.Mock() + str_value = self._target_class().__str__(self_mock) + assert self_mock._to_dict.called + assert str_value == str(self_mock._to_dict.return_value) + +class TestSetCell: + + def _target_class(self): + from google.cloud.bigtable.mutations import SetCell + return SetCell + + def _make_one(self, *args, **kwargs): + return self._target_class()(*args, **kwargs) + + def test__to_dict(self): + """Should be unimplemented in the base class""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = b"test-value" + expected_timestamp = 1234567890 + instance = self._make_one(expected_family, expected_qualifier, expected_value, expected_timestamp) + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["set_cell"] + got_inner_dict = got_dict["set_cell"] + assert got_inner_dict["family_name"] == expected_family + assert got_inner_dict["column_qualifier"] == expected_qualifier + assert got_inner_dict["timestamp_micros"] == expected_timestamp + assert got_inner_dict["value"] == expected_value + assert len(got_inner_dict.keys()) == 4 + + def test__to_dict_server_timestamp(self): + """Should be unimplemented in the base class""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = b"test-value" + expected_timestamp = -1 + instance = self._make_one(expected_family, expected_qualifier, expected_value) + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["set_cell"] + got_inner_dict = got_dict["set_cell"] + assert got_inner_dict["family_name"] == expected_family + assert got_inner_dict["column_qualifier"] == expected_qualifier + assert got_inner_dict["timestamp_micros"] == expected_timestamp + assert got_inner_dict["value"] == expected_value + assert len(got_inner_dict.keys()) == 4 + + @pytest.mark.parametrize("timestamp,expected_value", [ + (1234567890, True), + (1, True), + (0, True), + (-1, False), + (None, False), + ]) + def test_is_idempotent(self, timestamp, expected_value): + """is_idempotent is based on whether an explicit timestamp is set""" + instance = self._make_one("test-family", b"test-qualifier", b'test-value', timestamp) + assert instance.is_idempotent() is expected_value + + def test___str__(self): + """Str representation of mutations should be to_dict""" + instance = self._make_one("test-family", b"test-qualifier", b'test-value', 1234567890) + str_value = instance.__str__() + dict_value = instance._to_dict() + assert str_value == str(dict_value) + +class TestDeleteRangeFromColumn: + + def _target_class(self): + from google.cloud.bigtable.mutations import DeleteRangeFromColumn + return DeleteRangeFromColumn + + def _make_one(self, *args, **kwargs): + return self._target_class()(*args, **kwargs) + + def test_ctor(self): + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_start = 1234567890 + expected_end = 1234567891 + instance = self._make_one(expected_family, expected_qualifier, expected_start, expected_end) + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.start_timestamp_micros == expected_start + assert instance.end_timestamp_micros == expected_end + + def test_ctor_no_timestamps(self): + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + instance = self._make_one(expected_family, expected_qualifier) + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.start_timestamp_micros is None + assert instance.end_timestamp_micros is None + + def test_ctor_timestamps_out_of_order(self): + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_start = 10 + expected_end = 1 + with pytest.raises(ValueError) as excinfo: + self._make_one(expected_family, expected_qualifier, expected_start, expected_end) + assert "start_timestamp_micros must be <= end_timestamp_micros" in str(excinfo.value) + + + @pytest.mark.parametrize("start,end", [ + (0, 1), + (None, 1), + (0, None), + ]) + def test__to_dict(self, start, end): + """Should be unimplemented in the base class""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + + instance = self._make_one(expected_family, expected_qualifier, start, end) + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["delete_from_column"] + got_inner_dict = got_dict["delete_from_column"] + assert len(got_inner_dict.keys()) == 3 + assert got_inner_dict["family_name"] == expected_family + assert got_inner_dict["column_qualifier"] == expected_qualifier + time_range_dict = got_inner_dict["time_range"] + expected_len = int(isinstance(start, int)) + int(isinstance(end, int)) + assert len(time_range_dict.keys()) == expected_len + if start is not None: + assert time_range_dict["start_timestamp_micros"] == start + if end is not None: + assert time_range_dict["end_timestamp_micros"] == end + + def test_is_idempotent(self): + """is_idempotent is always true""" + instance = self._make_one("test-family", b"test-qualifier", 1234567890, 1234567891) + assert instance.is_idempotent() is True + + def test___str__(self): + """Str representation of mutations should be to_dict""" + instance = self._make_one("test-family", b"test-qualifier") + str_value = instance.__str__() + dict_value = instance._to_dict() + assert str_value == str(dict_value) + + + +class TestDeleteAllFromFamily: + + def _target_class(self): + from google.cloud.bigtable.mutations import DeleteAllFromFamily + return DeleteAllFromFamily + + def _make_one(self, *args, **kwargs): + return self._target_class()(*args, **kwargs) + + def test_ctor(self): + expected_family = "test-family" + instance = self._make_one(expected_family) + assert instance.family_to_delete == expected_family + + def test__to_dict(self): + """Should be unimplemented in the base class""" + expected_family = "test-family" + instance = self._make_one(expected_family) + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["delete_from_family"] + got_inner_dict = got_dict["delete_from_family"] + assert len(got_inner_dict.keys()) == 1 + assert got_inner_dict["family_name"] == expected_family + + + def test_is_idempotent(self): + """is_idempotent is always true""" + instance = self._make_one("test-family") + assert instance.is_idempotent() is True + + def test___str__(self): + """Str representation of mutations should be to_dict""" + instance = self._make_one("test-family") + str_value = instance.__str__() + dict_value = instance._to_dict() + assert str_value == str(dict_value) + + + +class TestDeleteFromRow: + + def _target_class(self): + from google.cloud.bigtable.mutations import DeleteAllFromRow + return DeleteAllFromRow + + def _make_one(self, *args, **kwargs): + return self._target_class()(*args, **kwargs) + + def test_ctor(self): + instance = self._make_one() + + def test__to_dict(self): + """Should be unimplemented in the base class""" + instance = self._make_one() + got_dict = instance._to_dict() + assert list(got_dict.keys()) == ["delete_from_row"] + assert len(got_dict["delete_from_row"].keys()) == 0 + + def test_is_idempotent(self): + """is_idempotent is always true""" + instance = self._make_one() + assert instance.is_idempotent() is True + + def test___str__(self): + """Str representation of mutations should be to_dict""" + instance = self._make_one() + assert instance.__str__() == "{'delete_from_row': {}}" + + From 54a4d432d68bcfe979e3747e734bf5360551a45f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 13:38:08 -0700 Subject: [PATCH 039/213] fixed linter errors --- google/cloud/bigtable/mutations.py | 10 ++-- tests/unit/test__mutate_rows.py | 3 -- tests/unit/test_client.py | 9 +--- tests/unit/test_exceptions.py | 14 +---- tests/unit/test_mutations.py | 85 +++++++++++++++++------------- 5 files changed, 58 insertions(+), 63 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 324d98141..211148158 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -71,10 +71,12 @@ class DeleteRangeFromColumn(Mutation): end_timestamp_micros: int | None = None def __post_init__(self): - if self.start_timestamp_micros is not None and self.end_timestamp_micros is not None and self.start_timestamp_micros > self.end_timestamp_micros: - raise ValueError( - "start_timestamp_micros must be <= end_timestamp_micros" - ) + if ( + self.start_timestamp_micros is not None + and self.end_timestamp_micros is not None + and self.start_timestamp_micros > self.end_timestamp_micros + ): + raise ValueError("start_timestamp_micros must be <= end_timestamp_micros") def _to_dict(self) -> dict[str, Any]: timestamp_range = {} diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 10dfac374..4d0c7820f 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest import pytest -import sys -import google.cloud.bigtable.exceptions as bigtable_exceptions from google.cloud.bigtable_v2.types import MutateRowsResponse from google.rpc import status_pb2 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4c179148b..1c7fab9d5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1069,11 +1069,10 @@ async def test_bulk_mutate_rows_multiple_entries(self): core_exceptions.Aborted, ], ) - async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exception): + async def test_bulk_mutate_rows_idempotent_mutation_error_retryable(self, exception): """ Individual idempotent mutations should be retried if they fail with a retryable error """ - from google.api_core.exceptions import DeadlineExceeded from google.cloud.bigtable.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, @@ -1114,13 +1113,11 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exceptio core_exceptions.FailedPrecondition, ], ) - async def test_bulk_mutate_rows_idempotent_mutation_error_retries(self, exception): + async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable(self, exception): """ Individual idempotent mutations should not be retried if they fail with a non-retryable error """ - from google.api_core.exceptions import DeadlineExceeded from google.cloud.bigtable.exceptions import ( - RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, ) @@ -1201,7 +1198,6 @@ async def test_bulk_mutate_rows_non_idempotent_retryable_errors( ): """Non-Idempotent mutations should never be retried""" from google.cloud.bigtable.exceptions import ( - RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, ) @@ -1242,7 +1238,6 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti If the request fails with a non-retryable error, mutations should not be retried """ from google.cloud.bigtable.exceptions import ( - RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, ) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 7566a30ba..29cb1d02b 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -19,15 +19,6 @@ import google.cloud.bigtable.exceptions as bigtable_exceptions -# try/except added for compatibility with python < 3.8 -try: - from unittest import mock - from unittest.mock import AsyncMock # type: ignore -except ImportError: # pragma: NO COVER - import mock # type: ignore - from mock import AsyncMock # type: ignore - - class TestConvertRetryDeadline: """ Test _convert_retry_deadline wrapper @@ -135,7 +126,7 @@ def test_311_traceback(self): exception_caught = False try: raise exc_group - except self._get_class() as e: + except self._get_class(): exception_caught = True tb = traceback.format_exc() tb_relevant_lines = tuple(tb.splitlines()[3:]) @@ -151,12 +142,9 @@ def test_311_exception_group(self): """ exceptions = [RuntimeError("mock"), ValueError("mock")] instance = self._make_one(excs=exceptions) - assert isinstance(instance, ExceptionGroup) # ensure split works as expected runtime_error, others = instance.split(lambda e: isinstance(e, RuntimeError)) - assert isinstance(runtime_error, ExceptionGroup) assert runtime_error.exceptions[0] == exceptions[0] - assert isinstance(others, ExceptionGroup) assert others.exceptions[0] == exceptions[1] def test_exception_handling(self): diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index c3e3058bc..cdb61596a 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -12,22 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest import pytest -import sys # try/except added for compatibility with python < 3.8 try: from unittest import mock - from unittest.mock import AsyncMock # type: ignore except ImportError: # pragma: NO COVER import mock # type: ignore - from mock import AsyncMock # type: ignore -class TestBaseMutation: +class TestBaseMutation: def _target_class(self): from google.cloud.bigtable.mutations import Mutation + return Mutation def test__to_dict(self): @@ -46,10 +43,11 @@ def test___str__(self): assert self_mock._to_dict.called assert str_value == str(self_mock._to_dict.return_value) -class TestSetCell: +class TestSetCell: def _target_class(self): from google.cloud.bigtable.mutations import SetCell + return SetCell def _make_one(self, *args, **kwargs): @@ -61,7 +59,9 @@ def test__to_dict(self): expected_qualifier = b"test-qualifier" expected_value = b"test-value" expected_timestamp = 1234567890 - instance = self._make_one(expected_family, expected_qualifier, expected_value, expected_timestamp) + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) got_dict = instance._to_dict() assert list(got_dict.keys()) == ["set_cell"] got_inner_dict = got_dict["set_cell"] @@ -87,29 +87,37 @@ def test__to_dict_server_timestamp(self): assert got_inner_dict["value"] == expected_value assert len(got_inner_dict.keys()) == 4 - @pytest.mark.parametrize("timestamp,expected_value", [ - (1234567890, True), - (1, True), - (0, True), - (-1, False), - (None, False), - ]) + @pytest.mark.parametrize( + "timestamp,expected_value", + [ + (1234567890, True), + (1, True), + (0, True), + (-1, False), + (None, False), + ], + ) def test_is_idempotent(self, timestamp, expected_value): """is_idempotent is based on whether an explicit timestamp is set""" - instance = self._make_one("test-family", b"test-qualifier", b'test-value', timestamp) + instance = self._make_one( + "test-family", b"test-qualifier", b"test-value", timestamp + ) assert instance.is_idempotent() is expected_value def test___str__(self): """Str representation of mutations should be to_dict""" - instance = self._make_one("test-family", b"test-qualifier", b'test-value', 1234567890) + instance = self._make_one( + "test-family", b"test-qualifier", b"test-value", 1234567890 + ) str_value = instance.__str__() dict_value = instance._to_dict() assert str_value == str(dict_value) -class TestDeleteRangeFromColumn: +class TestDeleteRangeFromColumn: def _target_class(self): from google.cloud.bigtable.mutations import DeleteRangeFromColumn + return DeleteRangeFromColumn def _make_one(self, *args, **kwargs): @@ -120,7 +128,9 @@ def test_ctor(self): expected_qualifier = b"test-qualifier" expected_start = 1234567890 expected_end = 1234567891 - instance = self._make_one(expected_family, expected_qualifier, expected_start, expected_end) + instance = self._make_one( + expected_family, expected_qualifier, expected_start, expected_end + ) assert instance.family == expected_family assert instance.qualifier == expected_qualifier assert instance.start_timestamp_micros == expected_start @@ -141,15 +151,21 @@ def test_ctor_timestamps_out_of_order(self): expected_start = 10 expected_end = 1 with pytest.raises(ValueError) as excinfo: - self._make_one(expected_family, expected_qualifier, expected_start, expected_end) - assert "start_timestamp_micros must be <= end_timestamp_micros" in str(excinfo.value) - - - @pytest.mark.parametrize("start,end", [ - (0, 1), - (None, 1), - (0, None), - ]) + self._make_one( + expected_family, expected_qualifier, expected_start, expected_end + ) + assert "start_timestamp_micros must be <= end_timestamp_micros" in str( + excinfo.value + ) + + @pytest.mark.parametrize( + "start,end", + [ + (0, 1), + (None, 1), + (0, None), + ], + ) def test__to_dict(self, start, end): """Should be unimplemented in the base class""" expected_family = "test-family" @@ -172,7 +188,9 @@ def test__to_dict(self, start, end): def test_is_idempotent(self): """is_idempotent is always true""" - instance = self._make_one("test-family", b"test-qualifier", 1234567890, 1234567891) + instance = self._make_one( + "test-family", b"test-qualifier", 1234567890, 1234567891 + ) assert instance.is_idempotent() is True def test___str__(self): @@ -183,11 +201,10 @@ def test___str__(self): assert str_value == str(dict_value) - class TestDeleteAllFromFamily: - def _target_class(self): from google.cloud.bigtable.mutations import DeleteAllFromFamily + return DeleteAllFromFamily def _make_one(self, *args, **kwargs): @@ -208,7 +225,6 @@ def test__to_dict(self): assert len(got_inner_dict.keys()) == 1 assert got_inner_dict["family_name"] == expected_family - def test_is_idempotent(self): """is_idempotent is always true""" instance = self._make_one("test-family") @@ -222,18 +238,17 @@ def test___str__(self): assert str_value == str(dict_value) - class TestDeleteFromRow: - def _target_class(self): from google.cloud.bigtable.mutations import DeleteAllFromRow + return DeleteAllFromRow def _make_one(self, *args, **kwargs): return self._target_class()(*args, **kwargs) def test_ctor(self): - instance = self._make_one() + self._make_one() def test__to_dict(self): """Should be unimplemented in the base class""" @@ -251,5 +266,3 @@ def test___str__(self): """Str representation of mutations should be to_dict""" instance = self._make_one() assert instance.__str__() == "{'delete_from_row': {}}" - - From bd51dc4d061977c0e59abd28799192ca0975d4ac Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 13:52:15 -0700 Subject: [PATCH 040/213] added tests for BulkMutationsEntry --- google/cloud/bigtable/mutations.py | 10 ++++-- tests/unit/test_client.py | 8 +++-- tests/unit/test_mutations.py | 56 ++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 211148158..f2c20502e 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -113,10 +113,14 @@ def _to_dict(self) -> dict[str, Any]: } -@dataclass class BulkMutationsEntry: - row_key: bytes - mutations: list[Mutation] + def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): + if isinstance(row_key, str): + row_key = row_key.encode("utf-8") + if isinstance(mutations, Mutation): + mutations = [mutations] + self.row_key = row_key + self.mutations = mutations def _to_dict(self) -> dict[str, Any]: return { diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1c7fab9d5..514440f95 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1069,7 +1069,9 @@ async def test_bulk_mutate_rows_multiple_entries(self): core_exceptions.Aborted, ], ) - async def test_bulk_mutate_rows_idempotent_mutation_error_retryable(self, exception): + async def test_bulk_mutate_rows_idempotent_mutation_error_retryable( + self, exception + ): """ Individual idempotent mutations should be retried if they fail with a retryable error """ @@ -1113,7 +1115,9 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retryable(self, except core_exceptions.FailedPrecondition, ], ) - async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable(self, exception): + async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable( + self, exception + ): """ Individual idempotent mutations should not be retried if they fail with a non-retryable error """ diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index cdb61596a..bd546ef25 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -266,3 +266,59 @@ def test___str__(self): """Str representation of mutations should be to_dict""" instance = self._make_one() assert instance.__str__() == "{'delete_from_row': {}}" + + +class TestBulkMutationsEntry: + + def _target_class(self): + from google.cloud.bigtable.mutations import BulkMutationsEntry + return BulkMutationsEntry + + def _make_one(self, row_key, mutations): + return self._target_class()(row_key, mutations) + + def test_ctor(self): + expected_key = b"row_key" + expected_mutations = [mock.Mock()] + instance = self._make_one(expected_key, expected_mutations) + assert instance.row_key == expected_key + assert instance.mutations == expected_mutations + + def test_ctor_str_key(self): + expected_key = "row_key" + expected_mutations = [mock.Mock()] + instance = self._make_one(expected_key, expected_mutations) + assert instance.row_key == b"row_key" + assert instance.mutations == expected_mutations + + def test_ctor_single_mutation(self): + from google.cloud.bigtable.mutations import DeleteAllFromRow + expected_key = b"row_key" + expected_mutations = DeleteAllFromRow() + instance = self._make_one(expected_key, expected_mutations) + assert instance.row_key == expected_key + assert instance.mutations == [expected_mutations] + + def test__to_dict(self): + expected_key = "row_key" + mutation_mock = mock.Mock() + n_mutations = 3 + expected_mutations = [mutation_mock for i in range(n_mutations)] + for mock_mutations in expected_mutations: + mock_mutations._to_dict.return_value = {"test": "data"} + instance = self._make_one(expected_key, expected_mutations) + expected_result = {"row_key": b"row_key", "mutations": [{"test": "data"}] * n_mutations} + assert instance._to_dict() == expected_result + assert mutation_mock._to_dict.call_count == n_mutations + + @pytest.mark.parametrize("mutations,result", [ + ([], True), + ([mock.Mock(is_idempotent=lambda: True)], True), + ([mock.Mock(is_idempotent=lambda: False)], False), + ([mock.Mock(is_idempotent=lambda: True), mock.Mock(is_idempotent=lambda: False)], False), + ([mock.Mock(is_idempotent=lambda: True), mock.Mock(is_idempotent=lambda: True)], True), + ]) + def test_is_idempotent(self, mutations, result): + instance = self._make_one("row_key", mutations) + assert instance.is_idempotent() == result + From 921b05aba99df1c230027d130d3f43cbd5a250b5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 28 Apr 2023 14:13:40 -0700 Subject: [PATCH 041/213] improved mutations documentation --- google/cloud/bigtable/exceptions.py | 15 ++++++------ tests/unit/test_mutations.py | 38 +++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 32029d801..0b5ff4e61 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -99,21 +99,22 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ @staticmethod - def _format_message(excs, total_entries): + def _format_message(excs: list[FailedMutationEntryError], total_entries: int): entry_str = "entry" if total_entries == 1 else "entries" plural_str = "" if len(excs) == 1 else "s" return f"{len(excs)} sub-exception{plural_str} (from {total_entries} {entry_str} attempted)" - def __init__(self, excs, total_entries): + def __init__(self, excs: list[FailedMutationEntryError], total_entries: int): super().__init__(self._format_message(excs, total_entries), excs) - def __new__(cls, excs, total_entries): + def __new__(cls, excs: list[FailedMutationEntryError], total_entries: int): return super().__new__(cls, cls._format_message(excs, total_entries), excs) class FailedMutationEntryError(Exception): """ - Represents a failed mutation entry for bulk mutation operations + Represents a single failed BulkMutationsEntry in a bulk_mutate_rows request. + A collection of FailedMutationEntryErrors will be raised in a MutationsExceptionGroup """ def __init__( @@ -136,7 +137,7 @@ class RetryExceptionGroup(BigtableExceptionGroup): """Represents one or more exceptions that occur during a retryable operation""" @staticmethod - def _format_message(excs): + def _format_message(excs: list[Exception]): if len(excs) == 0: return "No exceptions" if len(excs) == 1: @@ -144,8 +145,8 @@ def _format_message(excs): else: return f"{len(excs)} failed attempts. Latest: {type(excs[-1]).__name__}" - def __init__(self, excs): + def __init__(self, excs: list[Exception]): super().__init__(self._format_message(excs), excs) - def __new__(cls, excs): + def __new__(cls, excs: list[Exception]): return super().__new__(cls, cls._format_message(excs), excs) diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index bd546ef25..67187ca0e 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -269,9 +269,9 @@ def test___str__(self): class TestBulkMutationsEntry: - def _target_class(self): from google.cloud.bigtable.mutations import BulkMutationsEntry + return BulkMutationsEntry def _make_one(self, row_key, mutations): @@ -293,6 +293,7 @@ def test_ctor_str_key(self): def test_ctor_single_mutation(self): from google.cloud.bigtable.mutations import DeleteAllFromRow + expected_key = b"row_key" expected_mutations = DeleteAllFromRow() instance = self._make_one(expected_key, expected_mutations) @@ -307,18 +308,35 @@ def test__to_dict(self): for mock_mutations in expected_mutations: mock_mutations._to_dict.return_value = {"test": "data"} instance = self._make_one(expected_key, expected_mutations) - expected_result = {"row_key": b"row_key", "mutations": [{"test": "data"}] * n_mutations} + expected_result = { + "row_key": b"row_key", + "mutations": [{"test": "data"}] * n_mutations, + } assert instance._to_dict() == expected_result assert mutation_mock._to_dict.call_count == n_mutations - @pytest.mark.parametrize("mutations,result", [ - ([], True), - ([mock.Mock(is_idempotent=lambda: True)], True), - ([mock.Mock(is_idempotent=lambda: False)], False), - ([mock.Mock(is_idempotent=lambda: True), mock.Mock(is_idempotent=lambda: False)], False), - ([mock.Mock(is_idempotent=lambda: True), mock.Mock(is_idempotent=lambda: True)], True), - ]) + @pytest.mark.parametrize( + "mutations,result", + [ + ([], True), + ([mock.Mock(is_idempotent=lambda: True)], True), + ([mock.Mock(is_idempotent=lambda: False)], False), + ( + [ + mock.Mock(is_idempotent=lambda: True), + mock.Mock(is_idempotent=lambda: False), + ], + False, + ), + ( + [ + mock.Mock(is_idempotent=lambda: True), + mock.Mock(is_idempotent=lambda: True), + ], + True, + ), + ], + ) def test_is_idempotent(self, mutations, result): instance = self._make_one("row_key", mutations) assert instance.is_idempotent() == result - From 53f7ad29d0406cd95facafb6bdbd7c3e1cee2d41 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 11:40:09 -0700 Subject: [PATCH 042/213] added basic implementation for batcher --- google/cloud/bigtable/mutations_batcher.py | 42 +++++++++------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 582786ee4..21b850375 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -44,9 +44,6 @@ class MutationsBatcher: batcher.add(row, mut) """ - queue: asyncio.Queue[tuple[row_key, list[Mutation]]] - conditional_queues: dict[RowFilter, tuple[list[Mutation], list[Mutation]]] - MB_SIZE = 1024 * 1024 def __init__( @@ -55,30 +52,22 @@ def __init__( flush_count: int = 100, flush_size_bytes: int = 100 * MB_SIZE, max_mutation_bytes: int = 20 * MB_SIZE, - flush_interval: int = 5, + flush_interval: float = 5, metadata: list[tuple[str, str]] | None = None, ): - raise NotImplementedError + self._queue_map : dict[row_key, list[Mutation]] = {} + self._table = table - async def append(self, row_key: str | bytes, mutation: Mutation | list[Mutation]): + async def append(self, row_key: str | bytes, mutations: Mutation | list[Mutation]): """ Add a new mutation to the internal queue """ - raise NotImplementedError - - async def append_conditional( - self, - predicate_filter: RowFilter, - row_key: str | bytes, - if_true_mutations: Mutation | list[Mutation] | None = None, - if_false_mutations: Mutation | list[Mutation] | None = None, - ): - """ - Apply a different set of mutations based on the outcome of predicate_filter - - Calls check_and_mutate_row internally on flush - """ - raise NotImplementedError + if isinstance(mutations, Mutation): + mutations = [mutations] + if isinstance(row_key, str): + row_key = row_key.encode("utf-8") + existing_mutations = self._queue_map.setdefault(row_key, []) + existing_mutations.extend(mutations) async def flush(self): """ @@ -87,18 +76,21 @@ async def flush(self): Raises: - MutationsExceptionGroup if any mutation in the batch fails """ - raise NotImplementedError + entries : list[BulkMutationsEntry] = [] + for key, mutations in self._queue_map.items(): + entries.append(BulkMutationsEntry(key, mutations)) + await self._table.bulk_mutate_rows(entries) async def __aenter__(self): """For context manager API""" - raise NotImplementedError + return self async def __aexit__(self, exc_type, exc, tb): """For context manager API""" - raise NotImplementedError + await self.close() async def close(self): """ Flush queue and clean up resources """ - raise NotImplementedError + await self.flush() From f65b2e5f9c100dc5692a91460586e8d0ca61a5b8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 11:50:47 -0700 Subject: [PATCH 043/213] added flush timer --- google/cloud/bigtable/mutations_batcher.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 21b850375..7b0e924a4 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -53,10 +53,18 @@ def __init__( flush_size_bytes: int = 100 * MB_SIZE, max_mutation_bytes: int = 20 * MB_SIZE, flush_interval: float = 5, - metadata: list[tuple[str, str]] | None = None, ): self._queue_map : dict[row_key, list[Mutation]] = {} - self._table = table + self._table : "Table" = table + self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) + + async def _flush_timer(self, interval:float): + """ + Flush queue on a timer + """ + while True: + await asyncio.sleep(interval) + await self.flush() async def append(self, row_key: str | bytes, mutations: Mutation | list[Mutation]): """ @@ -79,7 +87,8 @@ async def flush(self): entries : list[BulkMutationsEntry] = [] for key, mutations in self._queue_map.items(): entries.append(BulkMutationsEntry(key, mutations)) - await self._table.bulk_mutate_rows(entries) + if entries: + await self._table.bulk_mutate_rows(entries) async def __aenter__(self): """For context manager API""" @@ -94,3 +103,6 @@ async def close(self): Flush queue and clean up resources """ await self.flush() + self._flush_timer_task.cancel() + await self._flush_timer_task + From 7051cd2ddd44cb3b617315a645bccdbe8be59615 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 12:36:40 -0700 Subject: [PATCH 044/213] added flush on size and count --- google/cloud/bigtable/mutations.py | 7 +++++ google/cloud/bigtable/mutations_batcher.py | 35 ++++++++++++++++++---- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index f2c20502e..d991211dc 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -16,6 +16,7 @@ from typing import Any from dataclasses import dataclass from abc import ABC, abstractmethod +from sys import getsizeof class Mutation(ABC): @@ -35,6 +36,12 @@ def is_idempotent(self) -> bool: def __str__(self) -> str: return str(self._to_dict()) + def size(self) -> int: + """ + Get the size of the mutation in bytes + """ + return getsizeof(self._to_dict()) + @dataclass class SetCell(Mutation): diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 7b0e924a4..6d8810395 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import Mutation +from google.cloud.bigtable.mutations import BulkMutationsEntry from google.cloud.bigtable.row import row_key from google.cloud.bigtable.row_filters import RowFilter @@ -49,14 +50,20 @@ class MutationsBatcher: def __init__( self, table: "Table", - flush_count: int = 100, - flush_size_bytes: int = 100 * MB_SIZE, + flush_limit_count: int = 100, + flush_limit_bytes: int = 100 * MB_SIZE, max_mutation_bytes: int = 20 * MB_SIZE, flush_interval: float = 5, ): self._queue_map : dict[row_key, list[Mutation]] = {} self._table : "Table" = table + self._max_mutation_bytes = max_mutation_bytes + self._flush_limit_bytes = flush_limit_bytes + self._flush_limit_count = flush_limit_count + self._queued_size = 0 + self._queued_count = 0 self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) + self._flush_tasks : list[asyncio.Task[None]] = [] async def _flush_timer(self, interval:float): """ @@ -74,8 +81,19 @@ async def append(self, row_key: str | bytes, mutations: Mutation | list[Mutation mutations = [mutations] if isinstance(row_key, str): row_key = row_key.encode("utf-8") + total_size = 0 + for idx, m in enumerate(mutations): + size = m.size() + if size > self._max_mutation_bytes: + raise ValueError(f"Mutation {idx} exceeds max mutation size: {m.size()} > {self._max_mutation_bytes}") + total_size += size existing_mutations = self._queue_map.setdefault(row_key, []) existing_mutations.extend(mutations) + self._queued_size += total_size + self._queued_count += len(mutations) + if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: + # start a new flush task + self._flush_tasks.append(asyncio.create_task(self.flush())) async def flush(self): """ @@ -85,7 +103,9 @@ async def flush(self): - MutationsExceptionGroup if any mutation in the batch fails """ entries : list[BulkMutationsEntry] = [] - for key, mutations in self._queue_map.items(): + # reset queue + old_queue, self._queue_map, self._queued_size, self._queued_count = self._queue_map, {}, 0, 0 + for key, mutations in old_queue.items(): entries.append(BulkMutationsEntry(key, mutations)) if entries: await self._table.bulk_mutate_rows(entries) @@ -98,11 +118,16 @@ async def __aexit__(self, exc_type, exc, tb): """For context manager API""" await self.close() - async def close(self): + async def close(self, timeout: float = 5.0): """ Flush queue and clean up resources """ await self.flush() self._flush_timer_task.cancel() - await self._flush_timer_task + for task in self._flush_tasks: + task.cancel() + group = asyncio.gather([*self._flush_tasks, self._flush_timer_task], return_exceptions=True) + self._flush_tasks = [] + await asyncio.wait_for(group, timeout=timeout) + From 9731f4c7bc3880748e3ffe8153847c096ee223e5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 12:45:07 -0700 Subject: [PATCH 045/213] improved queue --- google/cloud/bigtable/mutations_batcher.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 6d8810395..1d3d0c549 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -55,7 +55,7 @@ def __init__( max_mutation_bytes: int = 20 * MB_SIZE, flush_interval: float = 5, ): - self._queue_map : dict[row_key, list[Mutation]] = {} + self._queue : asyncio.Queue[BulkMutationsEntry] = asyncio.Queue() self._table : "Table" = table self._max_mutation_bytes = max_mutation_bytes self._flush_limit_bytes = flush_limit_bytes @@ -87,8 +87,8 @@ async def append(self, row_key: str | bytes, mutations: Mutation | list[Mutation if size > self._max_mutation_bytes: raise ValueError(f"Mutation {idx} exceeds max mutation size: {m.size()} > {self._max_mutation_bytes}") total_size += size - existing_mutations = self._queue_map.setdefault(row_key, []) - existing_mutations.extend(mutations) + new_batch = BulkMutationsEntry(row_key, mutations) + await self._queue.put(new_batch) self._queued_size += total_size self._queued_count += len(mutations) if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: @@ -104,9 +104,10 @@ async def flush(self): """ entries : list[BulkMutationsEntry] = [] # reset queue - old_queue, self._queue_map, self._queued_size, self._queued_count = self._queue_map, {}, 0, 0 - for key, mutations in old_queue.items(): - entries.append(BulkMutationsEntry(key, mutations)) + while not self._queue.empty(): + entries.append(await self._queue.get()) + self._queued_size = 0 + self._queued_count = 0 if entries: await self._table.bulk_mutate_rows(entries) From 34bf655e21aefa35aef7f2776f907439490bbaef Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 13:21:28 -0700 Subject: [PATCH 046/213] raise exceptions on close --- google/cloud/bigtable/mutations.py | 6 ++++ google/cloud/bigtable/mutations_batcher.py | 33 +++++++++++----------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index d991211dc..70b197e0a 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -138,3 +138,9 @@ def _to_dict(self) -> dict[str, Any]: def is_idempotent(self) -> bool: """Check if the mutation is idempotent""" return all(mutation.is_idempotent() for mutation in self.mutations) + + def size(self) -> int: + """ + Get the size of the mutation in bytes + """ + return getsizeof(self._to_dict()) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 1d3d0c549..0d4e6dd5b 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -19,8 +19,7 @@ from google.cloud.bigtable.mutations import Mutation from google.cloud.bigtable.mutations import BulkMutationsEntry -from google.cloud.bigtable.row import row_key -from google.cloud.bigtable.row_filters import RowFilter +from google.cloud.bigtable.exceptions import MutationsExceptionGroup if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover @@ -62,6 +61,7 @@ def __init__( self._flush_limit_count = flush_limit_count self._queued_size = 0 self._queued_count = 0 + self.exceptions = [] self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) self._flush_tasks : list[asyncio.Task[None]] = [] @@ -73,23 +73,15 @@ async def _flush_timer(self, interval:float): await asyncio.sleep(interval) await self.flush() - async def append(self, row_key: str | bytes, mutations: Mutation | list[Mutation]): + async def append(self, mutations:BulkMutationsEntry): """ Add a new mutation to the internal queue """ - if isinstance(mutations, Mutation): - mutations = [mutations] - if isinstance(row_key, str): - row_key = row_key.encode("utf-8") - total_size = 0 - for idx, m in enumerate(mutations): - size = m.size() - if size > self._max_mutation_bytes: - raise ValueError(f"Mutation {idx} exceeds max mutation size: {m.size()} > {self._max_mutation_bytes}") - total_size += size - new_batch = BulkMutationsEntry(row_key, mutations) - await self._queue.put(new_batch) - self._queued_size += total_size + size = mutations.size() + if size > self._max_mutation_bytes: + raise ValueError(f"Mutation size exceeds max_mutation_bytes: {size} > {self._max_mutation_bytes}") + await self._queue.put(mutations) + self._queued_size += size self._queued_count += len(mutations) if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: # start a new flush task @@ -109,7 +101,11 @@ async def flush(self): self._queued_size = 0 self._queued_count = 0 if entries: - await self._table.bulk_mutate_rows(entries) + try: + await self._table.bulk_mutate_rows(entries) + except MutationsExceptionGroup as e: + mutation_exceptions = e.exceptions + self.exceptions.extend(mutation_exceptions) async def __aenter__(self): """For context manager API""" @@ -130,5 +126,8 @@ async def close(self, timeout: float = 5.0): group = asyncio.gather([*self._flush_tasks, self._flush_timer_task], return_exceptions=True) self._flush_tasks = [] await asyncio.wait_for(group, timeout=timeout) + if self.exceptions: + # TODO: deal with indices + raise MutationsExceptionGroup(self.exceptions) From 582457a37900b74ddb9ad21803066c13be196a59 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 13:35:24 -0700 Subject: [PATCH 047/213] added args for flush --- google/cloud/bigtable/mutations_batcher.py | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 0d4e6dd5b..0daad7a5a 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -71,7 +71,7 @@ async def _flush_timer(self, interval:float): """ while True: await asyncio.sleep(interval) - await self.flush() + await self.flush(timeout=None, raise_on_error=False) async def append(self, mutations:BulkMutationsEntry): """ @@ -85,14 +85,18 @@ async def append(self, mutations:BulkMutationsEntry): self._queued_count += len(mutations) if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: # start a new flush task - self._flush_tasks.append(asyncio.create_task(self.flush())) + self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) - async def flush(self): + async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): """ Send queue over network in as few calls as possible + Args: + - timeout: operation_timeout for underlying rpc, in seconds + - raise_exceptions: if True, raise MutationsExceptionGroup if any mutations fail. If False, + exceptions are saved in self.exceptions and raised on close() Raises: - - MutationsExceptionGroup if any mutation in the batch fails + - MutationsExceptionGroup if raise_exceptions is True and any mutations fail """ entries : list[BulkMutationsEntry] = [] # reset queue @@ -102,10 +106,12 @@ async def flush(self): self._queued_count = 0 if entries: try: - await self._table.bulk_mutate_rows(entries) + await self._table.bulk_mutate_rows(entries, operation_timeout=timeout, per_request_timeout=timeout) except MutationsExceptionGroup as e: - mutation_exceptions = e.exceptions - self.exceptions.extend(mutation_exceptions) + if raise_exceptions: + raise e + else: + self.exceptions.extend(e.exceptions) async def __aenter__(self): """For context manager API""" @@ -119,13 +125,12 @@ async def close(self, timeout: float = 5.0): """ Flush queue and clean up resources """ - await self.flush() + final_flush = self.flush(timeout=timeout, raise_exceptions=False) self._flush_timer_task.cancel() for task in self._flush_tasks: task.cancel() - group = asyncio.gather([*self._flush_tasks, self._flush_timer_task], return_exceptions=True) + await asyncio.gather([final_flush, *self._flush_tasks, self._flush_timer_task]) self._flush_tasks = [] - await asyncio.wait_for(group, timeout=timeout) if self.exceptions: # TODO: deal with indices raise MutationsExceptionGroup(self.exceptions) From eeb24a592e6ce6b07e8f7acc3d281076955b3705 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 13:46:45 -0700 Subject: [PATCH 048/213] improved closing logic --- google/cloud/bigtable/mutations_batcher.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 0daad7a5a..0e4ec15df 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -54,6 +54,7 @@ def __init__( max_mutation_bytes: int = 20 * MB_SIZE, flush_interval: float = 5, ): + self.closed : bool = False self._queue : asyncio.Queue[BulkMutationsEntry] = asyncio.Queue() self._table : "Table" = table self._max_mutation_bytes = max_mutation_bytes @@ -69,20 +70,25 @@ async def _flush_timer(self, interval:float): """ Flush queue on a timer """ - while True: + while not self.closed: await asyncio.sleep(interval) - await self.flush(timeout=None, raise_on_error=False) + # add new flush task to list + if not self.closed: + new_task = asyncio.create_task(self.flush(timeout=None, raise_exceptions=False)) + self._flush_tasks.append(new_task) async def append(self, mutations:BulkMutationsEntry): """ Add a new mutation to the internal queue """ + if self.closed: + raise RuntimeError("Cannot append to closed MutationsBatcher") size = mutations.size() if size > self._max_mutation_bytes: raise ValueError(f"Mutation size exceeds max_mutation_bytes: {size} > {self._max_mutation_bytes}") await self._queue.put(mutations) self._queued_size += size - self._queued_count += len(mutations) + self._queued_count += len(mutations.mutations) if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: # start a new flush task self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) @@ -125,11 +131,12 @@ async def close(self, timeout: float = 5.0): """ Flush queue and clean up resources """ + self.closed = True final_flush = self.flush(timeout=timeout, raise_exceptions=False) + finalize_tasks = asyncio.wait_for(asyncio.gather(*self._flush_tasks), timeout=timeout) self._flush_timer_task.cancel() - for task in self._flush_tasks: - task.cancel() - await asyncio.gather([final_flush, *self._flush_tasks, self._flush_timer_task]) + # wait for all to finish + await asyncio.gather([final_flush, self._flush_timer_task, finalize_tasks]) self._flush_tasks = [] if self.exceptions: # TODO: deal with indices From 110cd96bd806b8f4a16e3b8310ed8adebef3117f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 14:20:43 -0700 Subject: [PATCH 049/213] use custom queue type to track size and count --- google/cloud/bigtable/mutations_batcher.py | 64 +++++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 0e4ec15df..2193696d9 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -25,6 +25,59 @@ from google.cloud.bigtable.client import Table # pragma: no cover +class _BatchMutationsQueue(asyncio.Queue[BulkMutationsEntry]): + """ + asyncio.Queue subclass that tracks the size and number of mutations + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._mutation_count = 0 + self._mutation_bytes_size = 0 + + @property + def mutation_count(self): + return self._mutation_count + + @mutation_count.setter + def mutation_count(self, value): + if value < 0: + raise ValueError("Mutation count cannot be negative") + self._mutation_count = value + + @property + def mutation_bytes_size(self): + return self._mutation_bytes_size + + @mutation_bytes_size.setter + def mutation_bytes_size(self, value): + if value < 0: + raise ValueError("Mutation bytes size cannot be negative") + self._mutation_bytes_size = value + + def put_nowait(self, item:BulkMutationsEntry): + super().put_nowait(item) + self.mutation_count += len(item.mutations) + self.mutation_bytes_size += item.size() + + def get_nowait(self): + item = super().get_nowait() + self.mutation_count -= len(item.mutations) + self.mutation_bytes_size -= item.size() + return item + + async def put(self, item:BulkMutationsEntry): + await super().put(item) + self.mutation_count += len(item.mutations) + self.mutation_bytes_size += item.size() + + async def get(self): + item = await super().get() + self.mutation_count -= len(item.mutations) + self.mutation_bytes_size -= item.size() + return item + + class MutationsBatcher: """ Allows users to send batches using context manager API: @@ -55,13 +108,11 @@ def __init__( flush_interval: float = 5, ): self.closed : bool = False - self._queue : asyncio.Queue[BulkMutationsEntry] = asyncio.Queue() + self._queue : _BatchMutationsQueue = _BatchMutationsQueue() self._table : "Table" = table self._max_mutation_bytes = max_mutation_bytes self._flush_limit_bytes = flush_limit_bytes self._flush_limit_count = flush_limit_count - self._queued_size = 0 - self._queued_count = 0 self.exceptions = [] self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) self._flush_tasks : list[asyncio.Task[None]] = [] @@ -87,9 +138,7 @@ async def append(self, mutations:BulkMutationsEntry): if size > self._max_mutation_bytes: raise ValueError(f"Mutation size exceeds max_mutation_bytes: {size} > {self._max_mutation_bytes}") await self._queue.put(mutations) - self._queued_size += size - self._queued_count += len(mutations.mutations) - if self._queued_size > self._flush_limit_bytes or self._queued_count > self._flush_limit_count: + if self._queue.mutation_bytes_size > self._flush_limit_bytes or self.mutation_count > self._flush_limit_count: # start a new flush task self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) @@ -108,8 +157,6 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): # reset queue while not self._queue.empty(): entries.append(await self._queue.get()) - self._queued_size = 0 - self._queued_count = 0 if entries: try: await self._table.bulk_mutate_rows(entries, operation_timeout=timeout, per_request_timeout=timeout) @@ -142,4 +189,3 @@ async def close(self, timeout: float = 5.0): # TODO: deal with indices raise MutationsExceptionGroup(self.exceptions) - From 375a77dbe6f050c757fa567f9bbd92479b66b4e6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 15:19:49 -0700 Subject: [PATCH 050/213] added flow control --- google/cloud/bigtable/mutations_batcher.py | 75 ++++++++++++++++++++-- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 2193696d9..de70e0760 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -20,11 +20,70 @@ from google.cloud.bigtable.mutations import Mutation from google.cloud.bigtable.mutations import BulkMutationsEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup +from google.cloud.bigtable.exceptions import FailedMutationEntryError if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover +class _FlowControl: + + def __init__(self, table, max_mutation_count, max_mutation_bytes): + self.table = table + self.max_mutation_count = max_mutation_count + self.max_mutation_bytes = max_mutation_bytes + self.available_mutation_count : asyncio.Semaphore = asyncio.Semaphore(max_mutation_count) + self.available_mutation_bytes : asyncio.Semaphore = asyncio.Semaphore(max_mutation_bytes) + + def _mutation_fits(self, mutation: BulkMutationsEntry) -> bool: + return ( + not self.available_mutation_count.locked() + and not self.available_mutation_bytes.locked() + and self.available_mutation_count._value >= len(mutation.mutations) + and self.available_mutation_bytes._value >= mutation.size() + ) + + async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None): + errors : list[FailedMutationEntryError] = [] + while mutations: + batch : list[BulkMutationsEntry] = [] + batch_bytes = 0 + # grab at least one mutation + next_mutation = mutations.pop() + next_mutation_size = next_mutation.size() + # do extra sanity check to avoid deadlocks + if len(next_mutation.mutations) > self.max_mutation_count: + raise ValueError( + f"Mutation count {len(next_mutation.mutations)} exceeds max mutation count {self.max_mutation_count}" + ) + if next_mutation_size > self.max_mutation_bytes: + raise ValueError( + f"Mutation size {next_mutation_size} exceeds max mutation size {self.max_mutation_bytes}" + ) + self.available_mutation_count.acquire(len(next_mutation.mutations)) + self.available_mutation_bytes.acquire(next_mutation_size) + # fill up batch until we hit lock + while mutations and self._mutation_fits(mutations[0]): + next_mutation = mutations.pop() + next_mutation_size = next_mutation.size() + await self.available_mutation_count.acquire(len(next_mutation.mutations)) + await self.available_mutation_bytes.acquire(next_mutation_size) + batch.append(next_mutation) + batch_bytes += next_mutation_size + # start mutate_rows rpc + try: + await self.table.mutate_rows(batch, operation_timeout=timeout, per_request_timeout=timeout) + except MutationsExceptionGroup as e: + errors.extend(e.exceptions) + finally: + # release locks + self.available_mutation_count.release(sum([len(m.mutations) for m in batch])) + self.available_mutation_bytes.release(batch_bytes) + # raise set of failed mutations on completion + if errors: + raise MutationsExceptionGroup(errors) + + class _BatchMutationsQueue(asyncio.Queue[BulkMutationsEntry]): """ asyncio.Queue subclass that tracks the size and number of mutations @@ -103,14 +162,14 @@ def __init__( self, table: "Table", flush_limit_count: int = 100, - flush_limit_bytes: int = 100 * MB_SIZE, - max_mutation_bytes: int = 20 * MB_SIZE, + flush_limit_bytes: int = 20 * MB_SIZE, + flow_control_max_count: int = 100000, + flow_control_max_bytes: int = 100 * MB_SIZE, flush_interval: float = 5, ): self.closed : bool = False self._queue : _BatchMutationsQueue = _BatchMutationsQueue() - self._table : "Table" = table - self._max_mutation_bytes = max_mutation_bytes + self._flow_control = _FlowControl(table, flow_control_max_count, flow_control_max_bytes) self._flush_limit_bytes = flush_limit_bytes self._flush_limit_count = flush_limit_count self.exceptions = [] @@ -135,8 +194,10 @@ async def append(self, mutations:BulkMutationsEntry): if self.closed: raise RuntimeError("Cannot append to closed MutationsBatcher") size = mutations.size() - if size > self._max_mutation_bytes: - raise ValueError(f"Mutation size exceeds max_mutation_bytes: {size} > {self._max_mutation_bytes}") + if size > self._flow_control.max_mutation_bytes: + raise ValueError(f"Mutation size {size} exceeds flow_control_max_bytes: {self._flow_control.max_mutation_bytes}") + if len(mutations.mutations) > self._flow_control.max_mutation_count: + raise ValueError(f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}") await self._queue.put(mutations) if self._queue.mutation_bytes_size > self._flush_limit_bytes or self.mutation_count > self._flush_limit_count: # start a new flush task @@ -159,7 +220,7 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): entries.append(await self._queue.get()) if entries: try: - await self._table.bulk_mutate_rows(entries, operation_timeout=timeout, per_request_timeout=timeout) + await self._flow_control.mutate_rows(entries, timeout=timeout) except MutationsExceptionGroup as e: if raise_exceptions: raise e From 5f9fa3f5bcdb672359aa0f9d25fa741c76a2bb42 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 1 May 2023 15:32:05 -0700 Subject: [PATCH 051/213] added comments and todos --- google/cloud/bigtable/mutations_batcher.py | 35 ++++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index de70e0760..582c653b5 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -27,8 +27,19 @@ class _FlowControl: + """ + Manages underlying rpcs for MutationsBatcher. Ensures that in-flight requests + stay within the configured limits (max_mutation_count, max_mutation_bytes). + """ def __init__(self, table, max_mutation_count, max_mutation_bytes): + """ + Args: + - table: Table object that performs rpc calls + - max_mutation_count: maximum number of mutations to send in a single rpc. + This corresponds to individual mutations in a single BulkMutationsEntry. + - max_mutation_bytes: maximum number of bytes to send in a single rpc + """ self.table = table self.max_mutation_count = max_mutation_count self.max_mutation_bytes = max_mutation_bytes @@ -36,6 +47,10 @@ def __init__(self, table, max_mutation_count, max_mutation_bytes): self.available_mutation_bytes : asyncio.Semaphore = asyncio.Semaphore(max_mutation_bytes) def _mutation_fits(self, mutation: BulkMutationsEntry) -> bool: + """ + Checks if a mutation fits within the current flow control limits + """ + # TODO: is _value safe to use? return ( not self.available_mutation_count.locked() and not self.available_mutation_bytes.locked() @@ -44,6 +59,10 @@ def _mutation_fits(self, mutation: BulkMutationsEntry) -> bool: ) async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None): + """ + Ascynronously send the set of mutations to the server. This method will block + when the flow control limits are reached. + """ errors : list[FailedMutationEntryError] = [] while mutations: batch : list[BulkMutationsEntry] = [] @@ -72,6 +91,7 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl batch_bytes += next_mutation_size # start mutate_rows rpc try: + # TODO: free up space as individual mutations are completed await self.table.mutate_rows(batch, operation_timeout=timeout, per_request_timeout=timeout) except MutationsExceptionGroup as e: errors.extend(e.exceptions) @@ -161,12 +181,21 @@ class MutationsBatcher: def __init__( self, table: "Table", + flush_interval: float = 5, flush_limit_count: int = 100, flush_limit_bytes: int = 20 * MB_SIZE, flow_control_max_count: int = 100000, flow_control_max_bytes: int = 100 * MB_SIZE, - flush_interval: float = 5, ): + """ + Args: + - table: Table to preform rpc calls + - flush_interval: Automatically flush every flush_interval seconds + - flush_limit_count: Flush immediately after flush_limit_count mutations are added + - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added + - flow_control_max_count: Maximum number of inflight mutations + - flow_control_max_bytes: Maximum number of inflight bytes + """ self.closed : bool = False self._queue : _BatchMutationsQueue = _BatchMutationsQueue() self._flow_control = _FlowControl(table, flow_control_max_count, flow_control_max_bytes) @@ -178,7 +207,7 @@ def __init__( async def _flush_timer(self, interval:float): """ - Flush queue on a timer + Triggers new flush tasks every `interval` seconds """ while not self.closed: await asyncio.sleep(interval) @@ -189,7 +218,7 @@ async def _flush_timer(self, interval:float): async def append(self, mutations:BulkMutationsEntry): """ - Add a new mutation to the internal queue + Add a new set of mutations to the internal queue """ if self.closed: raise RuntimeError("Cannot append to closed MutationsBatcher") From e63cfc22b925a6a9d049c11cfbe539055609412f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 09:46:50 -0700 Subject: [PATCH 052/213] allow None for batcher limits --- google/cloud/bigtable/mutations_batcher.py | 57 +++++++++++++--------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 582c653b5..749eb3295 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -32,30 +32,33 @@ class _FlowControl: stay within the configured limits (max_mutation_count, max_mutation_bytes). """ - def __init__(self, table, max_mutation_count, max_mutation_bytes): + def __init__(self, table, max_mutation_count:float|None, max_mutation_bytes:float|None): """ Args: - table: Table object that performs rpc calls - max_mutation_count: maximum number of mutations to send in a single rpc. This corresponds to individual mutations in a single BulkMutationsEntry. - - max_mutation_bytes: maximum number of bytes to send in a single rpc + If None, no limit is enforced. + - max_mutation_bytes: maximum number of bytes to send in a single rpc. + If None, no limit is enforced. """ self.table = table + if max_mutation_count is None: + self.max_mutation_count = float("inf") + if max_mutation_bytes is None: + self.max_mutation_bytes = float("inf") self.max_mutation_count = max_mutation_count self.max_mutation_bytes = max_mutation_bytes self.available_mutation_count : asyncio.Semaphore = asyncio.Semaphore(max_mutation_count) self.available_mutation_bytes : asyncio.Semaphore = asyncio.Semaphore(max_mutation_bytes) - def _mutation_fits(self, mutation: BulkMutationsEntry) -> bool: + def is_locked(self) -> bool: """ - Checks if a mutation fits within the current flow control limits + Check if either flow control semaphore is locked """ - # TODO: is _value safe to use? return ( - not self.available_mutation_count.locked() - and not self.available_mutation_bytes.locked() - and self.available_mutation_count._value >= len(mutation.mutations) - and self.available_mutation_bytes._value >= mutation.size() + self.available_mutation_count.locked() + or self.available_mutation_bytes.locked() ) async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None): @@ -82,7 +85,7 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl self.available_mutation_count.acquire(len(next_mutation.mutations)) self.available_mutation_bytes.acquire(next_mutation_size) # fill up batch until we hit lock - while mutations and self._mutation_fits(mutations[0]): + while mutations and not self.is_locked(): next_mutation = mutations.pop() next_mutation_size = next_mutation.size() await self.available_mutation_count.acquire(len(next_mutation.mutations)) @@ -181,34 +184,40 @@ class MutationsBatcher: def __init__( self, table: "Table", - flush_interval: float = 5, - flush_limit_count: int = 100, - flush_limit_bytes: int = 20 * MB_SIZE, - flow_control_max_count: int = 100000, - flow_control_max_bytes: int = 100 * MB_SIZE, + flush_interval: float | None = 5, + flush_limit_count: int | None = 100, + flush_limit_bytes: int | None = 20 * MB_SIZE, + flow_control_max_count: int | None = 100000, + flow_control_max_bytes: int | None = 100 * MB_SIZE, ): """ Args: - table: Table to preform rpc calls - flush_interval: Automatically flush every flush_interval seconds - - flush_limit_count: Flush immediately after flush_limit_count mutations are added - - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added - - flow_control_max_count: Maximum number of inflight mutations - - flow_control_max_bytes: Maximum number of inflight bytes + - flush_limit_count: Flush immediately after flush_limit_count mutations are added. + If None, this limit is ignored. + - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. + If None, this limit is ignored. + - flow_control_max_count: Maximum number of inflight mutations. + If None, this limit is ignored. + - flow_control_max_bytes: Maximum number of inflight bytes. + If None, this limit is ignored. """ self.closed : bool = False self._queue : _BatchMutationsQueue = _BatchMutationsQueue() self._flow_control = _FlowControl(table, flow_control_max_count, flow_control_max_bytes) - self._flush_limit_bytes = flush_limit_bytes - self._flush_limit_count = flush_limit_count + self._flush_limit_bytes = flush_limit_bytes if flush_limit_bytes is not None else float("inf") + self._flush_limit_count = flush_limit_count if flush_limit_count is not None else float("inf") self.exceptions = [] self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) self._flush_tasks : list[asyncio.Task[None]] = [] - async def _flush_timer(self, interval:float): + async def _flush_timer(self, interval:float | None): """ Triggers new flush tasks every `interval` seconds """ + if interval is None: + return while not self.closed: await asyncio.sleep(interval) # add new flush task to list @@ -228,8 +237,8 @@ async def append(self, mutations:BulkMutationsEntry): if len(mutations.mutations) > self._flow_control.max_mutation_count: raise ValueError(f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}") await self._queue.put(mutations) - if self._queue.mutation_bytes_size > self._flush_limit_bytes or self.mutation_count > self._flush_limit_count: - # start a new flush task + # start a new flush task if limits exceeded + if self._queue.mutation_count > self._flush_limit_count or self._queue.mutation_bytes_size > self._flush_limit_bytes: self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): From d5e15aacc85b85626e1f3c2e3b6bd363a338cf94 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 10:36:31 -0700 Subject: [PATCH 053/213] strip indices from exceptions --- google/cloud/bigtable/exceptions.py | 5 +++-- google/cloud/bigtable/mutations_batcher.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 0b5ff4e61..5d5a79374 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -119,14 +119,15 @@ class FailedMutationEntryError(Exception): def __init__( self, - failed_idx: int, + failed_idx: int | None, failed_mutation_entry: "BulkMutationsEntry", cause: Exception, ): idempotent_msg = ( "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" ) - message = f"Failed {idempotent_msg} mutation entry at index {failed_idx} with cause: {cause!r}" + index_msg = f" at index {failed_idx}" if failed_idx is not None else "" + message = f"Failed {idempotent_msg} mutation entry {index_msg} with cause: {cause!r}" super().__init__(message) self.index = failed_idx self.entry = failed_mutation_entry diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 749eb3295..b9a03e79f 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -263,7 +263,9 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): if raise_exceptions: raise e else: - self.exceptions.extend(e.exceptions) + for failed_mutation_exc in e.exceptions: + failed_mutation_exc.index = None + self.exceptions.append(failed_mutation_exc) async def __aenter__(self): """For context manager API""" @@ -285,6 +287,5 @@ async def close(self, timeout: float = 5.0): await asyncio.gather([final_flush, self._flush_timer_task, finalize_tasks]) self._flush_tasks = [] if self.exceptions: - # TODO: deal with indices raise MutationsExceptionGroup(self.exceptions) From 82ea61f2b852f32054427f21c99adb3d5c74a1c2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 11:00:59 -0700 Subject: [PATCH 054/213] refactored mutate_rows logic into helper function --- google/cloud/bigtable/_mutate_rows.py | 92 +++++++++++++++++++++++++++ google/cloud/bigtable/client.py | 78 +++-------------------- 2 files changed, 100 insertions(+), 70 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index c3a20e7f8..f75a72cd8 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -17,6 +17,8 @@ from typing import Callable, Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions +from google.api_core import retry_async as retries +import google.cloud.bigtable.exceptions as bt_exceptions if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -33,6 +35,96 @@ class _MutateRowsIncomplete(RuntimeError): pass +async def _mutate_rows_operation( + gapic_client: "BigtableAsyncClient", + request: dict[str, Any], + mutation_entries: list["BulkMutationsEntry"], + operation_timeout: float, + per_request_timeout: float | None, +): + """ + Helper function for managing a single mutate_rows operation, end-to-end. + + Args: + - gapic_client: the client to use for the mutate_rows call + - request: A request dict containing table name, app profile id, and other details to inclide in the request + - mutation_entries: a list of BulkMutationsEntry objects to send to the server + - operation_timeout: the timeout to use for the entire operation, in seconds. + - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. + If not specified, the request will run until operation_timeout is reached. + """ + mutations_dict: dict[int, BulkMutationsEntry | None] = { + idx: mut for idx, mut in enumerate(mutation_entries) + } + error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} + + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + _MutateRowsIncomplete, + ) + + def on_error_fn(exc): + if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): + # add this exception to list for each active mutation + for idx in error_dict.keys(): + if mutations_dict[idx] is not None: + error_dict[idx].append(exc) + # remove non-idempotent mutations from mutations_dict, so they are not retried + for idx, mut in mutations_dict.items(): + if mut is not None and not mut.is_idempotent(): + mutations_dict[idx] = None + + retry = retries.AsyncRetry( + predicate=predicate, + on_error=on_error_fn, + timeout=operation_timeout, + initial=0.01, + multiplier=2, + maximum=60, + ) + # wrap attempt in retry logic + retry_wrapped = retry(_mutate_rows_retryable_attempt) + # convert RetryErrors from retry wrapper into DeadlineExceeded errors + deadline_wrapped = bt_exceptions._convert_retry_deadline( + retry_wrapped, operation_timeout + ) + try: + # trigger mutate_rows + await deadline_wrapped( + gapic_client, + request, + per_request_timeout, + mutations_dict, + error_dict, + predicate, + ) + except Exception as exc: + # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations + for idx in error_dict.keys(): + if mutations_dict[idx] is not None: + error_dict[idx].append(exc) + finally: + # raise exception detailing incomplete mutations + all_errors = [] + for idx, exc_list in error_dict.items(): + if exc_list: + if len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + all_errors.append( + bt_exceptions.FailedMutationEntryError( + idx, mutation_entries[idx], cause_exc + ) + ) + if all_errors: + raise bt_exceptions.MutationsExceptionGroup( + all_errors, len(mutation_entries) + ) + + async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", request: dict[str, Any], diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b312a71a1..2f2d14fc8 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -39,14 +39,11 @@ import google.auth._default from google.api_core import client_options as client_options_lib -from google.cloud.bigtable.exceptions import RetryExceptionGroup -from google.cloud.bigtable.exceptions import FailedMutationEntryError -from google.cloud.bigtable.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.exceptions import _convert_retry_deadline from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry -from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt -from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete +from google.cloud.bigtable._mutate_rows import _mutate_rows_operation if TYPE_CHECKING: from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -706,72 +703,13 @@ async def bulk_mutate_rows( if self.app_profile_id: request["app_profile_id"] = self.app_profile_id - mutations_dict: dict[int, BulkMutationsEntry | None] = { - idx: mut for idx, mut in enumerate(mutation_entries) - } - error_dict: dict[int, list[Exception]] = { - idx: [] for idx in mutations_dict.keys() - } - - predicate = retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, - _MutateRowsIncomplete, - ) - - def on_error_fn(exc): - if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): - # add this exception to list for each active mutation - for idx in error_dict.keys(): - if mutations_dict[idx] is not None: - error_dict[idx].append(exc) - # remove non-idempotent mutations from mutations_dict, so they are not retried - for idx, mut in mutations_dict.items(): - if mut is not None and not mut.is_idempotent(): - mutations_dict[idx] = None - - retry = retries.AsyncRetry( - predicate=predicate, - on_error=on_error_fn, - timeout=operation_timeout, - initial=0.01, - multiplier=2, - maximum=60, + await _mutate_rows_operation( + self.client._gapic_client, + request, + mutation_entries, + operation_timeout, + per_request_timeout, ) - # wrap attempt in retry logic - retry_wrapped = retry(_mutate_rows_retryable_attempt) - # convert RetryErrors from retry wrapper into DeadlineExceeded errors - deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) - try: - # trigger mutate_rows - await deadline_wrapped( - self.client._gapic_client, - request, - per_request_timeout, - mutations_dict, - error_dict, - predicate, - ) - except Exception as exc: - # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations - for idx in error_dict.keys(): - if mutations_dict[idx] is not None: - error_dict[idx].append(exc) - finally: - # raise exception detailing incomplete mutations - all_errors = [] - for idx, exc_list in error_dict.items(): - if exc_list: - if len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = RetryExceptionGroup(exc_list) - all_errors.append( - FailedMutationEntryError(idx, mutation_entries[idx], cause_exc) - ) - if all_errors: - raise MutationsExceptionGroup(all_errors, len(mutation_entries)) async def check_and_mutate_row( self, From fa42b8606f27d6a4b03db02e48b6dd63abc666b4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 12:58:51 -0700 Subject: [PATCH 055/213] implemented callbacks for mutate_rows --- google/cloud/bigtable/_mutate_rows.py | 43 ++++++++++++----- google/cloud/bigtable/client.py | 15 +++++- tests/unit/test__mutate_rows.py | 69 ++++++++++++++++++++++++++- tests/unit/test_client.py | 42 ++++++++++++++++ 4 files changed, 155 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index f75a72cd8..f785c1056 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -41,6 +41,8 @@ async def _mutate_rows_operation( mutation_entries: list["BulkMutationsEntry"], operation_timeout: float, per_request_timeout: float | None, + on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + | None = None, ): """ Helper function for managing a single mutate_rows operation, end-to-end. @@ -52,6 +54,8 @@ async def _mutate_rows_operation( - operation_timeout: the timeout to use for the entire operation, in seconds. - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. + - on_terminal_state: If given, this function will be called as soon as a mutation entry + reaches a terminal state (success or failure). """ mutations_dict: dict[int, BulkMutationsEntry | None] = { idx: mut for idx, mut in enumerate(mutation_entries) @@ -99,6 +103,7 @@ def on_error_fn(exc): mutations_dict, error_dict, predicate, + on_terminal_state, ) except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations @@ -114,11 +119,13 @@ def on_error_fn(exc): cause_exc = exc_list[0] else: cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + entry = mutation_entries[idx] all_errors.append( - bt_exceptions.FailedMutationEntryError( - idx, mutation_entries[idx], cause_exc - ) + bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) ) + # call on_terminal_state for each unreported failed mutation + if on_terminal_state and mutations_dict[idx] is not None: + on_terminal_state(entry, cause_exc) if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -132,6 +139,8 @@ async def _mutate_rows_retryable_attempt( mutation_dict: dict[int, "BulkMutationsEntry" | None], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], + on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + | None = None, ): """ Helper function for managing a single mutate_rows attempt. @@ -152,6 +161,8 @@ async def _mutate_rows_retryable_attempt( - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will clear the error list. - predicate: a function that takes an exception and returns True if the exception is retryable. + - on_terminal_state: If given, this function will be called as soon as a mutation entry + reaches a terminal state (success or failure). Raises: - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - GoogleAPICallError: if the server returns an error on the grpc call @@ -172,24 +183,34 @@ async def _mutate_rows_retryable_attempt( for result in result_list.entries: # convert sub-request index to global index idx = index_map[result.index] + entry = mutation_dict[idx] + terminal_state = False + exc = None + if entry is None: + # this entry has already reached a terminal state + continue if result.status.code == 0: # mutation succeeded - mutation_dict[idx] = None error_dict[idx] = [] - if result.status.code != 0: + terminal_state = True + else: # mutation failed - exception = core_exceptions.from_grpc_status( + exc = core_exceptions.from_grpc_status( result.status.code, result.status.message, details=result.status.details, ) - error_dict[idx].append(exception) + error_dict[idx].append(exc) # if mutation is non-idempotent or the error is not retryable, # mark the mutation as terminal - entry = mutation_dict[idx] - if entry is not None: - if not predicate(exception) or not entry.is_idempotent(): - mutation_dict[idx] = None + if not predicate(exc) or not entry.is_idempotent(): + terminal_state = True + # if the mutation is terminal and won't be retried, remove it from the mutation_dict + if terminal_state: + mutation_dict[idx] = None + if on_terminal_state is not None: + on_terminal_state(entry, exc) + # check if attempt succeeded, or needs to be retried if any(mutation is not None for mutation in mutation_dict.values()): # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete() diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2f2d14fc8..e95849e65 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import cast, Any, Optional, AsyncIterable, Set, TYPE_CHECKING +from typing import cast, Any, Optional, AsyncIterable, Set, Callable, TYPE_CHECKING import asyncio import grpc @@ -659,6 +659,7 @@ async def bulk_mutate_rows( *, operation_timeout: float | None = 60, per_request_timeout: float | None = None, + on_success: Callable[[BulkMutationsEntry], None] | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -684,7 +685,8 @@ async def bulk_mutate_rows( in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted if within operation_timeout budget - + - on_success: a callback function that will be called when each mutation + entry is confirmed to be applied successfully. Raises: - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions @@ -703,12 +705,21 @@ async def bulk_mutate_rows( if self.app_profile_id: request["app_profile_id"] = self.app_profile_id + callback: Callable[[BulkMutationsEntry, Exception | None], None] | None = None + if on_success is not None: + # convert on_terminal_state callback to callback for successful results only + # failed results will be rasied as exceptions + def callback(entry: BulkMutationsEntry, exc: Exception | None): + if exc is None and on_success is not None: + on_success(entry) + await _mutate_rows_operation( self.client._gapic_client, request, mutation_entries, operation_timeout, per_request_timeout, + on_terminal_state=callback, ) async def check_and_mutate_row( diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 4d0c7820f..9748e513e 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -127,7 +127,6 @@ async def test_partial_success_non_retryable(self): mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) - # raise retryable error 3 times, then raise non-retryable error expected_request = {} expected_timeout = 9 await _mutate_rows_retryable_attempt( @@ -143,3 +142,71 @@ async def test_partial_success_non_retryable(self): assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 assert errors[2] == [] + + @pytest.mark.asyncio + async def test_on_terminal_state_no_retries(self): + """ + Should call on_terminal_state for each successful or non-retryable mutation + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + + success_mutation = mock.Mock() + success_mutation_2 = mock.Mock() + failure_mutation = mock.Mock() + mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} + callback = mock.Mock() + errors = {0: [], 1: [], 2: []} + client = self._make_mock_client(mutations, error_dict={1: 300}) + # raise retryable error 3 times, then raise non-retryable error + await _mutate_rows_retryable_attempt( + client, + {}, + 9, + mutations, + errors, + lambda x: False, + callback, + ) + assert callback.call_count == 3 + call_args = callback.call_args_list + assert call_args[0][0][0] == success_mutation + assert call_args[0][0][1] is None + assert call_args[1][0][0] == failure_mutation + assert call_args[1][0][1].grpc_status_code == 300 + assert call_args[2][0][0] == success_mutation_2 + assert call_args[2][0][1] is None + + @pytest.mark.asyncio + async def test_on_terminal_state_with_retries(self): + """ + Should not call on_terminal_state for retryable mutations + """ + from google.cloud.bigtable._mutate_rows import ( + _mutate_rows_retryable_attempt, + _MutateRowsIncomplete, + ) + + success_mutation = mock.Mock() + success_mutation_2 = mock.Mock() + failure_mutation = mock.Mock() + mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} + callback = mock.Mock() + errors = {0: [], 1: [], 2: []} + client = self._make_mock_client(mutations, error_dict={1: 300}) + # raise retryable error 3 times, then raise non-retryable error + with pytest.raises(_MutateRowsIncomplete): + await _mutate_rows_retryable_attempt( + client, + {}, + 9, + mutations, + errors, + lambda x: True, + callback, + ) + assert callback.call_count == 2 + call_args = callback.call_args_list + assert call_args[0][0][0] == success_mutation + assert call_args[0][0][1] is None + assert call_args[1][0][0] == success_mutation_2 + assert call_args[1][0][1] is None diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 514440f95..8850639bf 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1316,3 +1316,45 @@ async def test_bulk_mutate_error_index(self): assert isinstance(cause.exceptions[0], Aborted) assert isinstance(cause.exceptions[1], DeadlineExceeded) assert isinstance(cause.exceptions[2], FailedPrecondition) + + @pytest.mark.asyncio + async def test_bulk_mutate_rows_on_success(self): + """ + on_success should be called for each successful mutation + """ + from google.api_core.exceptions import ( + Aborted, + FailedPrecondition, + ) + from google.cloud.bigtable.exceptions import ( + MutationsExceptionGroup, + ) + + callback = mock.Mock() + async with self._make_client(project="project") as client: + async with client.get_table("instance", "table") as table: + with mock.patch.object( + client._gapic_client, "mutate_rows" + ) as mock_gapic: + # fail with retryable errors, then a non-retryable one + mock_gapic.side_effect = [ + self._mock_response([None, Aborted("mock"), None]), + self._mock_response([FailedPrecondition("final")]), + ] + with pytest.raises(MutationsExceptionGroup): + mutation = mutations.SetCell( + "family", b"qualifier", b"value", timestamp_micros=123 + ) + entries = [ + mutations.BulkMutationsEntry( + (f"row_key_{i}").encode(), [mutation] + ) + for i in range(3) + ] + assert mutation.is_idempotent() is True + await table.bulk_mutate_rows( + entries, operation_timeout=1000, on_success=callback + ) + assert callback.call_count == 2 + assert callback.call_args_list[0][0][0] == entries[0] + assert callback.call_args_list[1][0][0] == entries[2] From ee237213ace9e82f2dd18e946d37688f0a7220c1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 13:30:19 -0700 Subject: [PATCH 056/213] release locks as mutations are completed, instead of waiting until end --- google/cloud/bigtable/mutations_batcher.py | 35 ++++++++++++++++------ 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index b9a03e79f..cc8fa4b6e 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -17,11 +17,12 @@ import asyncio from typing import TYPE_CHECKING -from google.cloud.bigtable.mutations import Mutation from google.cloud.bigtable.mutations import BulkMutationsEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.cloud.bigtable.exceptions import FailedMutationEntryError +from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover @@ -61,6 +62,29 @@ def is_locked(self) -> bool: or self.available_mutation_bytes.locked() ) + def _on_mutation_entry_complete(self, mutation_entry:BulkMutationsEntry, exception:Exception|None): + """ + Each time an in-flight mutation is complete, release the flow control semaphore + """ + self.available_mutation_count.release(len(mutation_entry.mutations)) + self.available_mutation_bytes.release(mutation_entry.size()) + + def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | None): + """ + Helper to execute mutation operation on a batch + + Args: + - batch: list of BulkMutationsEntry objects to send to server + - timeout: timeout in seconds. Used as operation_timeout and per_request_timeout. + If not given, will use table defaults + """ + request = {"table_name": self.table.table_name} + if self.table.app_profile_id: + request["app_profile_id"] = self.table.app_profile_id + operation_timeout = timeout or self.table.default_operation_timeout + request_timeout = timeout or self.table.default_per_request_timeout + await _mutate_rows_operation(self.table.client._gapic_client, request, batch, operation_timeout, request_timeout, self._on_mutation_entry_complete) + async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None): """ Ascynronously send the set of mutations to the server. This method will block @@ -69,7 +93,6 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl errors : list[FailedMutationEntryError] = [] while mutations: batch : list[BulkMutationsEntry] = [] - batch_bytes = 0 # grab at least one mutation next_mutation = mutations.pop() next_mutation_size = next_mutation.size() @@ -91,17 +114,11 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl await self.available_mutation_count.acquire(len(next_mutation.mutations)) await self.available_mutation_bytes.acquire(next_mutation_size) batch.append(next_mutation) - batch_bytes += next_mutation_size # start mutate_rows rpc try: - # TODO: free up space as individual mutations are completed - await self.table.mutate_rows(batch, operation_timeout=timeout, per_request_timeout=timeout) + await self._execute_mutate_rows(batch, timeout) except MutationsExceptionGroup as e: errors.extend(e.exceptions) - finally: - # release locks - self.available_mutation_count.release(sum([len(m.mutations) for m in batch])) - self.available_mutation_bytes.release(batch_bytes) # raise set of failed mutations on completion if errors: raise MutationsExceptionGroup(errors) From b69cc96cc4a1ed99f4ccdea977872b2ad85bc51b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 13:42:47 -0700 Subject: [PATCH 057/213] refacotr row processing --- google/cloud/bigtable/mutations_batcher.py | 31 +++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index cc8fa4b6e..8c5e7d4b5 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -93,26 +93,21 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl errors : list[FailedMutationEntryError] = [] while mutations: batch : list[BulkMutationsEntry] = [] - # grab at least one mutation - next_mutation = mutations.pop() - next_mutation_size = next_mutation.size() - # do extra sanity check to avoid deadlocks - if len(next_mutation.mutations) > self.max_mutation_count: - raise ValueError( - f"Mutation count {len(next_mutation.mutations)} exceeds max mutation count {self.max_mutation_count}" - ) - if next_mutation_size > self.max_mutation_bytes: - raise ValueError( - f"Mutation size {next_mutation_size} exceeds max mutation size {self.max_mutation_bytes}" - ) - self.available_mutation_count.acquire(len(next_mutation.mutations)) - self.available_mutation_bytes.acquire(next_mutation_size) - # fill up batch until we hit lock - while mutations and not self.is_locked(): + # fill up batch until we hit a lock. Grab at least one entry + while mutations and (not self.is_locked() or not batch): next_mutation = mutations.pop() next_mutation_size = next_mutation.size() - await self.available_mutation_count.acquire(len(next_mutation.mutations)) - await self.available_mutation_bytes.acquire(next_mutation_size) + # do extra sanity check to avoid deadlocks + if len(next_mutation.mutations) > self.max_mutation_count: + raise ValueError( + f"Mutation count {len(next_mutation.mutations)} exceeds max mutation count {self.max_mutation_count}" + ) + if next_mutation_size > self.max_mutation_bytes: + raise ValueError( + f"Mutation size {next_mutation_size} exceeds max mutation size {self.max_mutation_bytes}" + ) + self.available_mutation_count.acquire(len(next_mutation.mutations)) + self.available_mutation_bytes.acquire(next_mutation_size) batch.append(next_mutation) # start mutate_rows rpc try: From 9e1850c66babc4e17ea97d1f31e070adb4ebe693 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 13:58:54 -0700 Subject: [PATCH 058/213] FlowControl returns exceptions rather than raises them --- google/cloud/bigtable/mutations_batcher.py | 45 ++++++++++++---------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 8c5e7d4b5..c4451e4c8 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -64,12 +64,12 @@ def is_locked(self) -> bool: def _on_mutation_entry_complete(self, mutation_entry:BulkMutationsEntry, exception:Exception|None): """ - Each time an in-flight mutation is complete, release the flow control semaphore + Every time an in-flight mutation is complete, release the flow control semaphore """ self.available_mutation_count.release(len(mutation_entry.mutations)) self.available_mutation_bytes.release(mutation_entry.size()) - def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | None): + def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | None) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch @@ -77,18 +77,30 @@ def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | N - batch: list of BulkMutationsEntry objects to send to server - timeout: timeout in seconds. Used as operation_timeout and per_request_timeout. If not given, will use table defaults + Returns: + - list of FailedMutationEntryError objects for mutations that failed. + FailedMutationEntryError objects will not contain index information """ request = {"table_name": self.table.table_name} if self.table.app_profile_id: request["app_profile_id"] = self.table.app_profile_id operation_timeout = timeout or self.table.default_operation_timeout request_timeout = timeout or self.table.default_per_request_timeout - await _mutate_rows_operation(self.table.client._gapic_client, request, batch, operation_timeout, request_timeout, self._on_mutation_entry_complete) - - async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None): + try: + await _mutate_rows_operation(self.table.client._gapic_client, request, batch, operation_timeout, request_timeout, self._on_mutation_entry_complete) + except MutationsExceptionGroup as e: + for subexc in e.exceptions: + subexc.index = None + return e.exceptions + return [] + + async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None) -> list[FailedMutationEntryError]: """ Ascynronously send the set of mutations to the server. This method will block when the flow control limits are reached. + + Returns: + - list of FailedMutationEntryError objects for mutations that failed """ errors : list[FailedMutationEntryError] = [] while mutations: @@ -110,13 +122,10 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl self.available_mutation_bytes.acquire(next_mutation_size) batch.append(next_mutation) # start mutate_rows rpc - try: - await self._execute_mutate_rows(batch, timeout) - except MutationsExceptionGroup as e: - errors.extend(e.exceptions) + batch_errors = self._execute_mutate_rows(batch, timeout) + errors.extend(batch_errors) # raise set of failed mutations on completion - if errors: - raise MutationsExceptionGroup(errors) + return errors class _BatchMutationsQueue(asyncio.Queue[BulkMutationsEntry]): @@ -269,15 +278,11 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): while not self._queue.empty(): entries.append(await self._queue.get()) if entries: - try: - await self._flow_control.mutate_rows(entries, timeout=timeout) - except MutationsExceptionGroup as e: - if raise_exceptions: - raise e - else: - for failed_mutation_exc in e.exceptions: - failed_mutation_exc.index = None - self.exceptions.append(failed_mutation_exc) + errors = await self._flow_control.mutate_rows(entries, timeout=timeout) + if raise_exceptions and errors: + raise MutationsExceptionGroup(errors) + else: + self.exceptions.extend(errors) async def __aenter__(self): """For context manager API""" From 9e4d013b1d8fb94d5d658c1a2b8f3c8034ba7e8b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 2 May 2023 14:07:32 -0700 Subject: [PATCH 059/213] raise previous exceptions on flush with flag set --- google/cloud/bigtable/mutations_batcher.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index c4451e4c8..bb639cc8e 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -268,21 +268,23 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): Args: - timeout: operation_timeout for underlying rpc, in seconds - - raise_exceptions: if True, raise MutationsExceptionGroup if any mutations fail. If False, - exceptions are saved in self.exceptions and raised on close() + - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. + If False, exceptions will be stored in self.exceptions and raised on a future flush + or when the batcher is closed. Raises: - - MutationsExceptionGroup if raise_exceptions is True and any mutations fail + - MutationsExceptionGroup if raise_exceptions is True and any mutations fail """ entries : list[BulkMutationsEntry] = [] # reset queue while not self._queue.empty(): entries.append(await self._queue.get()) if entries: - errors = await self._flow_control.mutate_rows(entries, timeout=timeout) - if raise_exceptions and errors: - raise MutationsExceptionGroup(errors) - else: - self.exceptions.extend(errors) + flush_errors = await self._flow_control.mutate_rows(entries, timeout=timeout) + self.exceptions.extend(flush_errors) + if raise_exceptions and self.exceptions: + # raise any exceptions from this or previous flushes + exc_list, self.exceptions = self.exceptions, [] + raise MutationsExceptionGroup(exc_list) async def __aenter__(self): """For context manager API""" From 2c7ec9c2d7b68b98199976cbfdcd7b9757a85265 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 09:35:42 -0700 Subject: [PATCH 060/213] disallow empty bulk mutation entries --- google/cloud/bigtable/mutations.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 70b197e0a..35ed24070 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -126,6 +126,8 @@ def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): row_key = row_key.encode("utf-8") if isinstance(mutations, Mutation): mutations = [mutations] + if len(mutations) == 0: + raise ValueError("mutations must not be empty") self.row_key = row_key self.mutations = mutations From 5c4f739c7191cb6642bd3ada3d91c852ab335893 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 09:52:38 -0700 Subject: [PATCH 061/213] simplified staged mutation store --- google/cloud/bigtable/mutations_batcher.py | 71 +++------------------- 1 file changed, 10 insertions(+), 61 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index bb639cc8e..d086f64cc 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -128,59 +128,6 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl return errors -class _BatchMutationsQueue(asyncio.Queue[BulkMutationsEntry]): - """ - asyncio.Queue subclass that tracks the size and number of mutations - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._mutation_count = 0 - self._mutation_bytes_size = 0 - - @property - def mutation_count(self): - return self._mutation_count - - @mutation_count.setter - def mutation_count(self, value): - if value < 0: - raise ValueError("Mutation count cannot be negative") - self._mutation_count = value - - @property - def mutation_bytes_size(self): - return self._mutation_bytes_size - - @mutation_bytes_size.setter - def mutation_bytes_size(self, value): - if value < 0: - raise ValueError("Mutation bytes size cannot be negative") - self._mutation_bytes_size = value - - def put_nowait(self, item:BulkMutationsEntry): - super().put_nowait(item) - self.mutation_count += len(item.mutations) - self.mutation_bytes_size += item.size() - - def get_nowait(self): - item = super().get_nowait() - self.mutation_count -= len(item.mutations) - self.mutation_bytes_size -= item.size() - return item - - async def put(self, item:BulkMutationsEntry): - await super().put(item) - self.mutation_count += len(item.mutations) - self.mutation_bytes_size += item.size() - - async def get(self): - item = await super().get() - self.mutation_count -= len(item.mutations) - self.mutation_bytes_size -= item.size() - return item - - class MutationsBatcher: """ Allows users to send batches using context manager API: @@ -225,7 +172,8 @@ def __init__( If None, this limit is ignored. """ self.closed : bool = False - self._queue : _BatchMutationsQueue = _BatchMutationsQueue() + self._staged_mutations : list[BulkMutationsEntry] = [] + self._staged_count, self._staged_size = 0, 0 self._flow_control = _FlowControl(table, flow_control_max_count, flow_control_max_bytes) self._flush_limit_bytes = flush_limit_bytes if flush_limit_bytes is not None else float("inf") self._flush_limit_count = flush_limit_count if flush_limit_count is not None else float("inf") @@ -246,7 +194,7 @@ async def _flush_timer(self, interval:float | None): new_task = asyncio.create_task(self.flush(timeout=None, raise_exceptions=False)) self._flush_tasks.append(new_task) - async def append(self, mutations:BulkMutationsEntry): + def append(self, mutations:BulkMutationsEntry): """ Add a new set of mutations to the internal queue """ @@ -257,9 +205,11 @@ async def append(self, mutations:BulkMutationsEntry): raise ValueError(f"Mutation size {size} exceeds flow_control_max_bytes: {self._flow_control.max_mutation_bytes}") if len(mutations.mutations) > self._flow_control.max_mutation_count: raise ValueError(f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}") - await self._queue.put(mutations) + self._staged_mutations.append(mutations) # start a new flush task if limits exceeded - if self._queue.mutation_count > self._flush_limit_count or self._queue.mutation_bytes_size > self._flush_limit_bytes: + self._staged_count += len(mutations.mutations) + self._staged_size += size + if self._staged_count >= self._flush_limit_count or self._staged_size >= self._flush_limit_bytes: self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): @@ -274,10 +224,10 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): Raises: - MutationsExceptionGroup if raise_exceptions is True and any mutations fail """ - entries : list[BulkMutationsEntry] = [] # reset queue - while not self._queue.empty(): - entries.append(await self._queue.get()) + entries, self._staged_mutations = self._staged_mutations, [] + self._staged_count, self._staged_size = 0, 0 + # perform flush if entries: flush_errors = await self._flow_control.mutate_rows(entries, timeout=timeout) self.exceptions.extend(flush_errors) @@ -304,7 +254,6 @@ async def close(self, timeout: float = 5.0): self._flush_timer_task.cancel() # wait for all to finish await asyncio.gather([final_flush, self._flush_timer_task, finalize_tasks]) - self._flush_tasks = [] if self.exceptions: raise MutationsExceptionGroup(self.exceptions) From fa4947f90e714a2209730c88397ad03b3c205b6b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 09:53:47 -0700 Subject: [PATCH 062/213] ran blacken --- google/cloud/bigtable/exceptions.py | 4 +- google/cloud/bigtable/mutations_batcher.py | 93 ++++++++++++++++------ 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 5d5a79374..da9594732 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -127,7 +127,9 @@ def __init__( "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" ) index_msg = f" at index {failed_idx}" if failed_idx is not None else "" - message = f"Failed {idempotent_msg} mutation entry {index_msg} with cause: {cause!r}" + message = ( + f"Failed {idempotent_msg} mutation entry {index_msg} with cause: {cause!r}" + ) super().__init__(message) self.index = failed_idx self.entry = failed_mutation_entry diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index d086f64cc..bf7e81be9 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -33,7 +33,9 @@ class _FlowControl: stay within the configured limits (max_mutation_count, max_mutation_bytes). """ - def __init__(self, table, max_mutation_count:float|None, max_mutation_bytes:float|None): + def __init__( + self, table, max_mutation_count: float | None, max_mutation_bytes: float | None + ): """ Args: - table: Table object that performs rpc calls @@ -50,8 +52,12 @@ def __init__(self, table, max_mutation_count:float|None, max_mutation_bytes:floa self.max_mutation_bytes = float("inf") self.max_mutation_count = max_mutation_count self.max_mutation_bytes = max_mutation_bytes - self.available_mutation_count : asyncio.Semaphore = asyncio.Semaphore(max_mutation_count) - self.available_mutation_bytes : asyncio.Semaphore = asyncio.Semaphore(max_mutation_bytes) + self.available_mutation_count: asyncio.Semaphore = asyncio.Semaphore( + max_mutation_count + ) + self.available_mutation_bytes: asyncio.Semaphore = asyncio.Semaphore( + max_mutation_bytes + ) def is_locked(self) -> bool: """ @@ -62,14 +68,18 @@ def is_locked(self) -> bool: or self.available_mutation_bytes.locked() ) - def _on_mutation_entry_complete(self, mutation_entry:BulkMutationsEntry, exception:Exception|None): + def _on_mutation_entry_complete( + self, mutation_entry: BulkMutationsEntry, exception: Exception | None + ): """ Every time an in-flight mutation is complete, release the flow control semaphore """ self.available_mutation_count.release(len(mutation_entry.mutations)) self.available_mutation_bytes.release(mutation_entry.size()) - def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | None) -> list[FailedMutationEntryError]: + def _execute_mutate_rows( + self, batch: list[BulkMutationsEntry], timeout: float | None + ) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch @@ -87,14 +97,23 @@ def _execute_mutate_rows(self, batch:list[BulkMutationsEntry], timeout:float | N operation_timeout = timeout or self.table.default_operation_timeout request_timeout = timeout or self.table.default_per_request_timeout try: - await _mutate_rows_operation(self.table.client._gapic_client, request, batch, operation_timeout, request_timeout, self._on_mutation_entry_complete) + await _mutate_rows_operation( + self.table.client._gapic_client, + request, + batch, + operation_timeout, + request_timeout, + self._on_mutation_entry_complete, + ) except MutationsExceptionGroup as e: for subexc in e.exceptions: subexc.index = None return e.exceptions return [] - async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:float | None) -> list[FailedMutationEntryError]: + async def process_mutations( + self, mutations: list[BulkMutationsEntry], timeout: float | None + ) -> list[FailedMutationEntryError]: """ Ascynronously send the set of mutations to the server. This method will block when the flow control limits are reached. @@ -102,9 +121,9 @@ async def process_mutations(self, mutations:list[BulkMutationsEntry], timeout:fl Returns: - list of FailedMutationEntryError objects for mutations that failed """ - errors : list[FailedMutationEntryError] = [] + errors: list[FailedMutationEntryError] = [] while mutations: - batch : list[BulkMutationsEntry] = [] + batch: list[BulkMutationsEntry] = [] # fill up batch until we hit a lock. Grab at least one entry while mutations and (not self.is_locked() or not batch): next_mutation = mutations.pop() @@ -171,17 +190,25 @@ def __init__( - flow_control_max_bytes: Maximum number of inflight bytes. If None, this limit is ignored. """ - self.closed : bool = False - self._staged_mutations : list[BulkMutationsEntry] = [] + self.closed: bool = False + self._staged_mutations: list[BulkMutationsEntry] = [] self._staged_count, self._staged_size = 0, 0 - self._flow_control = _FlowControl(table, flow_control_max_count, flow_control_max_bytes) - self._flush_limit_bytes = flush_limit_bytes if flush_limit_bytes is not None else float("inf") - self._flush_limit_count = flush_limit_count if flush_limit_count is not None else float("inf") + self._flow_control = _FlowControl( + table, flow_control_max_count, flow_control_max_bytes + ) + self._flush_limit_bytes = ( + flush_limit_bytes if flush_limit_bytes is not None else float("inf") + ) + self._flush_limit_count = ( + flush_limit_count if flush_limit_count is not None else float("inf") + ) self.exceptions = [] - self._flush_timer_task : asyncio.Task[None] = asyncio.create_task(self._flush_timer(flush_interval)) - self._flush_tasks : list[asyncio.Task[None]] = [] + self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( + self._flush_timer(flush_interval) + ) + self._flush_tasks: list[asyncio.Task[None]] = [] - async def _flush_timer(self, interval:float | None): + async def _flush_timer(self, interval: float | None): """ Triggers new flush tasks every `interval` seconds """ @@ -191,10 +218,12 @@ async def _flush_timer(self, interval:float | None): await asyncio.sleep(interval) # add new flush task to list if not self.closed: - new_task = asyncio.create_task(self.flush(timeout=None, raise_exceptions=False)) + new_task = asyncio.create_task( + self.flush(timeout=None, raise_exceptions=False) + ) self._flush_tasks.append(new_task) - def append(self, mutations:BulkMutationsEntry): + def append(self, mutations: BulkMutationsEntry): """ Add a new set of mutations to the internal queue """ @@ -202,15 +231,24 @@ def append(self, mutations:BulkMutationsEntry): raise RuntimeError("Cannot append to closed MutationsBatcher") size = mutations.size() if size > self._flow_control.max_mutation_bytes: - raise ValueError(f"Mutation size {size} exceeds flow_control_max_bytes: {self._flow_control.max_mutation_bytes}") + raise ValueError( + f"Mutation size {size} exceeds flow_control_max_bytes: {self._flow_control.max_mutation_bytes}" + ) if len(mutations.mutations) > self._flow_control.max_mutation_count: - raise ValueError(f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}") + raise ValueError( + f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}" + ) self._staged_mutations.append(mutations) # start a new flush task if limits exceeded self._staged_count += len(mutations.mutations) self._staged_size += size - if self._staged_count >= self._flush_limit_count or self._staged_size >= self._flush_limit_bytes: - self._flush_tasks.append(asyncio.create_task(self.flush(timeout=None, raise_exceptions=False))) + if ( + self._staged_count >= self._flush_limit_count + or self._staged_size >= self._flush_limit_bytes + ): + self._flush_tasks.append( + asyncio.create_task(self.flush(timeout=None, raise_exceptions=False)) + ) async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): """ @@ -229,7 +267,9 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): self._staged_count, self._staged_size = 0, 0 # perform flush if entries: - flush_errors = await self._flow_control.mutate_rows(entries, timeout=timeout) + flush_errors = await self._flow_control.mutate_rows( + entries, timeout=timeout + ) self.exceptions.extend(flush_errors) if raise_exceptions and self.exceptions: # raise any exceptions from this or previous flushes @@ -250,10 +290,11 @@ async def close(self, timeout: float = 5.0): """ self.closed = True final_flush = self.flush(timeout=timeout, raise_exceptions=False) - finalize_tasks = asyncio.wait_for(asyncio.gather(*self._flush_tasks), timeout=timeout) + finalize_tasks = asyncio.wait_for( + asyncio.gather(*self._flush_tasks), timeout=timeout + ) self._flush_timer_task.cancel() # wait for all to finish await asyncio.gather([final_flush, self._flush_timer_task, finalize_tasks]) if self.exceptions: raise MutationsExceptionGroup(self.exceptions) - From 914b24be7df14598891e4a96aeba9c9ef9934a5b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 11:11:12 -0700 Subject: [PATCH 063/213] replaced semaphores with condition --- google/cloud/bigtable/_mutate_rows.py | 20 ++++-- google/cloud/bigtable/mutations_batcher.py | 83 ++++++++++++---------- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index f785c1056..520c8d030 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,9 @@ # from __future__ import annotations -from typing import Callable, Any, TYPE_CHECKING +from typing import Callable, Awaitable, Any, TYPE_CHECKING + +from inspect import iscoroutine from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -41,7 +43,9 @@ async def _mutate_rows_operation( mutation_entries: list["BulkMutationsEntry"], operation_timeout: float, per_request_timeout: float | None, - on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + on_terminal_state: Callable[ + ["BulkMutationsEntry", Exception | None], Awaitable[None] | None + ] | None = None, ): """ @@ -125,7 +129,9 @@ def on_error_fn(exc): ) # call on_terminal_state for each unreported failed mutation if on_terminal_state and mutations_dict[idx] is not None: - on_terminal_state(entry, cause_exc) + output = on_terminal_state(mutations_dict[idx], cause_exc) + if iscoroutine(output): + await output if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -139,7 +145,9 @@ async def _mutate_rows_retryable_attempt( mutation_dict: dict[int, "BulkMutationsEntry" | None], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], - on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + on_terminal_state: Callable[ + ["BulkMutationsEntry", Exception | None], Awaitable[None] | None + ] | None = None, ): """ @@ -209,7 +217,9 @@ async def _mutate_rows_retryable_attempt( if terminal_state: mutation_dict[idx] = None if on_terminal_state is not None: - on_terminal_state(entry, exc) + result = on_terminal_state(entry, exc) + if iscoroutine(result): + await result # check if attempt succeeded, or needs to be retried if any(mutation is not None for mutation in mutation_dict.values()): # unfinished work; raise exception to trigger retry diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index bf7e81be9..185a92ff2 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -46,38 +46,36 @@ def __init__( If None, no limit is enforced. """ self.table = table - if max_mutation_count is None: - self.max_mutation_count = float("inf") - if max_mutation_bytes is None: - self.max_mutation_bytes = float("inf") - self.max_mutation_count = max_mutation_count - self.max_mutation_bytes = max_mutation_bytes - self.available_mutation_count: asyncio.Semaphore = asyncio.Semaphore( - max_mutation_count + self.max_mutation_count = ( + max_mutation_count if max_mutation_count is not None else float("inf") ) - self.available_mutation_bytes: asyncio.Semaphore = asyncio.Semaphore( - max_mutation_bytes + self.max_mutation_bytes = ( + max_mutation_bytes if max_mutation_bytes is not None else float("inf") ) + self.capacity_condition = asyncio.Condition() + self.in_flight_mutation_count = 0 + self.in_flight_mutation_bytes = 0 - def is_locked(self) -> bool: - """ - Check if either flow control semaphore is locked - """ + def _has_capacity(self, additional_size: int, additional_count: int) -> bool: + new_size = self.in_flight_mutation_bytes + additional_size + new_count = self.in_flight_mutation_count + additional_count return ( - self.available_mutation_count.locked() - or self.available_mutation_bytes.locked() + new_size <= self.max_mutation_bytes and new_count <= self.max_mutation_count ) - def _on_mutation_entry_complete( + async def _on_mutation_entry_complete( self, mutation_entry: BulkMutationsEntry, exception: Exception | None ): """ Every time an in-flight mutation is complete, release the flow control semaphore """ - self.available_mutation_count.release(len(mutation_entry.mutations)) - self.available_mutation_bytes.release(mutation_entry.size()) + self.in_flight_mutation_count -= len(mutation_entry.mutations) + self.in_flight_mutation_bytes -= mutation_entry.size() + # notify any blocked requests that there is additional capacity + async with self.capacity_condition: + self.capacity_condition.notify_all() - def _execute_mutate_rows( + async def _execute_mutate_rows( self, batch: list[BulkMutationsEntry], timeout: float | None ) -> list[FailedMutationEntryError]: """ @@ -124,24 +122,35 @@ async def process_mutations( errors: list[FailedMutationEntryError] = [] while mutations: batch: list[BulkMutationsEntry] = [] - # fill up batch until we hit a lock. Grab at least one entry - while mutations and (not self.is_locked() or not batch): - next_mutation = mutations.pop() - next_mutation_size = next_mutation.size() - # do extra sanity check to avoid deadlocks - if len(next_mutation.mutations) > self.max_mutation_count: - raise ValueError( - f"Mutation count {len(next_mutation.mutations)} exceeds max mutation count {self.max_mutation_count}" - ) - if next_mutation_size > self.max_mutation_bytes: - raise ValueError( - f"Mutation size {next_mutation_size} exceeds max mutation size {self.max_mutation_bytes}" - ) - self.available_mutation_count.acquire(len(next_mutation.mutations)) - self.available_mutation_bytes.acquire(next_mutation_size) - batch.append(next_mutation) + # fill up batch until we hit capacity + async with self.capacity_condition: + while mutations: + next_entry = mutations[0] + next_size = next_entry.size() + next_count = len(next_entry.mutations) + # do extra sanity check to avoid blocking forever + if next_count > self.max_mutation_count: + raise ValueError( + f"Mutation count {next_count} exceeds max mutation count {self.max_mutation_count}" + ) + if next_size > self.max_mutation_bytes: + raise ValueError( + f"Mutation size {next_size} exceeds max mutation size {self.max_mutation_bytes}" + ) + if self._has_capacity(next_size, next_count): + batch.append(mutations.pop(0)) + self.in_flight_mutation_bytes += next_size + self.in_flight_mutation_count += next_count + elif batch: + # break out and submit partial batch + break + else: + # batch is empty. Block until we have capacity + await self.capacity_condition.wait_for( + lambda: self._has_capacity(next_size, next_count) + ) # start mutate_rows rpc - batch_errors = self._execute_mutate_rows(batch, timeout) + batch_errors = await self._execute_mutate_rows(batch, timeout) errors.extend(batch_errors) # raise set of failed mutations on completion return errors From 8bda6514f9a14a538020aa192a79d401058728a0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 11:38:53 -0700 Subject: [PATCH 064/213] fixed mypy issues --- google/cloud/bigtable/_mutate_rows.py | 11 ++++++----- google/cloud/bigtable/mutations_batcher.py | 17 +++++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 520c8d030..9a1946fa4 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Callable, Awaitable, Any, TYPE_CHECKING +from typing import Callable, Awaitable, Any, cast, TYPE_CHECKING from inspect import iscoroutine @@ -129,7 +129,8 @@ def on_error_fn(exc): ) # call on_terminal_state for each unreported failed mutation if on_terminal_state and mutations_dict[idx] is not None: - output = on_terminal_state(mutations_dict[idx], cause_exc) + entry = cast(BulkMutationsEntry, mutations_dict[idx]) + output = on_terminal_state(entry, cause_exc) if iscoroutine(output): await output if all_errors: @@ -217,9 +218,9 @@ async def _mutate_rows_retryable_attempt( if terminal_state: mutation_dict[idx] = None if on_terminal_state is not None: - result = on_terminal_state(entry, exc) - if iscoroutine(result): - await result + output = on_terminal_state(entry, exc) + if iscoroutine(output): + await output # check if attempt succeeded, or needs to be retried if any(mutation is not None for mutation in mutation_dict.values()): # unfinished work; raise exception to trigger retry diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 185a92ff2..07b7ea009 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -106,7 +106,7 @@ async def _execute_mutate_rows( except MutationsExceptionGroup as e: for subexc in e.exceptions: subexc.index = None - return e.exceptions + return list(e.exceptions) return [] async def process_mutations( @@ -211,11 +211,13 @@ def __init__( self._flush_limit_count = ( flush_limit_count if flush_limit_count is not None else float("inf") ) - self.exceptions = [] + self.exceptions : list[FailedMutationEntryError] = [] self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( self._flush_timer(flush_interval) ) self._flush_tasks: list[asyncio.Task[None]] = [] + # MutationExceptionGroup reports number of successful entries along with failures + self._entries_processed_since_last_raise: int = 0 async def _flush_timer(self, interval: float | None): """ @@ -273,17 +275,20 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): """ # reset queue entries, self._staged_mutations = self._staged_mutations, [] + flush_count = self._staged_count self._staged_count, self._staged_size = 0, 0 # perform flush if entries: - flush_errors = await self._flow_control.mutate_rows( + flush_errors = await self._flow_control.process_mutations( entries, timeout=timeout ) self.exceptions.extend(flush_errors) + self._entries_processed_since_last_raise += flush_count if raise_exceptions and self.exceptions: # raise any exceptions from this or previous flushes exc_list, self.exceptions = self.exceptions, [] - raise MutationsExceptionGroup(exc_list) + raise_count, self._entries_processed_since_last_raise = self._entries_processed_since_last_raise, 0 + raise MutationsExceptionGroup(exc_list, raise_count) async def __aenter__(self): """For context manager API""" @@ -304,6 +309,6 @@ async def close(self, timeout: float = 5.0): ) self._flush_timer_task.cancel() # wait for all to finish - await asyncio.gather([final_flush, self._flush_timer_task, finalize_tasks]) + await asyncio.gather(final_flush, self._flush_timer_task, finalize_tasks) if self.exceptions: - raise MutationsExceptionGroup(self.exceptions) + raise MutationsExceptionGroup(self.exceptions, self._entries_processed_since_last_raise) From 742194bead45d04f7b01fa632f7f0b54f0452f43 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 11:42:15 -0700 Subject: [PATCH 065/213] added raise_exceptions fn --- google/cloud/bigtable/mutations_batcher.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 07b7ea009..821c5af02 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -284,11 +284,18 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): ) self.exceptions.extend(flush_errors) self._entries_processed_since_last_raise += flush_count - if raise_exceptions and self.exceptions: - # raise any exceptions from this or previous flushes - exc_list, self.exceptions = self.exceptions, [] - raise_count, self._entries_processed_since_last_raise = self._entries_processed_since_last_raise, 0 - raise MutationsExceptionGroup(exc_list, raise_count) + # raise any exceptions from this or previous flushes + if raise_exceptions: + self._raise_exceptions() + + def _raise_exceptions(self): + """ + Raise any unreported exceptions from background flush operations + """ + if self.exceptions: + exc_list, self.exceptions = self.exceptions, [] + raise_count, self._entries_processed_since_last_raise = self._entries_processed_since_last_raise, 0 + raise MutationsExceptionGroup(exc_list, raise_count) async def __aenter__(self): """For context manager API""" @@ -310,5 +317,5 @@ async def close(self, timeout: float = 5.0): self._flush_timer_task.cancel() # wait for all to finish await asyncio.gather(final_flush, self._flush_timer_task, finalize_tasks) - if self.exceptions: - raise MutationsExceptionGroup(self.exceptions, self._entries_processed_since_last_raise) + # raise unreported exceptions + self._raise_exceptions() From 45fe1f513372271890ff8c3579f5bccc9967b837 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 11:52:45 -0700 Subject: [PATCH 066/213] fixed failing tests --- google/cloud/bigtable/exceptions.py | 4 ++-- google/cloud/bigtable/mutations_batcher.py | 12 ++++++++---- tests/unit/test_exceptions.py | 19 +++++++++++++++++++ tests/unit/test_mutations.py | 6 +++++- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index da9594732..94553f1b8 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -126,9 +126,9 @@ def __init__( idempotent_msg = ( "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" ) - index_msg = f" at index {failed_idx}" if failed_idx is not None else "" + index_msg = f" at index {failed_idx} " if failed_idx is not None else " " message = ( - f"Failed {idempotent_msg} mutation entry {index_msg} with cause: {cause!r}" + f"Failed {idempotent_msg} mutation entry{index_msg}with cause: {cause!r}" ) super().__init__(message) self.index = failed_idx diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 821c5af02..43c2d640c 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -26,6 +26,9 @@ if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover +# used to make more readable default values +MB_SIZE = 1024 * 1024 + class _FlowControl: """ @@ -175,8 +178,6 @@ class MutationsBatcher: batcher.add(row, mut) """ - MB_SIZE = 1024 * 1024 - def __init__( self, table: "Table", @@ -211,7 +212,7 @@ def __init__( self._flush_limit_count = ( flush_limit_count if flush_limit_count is not None else float("inf") ) - self.exceptions : list[FailedMutationEntryError] = [] + self.exceptions: list[FailedMutationEntryError] = [] self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( self._flush_timer(flush_interval) ) @@ -294,7 +295,10 @@ def _raise_exceptions(self): """ if self.exceptions: exc_list, self.exceptions = self.exceptions, [] - raise_count, self._entries_processed_since_last_raise = self._entries_processed_since_last_raise, 0 + raise_count, self._entries_processed_since_last_raise = ( + self._entries_processed_since_last_raise, + 0, + ) raise MutationsExceptionGroup(exc_list, raise_count) async def __aenter__(self): diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 29cb1d02b..8ba3e2d04 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -285,3 +285,22 @@ def test_raise_idempotent(self): assert e.value.entry == test_entry assert e.value.__cause__ == test_exc assert test_entry.is_idempotent.call_count == 1 + + def test_no_index(self): + """ + Instances without an index should display different error string + """ + test_idx = None + test_entry = unittest.mock.Mock() + test_exc = ValueError("test") + with pytest.raises(self._get_class()) as e: + raise self._get_class()(test_idx, test_entry, test_exc) + assert ( + str(e.value) + == "Failed idempotent mutation entry with cause: ValueError('test')" + ) + assert e.value.index == test_idx + assert e.value.entry == test_entry + assert e.value.__cause__ == test_exc + assert isinstance(e.value, Exception) + assert test_entry.is_idempotent.call_count == 1 diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 67187ca0e..03c2ceb69 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -318,7 +318,6 @@ def test__to_dict(self): @pytest.mark.parametrize( "mutations,result", [ - ([], True), ([mock.Mock(is_idempotent=lambda: True)], True), ([mock.Mock(is_idempotent=lambda: False)], False), ( @@ -340,3 +339,8 @@ def test__to_dict(self): def test_is_idempotent(self, mutations, result): instance = self._make_one("row_key", mutations) assert instance.is_idempotent() == result + + def test_empty_mutations(self): + with pytest.raises(ValueError) as e: + self._make_one("row_key", []) + assert "must not be empty" in str(e.value) From d8f3a4d108569000613de06d78234bc601ba2d92 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 3 May 2023 12:03:51 -0700 Subject: [PATCH 067/213] added skeleton for tests --- tests/unit/test_mutations_batcher.py | 78 ++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/unit/test_mutations_batcher.py diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py new file mode 100644 index 000000000..ba3dffbdf --- /dev/null +++ b/tests/unit/test_mutations_batcher.py @@ -0,0 +1,78 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock +except ImportError: # pragma: NO COVER + import mock # type: ignore + + +class Test_FlowControl: + def _make_one(self, table=None, max_mutation_count=10, max_mutation_bytes=100): + from google.cloud.firestore_v1.batch import _FlowControl + + if table is None: + table = mock.Mock() + return _FlowControl(table, max_mutation_count, max_mutation_bytes) + + def test_ctor(self): + pass + + def test_has_capacity(self, existing_size, new_size, existing_count, new_count, expected): + pass + + def test__on_mutation_entry_complete(self): + pass + + def test__execute_mutate_rows(self): + pass + + def test_process_mutations(self): + pass + + +class TestMutationsBatcher: + + def _make_one(self, table=None, **kwargs): + from google.cloud.firestore_v1.batch import MutationsBatcher + + if table is None: + table = mock.Mock() + + return MutationsBatcher(table, **kwargs) + + def test_ctor(self): + pass + + def test_context_manager(self): + pass + + def test__flush_timer(self): + pass + + def test_close(self): + pass + + def test_append(self): + pass + + def test_flush(self): + pass + + def test__raise_exceptions(self): + pass + From 171fea6de57a47f92a2a56050f8bfe7518144df7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 10:42:05 -0700 Subject: [PATCH 068/213] feat: publish RateLimitInfo and FeatureFlag protos (#768) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: publish RateLimitInfo and FeatureFlag protos PiperOrigin-RevId: 527878708 Source-Link: https://github.com/googleapis/googleapis/commit/f129f486fa0f681456b99c5cc899bec889a3185c Source-Link: https://github.com/googleapis/googleapis-gen/commit/e02c87d9d0c9a77f2b17268a86f462b5a1d66bbd Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiZTAyYzg3ZDlkMGM5YTc3ZjJiMTcyNjhhODZmNDYyYjVhMWQ2NmJiZCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix: Add feature flag proto to BUILD file PiperOrigin-RevId: 528468347 Source-Link: https://github.com/googleapis/googleapis/commit/38247e83e10ace50ec0022302e540e3b0d4be123 Source-Link: https://github.com/googleapis/googleapis-gen/commit/17e62a1ab5f22d7d537675a659157207e406e63d Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMTdlNjJhMWFiNWYyMmQ3ZDUzNzY3NWE2NTkxNTcyMDdlNDA2ZTYzZCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigtable_v2/__init__.py | 4 ++ .../services/bigtable/async_client.py | 6 +- .../bigtable_v2/services/bigtable/client.py | 6 +- google/cloud/bigtable_v2/types/__init__.py | 6 ++ google/cloud/bigtable_v2/types/bigtable.py | 59 ++++++++++++++++++- .../cloud/bigtable_v2/types/feature_flags.py | 54 +++++++++++++++++ 6 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 google/cloud/bigtable_v2/types/feature_flags.py diff --git a/google/cloud/bigtable_v2/__init__.py b/google/cloud/bigtable_v2/__init__.py index 342718dea..ee3bd8c0c 100644 --- a/google/cloud/bigtable_v2/__init__.py +++ b/google/cloud/bigtable_v2/__init__.py @@ -31,6 +31,7 @@ from .types.bigtable import MutateRowsResponse from .types.bigtable import PingAndWarmRequest from .types.bigtable import PingAndWarmResponse +from .types.bigtable import RateLimitInfo from .types.bigtable import ReadChangeStreamRequest from .types.bigtable import ReadChangeStreamResponse from .types.bigtable import ReadModifyWriteRowRequest @@ -54,6 +55,7 @@ from .types.data import StreamPartition from .types.data import TimestampRange from .types.data import ValueRange +from .types.feature_flags import FeatureFlags from .types.request_stats import FullReadStatsView from .types.request_stats import ReadIterationStats from .types.request_stats import RequestLatencyStats @@ -69,6 +71,7 @@ "Column", "ColumnRange", "Family", + "FeatureFlags", "FullReadStatsView", "GenerateInitialChangeStreamPartitionsRequest", "GenerateInitialChangeStreamPartitionsResponse", @@ -79,6 +82,7 @@ "Mutation", "PingAndWarmRequest", "PingAndWarmResponse", + "RateLimitInfo", "ReadChangeStreamRequest", "ReadChangeStreamResponse", "ReadIterationStats", diff --git a/google/cloud/bigtable_v2/services/bigtable/async_client.py b/google/cloud/bigtable_v2/services/bigtable/async_client.py index 1233e1288..abd82d4d8 100644 --- a/google/cloud/bigtable_v2/services/bigtable/async_client.py +++ b/google/cloud/bigtable_v2/services/bigtable/async_client.py @@ -242,8 +242,10 @@ def read_rows( on the ``request`` instance; if ``request`` is provided, this should not be set. app_profile_id (:class:`str`): - This value specifies routing for replication. This API - only accepts the empty value of app_profile_id. + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. This corresponds to the ``app_profile_id`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/bigtable_v2/services/bigtable/client.py b/google/cloud/bigtable_v2/services/bigtable/client.py index 38618fa31..a778aff3c 100644 --- a/google/cloud/bigtable_v2/services/bigtable/client.py +++ b/google/cloud/bigtable_v2/services/bigtable/client.py @@ -491,8 +491,10 @@ def read_rows( on the ``request`` instance; if ``request`` is provided, this should not be set. app_profile_id (str): - This value specifies routing for replication. This API - only accepts the empty value of app_profile_id. + This value specifies routing for + replication. If not specified, the + "default" application profile will be + used. This corresponds to the ``app_profile_id`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/bigtable_v2/types/__init__.py b/google/cloud/bigtable_v2/types/__init__.py index bb2533e33..9f15efaf5 100644 --- a/google/cloud/bigtable_v2/types/__init__.py +++ b/google/cloud/bigtable_v2/types/__init__.py @@ -24,6 +24,7 @@ MutateRowsResponse, PingAndWarmRequest, PingAndWarmResponse, + RateLimitInfo, ReadChangeStreamRequest, ReadChangeStreamResponse, ReadModifyWriteRowRequest, @@ -50,6 +51,9 @@ TimestampRange, ValueRange, ) +from .feature_flags import ( + FeatureFlags, +) from .request_stats import ( FullReadStatsView, ReadIterationStats, @@ -71,6 +75,7 @@ "MutateRowsResponse", "PingAndWarmRequest", "PingAndWarmResponse", + "RateLimitInfo", "ReadChangeStreamRequest", "ReadChangeStreamResponse", "ReadModifyWriteRowRequest", @@ -94,6 +99,7 @@ "StreamPartition", "TimestampRange", "ValueRange", + "FeatureFlags", "FullReadStatsView", "ReadIterationStats", "RequestLatencyStats", diff --git a/google/cloud/bigtable_v2/types/bigtable.py b/google/cloud/bigtable_v2/types/bigtable.py index ea97588c2..13f6ac0db 100644 --- a/google/cloud/bigtable_v2/types/bigtable.py +++ b/google/cloud/bigtable_v2/types/bigtable.py @@ -38,6 +38,7 @@ "MutateRowResponse", "MutateRowsRequest", "MutateRowsResponse", + "RateLimitInfo", "CheckAndMutateRowRequest", "CheckAndMutateRowResponse", "PingAndWarmRequest", @@ -61,8 +62,9 @@ class ReadRowsRequest(proto.Message): Values are of the form ``projects//instances//tables/``. app_profile_id (str): - This value specifies routing for replication. This API only - accepts the empty value of app_profile_id. + This value specifies routing for replication. + If not specified, the "default" application + profile will be used. rows (google.cloud.bigtable_v2.types.RowSet): The row keys and/or ranges to read sequentially. If not specified, reads from all @@ -469,10 +471,19 @@ class Entry(proto.Message): class MutateRowsResponse(proto.Message): r"""Response message for BigtableService.MutateRows. + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: entries (MutableSequence[google.cloud.bigtable_v2.types.MutateRowsResponse.Entry]): One or more results for Entries from the batch request. + rate_limit_info (google.cloud.bigtable_v2.types.RateLimitInfo): + Information about how client should limit the + rate (QPS). Primirily used by supported official + Cloud Bigtable clients. If unset, the rate limit + info is not provided by the server. + + This field is a member of `oneof`_ ``_rate_limit_info``. """ class Entry(proto.Message): @@ -506,6 +517,50 @@ class Entry(proto.Message): number=1, message=Entry, ) + rate_limit_info: "RateLimitInfo" = proto.Field( + proto.MESSAGE, + number=3, + optional=True, + message="RateLimitInfo", + ) + + +class RateLimitInfo(proto.Message): + r"""Information about how client should adjust the load to + Bigtable. + + Attributes: + period (google.protobuf.duration_pb2.Duration): + Time that clients should wait before + adjusting the target rate again. If clients + adjust rate too frequently, the impact of the + previous adjustment may not have been taken into + account and may over-throttle or under-throttle. + If clients adjust rate too slowly, they will not + be responsive to load changes on server side, + and may over-throttle or under-throttle. + factor (float): + If it has been at least one ``period`` since the last load + adjustment, the client should multiply the current load by + this value to get the new target load. For example, if the + current load is 100 and ``factor`` is 0.8, the new target + load should be 80. After adjusting, the client should ignore + ``factor`` until another ``period`` has passed. + + The client can measure its load using any unit that's + comparable over time For example, QPS can be used as long as + each request involves a similar amount of work. + """ + + period: duration_pb2.Duration = proto.Field( + proto.MESSAGE, + number=1, + message=duration_pb2.Duration, + ) + factor: float = proto.Field( + proto.DOUBLE, + number=2, + ) class CheckAndMutateRowRequest(proto.Message): diff --git a/google/cloud/bigtable_v2/types/feature_flags.py b/google/cloud/bigtable_v2/types/feature_flags.py new file mode 100644 index 000000000..1b5f76e24 --- /dev/null +++ b/google/cloud/bigtable_v2/types/feature_flags.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import MutableMapping, MutableSequence + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.bigtable.v2", + manifest={ + "FeatureFlags", + }, +) + + +class FeatureFlags(proto.Message): + r"""Feature flags supported by a client. This is intended to be sent as + part of request metadata to assure the server that certain behaviors + are safe to enable. This proto is meant to be serialized and + websafe-base64 encoded under the ``bigtable-features`` metadata key. + The value will remain constant for the lifetime of a client and due + to HTTP2's HPACK compression, the request overhead will be tiny. + This is an internal implementation detail and should not be used by + endusers directly. + + Attributes: + mutate_rows_rate_limit (bool): + Notify the server that the client enables + batch write flow control by requesting + RateLimitInfo from MutateRowsResponse. + """ + + mutate_rows_rate_limit: bool = proto.Field( + proto.BOOL, + number=3, + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) From 7faebc0eacb27b5d5f540a58b290d681b5a4df06 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 4 May 2023 16:53:31 -0700 Subject: [PATCH 069/213] improved and tested flow control --- google/cloud/bigtable/_mutate_rows.py | 2 +- google/cloud/bigtable/mutations_batcher.py | 157 +++++++-------- tests/unit/test_mutations_batcher.py | 215 +++++++++++++++++++-- 3 files changed, 275 insertions(+), 99 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 9a1946fa4..105fce34b 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -129,7 +129,7 @@ def on_error_fn(exc): ) # call on_terminal_state for each unreported failed mutation if on_terminal_state and mutations_dict[idx] is not None: - entry = cast(BulkMutationsEntry, mutations_dict[idx]) + entry = cast("BulkMutationsEntry", mutations_dict[idx]) output = on_terminal_state(entry, cause_exc) if iscoroutine(output): await output diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 43c2d640c..3537cebb1 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -32,23 +32,22 @@ class _FlowControl: """ - Manages underlying rpcs for MutationsBatcher. Ensures that in-flight requests - stay within the configured limits (max_mutation_count, max_mutation_bytes). + Manages flow control for batched mutations. Mutations are registered against + the FlowControl object before being sent, which will block if size or count + limits have reached capacity. When a mutation is complete, it is unregistered + from the FlowControl object, which will notify any blocked requests that there + is additional capacity. """ - def __init__( - self, table, max_mutation_count: float | None, max_mutation_bytes: float | None - ): + def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | None): """ Args: - - table: Table object that performs rpc calls - max_mutation_count: maximum number of mutations to send in a single rpc. This corresponds to individual mutations in a single BulkMutationsEntry. If None, no limit is enforced. - max_mutation_bytes: maximum number of bytes to send in a single rpc. If None, no limit is enforced. """ - self.table = table self.max_mutation_count = ( max_mutation_count if max_mutation_count is not None else float("inf") ) @@ -59,16 +58,17 @@ def __init__( self.in_flight_mutation_count = 0 self.in_flight_mutation_bytes = 0 - def _has_capacity(self, additional_size: int, additional_count: int) -> bool: + def _has_capacity(self, additional_count: int, additional_size: int) -> bool: + """ + Checks if there is capacity to send a new mutation with the given size and count + """ new_size = self.in_flight_mutation_bytes + additional_size new_count = self.in_flight_mutation_count + additional_count return ( new_size <= self.max_mutation_bytes and new_count <= self.max_mutation_count ) - async def _on_mutation_entry_complete( - self, mutation_entry: BulkMutationsEntry, exception: Exception | None - ): + async def remove_from_flow(self, mutation_entry: BulkMutationsEntry, *args): """ Every time an in-flight mutation is complete, release the flow control semaphore """ @@ -78,85 +78,52 @@ async def _on_mutation_entry_complete( async with self.capacity_condition: self.capacity_condition.notify_all() - async def _execute_mutate_rows( - self, batch: list[BulkMutationsEntry], timeout: float | None - ) -> list[FailedMutationEntryError]: + async def add_to_flow(self, mutations: list[BulkMutationsEntry]): """ - Helper to execute mutation operation on a batch + Breaks up list of mutations into batches that were registered to fit within + flow control limits. This method will block when the flow control limits are + reached. Args: - - batch: list of BulkMutationsEntry objects to send to server - - timeout: timeout in seconds. Used as operation_timeout and per_request_timeout. - If not given, will use table defaults - Returns: - - list of FailedMutationEntryError objects for mutations that failed. - FailedMutationEntryError objects will not contain index information - """ - request = {"table_name": self.table.table_name} - if self.table.app_profile_id: - request["app_profile_id"] = self.table.app_profile_id - operation_timeout = timeout or self.table.default_operation_timeout - request_timeout = timeout or self.table.default_per_request_timeout - try: - await _mutate_rows_operation( - self.table.client._gapic_client, - request, - batch, - operation_timeout, - request_timeout, - self._on_mutation_entry_complete, - ) - except MutationsExceptionGroup as e: - for subexc in e.exceptions: - subexc.index = None - return list(e.exceptions) - return [] - - async def process_mutations( - self, mutations: list[BulkMutationsEntry], timeout: float | None - ) -> list[FailedMutationEntryError]: - """ - Ascynronously send the set of mutations to the server. This method will block - when the flow control limits are reached. - - Returns: - - list of FailedMutationEntryError objects for mutations that failed + - mutations: list mutations to break up into batches + Yields: + - list of mutations that have reserved space in the flow control. + Each batch contains at least one mutation. + Raises: + - ValueError if any mutation entry is larger than the flow control limits """ - errors: list[FailedMutationEntryError] = [] - while mutations: - batch: list[BulkMutationsEntry] = [] + start_idx = 0 + end_idx = 0 + while end_idx < len(mutations): + start_idx = end_idx # fill up batch until we hit capacity async with self.capacity_condition: - while mutations: - next_entry = mutations[0] + while end_idx < len(mutations): + next_entry = mutations[end_idx] next_size = next_entry.size() next_count = len(next_entry.mutations) # do extra sanity check to avoid blocking forever if next_count > self.max_mutation_count: raise ValueError( - f"Mutation count {next_count} exceeds max mutation count {self.max_mutation_count}" + f"Mutation count {next_count} exceeds maximum: {self.max_mutation_count}" ) if next_size > self.max_mutation_bytes: raise ValueError( - f"Mutation size {next_size} exceeds max mutation size {self.max_mutation_bytes}" + f"Mutation size {next_size} exceeds maximum: {self.max_mutation_bytes}" ) - if self._has_capacity(next_size, next_count): - batch.append(mutations.pop(0)) + if self._has_capacity(next_count, next_size): + end_idx += 1 self.in_flight_mutation_bytes += next_size self.in_flight_mutation_count += next_count - elif batch: - # break out and submit partial batch + elif start_idx != end_idx: + # we have at least one mutation in the batch, so send it break else: # batch is empty. Block until we have capacity await self.capacity_condition.wait_for( - lambda: self._has_capacity(next_size, next_count) + lambda: self._has_capacity(next_count, next_size) ) - # start mutate_rows rpc - batch_errors = await self._execute_mutate_rows(batch, timeout) - errors.extend(batch_errors) - # raise set of failed mutations on completion - return errors + yield mutations[start_idx:end_idx] class MutationsBatcher: @@ -201,10 +168,11 @@ def __init__( If None, this limit is ignored. """ self.closed: bool = False + self._table = table self._staged_mutations: list[BulkMutationsEntry] = [] self._staged_count, self._staged_size = 0, 0 self._flow_control = _FlowControl( - table, flow_control_max_count, flow_control_max_bytes + flow_control_max_count, flow_control_max_bytes ) self._flush_limit_bytes = ( flush_limit_bytes if flush_limit_bytes is not None else float("inf") @@ -276,18 +244,51 @@ async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): """ # reset queue entries, self._staged_mutations = self._staged_mutations, [] - flush_count = self._staged_count self._staged_count, self._staged_size = 0, 0 # perform flush - if entries: - flush_errors = await self._flow_control.process_mutations( - entries, timeout=timeout + async for batch in self._flow_control.add_to_flow(entries): + batch_errors = await self._execute_mutate_rows(batch, timeout) + self.exceptions.extend(batch_errors) + self._entries_processed_since_last_raise += sum([ + len(entry.mutations) for entry in batch + ]) + # raise any exceptions from this or previous flushes + if raise_exceptions: + self._raise_exceptions() + + async def _execute_mutate_rows( + self, batch: list[BulkMutationsEntry], timeout: float | None = None + ) -> list[FailedMutationEntryError]: + """ + Helper to execute mutation operation on a batch + + Args: + - batch: list of BulkMutationsEntry objects to send to server + - timeout: timeout in seconds. Used as operation_timeout and per_request_timeout. + If not given, will use table defaults + Returns: + - list of FailedMutationEntryError objects for mutations that failed. + FailedMutationEntryError objects will not contain index information + """ + request = {"table_name": self._table.table_name} + if self._table.app_profile_id: + request["app_profile_id"] = self._table.app_profile_id + operation_timeout = timeout or self._table.default_operation_timeout + request_timeout = timeout or self._table.default_per_request_timeout + try: + await _mutate_rows_operation( + self._table.client._gapic_client, + request, + batch, + operation_timeout, + request_timeout, + self._flow_control.remove_from_flow, ) - self.exceptions.extend(flush_errors) - self._entries_processed_since_last_raise += flush_count - # raise any exceptions from this or previous flushes - if raise_exceptions: - self._raise_exceptions() + except MutationsExceptionGroup as e: + for subexc in e.exceptions: + subexc.index = None + return list(e.exceptions) + return [] def _raise_exceptions(self): """ diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index ba3dffbdf..a28ca05b4 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -13,6 +13,7 @@ # limitations under the License. import pytest +import asyncio # try/except added for compatibility with python < 3.8 try: @@ -22,31 +23,206 @@ class Test_FlowControl: - def _make_one(self, table=None, max_mutation_count=10, max_mutation_bytes=100): - from google.cloud.firestore_v1.batch import _FlowControl + def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): + from google.cloud.bigtable.mutations_batcher import _FlowControl - if table is None: - table = mock.Mock() - return _FlowControl(table, max_mutation_count, max_mutation_bytes) - - def test_ctor(self): - pass - - def test_has_capacity(self, existing_size, new_size, existing_count, new_count, expected): - pass + return _FlowControl(max_mutation_count, max_mutation_bytes) - def test__on_mutation_entry_complete(self): - pass - - def test__execute_mutate_rows(self): - pass + def _make_mutation(self, count=1, size=1): + mutation = mock.Mock() + mutation.size.return_value = size + mutation.mutations = [mock.Mock()] * count + return mutation - def test_process_mutations(self): - pass + def test_ctor(self): + max_mutation_count = 9 + max_mutation_bytes = 19 + instance = self._make_one(max_mutation_count, max_mutation_bytes) + assert instance.max_mutation_count == max_mutation_count + assert instance.max_mutation_bytes == max_mutation_bytes + assert instance.in_flight_mutation_count == 0 + assert instance.in_flight_mutation_bytes == 0 + assert isinstance(instance.capacity_condition, asyncio.Condition) + + def test_ctor_empty_values(self): + """Test constructor with None count and bytes""" + instance = self._make_one(None, None) + assert instance.max_mutation_count == float("inf") + assert instance.max_mutation_bytes == float("inf") + + @pytest.mark.parametrize( + "max_count,max_size,existing_count,existing_size,new_count,new_size,expected", + [ + (0, 0, 0, 0, 0, 0, True), + (0, 0, 1, 1, 1, 1, False), + (10, 10, 0, 0, 0, 0, True), + (10, 10, 0, 0, 9, 9, True), + (10, 10, 0, 0, 11, 9, False), + (10, 10, 0, 0, 9, 11, False), + (10, 1, 0, 0, 1, 0, True), + (1, 10, 0, 0, 0, 8, True), + (float("inf"), float("inf"), 0, 0, 1e10, 1e10, True), + (8, 8, 0, 0, 1e10, 1e10, False), + (12, 12, 6, 6, 5, 5, True), + (12, 12, 5, 5, 6, 6, True), + (12, 12, 6, 6, 6, 6, True), + (12, 12, 6, 6, 7, 7, False), + ], + ) + def test__has_capacity( + self, + max_count, + max_size, + existing_count, + existing_size, + new_count, + new_size, + expected, + ): + """ + _has_capacity should return True if the new mutation will will not exceed the max count or size + """ + instance = self._make_one(max_count, max_size) + instance.in_flight_mutation_count = existing_count + instance.in_flight_mutation_bytes = existing_size + assert instance._has_capacity(new_count, new_size) == expected + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "existing_count,existing_size,added_count,added_size,new_count,new_size", + [ + (0, 0, 0, 0, 0, 0), + (2, 2, 1, 1, 1, 1), + (2, 0, 1, 0, 1, 0), + (0, 2, 0, 1, 0, 1), + (10, 10, 0, 0, 10, 10), + (10, 10, 5, 5, 5, 5), + (0, 0, 1, 1, -1, -1), + ], + ) + async def test_remove_from_flow_value_update( + self, + existing_count, + existing_size, + added_count, + added_size, + new_count, + new_size, + ): + """ + completed mutations should lower the inflight values + """ + instance = self._make_one() + instance.in_flight_mutation_count = existing_count + instance.in_flight_mutation_bytes = existing_size + mutation = self._make_mutation(added_count, added_size) + await instance.remove_from_flow(mutation) + assert instance.in_flight_mutation_count == new_count + assert instance.in_flight_mutation_bytes == new_size + + @pytest.mark.asyncio + async def test__remove_from_flow_unlock(self): + """capacity condition should notify after mutation is complete""" + instance = self._make_one(10, 10) + instance.in_flight_mutation_count = 10 + instance.in_flight_mutation_bytes = 10 + + async def task_routine(): + async with instance.capacity_condition: + await instance.capacity_condition.wait_for( + lambda: instance._has_capacity(1, 1) + ) + + task = asyncio.create_task(task_routine()) + await asyncio.sleep(0.05) + # should be blocked due to capacity + assert task.done() is False + # try changing size + mutation = self._make_mutation(count=0, size=5) + await instance.remove_from_flow(mutation) + await asyncio.sleep(0.05) + assert instance.in_flight_mutation_count == 10 + assert instance.in_flight_mutation_bytes == 5 + assert task.done() is False + # try changing count + instance.in_flight_mutation_bytes = 10 + mutation = self._make_mutation(count=5, size=0) + await instance.remove_from_flow(mutation) + await asyncio.sleep(0.05) + assert instance.in_flight_mutation_count == 5 + assert instance.in_flight_mutation_bytes == 10 + assert task.done() is False + # try changing both + instance.in_flight_mutation_count = 10 + mutation = self._make_mutation(count=5, size=5) + await instance.remove_from_flow(mutation) + await asyncio.sleep(0.05) + assert instance.in_flight_mutation_count == 5 + assert instance.in_flight_mutation_bytes == 5 + # task should be complete + assert task.done() is True + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "mutations,count_cap,size_cap,expected_results", + [ + # high capacity results in no batching + ([(5, 5), (1, 1), (1, 1)], 10, 10, [[(5, 5), (1, 1), (1, 1)]]), + # low capacity splits up into batches + ([(1, 1), (1, 1), (1, 1)], 1, 1, [[(1, 1)], [(1, 1)], [(1, 1)]]), + # test count as limiting factor + ([(1, 1), (1, 1), (1, 1)], 2, 10, [[(1, 1), (1, 1)], [(1, 1)]]), + # test size as limiting factor + ([(1, 1), (1, 1), (1, 1)], 10, 2, [[(1, 1), (1, 1)], [(1, 1)]]), + # test with some bloackages and some flows + ( + [(1, 1), (5, 5), (4, 1), (1, 4), (1, 1)], + 5, + 5, + [[(1, 1)], [(5, 5)], [(4, 1), (1, 4)], [(1, 1)]], + ), + ], + ) + async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_results): + """ + Test batching with various flow control settings + """ + mutation_objs = [self._make_mutation(count=m[0], size=m[1]) for m in mutations] + instance = self._make_one(count_cap, size_cap) + i = 0 + async for batch in instance.add_to_flow(mutation_objs): + expected_batch = expected_results[i] + assert len(batch) == len(expected_batch) + for j in range(len(expected_batch)): + # check counts + assert len(batch[j].mutations) == expected_batch[j][0] + # check sizes + assert batch[j].size() == expected_batch[j][1] + # update lock + for entry in batch: + await instance.remove_from_flow(entry) + i += 1 + assert i == len(expected_results) + + @pytest.mark.asyncio + async def test_add_to_flow_invalid_mutation(self): + """ + batching should raise exception for mutations larger than limits to avoid deadlock + """ + instance = self._make_one(2, 3) + large_size_mutation = self._make_mutation(count=1, size=10) + large_count_mutation = self._make_mutation(count=10, size=1) + with pytest.raises(ValueError) as e: + async for _ in instance.add_to_flow([large_size_mutation]): + pass + assert "Mutation size 10 exceeds maximum: 3" in str(e.value) + with pytest.raises(ValueError) as e: + async for _ in instance.add_to_flow([large_count_mutation]): + pass + assert "Mutation count 10 exceeds maximum: 2" in str(e.value) class TestMutationsBatcher: - def _make_one(self, table=None, **kwargs): from google.cloud.firestore_v1.batch import MutationsBatcher @@ -75,4 +251,3 @@ def test_flush(self): def test__raise_exceptions(self): pass - From 10da89ac1af7db9f9399bd9b147cbd42f6cce6d0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 09:44:28 -0700 Subject: [PATCH 070/213] refactored flushes to run sequentially --- google/cloud/bigtable/mutations_batcher.py | 74 +++++++++++++--------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 3537cebb1..2866d2568 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -184,7 +184,8 @@ def __init__( self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( self._flush_timer(flush_interval) ) - self._flush_tasks: list[asyncio.Task[None]] = [] + # create noop previous flush task to avoid None checks + self._prev_flush: asyncio.Task[None] = asyncio.create_task(asyncio.sleep(0)) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 @@ -197,11 +198,8 @@ async def _flush_timer(self, interval: float | None): while not self.closed: await asyncio.sleep(interval) # add new flush task to list - if not self.closed: - new_task = asyncio.create_task( - self.flush(timeout=None, raise_exceptions=False) - ) - self._flush_tasks.append(new_task) + if not self.closed and self._staged_mutations: + self._schedule_flush() def append(self, mutations: BulkMutationsEntry): """ @@ -226,36 +224,57 @@ def append(self, mutations: BulkMutationsEntry): self._staged_count >= self._flush_limit_count or self._staged_size >= self._flush_limit_bytes ): - self._flush_tasks.append( - asyncio.create_task(self.flush(timeout=None, raise_exceptions=False)) - ) + self._schedule_flush() - async def flush(self, *, timeout: float | None = 5.0, raise_exceptions=True): + async def flush(self, *, raise_exceptions=True): """ - Send queue over network in as few calls as possible + Flush all staged mutations to the server Args: - - timeout: operation_timeout for underlying rpc, in seconds - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. If False, exceptions will be stored in self.exceptions and raised on a future flush or when the batcher is closed. Raises: - MutationsExceptionGroup if raise_exceptions is True and any mutations fail """ - # reset queue - entries, self._staged_mutations = self._staged_mutations, [] - self._staged_count, self._staged_size = 0, 0 - # perform flush - async for batch in self._flow_control.add_to_flow(entries): - batch_errors = await self._execute_mutate_rows(batch, timeout) - self.exceptions.extend(batch_errors) - self._entries_processed_since_last_raise += sum([ - len(entry.mutations) for entry in batch - ]) - # raise any exceptions from this or previous flushes + # add recent staged mutations to flush task, and wait for flush to complete + await self._schedule_flush() + # raise any unreported exceptions from this or previous flushes if raise_exceptions: self._raise_exceptions() + def _schedule_flush(self) -> asyncio.Task[None]: + """Update the flush task to include the latest staged mutations""" + if self._staged_mutations: + entries, self._staged_mutations = self._staged_mutations, [] + self._staged_count, self._staged_size = 0, 0 + self._prev_flush = asyncio.create_task( + self._flush_internal(entries, self._prev_flush) + ) + return self._prev_flush + + + async def _flush_internal( + self, + new_entries: list[BulkMutationsEntry], + prev_flush: asyncio.Task[None], + ): + """ + Flushes a set of mutations to the server, and updates internal state + + Args: + - new_entries: list of mutations to flush + - prev_flush: the previous flush task, which will be awaited before + a new flush is initiated + """ + # wait for previous flush to complete + await prev_flush + # flush new entries + async for batch in self._flow_control.add_to_flow(new_entries): + batch_errors = await self._execute_mutate_rows(batch, None) + self.exceptions.extend(batch_errors) + self._entries_processed_since_last_raise += len(batch.mutations) + async def _execute_mutate_rows( self, batch: list[BulkMutationsEntry], timeout: float | None = None ) -> list[FailedMutationEntryError]: @@ -310,17 +329,14 @@ async def __aexit__(self, exc_type, exc, tb): """For context manager API""" await self.close() - async def close(self, timeout: float = 5.0): + async def close(self): """ Flush queue and clean up resources """ self.closed = True - final_flush = self.flush(timeout=timeout, raise_exceptions=False) - finalize_tasks = asyncio.wait_for( - asyncio.gather(*self._flush_tasks), timeout=timeout - ) self._flush_timer_task.cancel() + final_flush = self._schedule_flush() # wait for all to finish - await asyncio.gather(final_flush, self._flush_timer_task, finalize_tasks) + await asyncio.gather(final_flush, self._flush_timer_task) # raise unreported exceptions self._raise_exceptions() From 0eca65e2ad2afed93a20b90ad78d755e84fdca43 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 14:08:00 -0700 Subject: [PATCH 071/213] added batcher tests --- google/cloud/bigtable/mutations_batcher.py | 26 +- tests/unit/test_mutations_batcher.py | 518 +++++++++++++++++++-- 2 files changed, 502 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 2866d2568..e6287e420 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -148,6 +148,7 @@ class MutationsBatcher: def __init__( self, table: "Table", + *, flush_interval: float | None = 5, flush_limit_count: int | None = 100, flush_limit_bytes: int | None = 20 * MB_SIZE, @@ -170,7 +171,7 @@ def __init__( self.closed: bool = False self._table = table self._staged_mutations: list[BulkMutationsEntry] = [] - self._staged_count, self._staged_size = 0, 0 + self._staged_count, self._staged_bytes = 0, 0 self._flow_control = _FlowControl( flow_control_max_count, flow_control_max_bytes ) @@ -219,10 +220,10 @@ def append(self, mutations: BulkMutationsEntry): self._staged_mutations.append(mutations) # start a new flush task if limits exceeded self._staged_count += len(mutations.mutations) - self._staged_size += size + self._staged_bytes += size if ( self._staged_count >= self._flush_limit_count - or self._staged_size >= self._flush_limit_bytes + or self._staged_bytes >= self._flush_limit_bytes ): self._schedule_flush() @@ -247,7 +248,7 @@ def _schedule_flush(self) -> asyncio.Task[None]: """Update the flush task to include the latest staged mutations""" if self._staged_mutations: entries, self._staged_mutations = self._staged_mutations, [] - self._staged_count, self._staged_size = 0, 0 + self._staged_count, self._staged_bytes = 0, 0 self._prev_flush = asyncio.create_task( self._flush_internal(entries, self._prev_flush) ) @@ -271,12 +272,12 @@ async def _flush_internal( await prev_flush # flush new entries async for batch in self._flow_control.add_to_flow(new_entries): - batch_errors = await self._execute_mutate_rows(batch, None) + batch_errors = await self._execute_mutate_rows(batch) self.exceptions.extend(batch_errors) - self._entries_processed_since_last_raise += len(batch.mutations) + self._entries_processed_since_last_raise += len(batch) async def _execute_mutate_rows( - self, batch: list[BulkMutationsEntry], timeout: float | None = None + self, batch: list[BulkMutationsEntry] ) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch @@ -292,15 +293,13 @@ async def _execute_mutate_rows( request = {"table_name": self._table.table_name} if self._table.app_profile_id: request["app_profile_id"] = self._table.app_profile_id - operation_timeout = timeout or self._table.default_operation_timeout - request_timeout = timeout or self._table.default_per_request_timeout try: await _mutate_rows_operation( self._table.client._gapic_client, request, batch, - operation_timeout, - request_timeout, + self._table.default_operation_timeout, + self._table.default_per_request_timeout, self._flow_control.remove_from_flow, ) except MutationsExceptionGroup as e: @@ -335,8 +334,7 @@ async def close(self): """ self.closed = True self._flush_timer_task.cancel() - final_flush = self._schedule_flush() - # wait for all to finish - await asyncio.gather(final_flush, self._flush_timer_task) + self._schedule_flush() + await self._prev_flush # raise unreported exceptions self._raise_exceptions() diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index a28ca05b4..e949ec173 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -14,12 +14,22 @@ import pytest import asyncio +import unittest # try/except added for compatibility with python < 3.8 try: from unittest import mock + from unittest.mock import AsyncMock except ImportError: # pragma: NO COVER import mock # type: ignore + from mock import AsyncMock # type: ignore + + +def _make_mutation(count=1, size=1): + mutation = mock.Mock() + mutation.size.return_value = size + mutation.mutations = [mock.Mock()] * count + return mutation class Test_FlowControl: @@ -28,12 +38,6 @@ def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): return _FlowControl(max_mutation_count, max_mutation_bytes) - def _make_mutation(self, count=1, size=1): - mutation = mock.Mock() - mutation.size.return_value = size - mutation.mutations = [mock.Mock()] * count - return mutation - def test_ctor(self): max_mutation_count = 9 max_mutation_bytes = 19 @@ -115,7 +119,7 @@ async def test_remove_from_flow_value_update( instance = self._make_one() instance.in_flight_mutation_count = existing_count instance.in_flight_mutation_bytes = existing_size - mutation = self._make_mutation(added_count, added_size) + mutation = _make_mutation(added_count, added_size) await instance.remove_from_flow(mutation) assert instance.in_flight_mutation_count == new_count assert instance.in_flight_mutation_bytes == new_size @@ -138,7 +142,7 @@ async def task_routine(): # should be blocked due to capacity assert task.done() is False # try changing size - mutation = self._make_mutation(count=0, size=5) + mutation = _make_mutation(count=0, size=5) await instance.remove_from_flow(mutation) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 10 @@ -146,7 +150,7 @@ async def task_routine(): assert task.done() is False # try changing count instance.in_flight_mutation_bytes = 10 - mutation = self._make_mutation(count=5, size=0) + mutation = _make_mutation(count=5, size=0) await instance.remove_from_flow(mutation) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 5 @@ -154,7 +158,7 @@ async def task_routine(): assert task.done() is False # try changing both instance.in_flight_mutation_count = 10 - mutation = self._make_mutation(count=5, size=5) + mutation = _make_mutation(count=5, size=5) await instance.remove_from_flow(mutation) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 5 @@ -187,7 +191,7 @@ async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_result """ Test batching with various flow control settings """ - mutation_objs = [self._make_mutation(count=m[0], size=m[1]) for m in mutations] + mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] instance = self._make_one(count_cap, size_cap) i = 0 async for batch in instance.add_to_flow(mutation_objs): @@ -210,8 +214,8 @@ async def test_add_to_flow_invalid_mutation(self): batching should raise exception for mutations larger than limits to avoid deadlock """ instance = self._make_one(2, 3) - large_size_mutation = self._make_mutation(count=1, size=10) - large_count_mutation = self._make_mutation(count=10, size=1) + large_size_mutation = _make_mutation(count=1, size=10) + large_count_mutation = _make_mutation(count=10, size=1) with pytest.raises(ValueError) as e: async for _ in instance.add_to_flow([large_size_mutation]): pass @@ -224,30 +228,488 @@ async def test_add_to_flow_invalid_mutation(self): class TestMutationsBatcher: def _make_one(self, table=None, **kwargs): - from google.cloud.firestore_v1.batch import MutationsBatcher + from google.cloud.bigtable.mutations_batcher import MutationsBatcher if table is None: table = mock.Mock() return MutationsBatcher(table, **kwargs) - def test_ctor(self): - pass + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @pytest.mark.asyncio + async def test_ctor_defaults(self, flush_timer_mock): + table = mock.Mock() + async with self._make_one(table) as instance: + assert instance._table == table + assert instance.closed is False + assert instance._staged_mutations == [] + assert instance.exceptions == [] + assert instance._flow_control.max_mutation_count == 100000 + assert instance._flow_control.max_mutation_bytes == 104857600 + assert instance._flow_control.in_flight_mutation_count == 0 + assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._entries_processed_since_last_raise == 0 + await asyncio.sleep(0) + assert flush_timer_mock.call_count == 1 + assert flush_timer_mock.call_args[0][0] == 5 + assert isinstance(instance._flush_timer_task, asyncio.Task) + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @pytest.mark.asyncio + async def test_ctor_explicit(self, flush_timer_mock): + """Test with explicit parameters""" + table = mock.Mock() + flush_interval = 20 + flush_limit_count = 17 + flush_limit_bytes = 19 + flow_control_max_count = 1001 + flow_control_max_bytes = 12 + async with self._make_one(table, flush_interval=flush_interval, flush_limit_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes) as instance: + assert instance._table == table + assert instance.closed is False + assert instance._staged_mutations == [] + assert instance.exceptions == [] + assert instance._flow_control.max_mutation_count == flow_control_max_count + assert instance._flow_control.max_mutation_bytes == flow_control_max_bytes + assert instance._flow_control.in_flight_mutation_count == 0 + assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._entries_processed_since_last_raise == 0 + await asyncio.sleep(0) + assert flush_timer_mock.call_count == 1 + assert flush_timer_mock.call_args[0][0] == flush_interval + assert isinstance(instance._flush_timer_task, asyncio.Task) + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @pytest.mark.asyncio + async def test_ctor_no_limits(self, flush_timer_mock): + """Test with None for flow control and flush limits""" + table = mock.Mock() + flush_interval = None + flush_limit_count = None + flush_limit_bytes = None + flow_control_max_count = None + flow_control_max_bytes = None + async with self._make_one(table, flush_interval=flush_interval, flush_limit_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes) as instance: + assert instance._table == table + assert instance.closed is False + assert instance._staged_mutations == [] + assert instance.exceptions == [] + assert instance._flow_control.max_mutation_count == float("inf") + assert instance._flow_control.max_mutation_bytes == float("inf") + assert instance._flow_control.in_flight_mutation_count == 0 + assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._entries_processed_since_last_raise == 0 + await asyncio.sleep(0) + assert flush_timer_mock.call_count == 1 + assert flush_timer_mock.call_args[0][0] is None + assert isinstance(instance._flush_timer_task, asyncio.Task) + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @pytest.mark.asyncio + async def test__flush_timer_w_None(self, flush_mock): + """Empty timer should return immediately""" + async with self._make_one() as instance: + with mock.patch("asyncio.sleep") as sleep_mock: + await instance._flush_timer(None) + assert sleep_mock.call_count == 0 + assert flush_mock.call_count == 0 + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @pytest.mark.asyncio + async def test__flush_timer_call_when_closed(self, flush_mock): + """closed batcher's timer should return immediately""" + async with self._make_one() as instance: + await instance.close() + flush_mock.reset_mock() + with mock.patch("asyncio.sleep") as sleep_mock: + await instance._flush_timer(1) + assert sleep_mock.call_count == 0 + assert flush_mock.call_count == 0 + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @pytest.mark.asyncio + async def test__flush_timer(self, flush_mock): + """Timer should continue to call _schedule_flush in a loop""" + async with self._make_one() as instance: + instance._staged_mutations = [mock.Mock()] + loop_num = 3 + expected_sleep = 12 + with mock.patch("asyncio.sleep") as sleep_mock: + sleep_mock.side_effect = [None] * loop_num + [asyncio.CancelledError()] + try: + await instance._flush_timer(expected_sleep) + except asyncio.CancelledError: + pass + assert sleep_mock.call_count == loop_num + 1 + sleep_mock.assert_called_with(expected_sleep) + assert flush_mock.call_count == loop_num - def test_context_manager(self): - pass + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @pytest.mark.asyncio + async def test__flush_timer_no_mutations(self, flush_mock): + """Timer should not flush if no new mutations have been staged""" + async with self._make_one() as instance: + loop_num = 3 + expected_sleep = 12 + with mock.patch("asyncio.sleep") as sleep_mock: + sleep_mock.side_effect = [None] * loop_num + [asyncio.CancelledError()] + try: + await instance._flush_timer(expected_sleep) + except asyncio.CancelledError: + pass + assert sleep_mock.call_count == loop_num + 1 + sleep_mock.assert_called_with(expected_sleep) + assert flush_mock.call_count == 0 + + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @pytest.mark.asyncio + async def test__flush_timer_close(self, flush_mock): + """Timer should continue terminate after close""" + async with self._make_one() as instance: + expected_sleep = 12 + with mock.patch("asyncio.sleep"): + task = asyncio.create_task(instance._flush_timer(expected_sleep)) + # let task run in background + await asyncio.sleep(0.5) + # close the batcher + await instance.close() + await asyncio.sleep(0.1) + # task should be complete + assert task.done() - def test__flush_timer(self): - pass + @pytest.mark.asyncio + async def test_append_closed(self): + """Should raise exception""" + with pytest.raises(RuntimeError): + instance = self._make_one() + await instance.close() + instance.append([mock.Mock()]) - def test_close(self): - pass + @pytest.mark.asyncio + async def test_append_outside_flow_limits(self): + """entries larger than mutation limits are rejected""" + async with self._make_one(flow_control_max_count=1, flow_control_max_bytes=1) as instance: + oversized_entry = _make_mutation(count=0, size=2) + overcount_entry = _make_mutation(count=2, size=0) + with pytest.raises(ValueError) as e: + instance.append(oversized_entry) + assert "Mutation size 2 exceeds flow_control_max_bytes: 1" in str(e.value) + with pytest.raises(ValueError) as e: + instance.append(overcount_entry) + assert "Mutation count 2 exceeds flow_control_max_count: 1" in str(e.value) - def test_append(self): - pass + @pytest.mark.parametrize("flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", [ + (10,10,1,1,False), + (10,10,9,9,False), + (10,10,10,1,True), + (10,10,1,10,True), + (10,10,10,10,True), + (1,1,10,10,True), + (1,1,0,0,False), + ]) + @pytest.mark.asyncio + async def test_append(self, flush_count, flush_bytes, mutation_count, mutation_bytes, expect_flush): + """test appending different mutations, and checking if it causes a flush""" + async with self._make_one(flush_limit_count=flush_count, flush_limit_bytes=flush_bytes) as instance: + assert instance._staged_count == 0 + assert instance._staged_bytes == 0 + assert instance._staged_mutations == [] + mutation = _make_mutation(count=mutation_count, size=mutation_bytes) + with mock.patch.object(instance, "_schedule_flush") as flush_mock: + instance.append(mutation) + assert flush_mock.call_count == bool(expect_flush) + assert instance._staged_count == mutation_count + assert instance._staged_bytes == mutation_bytes + assert instance._staged_mutations == [mutation] + instance._staged_mutations = [] - def test_flush(self): - pass + @pytest.mark.asyncio + async def test_append_multiple(self): + """Append multiple mutations""" + async with self._make_one(flush_limit_count=8, flush_limit_bytes=8) as instance: + assert instance._staged_count == 0 + assert instance._staged_bytes == 0 + assert instance._staged_mutations == [] + mutation = _make_mutation(count=2, size=3) + with mock.patch.object(instance, "_schedule_flush") as flush_mock: + instance.append(mutation) + assert flush_mock.call_count == 0 + assert instance._staged_count == 2 + assert instance._staged_bytes == 3 + assert len(instance._staged_mutations) == 1 + instance.append(mutation) + assert flush_mock.call_count == 0 + assert instance._staged_count == 4 + assert instance._staged_bytes == 6 + assert len(instance._staged_mutations) == 2 + instance.append(mutation) + assert flush_mock.call_count == 1 + assert instance._staged_count == 6 + assert instance._staged_bytes == 9 + assert len(instance._staged_mutations) == 3 + instance._staged_mutations = [] - def test__raise_exceptions(self): - pass + @pytest.mark.parametrize("raise_exceptions", [True, False]) + @pytest.mark.asyncio + async def test_flush(self, raise_exceptions): + """flush should internally call _schedule_flush""" + mock_obj = AsyncMock() + async with self._make_one() as instance: + with mock.patch.object(instance, "_schedule_flush") as flush_mock: + with mock.patch.object(instance, "_raise_exceptions") as raise_mock: + flush_mock.return_value = mock_obj.__call__() + if not raise_exceptions: + await instance.flush(raise_exceptions=False) + else: + await instance.flush() + assert flush_mock.call_count == 1 + assert mock_obj.await_count == 1 + assert raise_mock.call_count == int(raise_exceptions) + + @pytest.mark.asyncio + async def test_schedule_flush_no_mutations(self): + """schedule flush should return prev_flush if no new mutations""" + async with self._make_one() as instance: + orig_flush = instance._prev_flush + with mock.patch.object(instance, "_flush_internal") as flush_mock: + for i in range(3): + instance._schedule_flush() + assert flush_mock.call_count == 0 + assert instance._prev_flush == orig_flush + + + @pytest.mark.asyncio + async def test_schedule_flush_with_mutations(self): + """if new mutations exist, should update prev_flush to a new flush task""" + async with self._make_one() as instance: + orig_flush = instance._prev_flush + with mock.patch.object(instance, "_flush_internal") as flush_mock: + for i in range(1,4): + instance._staged_mutations = [mock.Mock()] + instance._schedule_flush() + assert instance._staged_mutations == [] + assert instance._staged_count == 0 + assert instance._staged_bytes == 0 + assert flush_mock.call_count == i + assert instance._prev_flush != orig_flush + orig_flush = instance._prev_flush + + @pytest.mark.asyncio + async def test__flush_internal(self): + """ + _flush_internal should: + - await previous flush call + - delegate batching to _flow_control + - call _execute_mutate_rows on each batch + - update self.exceptions and self._entries_processed_since_last_raise + """ + num_entries = 10 + async with self._make_one() as instance: + with mock.patch.object(instance, "_execute_mutate_rows") as execute_mock: + with mock.patch.object(instance._flow_control, "add_to_flow") as flow_mock: + # mock flow control to always return a single batch + async def gen(x): + yield x + flow_mock.side_effect = lambda x: gen(x) + prev_flush_mock = AsyncMock() + prev_flush = prev_flush_mock.__call__() + mutations = [_make_mutation(count=1, size=1)] * num_entries + await instance._flush_internal(mutations, prev_flush) + assert prev_flush_mock.await_count == 1 + assert instance._entries_processed_since_last_raise == num_entries + assert execute_mock.call_count == 1 + assert flow_mock.call_count == 1 + assert instance.exceptions == [] + + + @pytest.mark.parametrize("num_starting,num_new_errors,expected_total_errors", [ + (0, 0, 0), + (0, 1, 1), + (0, 2, 2), + (1, 0, 1), + (1, 1, 2), + (10, 2, 12), + ]) + @pytest.mark.asyncio + async def test__flush_internal_with_errors(self, num_starting, num_new_errors, expected_total_errors): + """ + errors returned from _execute_mutate_rows should be added to self.exceptions + """ + from google.cloud.bigtable import exceptions + num_entries = 10 + expected_errors = [exceptions.FailedMutationEntryError(mock.Mock(), mock.Mock(), ValueError())] * num_new_errors + async with self._make_one() as instance: + instance.exceptions = [mock.Mock()] * num_starting + with mock.patch.object(instance, "_execute_mutate_rows") as execute_mock: + execute_mock.return_value = expected_errors + with mock.patch.object(instance._flow_control, "add_to_flow") as flow_mock: + # mock flow control to always return a single batch + async def gen(x): + yield x + flow_mock.side_effect = lambda x: gen(x) + prev_flush_mock = AsyncMock() + prev_flush = prev_flush_mock.__call__() + mutations = [_make_mutation(count=1, size=1)] * num_entries + await instance._flush_internal(mutations, prev_flush) + assert prev_flush_mock.await_count == 1 + assert instance._entries_processed_since_last_raise == num_entries + assert execute_mock.call_count == 1 + assert flow_mock.call_count == 1 + assert len(instance.exceptions) == expected_total_errors + for i in range(num_starting, expected_total_errors): + assert instance.exceptions[i] == expected_errors[i - num_starting] + instance.exceptions = [] + + async def _mock_gapic_return(self, num=5): + from google.cloud.bigtable_v2.types import MutateRowsResponse + from google.rpc import status_pb2 + async def gen(num): + for i in range(num): + entry = MutateRowsResponse.Entry(index=i, status=status_pb2.Status(code=0)) + yield MutateRowsResponse(entries=[entry]) + return gen(num) + + @pytest.mark.asyncio + async def test_manual_flush_end_to_end(self): + """Test full flush process with minimal mocking""" + + num_nutations = 10 + mutations = [_make_mutation(count=2, size=2)] * num_nutations + + async with self._make_one(flow_control_max_count=3, flow_control_max_bytes=3) as instance: + instance._table.default_operation_timeout = 10 + instance._table.default_per_request_timeout = 9 + with mock.patch.object(instance._table.client._gapic_client, "mutate_rows") as gapic_mock: + gapic_mock.side_effect = lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + for m in mutations: + instance.append(m) + assert instance._entries_processed_since_last_raise == 0 + await instance.flush() + assert instance._entries_processed_since_last_raise == num_nutations + + @pytest.mark.asyncio + async def test_timer_flush_end_to_end(self): + """Flush should automatically trigger after flush_interval""" + num_nutations = 10 + mutations = [_make_mutation(count=2, size=2)] * num_nutations + + async with self._make_one(flush_interval=0.05) as instance: + instance._table.default_operation_timeout = 10 + instance._table.default_per_request_timeout = 9 + with mock.patch.object(instance._table.client._gapic_client, "mutate_rows") as gapic_mock: + gapic_mock.side_effect = lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + for m in mutations: + instance.append(m) + assert instance._entries_processed_since_last_raise == 0 + # let flush trigger due to timer + await asyncio.sleep(0.1) + assert instance._entries_processed_since_last_raise == num_nutations + + @pytest.mark.asyncio + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher._mutate_rows_operation") + async def test__execute_mutate_rows(self, mutate_rows): + from google.cloud.bigtable import _mutate_rows + table = mock.Mock() + table.table_name = "test-table" + table.app_profile_id = "test-app-profile" + table.default_operation_timeout = 17 + table.default_per_request_timeout = 13 + async with self._make_one(table) as instance: + batch = [mock.Mock()] + result = await instance._execute_mutate_rows(batch) + assert mutate_rows.call_count == 1 + assert mutate_rows.await_count == 1 + args, _ = mutate_rows.call_args + assert args[0] == table.client._gapic_client + assert args[2] == batch + assert args[3] == 17 + assert args[4] == 13 + assert args[5] == instance._flow_control.remove_from_flow + request = args[1] + assert request["table_name"] == "test-table" + assert request["app_profile_id"] == "test-app-profile" + assert len(request.keys()) == 2 + assert result == [] + + @pytest.mark.asyncio + @unittest.mock.patch("google.cloud.bigtable.mutations_batcher._mutate_rows_operation") + async def test__execute_mutate_rows_returns_errors(self, mutate_rows): + """Errors from operation should be retruned as list""" + from google.cloud.bigtable import _mutate_rows + from google.cloud.bigtable.exceptions import MutationsExceptionGroup, FailedMutationEntryError + err1 = FailedMutationEntryError(0, mock.Mock(), RuntimeError("test error")) + err2 = FailedMutationEntryError(1, mock.Mock(), RuntimeError("test error")) + mutate_rows.side_effect = MutationsExceptionGroup([err1, err2], 10) + table = mock.Mock() + async with self._make_one(table) as instance: + batch = [mock.Mock()] + result = await instance._execute_mutate_rows(batch) + assert len(result) == 2 + assert result[0] == err1 + assert result[1] == err2 + # indices should be set to None + assert result[0].index is None + assert result[1].index is None + + @pytest.mark.asyncio + async def test__raise_exceptions(self): + """Raise exceptions and reset error state""" + from google.cloud.bigtable import exceptions + expected_total = 1201 + expected_exceptions = [mock.Mock()] * 3 + async with self._make_one() as instance: + instance.exceptions = expected_exceptions + instance._entries_processed_since_last_raise = expected_total + try: + instance._raise_exceptions() + except exceptions.MutationsExceptionGroup as exc: + assert list(exc.exceptions) == expected_exceptions + assert str(expected_total) in str(exc) + assert instance._entries_processed_since_last_raise == 0 + assert instance.exceptions == [] + # try calling again + instance._raise_exceptions() + + @pytest.mark.asyncio + async def test___aenter__(self): + """Should return self""" + async with self._make_one() as instance: + assert await instance.__aenter__() == instance + + @pytest.mark.asyncio + async def test___aexit__(self): + """aexit should call close""" + async with self._make_one() as instance: + with mock.patch.object(instance, "close") as close_mock: + await instance.__aexit__(None, None, None) + assert close_mock.call_count == 1 + + @pytest.mark.asyncio + async def test_close(self): + """Should clean up all resources""" + async with self._make_one() as instance: + with mock.patch.object(instance, "_schedule_flush") as flush_mock: + with mock.patch.object(instance, "_raise_exceptions") as raise_mock: + await instance.close() + assert instance.closed is True + assert instance._flush_timer_task.done() is True + assert instance._prev_flush.done() is True + assert flush_mock.call_count == 1 + assert raise_mock.call_count == 1 + + @pytest.mark.asyncio + async def test_close_w_exceptions(self): + """Raise exceptions on close""" + from google.cloud.bigtable import exceptions + expected_total = 10 + expected_exceptions = [mock.Mock()] + async with self._make_one() as instance: + instance.exceptions = expected_exceptions + instance._entries_processed_since_last_raise = expected_total + try: + await instance.close() + except exceptions.MutationsExceptionGroup as exc: + assert list(exc.exceptions) == expected_exceptions + assert str(expected_total) in str(exc) + assert instance._entries_processed_since_last_raise == 0 + assert instance.exceptions == [] From 2532b6329e220da51493b7321806427435d50464 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 14:34:12 -0700 Subject: [PATCH 072/213] ran blacken --- google/cloud/bigtable/mutations_batcher.py | 1 - tests/unit/test_mutations_batcher.py | 173 +++++++++++++++------ 2 files changed, 124 insertions(+), 50 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index e6287e420..e8c33c09b 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -254,7 +254,6 @@ def _schedule_flush(self) -> asyncio.Task[None]: ) return self._prev_flush - async def _flush_internal( self, new_entries: list[BulkMutationsEntry], diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index e949ec173..7c685988e 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -235,7 +235,9 @@ def _make_one(self, table=None, **kwargs): return MutationsBatcher(table, **kwargs) - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + ) @pytest.mark.asyncio async def test_ctor_defaults(self, flush_timer_mock): table = mock.Mock() @@ -254,7 +256,9 @@ async def test_ctor_defaults(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] == 5 assert isinstance(instance._flush_timer_task, asyncio.Task) - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + ) @pytest.mark.asyncio async def test_ctor_explicit(self, flush_timer_mock): """Test with explicit parameters""" @@ -264,7 +268,14 @@ async def test_ctor_explicit(self, flush_timer_mock): flush_limit_bytes = 19 flow_control_max_count = 1001 flow_control_max_bytes = 12 - async with self._make_one(table, flush_interval=flush_interval, flush_limit_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes) as instance: + async with self._make_one( + table, + flush_interval=flush_interval, + flush_limit_count=flush_limit_count, + flush_limit_bytes=flush_limit_bytes, + flow_control_max_count=flow_control_max_count, + flow_control_max_bytes=flow_control_max_bytes, + ) as instance: assert instance._table == table assert instance.closed is False assert instance._staged_mutations == [] @@ -279,7 +290,9 @@ async def test_ctor_explicit(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] == flush_interval assert isinstance(instance._flush_timer_task, asyncio.Task) - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + ) @pytest.mark.asyncio async def test_ctor_no_limits(self, flush_timer_mock): """Test with None for flow control and flush limits""" @@ -289,7 +302,14 @@ async def test_ctor_no_limits(self, flush_timer_mock): flush_limit_bytes = None flow_control_max_count = None flow_control_max_bytes = None - async with self._make_one(table, flush_interval=flush_interval, flush_limit_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes) as instance: + async with self._make_one( + table, + flush_interval=flush_interval, + flush_limit_count=flush_limit_count, + flush_limit_bytes=flush_limit_bytes, + flow_control_max_count=flow_control_max_count, + flow_control_max_bytes=flow_control_max_bytes, + ) as instance: assert instance._table == table assert instance.closed is False assert instance._staged_mutations == [] @@ -304,7 +324,9 @@ async def test_ctor_no_limits(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] is None assert isinstance(instance._flush_timer_task, asyncio.Task) - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + ) @pytest.mark.asyncio async def test__flush_timer_w_None(self, flush_mock): """Empty timer should return immediately""" @@ -314,7 +336,9 @@ async def test__flush_timer_w_None(self, flush_mock): assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + ) @pytest.mark.asyncio async def test__flush_timer_call_when_closed(self, flush_mock): """closed batcher's timer should return immediately""" @@ -326,7 +350,9 @@ async def test__flush_timer_call_when_closed(self, flush_mock): assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + ) @pytest.mark.asyncio async def test__flush_timer(self, flush_mock): """Timer should continue to call _schedule_flush in a loop""" @@ -344,7 +370,9 @@ async def test__flush_timer(self, flush_mock): sleep_mock.assert_called_with(expected_sleep) assert flush_mock.call_count == loop_num - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + ) @pytest.mark.asyncio async def test__flush_timer_no_mutations(self, flush_mock): """Timer should not flush if no new mutations have been staged""" @@ -361,7 +389,9 @@ async def test__flush_timer_no_mutations(self, flush_mock): sleep_mock.assert_called_with(expected_sleep) assert flush_mock.call_count == 0 - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + ) @pytest.mark.asyncio async def test__flush_timer_close(self, flush_mock): """Timer should continue terminate after close""" @@ -388,7 +418,9 @@ async def test_append_closed(self): @pytest.mark.asyncio async def test_append_outside_flow_limits(self): """entries larger than mutation limits are rejected""" - async with self._make_one(flow_control_max_count=1, flow_control_max_bytes=1) as instance: + async with self._make_one( + flow_control_max_count=1, flow_control_max_bytes=1 + ) as instance: oversized_entry = _make_mutation(count=0, size=2) overcount_entry = _make_mutation(count=2, size=0) with pytest.raises(ValueError) as e: @@ -398,19 +430,26 @@ async def test_append_outside_flow_limits(self): instance.append(overcount_entry) assert "Mutation count 2 exceeds flow_control_max_count: 1" in str(e.value) - @pytest.mark.parametrize("flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", [ - (10,10,1,1,False), - (10,10,9,9,False), - (10,10,10,1,True), - (10,10,1,10,True), - (10,10,10,10,True), - (1,1,10,10,True), - (1,1,0,0,False), - ]) + @pytest.mark.parametrize( + "flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", + [ + (10, 10, 1, 1, False), + (10, 10, 9, 9, False), + (10, 10, 10, 1, True), + (10, 10, 1, 10, True), + (10, 10, 10, 10, True), + (1, 1, 10, 10, True), + (1, 1, 0, 0, False), + ], + ) @pytest.mark.asyncio - async def test_append(self, flush_count, flush_bytes, mutation_count, mutation_bytes, expect_flush): + async def test_append( + self, flush_count, flush_bytes, mutation_count, mutation_bytes, expect_flush + ): """test appending different mutations, and checking if it causes a flush""" - async with self._make_one(flush_limit_count=flush_count, flush_limit_bytes=flush_bytes) as instance: + async with self._make_one( + flush_limit_count=flush_count, flush_limit_bytes=flush_bytes + ) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 assert instance._staged_mutations == [] @@ -477,14 +516,13 @@ async def test_schedule_flush_no_mutations(self): assert flush_mock.call_count == 0 assert instance._prev_flush == orig_flush - @pytest.mark.asyncio async def test_schedule_flush_with_mutations(self): """if new mutations exist, should update prev_flush to a new flush task""" async with self._make_one() as instance: orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: - for i in range(1,4): + for i in range(1, 4): instance._staged_mutations = [mock.Mock()] instance._schedule_flush() assert instance._staged_mutations == [] @@ -506,10 +544,13 @@ async def test__flush_internal(self): num_entries = 10 async with self._make_one() as instance: with mock.patch.object(instance, "_execute_mutate_rows") as execute_mock: - with mock.patch.object(instance._flow_control, "add_to_flow") as flow_mock: + with mock.patch.object( + instance._flow_control, "add_to_flow" + ) as flow_mock: # mock flow control to always return a single batch async def gen(x): yield x + flow_mock.side_effect = lambda x: gen(x) prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() @@ -521,31 +562,41 @@ async def gen(x): assert flow_mock.call_count == 1 assert instance.exceptions == [] - - @pytest.mark.parametrize("num_starting,num_new_errors,expected_total_errors", [ - (0, 0, 0), - (0, 1, 1), - (0, 2, 2), - (1, 0, 1), - (1, 1, 2), - (10, 2, 12), - ]) + @pytest.mark.parametrize( + "num_starting,num_new_errors,expected_total_errors", + [ + (0, 0, 0), + (0, 1, 1), + (0, 2, 2), + (1, 0, 1), + (1, 1, 2), + (10, 2, 12), + ], + ) @pytest.mark.asyncio - async def test__flush_internal_with_errors(self, num_starting, num_new_errors, expected_total_errors): + async def test__flush_internal_with_errors( + self, num_starting, num_new_errors, expected_total_errors + ): """ errors returned from _execute_mutate_rows should be added to self.exceptions """ from google.cloud.bigtable import exceptions + num_entries = 10 - expected_errors = [exceptions.FailedMutationEntryError(mock.Mock(), mock.Mock(), ValueError())] * num_new_errors + expected_errors = [ + exceptions.FailedMutationEntryError(mock.Mock(), mock.Mock(), ValueError()) + ] * num_new_errors async with self._make_one() as instance: instance.exceptions = [mock.Mock()] * num_starting with mock.patch.object(instance, "_execute_mutate_rows") as execute_mock: execute_mock.return_value = expected_errors - with mock.patch.object(instance._flow_control, "add_to_flow") as flow_mock: + with mock.patch.object( + instance._flow_control, "add_to_flow" + ) as flow_mock: # mock flow control to always return a single batch async def gen(x): yield x + flow_mock.side_effect = lambda x: gen(x) prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() @@ -557,16 +608,22 @@ async def gen(x): assert flow_mock.call_count == 1 assert len(instance.exceptions) == expected_total_errors for i in range(num_starting, expected_total_errors): - assert instance.exceptions[i] == expected_errors[i - num_starting] + assert ( + instance.exceptions[i] == expected_errors[i - num_starting] + ) instance.exceptions = [] async def _mock_gapic_return(self, num=5): from google.cloud.bigtable_v2.types import MutateRowsResponse from google.rpc import status_pb2 + async def gen(num): for i in range(num): - entry = MutateRowsResponse.Entry(index=i, status=status_pb2.Status(code=0)) + entry = MutateRowsResponse.Entry( + index=i, status=status_pb2.Status(code=0) + ) yield MutateRowsResponse(entries=[entry]) + return gen(num) @pytest.mark.asyncio @@ -576,11 +633,17 @@ async def test_manual_flush_end_to_end(self): num_nutations = 10 mutations = [_make_mutation(count=2, size=2)] * num_nutations - async with self._make_one(flow_control_max_count=3, flow_control_max_bytes=3) as instance: + async with self._make_one( + flow_control_max_count=3, flow_control_max_bytes=3 + ) as instance: instance._table.default_operation_timeout = 10 instance._table.default_per_request_timeout = 9 - with mock.patch.object(instance._table.client._gapic_client, "mutate_rows") as gapic_mock: - gapic_mock.side_effect = lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + with mock.patch.object( + instance._table.client._gapic_client, "mutate_rows" + ) as gapic_mock: + gapic_mock.side_effect = ( + lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + ) for m in mutations: instance.append(m) assert instance._entries_processed_since_last_raise == 0 @@ -596,8 +659,12 @@ async def test_timer_flush_end_to_end(self): async with self._make_one(flush_interval=0.05) as instance: instance._table.default_operation_timeout = 10 instance._table.default_per_request_timeout = 9 - with mock.patch.object(instance._table.client._gapic_client, "mutate_rows") as gapic_mock: - gapic_mock.side_effect = lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + with mock.patch.object( + instance._table.client._gapic_client, "mutate_rows" + ) as gapic_mock: + gapic_mock.side_effect = ( + lambda *args, **kwargs: self._mock_gapic_return(num_nutations) + ) for m in mutations: instance.append(m) assert instance._entries_processed_since_last_raise == 0 @@ -606,9 +673,10 @@ async def test_timer_flush_end_to_end(self): assert instance._entries_processed_since_last_raise == num_nutations @pytest.mark.asyncio - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher._mutate_rows_operation") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher._mutate_rows_operation" + ) async def test__execute_mutate_rows(self, mutate_rows): - from google.cloud.bigtable import _mutate_rows table = mock.Mock() table.table_name = "test-table" table.app_profile_id = "test-app-profile" @@ -632,11 +700,16 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @unittest.mock.patch("google.cloud.bigtable.mutations_batcher._mutate_rows_operation") + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher._mutate_rows_operation" + ) async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" - from google.cloud.bigtable import _mutate_rows - from google.cloud.bigtable.exceptions import MutationsExceptionGroup, FailedMutationEntryError + from google.cloud.bigtable.exceptions import ( + MutationsExceptionGroup, + FailedMutationEntryError, + ) + err1 = FailedMutationEntryError(0, mock.Mock(), RuntimeError("test error")) err2 = FailedMutationEntryError(1, mock.Mock(), RuntimeError("test error")) mutate_rows.side_effect = MutationsExceptionGroup([err1, err2], 10) @@ -655,6 +728,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): async def test__raise_exceptions(self): """Raise exceptions and reset error state""" from google.cloud.bigtable import exceptions + expected_total = 1201 expected_exceptions = [mock.Mock()] * 3 async with self._make_one() as instance: @@ -701,6 +775,7 @@ async def test_close(self): async def test_close_w_exceptions(self): """Raise exceptions on close""" from google.cloud.bigtable import exceptions + expected_total = 10 expected_exceptions = [mock.Mock()] async with self._make_one() as instance: From 8313b57db0abb9c5bffdf0030ba6cb8c6e1a665f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 14:40:42 -0700 Subject: [PATCH 073/213] added additional tests --- tests/unit/test__mutate_rows.py | 14 ++++++++++++-- tests/unit/test_mutations.py | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 9748e513e..bfb4e3f23 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -144,7 +144,8 @@ async def test_partial_success_non_retryable(self): assert errors[2] == [] @pytest.mark.asyncio - async def test_on_terminal_state_no_retries(self): + @pytest.mark.parametrize("is_coroutine_fn", [True, False]) + async def test_on_terminal_state_no_retries(self, is_coroutine_fn): """ Should call on_terminal_state for each successful or non-retryable mutation """ @@ -155,6 +156,8 @@ async def test_on_terminal_state_no_retries(self): failure_mutation = mock.Mock() mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} callback = mock.Mock() + if is_coroutine_fn: + callback.side_effect = AsyncMock() errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) # raise retryable error 3 times, then raise non-retryable error @@ -168,6 +171,8 @@ async def test_on_terminal_state_no_retries(self): callback, ) assert callback.call_count == 3 + if is_coroutine_fn: + assert callback.side_effect.await_count == 3 call_args = callback.call_args_list assert call_args[0][0][0] == success_mutation assert call_args[0][0][1] is None @@ -177,7 +182,8 @@ async def test_on_terminal_state_no_retries(self): assert call_args[2][0][1] is None @pytest.mark.asyncio - async def test_on_terminal_state_with_retries(self): + @pytest.mark.parametrize("is_coroutine_fn", [True, False]) + async def test_on_terminal_state_with_retries(self, is_coroutine_fn): """ Should not call on_terminal_state for retryable mutations """ @@ -191,6 +197,8 @@ async def test_on_terminal_state_with_retries(self): failure_mutation = mock.Mock() mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} callback = mock.Mock() + if is_coroutine_fn: + callback.side_effect = AsyncMock() errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) # raise retryable error 3 times, then raise non-retryable error @@ -210,3 +218,5 @@ async def test_on_terminal_state_with_retries(self): assert call_args[0][0][1] is None assert call_args[1][0][0] == success_mutation_2 assert call_args[1][0][1] is None + if is_coroutine_fn: + assert callback.side_effect.await_count == 2 diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 03c2ceb69..645f2c04a 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -43,6 +43,16 @@ def test___str__(self): assert self_mock._to_dict.called assert str_value == str(self_mock._to_dict.return_value) + @pytest.mark.parametrize("test_dict", [{}, {"key": "value"}]) + def test_size(self, test_dict): + from sys import getsizeof + + """Size should return size of dict representation""" + self_mock = mock.Mock() + self_mock._to_dict.return_value = test_dict + size_value = self._target_class().size(self_mock) + assert size_value == getsizeof(test_dict) + class TestSetCell: def _target_class(self): @@ -344,3 +354,13 @@ def test_empty_mutations(self): with pytest.raises(ValueError) as e: self._make_one("row_key", []) assert "must not be empty" in str(e.value) + + @pytest.mark.parametrize("test_dict", [{}, {"key": "value"}]) + def test_size(self, test_dict): + from sys import getsizeof + + """Size should return size of dict representation""" + self_mock = mock.Mock() + self_mock._to_dict.return_value = test_dict + size_value = self._target_class().size(self_mock) + assert size_value == getsizeof(test_dict) From 01a16f339375f3d8d0bd1c904bbfc6232a11bcbb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 14:48:01 -0700 Subject: [PATCH 074/213] made exceptions into a tuple --- google/cloud/bigtable/mutations.py | 2 +- tests/unit/test_mutations.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index f2c20502e..f6e530af0 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -120,7 +120,7 @@ def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): if isinstance(mutations, Mutation): mutations = [mutations] self.row_key = row_key - self.mutations = mutations + self.mutations = tuple(mutations) def _to_dict(self) -> dict[str, Any]: return { diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 67187ca0e..b6f62d32a 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -282,14 +282,14 @@ def test_ctor(self): expected_mutations = [mock.Mock()] instance = self._make_one(expected_key, expected_mutations) assert instance.row_key == expected_key - assert instance.mutations == expected_mutations + assert list(instance.mutations) == expected_mutations def test_ctor_str_key(self): expected_key = "row_key" - expected_mutations = [mock.Mock()] + expected_mutations = [mock.Mock(), mock.Mock()] instance = self._make_one(expected_key, expected_mutations) assert instance.row_key == b"row_key" - assert instance.mutations == expected_mutations + assert list(instance.mutations) == expected_mutations def test_ctor_single_mutation(self): from google.cloud.bigtable.mutations import DeleteAllFromRow @@ -298,7 +298,7 @@ def test_ctor_single_mutation(self): expected_mutations = DeleteAllFromRow() instance = self._make_one(expected_key, expected_mutations) assert instance.row_key == expected_key - assert instance.mutations == [expected_mutations] + assert instance.mutations == (expected_mutations,) def test__to_dict(self): expected_key = "row_key" From b7d3fcf90c18c503144ccc037d8c500fd3075709 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 5 May 2023 15:26:04 -0700 Subject: [PATCH 075/213] added warning for unflushed mutation entries --- google/cloud/bigtable/mutations_batcher.py | 14 ++++++++ tests/unit/test_mutations_batcher.py | 38 ++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index e8c33c09b..b475980e1 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -15,6 +15,8 @@ from __future__ import annotations import asyncio +import atexit +import warnings from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import BulkMutationsEntry @@ -168,6 +170,7 @@ def __init__( - flow_control_max_bytes: Maximum number of inflight bytes. If None, this limit is ignored. """ + atexit.register(self._on_exit) self.closed: bool = False self._table = table self._staged_mutations: list[BulkMutationsEntry] = [] @@ -337,3 +340,14 @@ async def close(self): await self._prev_flush # raise unreported exceptions self._raise_exceptions() + atexit.unregister(self._on_exit) + + def _on_exit(self): + """ + Called when program is exited. Raises warning if unflushed mutations remain + """ + if not self.closed and self._staged_mutations: + warnings.warn( + f"MutationsBatcher for table {self._table.table_name} was not closed. " + f"{len(self._staged_mutations)} Unflushed mutations will not be sent to the server." + ) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 7c685988e..02ab73163 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -788,3 +788,41 @@ async def test_close_w_exceptions(self): assert str(expected_total) in str(exc) assert instance._entries_processed_since_last_raise == 0 assert instance.exceptions == [] + + @pytest.mark.asyncio + async def test__on_exit(self, recwarn): + """Should raise warnings if unflushed mutations exist""" + async with self._make_one() as instance: + # calling without mutations is noop + instance._on_exit() + assert len(recwarn) == 0 + # calling with existing mutations should raise warning + num_left = 4 + instance._staged_mutations = [mock.Mock()] * num_left + with pytest.warns(UserWarning) as w: + instance._on_exit() + assert len(w) == 1 + assert "unflushed mutations" in str(w[0].message).lower() + assert str(num_left) in str(w[0].message) + # calling while closed is noop + instance.closed = True + instance._on_exit() + assert len(recwarn) == 0 + # reset staged mutations for cleanup + instance._staged_mutations = [] + + @pytest.mark.asyncio + async def test_atexit_registration(self): + """Should run _on_exit on program termination""" + import atexit + + with mock.patch( + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._on_exit" + ) as on_exit_mock: + async with self._make_one(): + assert on_exit_mock.call_count == 0 + atexit._run_exitfuncs() + assert on_exit_mock.call_count == 1 + # should not call after close + atexit._run_exitfuncs() + assert on_exit_mock.call_count == 1 From 7521a617c121ead96a21ca47959a53b2db2da090 Mon Sep 17 00:00:00 2001 From: Mariatta Wijaya Date: Wed, 10 May 2023 15:42:01 -0700 Subject: [PATCH 076/213] Feat: Threaded MutationsBatcher (#722) - Batch mutations in a thread to allow concurrent batching - Flush the batch every second - Add flow control to control inflight requests Co-authored-by: Mattie Fu --- docs/batcher.rst | 6 + docs/usage.rst | 1 + google/cloud/bigtable/batcher.py | 366 +++++++++++++++++++++++++------ google/cloud/bigtable/table.py | 6 +- tests/unit/test_batcher.py | 218 ++++++++++++------ 5 files changed, 469 insertions(+), 128 deletions(-) create mode 100644 docs/batcher.rst diff --git a/docs/batcher.rst b/docs/batcher.rst new file mode 100644 index 000000000..9ac335be1 --- /dev/null +++ b/docs/batcher.rst @@ -0,0 +1,6 @@ +Mutations Batching +~~~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.bigtable.batcher + :members: + :show-inheritance: diff --git a/docs/usage.rst b/docs/usage.rst index 33bf7bb7f..73a32b039 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -17,6 +17,7 @@ Using the API row-data row-filters row-set + batcher In the hierarchy of API concepts diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 3c23f4436..6b06ec060 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -13,104 +13,251 @@ # limitations under the License. """User friendly container for Google Cloud Bigtable MutationBatcher.""" +import threading +import queue +import concurrent.futures +import atexit -FLUSH_COUNT = 1000 -MAX_MUTATIONS = 100000 -MAX_ROW_BYTES = 5242880 # 5MB +from google.api_core.exceptions import from_grpc_status +from dataclasses import dataclass -class MaxMutationsError(ValueError): - """The number of mutations for bulk request is too big.""" +FLUSH_COUNT = 100 # after this many elements, send out the batch + +MAX_MUTATION_SIZE = 20 * 1024 * 1024 # 20MB # after this many bytes, send out the batch + +MAX_OUTSTANDING_BYTES = 100 * 1024 * 1024 # 100MB # max inflight byte size. + +MAX_OUTSTANDING_ELEMENTS = 100000 # max inflight mutations. + + +class MutationsBatchError(Exception): + """Error in the batch request""" + + def __init__(self, message, exc): + self.exc = exc + self.message = message + super().__init__(self.message) + + +class _MutationsBatchQueue(object): + """Private Threadsafe Queue to hold rows for batching.""" + + def __init__(self, max_mutation_bytes=MAX_MUTATION_SIZE, flush_count=FLUSH_COUNT): + """Specify the queue constraints""" + self._queue = queue.Queue() + self.total_mutation_count = 0 + self.total_size = 0 + self.max_mutation_bytes = max_mutation_bytes + self.flush_count = flush_count + + def get(self): + """Retrieve an item from the queue. Recalculate queue size.""" + row = self._queue.get() + mutation_size = row.get_mutations_size() + self.total_mutation_count -= len(row._get_mutations()) + self.total_size -= mutation_size + return row + + def put(self, item): + """Insert an item to the queue. Recalculate queue size.""" + + mutation_count = len(item._get_mutations()) + + self._queue.put(item) + + self.total_size += item.get_mutations_size() + self.total_mutation_count += mutation_count + + def full(self): + """Check if the queue is full.""" + if ( + self.total_mutation_count >= self.flush_count + or self.total_size >= self.max_mutation_bytes + ): + return True + return False + + def empty(self): + return self._queue.empty() + + +@dataclass +class _BatchInfo: + """Keeping track of size of a batch""" + + mutations_count: int = 0 + rows_count: int = 0 + mutations_size: int = 0 + + +class _FlowControl(object): + def __init__( + self, + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ): + """Control the inflight requests. Keep track of the mutations, row bytes and row counts. + As requests to backend are being made, adjust the number of mutations being processed. + + If threshold is reached, block the flow. + Reopen the flow as requests are finished. + """ + self.max_mutations = max_mutations + self.max_mutation_bytes = max_mutation_bytes + self.inflight_mutations = 0 + self.inflight_size = 0 + self.event = threading.Event() + self.event.set() + + def is_blocked(self): + """Returns True if: + + - inflight mutations >= max_mutations, or + - inflight bytes size >= max_mutation_bytes, or + """ + + return ( + self.inflight_mutations >= self.max_mutations + or self.inflight_size >= self.max_mutation_bytes + ) + + def control_flow(self, batch_info): + """ + Calculate the resources used by this batch + """ + + self.inflight_mutations += batch_info.mutations_count + self.inflight_size += batch_info.mutations_size + self.set_flow_control_status() + + def wait(self): + """ + Wait until flow control pushback has been released. + It awakens as soon as `event` is set. + """ + self.event.wait() + + def set_flow_control_status(self): + """Check the inflight mutations and size. + + If values exceed the allowed threshold, block the event. + """ + if self.is_blocked(): + self.event.clear() # sleep + else: + self.event.set() # awaken the threads + + def release(self, batch_info): + """ + Release the resources. + Decrement the row size to allow enqueued mutations to be run. + """ + self.inflight_mutations -= batch_info.mutations_count + self.inflight_size -= batch_info.mutations_size + self.set_flow_control_status() class MutationsBatcher(object): """A MutationsBatcher is used in batch cases where the number of mutations - is large or unknown. It will store DirectRows in memory until one of the - size limits is reached, or an explicit call to flush() is performed. When - a flush event occurs, the DirectRows in memory will be sent to Cloud + is large or unknown. It will store :class:`DirectRow` in memory until one of the + size limits is reached, or an explicit call to :func:`flush()` is performed. When + a flush event occurs, the :class:`DirectRow` in memory will be sent to Cloud Bigtable. Batching mutations is more efficient than sending individual request. This class is not suited for usage in systems where each mutation must be guaranteed to be sent, since calling mutate may only result in an - in-memory change. In a case of a system crash, any DirectRows remaining in + in-memory change. In a case of a system crash, any :class:`DirectRow` remaining in memory will not necessarily be sent to the service, even after the - completion of the mutate() method. + completion of the :func:`mutate()` method. - TODO: Performance would dramatically improve if this class had the - capability of asynchronous, parallel RPCs. + Note on thread safety: The same :class:`MutationBatcher` cannot be shared by multiple end-user threads. :type table: class :param table: class:`~google.cloud.bigtable.table.Table`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it - reaches the max number of rows it calls finish_batch() to mutate the - current row batch. Default is FLUSH_COUNT (1000 rows). + reaches the max number of rows it calls finish_batch() to mutate the + current row batch. Default is FLUSH_COUNT (1000 rows). :type max_row_bytes: int :param max_row_bytes: (Optional) Max number of row mutations size to - flush. If it reaches the max number of row mutations size it calls - finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES - (5 MB). + flush. If it reaches the max number of row mutations size it calls + finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES + (5 MB). + + :type flush_interval: float + :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. + Default is 1 second. """ - def __init__(self, table, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): - self.rows = [] - self.total_mutation_count = 0 - self.total_size = 0 + def __init__( + self, + table, + flush_count=FLUSH_COUNT, + max_row_bytes=MAX_MUTATION_SIZE, + flush_interval=1, + ): + self._rows = _MutationsBatchQueue( + max_mutation_bytes=max_row_bytes, flush_count=flush_count + ) self.table = table - self.flush_count = flush_count - self.max_row_bytes = max_row_bytes + self._executor = concurrent.futures.ThreadPoolExecutor() + atexit.register(self.close) + self._timer = threading.Timer(flush_interval, self.flush) + self._timer.start() + self.flow_control = _FlowControl( + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ) + self.futures_mapping = {} + self.exceptions = queue.Queue() + + @property + def flush_count(self): + return self._rows.flush_count + + @property + def max_row_bytes(self): + return self._rows.max_mutation_bytes + + def __enter__(self): + """Starting the MutationsBatcher as a context manager""" + return self def mutate(self, row): """Add a row to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. + limits, the batch is sent asynchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_mutate] :end-before: [END bigtable_api_batcher_mutate] :dedent: 4 :type row: class - :param row: class:`~google.cloud.bigtable.row.DirectRow`. + :param row: :class:`~google.cloud.bigtable.row.DirectRow`. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. - """ - mutation_count = len(row._get_mutations()) - if mutation_count > MAX_MUTATIONS: - raise MaxMutationsError( - "The row key {} exceeds the number of mutations {}.".format( - row.row_key, mutation_count - ) - ) - - if (self.total_mutation_count + mutation_count) >= MAX_MUTATIONS: - self.flush() - - self.rows.append(row) - self.total_mutation_count += mutation_count - self.total_size += row.get_mutations_size() + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried + """ + self._rows.put(row) - if self.total_size >= self.max_row_bytes or len(self.rows) >= self.flush_count: - self.flush() + if self._rows.full(): + self._flush_async() def mutate_rows(self, rows): """Add multiple rows to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. + limits, the batch is sent asynchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_mutate_rows] :end-before: [END bigtable_api_batcher_mutate_rows] :dedent: 4 @@ -119,28 +266,119 @@ def mutate_rows(self, rows): :param rows: list:[`~google.cloud.bigtable.row.DirectRow`]. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried """ for row in rows: self.mutate(row) def flush(self): - """Sends the current. batch to Cloud Bigtable. + """Sends the current batch to Cloud Bigtable synchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_flush] :end-before: [END bigtable_api_batcher_flush] :dedent: 4 + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + rows_to_flush = [] + while not self._rows.empty(): + rows_to_flush.append(self._rows.get()) + response = self._flush_rows(rows_to_flush) + return response + + def _flush_async(self): + """Sends the current batch to Cloud Bigtable asynchronously. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + + rows_to_flush = [] + mutations_count = 0 + mutations_size = 0 + rows_count = 0 + batch_info = _BatchInfo() + + while not self._rows.empty(): + row = self._rows.get() + mutations_count += len(row._get_mutations()) + mutations_size += row.get_mutations_size() + rows_count += 1 + rows_to_flush.append(row) + batch_info.mutations_count = mutations_count + batch_info.rows_count = rows_count + batch_info.mutations_size = mutations_size + + if ( + rows_count >= self.flush_count + or mutations_size >= self.max_row_bytes + or mutations_count >= self.flow_control.max_mutations + or mutations_size >= self.flow_control.max_mutation_bytes + or self._rows.empty() # submit when it reached the end of the queue + ): + # wait for resources to become available, before submitting any new batch + self.flow_control.wait() + # once unblocked, submit a batch + # event flag will be set by control_flow to block subsequent thread, but not blocking this one + self.flow_control.control_flow(batch_info) + future = self._executor.submit(self._flush_rows, rows_to_flush) + self.futures_mapping[future] = batch_info + future.add_done_callback(self._batch_completed_callback) + + # reset and start a new batch + rows_to_flush = [] + mutations_size = 0 + rows_count = 0 + mutations_count = 0 + batch_info = _BatchInfo() + + def _batch_completed_callback(self, future): + """Callback for when the mutation has finished. + + Raise exceptions if there's any. + Release the resources locked by the flow control and allow enqueued tasks to be run. + """ + + processed_rows = self.futures_mapping[future] + self.flow_control.release(processed_rows) + del self.futures_mapping[future] + + def _flush_rows(self, rows_to_flush): + """Mutate the specified rows. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + responses = [] + if len(rows_to_flush) > 0: + response = self.table.mutate_rows(rows_to_flush) + + for result in response: + if result.code != 0: + exc = from_grpc_status(result.code, result.message) + self.exceptions.put(exc) + responses.append(result) + + return responses + + def __exit__(self, exc_type, exc_value, exc_traceback): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor.""" + self.close() + + def close(self): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor. + Any errors will be raised. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. """ - if len(self.rows) != 0: - self.table.mutate_rows(self.rows) - self.total_mutation_count = 0 - self.total_size = 0 - self.rows = [] + self.flush() + self._executor.shutdown(wait=True) + atexit.unregister(self.close) + if self.exceptions.qsize() > 0: + exc = list(self.exceptions.queue) + raise MutationsBatchError("Errors in batch mutations.", exc=exc) diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/table.py index 8605992ba..e3191a729 100644 --- a/google/cloud/bigtable/table.py +++ b/google/cloud/bigtable/table.py @@ -32,7 +32,7 @@ from google.cloud.bigtable.column_family import _gc_rule_from_pb from google.cloud.bigtable.column_family import ColumnFamily from google.cloud.bigtable.batcher import MutationsBatcher -from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_ROW_BYTES +from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_MUTATION_SIZE from google.cloud.bigtable.encryption_info import EncryptionInfo from google.cloud.bigtable.policy import Policy from google.cloud.bigtable.row import AppendRow @@ -844,7 +844,9 @@ def drop_by_prefix(self, row_key_prefix, timeout=None): request={"name": self.name, "row_key_prefix": _to_bytes(row_key_prefix)} ) - def mutations_batcher(self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): + def mutations_batcher( + self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE + ): """Factory to create a mutation batcher associated with this instance. For example: diff --git a/tests/unit/test_batcher.py b/tests/unit/test_batcher.py index 9ae6ed175..a238b2852 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/test_batcher.py @@ -14,122 +14,118 @@ import mock +import time + import pytest from google.cloud.bigtable.row import DirectRow +from google.cloud.bigtable.batcher import ( + _FlowControl, + MutationsBatcher, + MutationsBatchError, +) TABLE_ID = "table-id" TABLE_NAME = "/tables/" + TABLE_ID -def _make_mutation_batcher(table, **kw): - from google.cloud.bigtable.batcher import MutationsBatcher - - return MutationsBatcher(table, **kw) - - def test_mutation_batcher_constructor(): table = _Table(TABLE_NAME) - - mutation_batcher = _make_mutation_batcher(table) - assert table is mutation_batcher.table + with MutationsBatcher(table) as mutation_batcher: + assert table is mutation_batcher.table def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - rows = [ - DirectRow(row_key=b"row_key"), - DirectRow(row_key=b"row_key_2"), - DirectRow(row_key=b"row_key_3"), - DirectRow(row_key=b"row_key_4"), - ] + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] - mutation_batcher.mutate_rows(rows) - mutation_batcher.flush() + mutation_batcher.mutate_rows(rows) assert table.mutation_calls == 1 def test_mutation_batcher_mutate(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) - - mutation_batcher.mutate(row) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) + row.set_cell("cf1", b"c4", 4) - mutation_batcher.flush() + mutation_batcher.mutate(row) assert table.mutation_calls == 1 def test_mutation_batcher_flush_w_no_rows(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) - mutation_batcher.flush() + with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher.flush() assert table.mutation_calls == 0 def test_mutation_batcher_mutate_w_max_flush_count(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table, flush_count=3) + with MutationsBatcher(table=table, flush_count=3) as mutation_batcher: - row_1 = DirectRow(row_key=b"row_key_1") - row_2 = DirectRow(row_key=b"row_key_2") - row_3 = DirectRow(row_key=b"row_key_3") + row_1 = DirectRow(row_key=b"row_key_1") + row_2 = DirectRow(row_key=b"row_key_2") + row_3 = DirectRow(row_key=b"row_key_3") - mutation_batcher.mutate(row_1) - mutation_batcher.mutate(row_2) - mutation_batcher.mutate(row_3) + mutation_batcher.mutate(row_1) + mutation_batcher.mutate(row_2) + mutation_batcher.mutate(row_3) assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_with_max_mutations_failure(): - from google.cloud.bigtable.batcher import MaxMutationsError - +@mock.patch("google.cloud.bigtable.batcher.MAX_OUTSTANDING_ELEMENTS", new=3) +def test_mutation_batcher_mutate_w_max_mutations(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) - with pytest.raises(MaxMutationsError): mutation_batcher.mutate(row) + assert table.mutation_calls == 1 + -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_w_max_mutations(): +def test_mutation_batcher_mutate_w_max_row_bytes(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes - mutation_batcher.mutate(row) - mutation_batcher.flush() + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + row.set_cell("cf1", b"c3", max_value) + + mutation_batcher.mutate(row) assert table.mutation_calls == 1 -def test_mutation_batcher_mutate_w_max_row_bytes(): +def test_mutations_batcher_flushed_when_closed(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher( - table=table, max_row_bytes=3 * 1024 * 1024 - ) + mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) number_of_bytes = 1 * 1024 * 1024 max_value = b"1" * number_of_bytes @@ -137,13 +133,108 @@ def test_mutation_batcher_mutate_w_max_row_bytes(): row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", max_value) row.set_cell("cf1", b"c2", max_value) - row.set_cell("cf1", b"c3", max_value) mutation_batcher.mutate(row) + assert table.mutation_calls == 0 + + mutation_batcher.close() + + assert table.mutation_calls == 1 + + +def test_mutations_batcher_context_manager_flushed_when_closed(): + table = _Table(TABLE_NAME) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + + mutation_batcher.mutate(row) assert table.mutation_calls == 1 +@mock.patch("google.cloud.bigtable.batcher.MutationsBatcher.flush") +def test_mutations_batcher_flush_interval(mocked_flush): + table = _Table(TABLE_NAME) + flush_interval = 0.5 + mutation_batcher = MutationsBatcher(table=table, flush_interval=flush_interval) + + assert mutation_batcher._timer.interval == flush_interval + mocked_flush.assert_not_called() + + time.sleep(0.4) + mocked_flush.assert_not_called() + + time.sleep(0.1) + mocked_flush.assert_called_once_with() + + mutation_batcher.close() + + +def test_mutations_batcher_response_with_error_codes(): + from google.rpc.status_pb2 import Status + + mocked_response = [Status(code=1), Status(code=5)] + + with mock.patch("tests.unit.test_batcher._Table") as mocked_table: + table = mocked_table.return_value + mutation_batcher = MutationsBatcher(table=table) + + row1 = DirectRow(row_key=b"row_key") + row2 = DirectRow(row_key=b"row_key") + table.mutate_rows.return_value = mocked_response + + mutation_batcher.mutate_rows([row1, row2]) + with pytest.raises(MutationsBatchError) as exc: + mutation_batcher.close() + assert exc.value.message == "Errors in batch mutations." + assert len(exc.value.exc) == 2 + + assert exc.value.exc[0].message == mocked_response[0].message + assert exc.value.exc[1].message == mocked_response[1].message + + +def test_flow_control_event_is_set_when_not_blocked(): + flow_control = _FlowControl() + + flow_control.set_flow_control_status() + assert flow_control.event.is_set() + + +def test_flow_control_event_is_not_set_when_blocked(): + flow_control = _FlowControl() + + flow_control.inflight_mutations = flow_control.max_mutations + flow_control.inflight_size = flow_control.max_mutation_bytes + + flow_control.set_flow_control_status() + assert not flow_control.event.is_set() + + +@mock.patch("concurrent.futures.ThreadPoolExecutor.submit") +def test_flush_async_batch_count(mocked_executor_submit): + table = _Table(TABLE_NAME) + mutation_batcher = MutationsBatcher(table=table, flush_count=2) + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + for index in range(5): + row = DirectRow(row_key=f"row_key_{index}") + row.set_cell("cf1", b"c1", max_value) + mutation_batcher.mutate(row) + mutation_batcher._flush_async() + + # 3 batches submitted. 2 batches of 2 items, and the last one a single item batch. + assert mocked_executor_submit.call_count == 3 + + class _Instance(object): def __init__(self, client=None): self._client = client @@ -156,5 +247,8 @@ def __init__(self, name, client=None): self.mutation_calls = 0 def mutate_rows(self, rows): + from google.rpc.status_pb2 import Status + self.mutation_calls += 1 - return rows + + return [Status(code=0) for _ in rows] From 8f9de8d58018d97f008dca31b357fdbd2edb5ab7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 10 May 2023 16:13:17 -0700 Subject: [PATCH 077/213] chore(main): release 2.18.0 (#757) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigtable/gapic_version.py | 2 +- google/cloud/bigtable_admin/gapic_version.py | 2 +- .../cloud/bigtable_admin_v2/gapic_version.py | 2 +- google/cloud/bigtable_v2/gapic_version.py | 2 +- 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 882f663e6..a627e662e 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.17.0" + ".": "2.18.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b4d1b29..2d7fe5141 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigtable/#history +## [2.18.0](https://github.com/googleapis/python-bigtable/compare/v2.17.0...v2.18.0) (2023-05-10) + + +### Features + +* Publish RateLimitInfo and FeatureFlag protos ([#768](https://github.com/googleapis/python-bigtable/issues/768)) ([171fea6](https://github.com/googleapis/python-bigtable/commit/171fea6de57a47f92a2a56050f8bfe7518144df7)) +* Threaded MutationsBatcher ([#722](https://github.com/googleapis/python-bigtable/issues/722)) ([7521a61](https://github.com/googleapis/python-bigtable/commit/7521a617c121ead96a21ca47959a53b2db2da090)) + + +### Bug Fixes + +* Pass the "retry" when calling read_rows. ([#759](https://github.com/googleapis/python-bigtable/issues/759)) ([505273b](https://github.com/googleapis/python-bigtable/commit/505273b72bf83d8f92d0e0a92d62f22bce96cc3d)) + + +### Documentation + +* Fix delete from column family example ([#764](https://github.com/googleapis/python-bigtable/issues/764)) ([128b4e1](https://github.com/googleapis/python-bigtable/commit/128b4e1f3eea2dad903d84c8f2933b17a5f0d226)) +* Fix formatting of request arg in docstring ([#756](https://github.com/googleapis/python-bigtable/issues/756)) ([45d3e43](https://github.com/googleapis/python-bigtable/commit/45d3e4308c4f494228c2e6e18a36285c557cb0c3)) + ## [2.17.0](https://github.com/googleapis/python-bigtable/compare/v2.16.0...v2.17.0) (2023-03-01) diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/gapic_version.py index 8d4f4cfb6..f09943f6b 100644 --- a/google/cloud/bigtable/gapic_version.py +++ b/google/cloud/bigtable/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.17.0" # {x-release-please-version} +__version__ = "2.18.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin/gapic_version.py b/google/cloud/bigtable_admin/gapic_version.py index 8d4f4cfb6..f09943f6b 100644 --- a/google/cloud/bigtable_admin/gapic_version.py +++ b/google/cloud/bigtable_admin/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.17.0" # {x-release-please-version} +__version__ = "2.18.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin_v2/gapic_version.py b/google/cloud/bigtable_admin_v2/gapic_version.py index 8d4f4cfb6..f09943f6b 100644 --- a/google/cloud/bigtable_admin_v2/gapic_version.py +++ b/google/cloud/bigtable_admin_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.17.0" # {x-release-please-version} +__version__ = "2.18.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_v2/gapic_version.py b/google/cloud/bigtable_v2/gapic_version.py index 8d4f4cfb6..f09943f6b 100644 --- a/google/cloud/bigtable_v2/gapic_version.py +++ b/google/cloud/bigtable_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.17.0" # {x-release-please-version} +__version__ = "2.18.0" # {x-release-please-version} From a767cff95d990994f85f5fd05cc10f952087b49d Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 11 May 2023 15:30:12 -0400 Subject: [PATCH 078/213] fix: Revert "Feat: Threaded MutationsBatcher" (#773) Reverts googleapis/python-bigtable#722 This PR caused beam bigtableio.py failures https://togithub.com/apache/beam/issues/26673 and is blocking beam release. We're unclear why it caused the failure. So will revert this change, cut another release so we can unblock beam and investigate separately. --- docs/batcher.rst | 6 - docs/usage.rst | 1 - google/cloud/bigtable/batcher.py | 366 ++++++------------------------- google/cloud/bigtable/table.py | 6 +- tests/unit/test_batcher.py | 218 ++++++------------ 5 files changed, 128 insertions(+), 469 deletions(-) delete mode 100644 docs/batcher.rst diff --git a/docs/batcher.rst b/docs/batcher.rst deleted file mode 100644 index 9ac335be1..000000000 --- a/docs/batcher.rst +++ /dev/null @@ -1,6 +0,0 @@ -Mutations Batching -~~~~~~~~~~~~~~~~~~ - -.. automodule:: google.cloud.bigtable.batcher - :members: - :show-inheritance: diff --git a/docs/usage.rst b/docs/usage.rst index 73a32b039..33bf7bb7f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -17,7 +17,6 @@ Using the API row-data row-filters row-set - batcher In the hierarchy of API concepts diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 6b06ec060..3c23f4436 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -13,251 +13,104 @@ # limitations under the License. """User friendly container for Google Cloud Bigtable MutationBatcher.""" -import threading -import queue -import concurrent.futures -import atexit -from google.api_core.exceptions import from_grpc_status -from dataclasses import dataclass +FLUSH_COUNT = 1000 +MAX_MUTATIONS = 100000 +MAX_ROW_BYTES = 5242880 # 5MB -FLUSH_COUNT = 100 # after this many elements, send out the batch - -MAX_MUTATION_SIZE = 20 * 1024 * 1024 # 20MB # after this many bytes, send out the batch - -MAX_OUTSTANDING_BYTES = 100 * 1024 * 1024 # 100MB # max inflight byte size. - -MAX_OUTSTANDING_ELEMENTS = 100000 # max inflight mutations. - - -class MutationsBatchError(Exception): - """Error in the batch request""" - - def __init__(self, message, exc): - self.exc = exc - self.message = message - super().__init__(self.message) - - -class _MutationsBatchQueue(object): - """Private Threadsafe Queue to hold rows for batching.""" - - def __init__(self, max_mutation_bytes=MAX_MUTATION_SIZE, flush_count=FLUSH_COUNT): - """Specify the queue constraints""" - self._queue = queue.Queue() - self.total_mutation_count = 0 - self.total_size = 0 - self.max_mutation_bytes = max_mutation_bytes - self.flush_count = flush_count - - def get(self): - """Retrieve an item from the queue. Recalculate queue size.""" - row = self._queue.get() - mutation_size = row.get_mutations_size() - self.total_mutation_count -= len(row._get_mutations()) - self.total_size -= mutation_size - return row - - def put(self, item): - """Insert an item to the queue. Recalculate queue size.""" - - mutation_count = len(item._get_mutations()) - - self._queue.put(item) - - self.total_size += item.get_mutations_size() - self.total_mutation_count += mutation_count - - def full(self): - """Check if the queue is full.""" - if ( - self.total_mutation_count >= self.flush_count - or self.total_size >= self.max_mutation_bytes - ): - return True - return False - - def empty(self): - return self._queue.empty() - - -@dataclass -class _BatchInfo: - """Keeping track of size of a batch""" - - mutations_count: int = 0 - rows_count: int = 0 - mutations_size: int = 0 - - -class _FlowControl(object): - def __init__( - self, - max_mutations=MAX_OUTSTANDING_ELEMENTS, - max_mutation_bytes=MAX_OUTSTANDING_BYTES, - ): - """Control the inflight requests. Keep track of the mutations, row bytes and row counts. - As requests to backend are being made, adjust the number of mutations being processed. - - If threshold is reached, block the flow. - Reopen the flow as requests are finished. - """ - self.max_mutations = max_mutations - self.max_mutation_bytes = max_mutation_bytes - self.inflight_mutations = 0 - self.inflight_size = 0 - self.event = threading.Event() - self.event.set() - - def is_blocked(self): - """Returns True if: - - - inflight mutations >= max_mutations, or - - inflight bytes size >= max_mutation_bytes, or - """ - - return ( - self.inflight_mutations >= self.max_mutations - or self.inflight_size >= self.max_mutation_bytes - ) - - def control_flow(self, batch_info): - """ - Calculate the resources used by this batch - """ - - self.inflight_mutations += batch_info.mutations_count - self.inflight_size += batch_info.mutations_size - self.set_flow_control_status() - - def wait(self): - """ - Wait until flow control pushback has been released. - It awakens as soon as `event` is set. - """ - self.event.wait() - - def set_flow_control_status(self): - """Check the inflight mutations and size. - - If values exceed the allowed threshold, block the event. - """ - if self.is_blocked(): - self.event.clear() # sleep - else: - self.event.set() # awaken the threads - - def release(self, batch_info): - """ - Release the resources. - Decrement the row size to allow enqueued mutations to be run. - """ - self.inflight_mutations -= batch_info.mutations_count - self.inflight_size -= batch_info.mutations_size - self.set_flow_control_status() +class MaxMutationsError(ValueError): + """The number of mutations for bulk request is too big.""" class MutationsBatcher(object): """A MutationsBatcher is used in batch cases where the number of mutations - is large or unknown. It will store :class:`DirectRow` in memory until one of the - size limits is reached, or an explicit call to :func:`flush()` is performed. When - a flush event occurs, the :class:`DirectRow` in memory will be sent to Cloud + is large or unknown. It will store DirectRows in memory until one of the + size limits is reached, or an explicit call to flush() is performed. When + a flush event occurs, the DirectRows in memory will be sent to Cloud Bigtable. Batching mutations is more efficient than sending individual request. This class is not suited for usage in systems where each mutation must be guaranteed to be sent, since calling mutate may only result in an - in-memory change. In a case of a system crash, any :class:`DirectRow` remaining in + in-memory change. In a case of a system crash, any DirectRows remaining in memory will not necessarily be sent to the service, even after the - completion of the :func:`mutate()` method. + completion of the mutate() method. - Note on thread safety: The same :class:`MutationBatcher` cannot be shared by multiple end-user threads. + TODO: Performance would dramatically improve if this class had the + capability of asynchronous, parallel RPCs. :type table: class :param table: class:`~google.cloud.bigtable.table.Table`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it - reaches the max number of rows it calls finish_batch() to mutate the - current row batch. Default is FLUSH_COUNT (1000 rows). + reaches the max number of rows it calls finish_batch() to mutate the + current row batch. Default is FLUSH_COUNT (1000 rows). :type max_row_bytes: int :param max_row_bytes: (Optional) Max number of row mutations size to - flush. If it reaches the max number of row mutations size it calls - finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES - (5 MB). - - :type flush_interval: float - :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. - Default is 1 second. + flush. If it reaches the max number of row mutations size it calls + finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES + (5 MB). """ - def __init__( - self, - table, - flush_count=FLUSH_COUNT, - max_row_bytes=MAX_MUTATION_SIZE, - flush_interval=1, - ): - self._rows = _MutationsBatchQueue( - max_mutation_bytes=max_row_bytes, flush_count=flush_count - ) + def __init__(self, table, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): + self.rows = [] + self.total_mutation_count = 0 + self.total_size = 0 self.table = table - self._executor = concurrent.futures.ThreadPoolExecutor() - atexit.register(self.close) - self._timer = threading.Timer(flush_interval, self.flush) - self._timer.start() - self.flow_control = _FlowControl( - max_mutations=MAX_OUTSTANDING_ELEMENTS, - max_mutation_bytes=MAX_OUTSTANDING_BYTES, - ) - self.futures_mapping = {} - self.exceptions = queue.Queue() - - @property - def flush_count(self): - return self._rows.flush_count - - @property - def max_row_bytes(self): - return self._rows.max_mutation_bytes - - def __enter__(self): - """Starting the MutationsBatcher as a context manager""" - return self + self.flush_count = flush_count + self.max_row_bytes = max_row_bytes def mutate(self, row): """Add a row to the batch. If the current batch meets one of the size - limits, the batch is sent asynchronously. + limits, the batch is sent synchronously. For example: - .. literalinclude:: snippets_table.py + .. literalinclude:: snippets.py :start-after: [START bigtable_api_batcher_mutate] :end-before: [END bigtable_api_batcher_mutate] :dedent: 4 :type row: class - :param row: :class:`~google.cloud.bigtable.row.DirectRow`. + :param row: class:`~google.cloud.bigtable.row.DirectRow`. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried - """ - self._rows.put(row) + * :exc:`~.table._BigtableRetryableError` if any + row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't + match the number of rows that were retried + * :exc:`.batcher.MaxMutationsError` if any row exceeds max + mutations count. + """ + mutation_count = len(row._get_mutations()) + if mutation_count > MAX_MUTATIONS: + raise MaxMutationsError( + "The row key {} exceeds the number of mutations {}.".format( + row.row_key, mutation_count + ) + ) + + if (self.total_mutation_count + mutation_count) >= MAX_MUTATIONS: + self.flush() + + self.rows.append(row) + self.total_mutation_count += mutation_count + self.total_size += row.get_mutations_size() - if self._rows.full(): - self._flush_async() + if self.total_size >= self.max_row_bytes or len(self.rows) >= self.flush_count: + self.flush() def mutate_rows(self, rows): """Add multiple rows to the batch. If the current batch meets one of the size - limits, the batch is sent asynchronously. + limits, the batch is sent synchronously. For example: - .. literalinclude:: snippets_table.py + .. literalinclude:: snippets.py :start-after: [START bigtable_api_batcher_mutate_rows] :end-before: [END bigtable_api_batcher_mutate_rows] :dedent: 4 @@ -266,119 +119,28 @@ def mutate_rows(self, rows): :param rows: list:[`~google.cloud.bigtable.row.DirectRow`]. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried + * :exc:`~.table._BigtableRetryableError` if any + row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't + match the number of rows that were retried + * :exc:`.batcher.MaxMutationsError` if any row exceeds max + mutations count. """ for row in rows: self.mutate(row) def flush(self): - """Sends the current batch to Cloud Bigtable synchronously. + """Sends the current. batch to Cloud Bigtable. For example: - .. literalinclude:: snippets_table.py + .. literalinclude:: snippets.py :start-after: [START bigtable_api_batcher_flush] :end-before: [END bigtable_api_batcher_flush] :dedent: 4 - :raises: - * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. - """ - rows_to_flush = [] - while not self._rows.empty(): - rows_to_flush.append(self._rows.get()) - response = self._flush_rows(rows_to_flush) - return response - - def _flush_async(self): - """Sends the current batch to Cloud Bigtable asynchronously. - - :raises: - * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. - """ - - rows_to_flush = [] - mutations_count = 0 - mutations_size = 0 - rows_count = 0 - batch_info = _BatchInfo() - - while not self._rows.empty(): - row = self._rows.get() - mutations_count += len(row._get_mutations()) - mutations_size += row.get_mutations_size() - rows_count += 1 - rows_to_flush.append(row) - batch_info.mutations_count = mutations_count - batch_info.rows_count = rows_count - batch_info.mutations_size = mutations_size - - if ( - rows_count >= self.flush_count - or mutations_size >= self.max_row_bytes - or mutations_count >= self.flow_control.max_mutations - or mutations_size >= self.flow_control.max_mutation_bytes - or self._rows.empty() # submit when it reached the end of the queue - ): - # wait for resources to become available, before submitting any new batch - self.flow_control.wait() - # once unblocked, submit a batch - # event flag will be set by control_flow to block subsequent thread, but not blocking this one - self.flow_control.control_flow(batch_info) - future = self._executor.submit(self._flush_rows, rows_to_flush) - self.futures_mapping[future] = batch_info - future.add_done_callback(self._batch_completed_callback) - - # reset and start a new batch - rows_to_flush = [] - mutations_size = 0 - rows_count = 0 - mutations_count = 0 - batch_info = _BatchInfo() - - def _batch_completed_callback(self, future): - """Callback for when the mutation has finished. - - Raise exceptions if there's any. - Release the resources locked by the flow control and allow enqueued tasks to be run. - """ - - processed_rows = self.futures_mapping[future] - self.flow_control.release(processed_rows) - del self.futures_mapping[future] - - def _flush_rows(self, rows_to_flush): - """Mutate the specified rows. - - :raises: - * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. - """ - responses = [] - if len(rows_to_flush) > 0: - response = self.table.mutate_rows(rows_to_flush) - - for result in response: - if result.code != 0: - exc = from_grpc_status(result.code, result.message) - self.exceptions.put(exc) - responses.append(result) - - return responses - - def __exit__(self, exc_type, exc_value, exc_traceback): - """Clean up resources. Flush and shutdown the ThreadPoolExecutor.""" - self.close() - - def close(self): - """Clean up resources. Flush and shutdown the ThreadPoolExecutor. - Any errors will be raised. - - :raises: - * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. """ - self.flush() - self._executor.shutdown(wait=True) - atexit.unregister(self.close) - if self.exceptions.qsize() > 0: - exc = list(self.exceptions.queue) - raise MutationsBatchError("Errors in batch mutations.", exc=exc) + if len(self.rows) != 0: + self.table.mutate_rows(self.rows) + self.total_mutation_count = 0 + self.total_size = 0 + self.rows = [] diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/table.py index e3191a729..8605992ba 100644 --- a/google/cloud/bigtable/table.py +++ b/google/cloud/bigtable/table.py @@ -32,7 +32,7 @@ from google.cloud.bigtable.column_family import _gc_rule_from_pb from google.cloud.bigtable.column_family import ColumnFamily from google.cloud.bigtable.batcher import MutationsBatcher -from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_MUTATION_SIZE +from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_ROW_BYTES from google.cloud.bigtable.encryption_info import EncryptionInfo from google.cloud.bigtable.policy import Policy from google.cloud.bigtable.row import AppendRow @@ -844,9 +844,7 @@ def drop_by_prefix(self, row_key_prefix, timeout=None): request={"name": self.name, "row_key_prefix": _to_bytes(row_key_prefix)} ) - def mutations_batcher( - self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE - ): + def mutations_batcher(self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): """Factory to create a mutation batcher associated with this instance. For example: diff --git a/tests/unit/test_batcher.py b/tests/unit/test_batcher.py index a238b2852..9ae6ed175 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/test_batcher.py @@ -14,118 +14,122 @@ import mock -import time - import pytest from google.cloud.bigtable.row import DirectRow -from google.cloud.bigtable.batcher import ( - _FlowControl, - MutationsBatcher, - MutationsBatchError, -) TABLE_ID = "table-id" TABLE_NAME = "/tables/" + TABLE_ID +def _make_mutation_batcher(table, **kw): + from google.cloud.bigtable.batcher import MutationsBatcher + + return MutationsBatcher(table, **kw) + + def test_mutation_batcher_constructor(): table = _Table(TABLE_NAME) - with MutationsBatcher(table) as mutation_batcher: - assert table is mutation_batcher.table + + mutation_batcher = _make_mutation_batcher(table) + assert table is mutation_batcher.table def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) - with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher = _make_mutation_batcher(table=table) - rows = [ - DirectRow(row_key=b"row_key"), - DirectRow(row_key=b"row_key_2"), - DirectRow(row_key=b"row_key_3"), - DirectRow(row_key=b"row_key_4"), - ] + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] - mutation_batcher.mutate_rows(rows) + mutation_batcher.mutate_rows(rows) + mutation_batcher.flush() assert table.mutation_calls == 1 def test_mutation_batcher_mutate(): table = _Table(TABLE_NAME) - with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher = _make_mutation_batcher(table=table) - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) + row.set_cell("cf1", b"c4", 4) - mutation_batcher.mutate(row) + mutation_batcher.mutate(row) + + mutation_batcher.flush() assert table.mutation_calls == 1 def test_mutation_batcher_flush_w_no_rows(): table = _Table(TABLE_NAME) - with MutationsBatcher(table=table) as mutation_batcher: - mutation_batcher.flush() + mutation_batcher = _make_mutation_batcher(table=table) + mutation_batcher.flush() assert table.mutation_calls == 0 def test_mutation_batcher_mutate_w_max_flush_count(): table = _Table(TABLE_NAME) - with MutationsBatcher(table=table, flush_count=3) as mutation_batcher: + mutation_batcher = _make_mutation_batcher(table=table, flush_count=3) - row_1 = DirectRow(row_key=b"row_key_1") - row_2 = DirectRow(row_key=b"row_key_2") - row_3 = DirectRow(row_key=b"row_key_3") + row_1 = DirectRow(row_key=b"row_key_1") + row_2 = DirectRow(row_key=b"row_key_2") + row_3 = DirectRow(row_key=b"row_key_3") - mutation_batcher.mutate(row_1) - mutation_batcher.mutate(row_2) - mutation_batcher.mutate(row_3) + mutation_batcher.mutate(row_1) + mutation_batcher.mutate(row_2) + mutation_batcher.mutate(row_3) assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.batcher.MAX_OUTSTANDING_ELEMENTS", new=3) -def test_mutation_batcher_mutate_w_max_mutations(): +@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) +def test_mutation_batcher_mutate_with_max_mutations_failure(): + from google.cloud.bigtable.batcher import MaxMutationsError + table = _Table(TABLE_NAME) - with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher = _make_mutation_batcher(table=table) - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) + row.set_cell("cf1", b"c4", 4) + with pytest.raises(MaxMutationsError): mutation_batcher.mutate(row) - assert table.mutation_calls == 1 - -def test_mutation_batcher_mutate_w_max_row_bytes(): +@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) +def test_mutation_batcher_mutate_w_max_mutations(): table = _Table(TABLE_NAME) - with MutationsBatcher( - table=table, max_row_bytes=3 * 1024 * 1024 - ) as mutation_batcher: + mutation_batcher = _make_mutation_batcher(table=table) - number_of_bytes = 1 * 1024 * 1024 - max_value = b"1" * number_of_bytes - - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", max_value) - row.set_cell("cf1", b"c2", max_value) - row.set_cell("cf1", b"c3", max_value) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) - mutation_batcher.mutate(row) + mutation_batcher.mutate(row) + mutation_batcher.flush() assert table.mutation_calls == 1 -def test_mutations_batcher_flushed_when_closed(): +def test_mutation_batcher_mutate_w_max_row_bytes(): table = _Table(TABLE_NAME) - mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) + mutation_batcher = _make_mutation_batcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) number_of_bytes = 1 * 1024 * 1024 max_value = b"1" * number_of_bytes @@ -133,108 +137,13 @@ def test_mutations_batcher_flushed_when_closed(): row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", max_value) row.set_cell("cf1", b"c2", max_value) + row.set_cell("cf1", b"c3", max_value) mutation_batcher.mutate(row) - assert table.mutation_calls == 0 - - mutation_batcher.close() - - assert table.mutation_calls == 1 - - -def test_mutations_batcher_context_manager_flushed_when_closed(): - table = _Table(TABLE_NAME) - with MutationsBatcher( - table=table, max_row_bytes=3 * 1024 * 1024 - ) as mutation_batcher: - - number_of_bytes = 1 * 1024 * 1024 - max_value = b"1" * number_of_bytes - - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", max_value) - row.set_cell("cf1", b"c2", max_value) - - mutation_batcher.mutate(row) assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.batcher.MutationsBatcher.flush") -def test_mutations_batcher_flush_interval(mocked_flush): - table = _Table(TABLE_NAME) - flush_interval = 0.5 - mutation_batcher = MutationsBatcher(table=table, flush_interval=flush_interval) - - assert mutation_batcher._timer.interval == flush_interval - mocked_flush.assert_not_called() - - time.sleep(0.4) - mocked_flush.assert_not_called() - - time.sleep(0.1) - mocked_flush.assert_called_once_with() - - mutation_batcher.close() - - -def test_mutations_batcher_response_with_error_codes(): - from google.rpc.status_pb2 import Status - - mocked_response = [Status(code=1), Status(code=5)] - - with mock.patch("tests.unit.test_batcher._Table") as mocked_table: - table = mocked_table.return_value - mutation_batcher = MutationsBatcher(table=table) - - row1 = DirectRow(row_key=b"row_key") - row2 = DirectRow(row_key=b"row_key") - table.mutate_rows.return_value = mocked_response - - mutation_batcher.mutate_rows([row1, row2]) - with pytest.raises(MutationsBatchError) as exc: - mutation_batcher.close() - assert exc.value.message == "Errors in batch mutations." - assert len(exc.value.exc) == 2 - - assert exc.value.exc[0].message == mocked_response[0].message - assert exc.value.exc[1].message == mocked_response[1].message - - -def test_flow_control_event_is_set_when_not_blocked(): - flow_control = _FlowControl() - - flow_control.set_flow_control_status() - assert flow_control.event.is_set() - - -def test_flow_control_event_is_not_set_when_blocked(): - flow_control = _FlowControl() - - flow_control.inflight_mutations = flow_control.max_mutations - flow_control.inflight_size = flow_control.max_mutation_bytes - - flow_control.set_flow_control_status() - assert not flow_control.event.is_set() - - -@mock.patch("concurrent.futures.ThreadPoolExecutor.submit") -def test_flush_async_batch_count(mocked_executor_submit): - table = _Table(TABLE_NAME) - mutation_batcher = MutationsBatcher(table=table, flush_count=2) - - number_of_bytes = 1 * 1024 * 1024 - max_value = b"1" * number_of_bytes - for index in range(5): - row = DirectRow(row_key=f"row_key_{index}") - row.set_cell("cf1", b"c1", max_value) - mutation_batcher.mutate(row) - mutation_batcher._flush_async() - - # 3 batches submitted. 2 batches of 2 items, and the last one a single item batch. - assert mocked_executor_submit.call_count == 3 - - class _Instance(object): def __init__(self, client=None): self._client = client @@ -247,8 +156,5 @@ def __init__(self, name, client=None): self.mutation_calls = 0 def mutate_rows(self, rows): - from google.rpc.status_pb2 import Status - self.mutation_calls += 1 - - return [Status(code=0) for _ in rows] + return rows From a4e73251c6cd6ce83d9b812120b5321b0ab70280 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 11 May 2023 15:59:32 -0400 Subject: [PATCH 079/213] Revert "fix: Revert "Feat: Threaded MutationsBatcher" (#773)" (#775) This reverts commit a767cff95d990994f85f5fd05cc10f952087b49d. --- docs/batcher.rst | 6 + docs/usage.rst | 1 + google/cloud/bigtable/batcher.py | 366 +++++++++++++++++++++++++------ google/cloud/bigtable/table.py | 6 +- tests/unit/test_batcher.py | 218 ++++++++++++------ 5 files changed, 469 insertions(+), 128 deletions(-) create mode 100644 docs/batcher.rst diff --git a/docs/batcher.rst b/docs/batcher.rst new file mode 100644 index 000000000..9ac335be1 --- /dev/null +++ b/docs/batcher.rst @@ -0,0 +1,6 @@ +Mutations Batching +~~~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.bigtable.batcher + :members: + :show-inheritance: diff --git a/docs/usage.rst b/docs/usage.rst index 33bf7bb7f..73a32b039 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -17,6 +17,7 @@ Using the API row-data row-filters row-set + batcher In the hierarchy of API concepts diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 3c23f4436..6b06ec060 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -13,104 +13,251 @@ # limitations under the License. """User friendly container for Google Cloud Bigtable MutationBatcher.""" +import threading +import queue +import concurrent.futures +import atexit -FLUSH_COUNT = 1000 -MAX_MUTATIONS = 100000 -MAX_ROW_BYTES = 5242880 # 5MB +from google.api_core.exceptions import from_grpc_status +from dataclasses import dataclass -class MaxMutationsError(ValueError): - """The number of mutations for bulk request is too big.""" +FLUSH_COUNT = 100 # after this many elements, send out the batch + +MAX_MUTATION_SIZE = 20 * 1024 * 1024 # 20MB # after this many bytes, send out the batch + +MAX_OUTSTANDING_BYTES = 100 * 1024 * 1024 # 100MB # max inflight byte size. + +MAX_OUTSTANDING_ELEMENTS = 100000 # max inflight mutations. + + +class MutationsBatchError(Exception): + """Error in the batch request""" + + def __init__(self, message, exc): + self.exc = exc + self.message = message + super().__init__(self.message) + + +class _MutationsBatchQueue(object): + """Private Threadsafe Queue to hold rows for batching.""" + + def __init__(self, max_mutation_bytes=MAX_MUTATION_SIZE, flush_count=FLUSH_COUNT): + """Specify the queue constraints""" + self._queue = queue.Queue() + self.total_mutation_count = 0 + self.total_size = 0 + self.max_mutation_bytes = max_mutation_bytes + self.flush_count = flush_count + + def get(self): + """Retrieve an item from the queue. Recalculate queue size.""" + row = self._queue.get() + mutation_size = row.get_mutations_size() + self.total_mutation_count -= len(row._get_mutations()) + self.total_size -= mutation_size + return row + + def put(self, item): + """Insert an item to the queue. Recalculate queue size.""" + + mutation_count = len(item._get_mutations()) + + self._queue.put(item) + + self.total_size += item.get_mutations_size() + self.total_mutation_count += mutation_count + + def full(self): + """Check if the queue is full.""" + if ( + self.total_mutation_count >= self.flush_count + or self.total_size >= self.max_mutation_bytes + ): + return True + return False + + def empty(self): + return self._queue.empty() + + +@dataclass +class _BatchInfo: + """Keeping track of size of a batch""" + + mutations_count: int = 0 + rows_count: int = 0 + mutations_size: int = 0 + + +class _FlowControl(object): + def __init__( + self, + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ): + """Control the inflight requests. Keep track of the mutations, row bytes and row counts. + As requests to backend are being made, adjust the number of mutations being processed. + + If threshold is reached, block the flow. + Reopen the flow as requests are finished. + """ + self.max_mutations = max_mutations + self.max_mutation_bytes = max_mutation_bytes + self.inflight_mutations = 0 + self.inflight_size = 0 + self.event = threading.Event() + self.event.set() + + def is_blocked(self): + """Returns True if: + + - inflight mutations >= max_mutations, or + - inflight bytes size >= max_mutation_bytes, or + """ + + return ( + self.inflight_mutations >= self.max_mutations + or self.inflight_size >= self.max_mutation_bytes + ) + + def control_flow(self, batch_info): + """ + Calculate the resources used by this batch + """ + + self.inflight_mutations += batch_info.mutations_count + self.inflight_size += batch_info.mutations_size + self.set_flow_control_status() + + def wait(self): + """ + Wait until flow control pushback has been released. + It awakens as soon as `event` is set. + """ + self.event.wait() + + def set_flow_control_status(self): + """Check the inflight mutations and size. + + If values exceed the allowed threshold, block the event. + """ + if self.is_blocked(): + self.event.clear() # sleep + else: + self.event.set() # awaken the threads + + def release(self, batch_info): + """ + Release the resources. + Decrement the row size to allow enqueued mutations to be run. + """ + self.inflight_mutations -= batch_info.mutations_count + self.inflight_size -= batch_info.mutations_size + self.set_flow_control_status() class MutationsBatcher(object): """A MutationsBatcher is used in batch cases where the number of mutations - is large or unknown. It will store DirectRows in memory until one of the - size limits is reached, or an explicit call to flush() is performed. When - a flush event occurs, the DirectRows in memory will be sent to Cloud + is large or unknown. It will store :class:`DirectRow` in memory until one of the + size limits is reached, or an explicit call to :func:`flush()` is performed. When + a flush event occurs, the :class:`DirectRow` in memory will be sent to Cloud Bigtable. Batching mutations is more efficient than sending individual request. This class is not suited for usage in systems where each mutation must be guaranteed to be sent, since calling mutate may only result in an - in-memory change. In a case of a system crash, any DirectRows remaining in + in-memory change. In a case of a system crash, any :class:`DirectRow` remaining in memory will not necessarily be sent to the service, even after the - completion of the mutate() method. + completion of the :func:`mutate()` method. - TODO: Performance would dramatically improve if this class had the - capability of asynchronous, parallel RPCs. + Note on thread safety: The same :class:`MutationBatcher` cannot be shared by multiple end-user threads. :type table: class :param table: class:`~google.cloud.bigtable.table.Table`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it - reaches the max number of rows it calls finish_batch() to mutate the - current row batch. Default is FLUSH_COUNT (1000 rows). + reaches the max number of rows it calls finish_batch() to mutate the + current row batch. Default is FLUSH_COUNT (1000 rows). :type max_row_bytes: int :param max_row_bytes: (Optional) Max number of row mutations size to - flush. If it reaches the max number of row mutations size it calls - finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES - (5 MB). + flush. If it reaches the max number of row mutations size it calls + finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES + (5 MB). + + :type flush_interval: float + :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. + Default is 1 second. """ - def __init__(self, table, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): - self.rows = [] - self.total_mutation_count = 0 - self.total_size = 0 + def __init__( + self, + table, + flush_count=FLUSH_COUNT, + max_row_bytes=MAX_MUTATION_SIZE, + flush_interval=1, + ): + self._rows = _MutationsBatchQueue( + max_mutation_bytes=max_row_bytes, flush_count=flush_count + ) self.table = table - self.flush_count = flush_count - self.max_row_bytes = max_row_bytes + self._executor = concurrent.futures.ThreadPoolExecutor() + atexit.register(self.close) + self._timer = threading.Timer(flush_interval, self.flush) + self._timer.start() + self.flow_control = _FlowControl( + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ) + self.futures_mapping = {} + self.exceptions = queue.Queue() + + @property + def flush_count(self): + return self._rows.flush_count + + @property + def max_row_bytes(self): + return self._rows.max_mutation_bytes + + def __enter__(self): + """Starting the MutationsBatcher as a context manager""" + return self def mutate(self, row): """Add a row to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. + limits, the batch is sent asynchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_mutate] :end-before: [END bigtable_api_batcher_mutate] :dedent: 4 :type row: class - :param row: class:`~google.cloud.bigtable.row.DirectRow`. + :param row: :class:`~google.cloud.bigtable.row.DirectRow`. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. - """ - mutation_count = len(row._get_mutations()) - if mutation_count > MAX_MUTATIONS: - raise MaxMutationsError( - "The row key {} exceeds the number of mutations {}.".format( - row.row_key, mutation_count - ) - ) - - if (self.total_mutation_count + mutation_count) >= MAX_MUTATIONS: - self.flush() - - self.rows.append(row) - self.total_mutation_count += mutation_count - self.total_size += row.get_mutations_size() + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried + """ + self._rows.put(row) - if self.total_size >= self.max_row_bytes or len(self.rows) >= self.flush_count: - self.flush() + if self._rows.full(): + self._flush_async() def mutate_rows(self, rows): """Add multiple rows to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. + limits, the batch is sent asynchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_mutate_rows] :end-before: [END bigtable_api_batcher_mutate_rows] :dedent: 4 @@ -119,28 +266,119 @@ def mutate_rows(self, rows): :param rows: list:[`~google.cloud.bigtable.row.DirectRow`]. :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried """ for row in rows: self.mutate(row) def flush(self): - """Sends the current. batch to Cloud Bigtable. + """Sends the current batch to Cloud Bigtable synchronously. For example: - .. literalinclude:: snippets.py + .. literalinclude:: snippets_table.py :start-after: [START bigtable_api_batcher_flush] :end-before: [END bigtable_api_batcher_flush] :dedent: 4 + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + rows_to_flush = [] + while not self._rows.empty(): + rows_to_flush.append(self._rows.get()) + response = self._flush_rows(rows_to_flush) + return response + + def _flush_async(self): + """Sends the current batch to Cloud Bigtable asynchronously. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + + rows_to_flush = [] + mutations_count = 0 + mutations_size = 0 + rows_count = 0 + batch_info = _BatchInfo() + + while not self._rows.empty(): + row = self._rows.get() + mutations_count += len(row._get_mutations()) + mutations_size += row.get_mutations_size() + rows_count += 1 + rows_to_flush.append(row) + batch_info.mutations_count = mutations_count + batch_info.rows_count = rows_count + batch_info.mutations_size = mutations_size + + if ( + rows_count >= self.flush_count + or mutations_size >= self.max_row_bytes + or mutations_count >= self.flow_control.max_mutations + or mutations_size >= self.flow_control.max_mutation_bytes + or self._rows.empty() # submit when it reached the end of the queue + ): + # wait for resources to become available, before submitting any new batch + self.flow_control.wait() + # once unblocked, submit a batch + # event flag will be set by control_flow to block subsequent thread, but not blocking this one + self.flow_control.control_flow(batch_info) + future = self._executor.submit(self._flush_rows, rows_to_flush) + self.futures_mapping[future] = batch_info + future.add_done_callback(self._batch_completed_callback) + + # reset and start a new batch + rows_to_flush = [] + mutations_size = 0 + rows_count = 0 + mutations_count = 0 + batch_info = _BatchInfo() + + def _batch_completed_callback(self, future): + """Callback for when the mutation has finished. + + Raise exceptions if there's any. + Release the resources locked by the flow control and allow enqueued tasks to be run. + """ + + processed_rows = self.futures_mapping[future] + self.flow_control.release(processed_rows) + del self.futures_mapping[future] + + def _flush_rows(self, rows_to_flush): + """Mutate the specified rows. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + responses = [] + if len(rows_to_flush) > 0: + response = self.table.mutate_rows(rows_to_flush) + + for result in response: + if result.code != 0: + exc = from_grpc_status(result.code, result.message) + self.exceptions.put(exc) + responses.append(result) + + return responses + + def __exit__(self, exc_type, exc_value, exc_traceback): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor.""" + self.close() + + def close(self): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor. + Any errors will be raised. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. """ - if len(self.rows) != 0: - self.table.mutate_rows(self.rows) - self.total_mutation_count = 0 - self.total_size = 0 - self.rows = [] + self.flush() + self._executor.shutdown(wait=True) + atexit.unregister(self.close) + if self.exceptions.qsize() > 0: + exc = list(self.exceptions.queue) + raise MutationsBatchError("Errors in batch mutations.", exc=exc) diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/table.py index 8605992ba..e3191a729 100644 --- a/google/cloud/bigtable/table.py +++ b/google/cloud/bigtable/table.py @@ -32,7 +32,7 @@ from google.cloud.bigtable.column_family import _gc_rule_from_pb from google.cloud.bigtable.column_family import ColumnFamily from google.cloud.bigtable.batcher import MutationsBatcher -from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_ROW_BYTES +from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_MUTATION_SIZE from google.cloud.bigtable.encryption_info import EncryptionInfo from google.cloud.bigtable.policy import Policy from google.cloud.bigtable.row import AppendRow @@ -844,7 +844,9 @@ def drop_by_prefix(self, row_key_prefix, timeout=None): request={"name": self.name, "row_key_prefix": _to_bytes(row_key_prefix)} ) - def mutations_batcher(self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): + def mutations_batcher( + self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE + ): """Factory to create a mutation batcher associated with this instance. For example: diff --git a/tests/unit/test_batcher.py b/tests/unit/test_batcher.py index 9ae6ed175..a238b2852 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/test_batcher.py @@ -14,122 +14,118 @@ import mock +import time + import pytest from google.cloud.bigtable.row import DirectRow +from google.cloud.bigtable.batcher import ( + _FlowControl, + MutationsBatcher, + MutationsBatchError, +) TABLE_ID = "table-id" TABLE_NAME = "/tables/" + TABLE_ID -def _make_mutation_batcher(table, **kw): - from google.cloud.bigtable.batcher import MutationsBatcher - - return MutationsBatcher(table, **kw) - - def test_mutation_batcher_constructor(): table = _Table(TABLE_NAME) - - mutation_batcher = _make_mutation_batcher(table) - assert table is mutation_batcher.table + with MutationsBatcher(table) as mutation_batcher: + assert table is mutation_batcher.table def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - rows = [ - DirectRow(row_key=b"row_key"), - DirectRow(row_key=b"row_key_2"), - DirectRow(row_key=b"row_key_3"), - DirectRow(row_key=b"row_key_4"), - ] + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] - mutation_batcher.mutate_rows(rows) - mutation_batcher.flush() + mutation_batcher.mutate_rows(rows) assert table.mutation_calls == 1 def test_mutation_batcher_mutate(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) - - mutation_batcher.mutate(row) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) + row.set_cell("cf1", b"c4", 4) - mutation_batcher.flush() + mutation_batcher.mutate(row) assert table.mutation_calls == 1 def test_mutation_batcher_flush_w_no_rows(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) - mutation_batcher.flush() + with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher.flush() assert table.mutation_calls == 0 def test_mutation_batcher_mutate_w_max_flush_count(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table, flush_count=3) + with MutationsBatcher(table=table, flush_count=3) as mutation_batcher: - row_1 = DirectRow(row_key=b"row_key_1") - row_2 = DirectRow(row_key=b"row_key_2") - row_3 = DirectRow(row_key=b"row_key_3") + row_1 = DirectRow(row_key=b"row_key_1") + row_2 = DirectRow(row_key=b"row_key_2") + row_3 = DirectRow(row_key=b"row_key_3") - mutation_batcher.mutate(row_1) - mutation_batcher.mutate(row_2) - mutation_batcher.mutate(row_3) + mutation_batcher.mutate(row_1) + mutation_batcher.mutate(row_2) + mutation_batcher.mutate(row_3) assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_with_max_mutations_failure(): - from google.cloud.bigtable.batcher import MaxMutationsError - +@mock.patch("google.cloud.bigtable.batcher.MAX_OUTSTANDING_ELEMENTS", new=3) +def test_mutation_batcher_mutate_w_max_mutations(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) - with pytest.raises(MaxMutationsError): mutation_batcher.mutate(row) + assert table.mutation_calls == 1 + -@mock.patch("google.cloud.bigtable.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_w_max_mutations(): +def test_mutation_batcher_mutate_w_max_row_bytes(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes - mutation_batcher.mutate(row) - mutation_batcher.flush() + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + row.set_cell("cf1", b"c3", max_value) + + mutation_batcher.mutate(row) assert table.mutation_calls == 1 -def test_mutation_batcher_mutate_w_max_row_bytes(): +def test_mutations_batcher_flushed_when_closed(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher( - table=table, max_row_bytes=3 * 1024 * 1024 - ) + mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) number_of_bytes = 1 * 1024 * 1024 max_value = b"1" * number_of_bytes @@ -137,13 +133,108 @@ def test_mutation_batcher_mutate_w_max_row_bytes(): row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", max_value) row.set_cell("cf1", b"c2", max_value) - row.set_cell("cf1", b"c3", max_value) mutation_batcher.mutate(row) + assert table.mutation_calls == 0 + + mutation_batcher.close() + + assert table.mutation_calls == 1 + + +def test_mutations_batcher_context_manager_flushed_when_closed(): + table = _Table(TABLE_NAME) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + + mutation_batcher.mutate(row) assert table.mutation_calls == 1 +@mock.patch("google.cloud.bigtable.batcher.MutationsBatcher.flush") +def test_mutations_batcher_flush_interval(mocked_flush): + table = _Table(TABLE_NAME) + flush_interval = 0.5 + mutation_batcher = MutationsBatcher(table=table, flush_interval=flush_interval) + + assert mutation_batcher._timer.interval == flush_interval + mocked_flush.assert_not_called() + + time.sleep(0.4) + mocked_flush.assert_not_called() + + time.sleep(0.1) + mocked_flush.assert_called_once_with() + + mutation_batcher.close() + + +def test_mutations_batcher_response_with_error_codes(): + from google.rpc.status_pb2 import Status + + mocked_response = [Status(code=1), Status(code=5)] + + with mock.patch("tests.unit.test_batcher._Table") as mocked_table: + table = mocked_table.return_value + mutation_batcher = MutationsBatcher(table=table) + + row1 = DirectRow(row_key=b"row_key") + row2 = DirectRow(row_key=b"row_key") + table.mutate_rows.return_value = mocked_response + + mutation_batcher.mutate_rows([row1, row2]) + with pytest.raises(MutationsBatchError) as exc: + mutation_batcher.close() + assert exc.value.message == "Errors in batch mutations." + assert len(exc.value.exc) == 2 + + assert exc.value.exc[0].message == mocked_response[0].message + assert exc.value.exc[1].message == mocked_response[1].message + + +def test_flow_control_event_is_set_when_not_blocked(): + flow_control = _FlowControl() + + flow_control.set_flow_control_status() + assert flow_control.event.is_set() + + +def test_flow_control_event_is_not_set_when_blocked(): + flow_control = _FlowControl() + + flow_control.inflight_mutations = flow_control.max_mutations + flow_control.inflight_size = flow_control.max_mutation_bytes + + flow_control.set_flow_control_status() + assert not flow_control.event.is_set() + + +@mock.patch("concurrent.futures.ThreadPoolExecutor.submit") +def test_flush_async_batch_count(mocked_executor_submit): + table = _Table(TABLE_NAME) + mutation_batcher = MutationsBatcher(table=table, flush_count=2) + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + for index in range(5): + row = DirectRow(row_key=f"row_key_{index}") + row.set_cell("cf1", b"c1", max_value) + mutation_batcher.mutate(row) + mutation_batcher._flush_async() + + # 3 batches submitted. 2 batches of 2 items, and the last one a single item batch. + assert mocked_executor_submit.call_count == 3 + + class _Instance(object): def __init__(self, client=None): self._client = client @@ -156,5 +247,8 @@ def __init__(self, name, client=None): self.mutation_calls = 0 def mutate_rows(self, rows): + from google.rpc.status_pb2 import Status + self.mutation_calls += 1 - return rows + + return [Status(code=0) for _ in rows] From 9e35ad2a4b0479f90f8e0353e0ac3a46185298d2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 16 May 2023 11:56:46 +0200 Subject: [PATCH 080/213] chore(main): release 2.18.1 (#774) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 7 +++++++ google/cloud/bigtable/gapic_version.py | 2 +- google/cloud/bigtable_admin/gapic_version.py | 2 +- google/cloud/bigtable_admin_v2/gapic_version.py | 2 +- google/cloud/bigtable_v2/gapic_version.py | 2 +- 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index a627e662e..e7a7a136b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.18.0" + ".": "2.18.1" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d7fe5141..d56f02896 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigtable/#history +## [2.18.1](https://github.com/googleapis/python-bigtable/compare/v2.18.0...v2.18.1) (2023-05-11) + + +### Bug Fixes + +* Revert "Feat: Threaded MutationsBatcher" ([#773](https://github.com/googleapis/python-bigtable/issues/773)) ([a767cff](https://github.com/googleapis/python-bigtable/commit/a767cff95d990994f85f5fd05cc10f952087b49d)) + ## [2.18.0](https://github.com/googleapis/python-bigtable/compare/v2.17.0...v2.18.0) (2023-05-10) diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/gapic_version.py index f09943f6b..e1b4da1de 100644 --- a/google/cloud/bigtable/gapic_version.py +++ b/google/cloud/bigtable/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.0" # {x-release-please-version} +__version__ = "2.18.1" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin/gapic_version.py b/google/cloud/bigtable_admin/gapic_version.py index f09943f6b..e1b4da1de 100644 --- a/google/cloud/bigtable_admin/gapic_version.py +++ b/google/cloud/bigtable_admin/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.0" # {x-release-please-version} +__version__ = "2.18.1" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin_v2/gapic_version.py b/google/cloud/bigtable_admin_v2/gapic_version.py index f09943f6b..e1b4da1de 100644 --- a/google/cloud/bigtable_admin_v2/gapic_version.py +++ b/google/cloud/bigtable_admin_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.0" # {x-release-please-version} +__version__ = "2.18.1" # {x-release-please-version} diff --git a/google/cloud/bigtable_v2/gapic_version.py b/google/cloud/bigtable_v2/gapic_version.py index f09943f6b..e1b4da1de 100644 --- a/google/cloud/bigtable_v2/gapic_version.py +++ b/google/cloud/bigtable_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.0" # {x-release-please-version} +__version__ = "2.18.1" # {x-release-please-version} From 6140acb2260dbf1245a378b846fdaa47c6e843f4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 22 May 2023 11:07:35 -0700 Subject: [PATCH 081/213] remove aborted from retryable errors --- google/cloud/bigtable/_mutate_rows.py | 1 - google/cloud/bigtable/client.py | 1 - 2 files changed, 2 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index f785c1056..5ea53827d 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -65,7 +65,6 @@ async def _mutate_rows_operation( predicate = retries.if_exception_type( core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, _MutateRowsIncomplete, ) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e95849e65..2b0dcf2aa 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -624,7 +624,6 @@ async def mutate_row( predicate = retries.if_exception_type( core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ) else: # mutations should not be retried From 36ba2b6869c8bacc3ce89e37de19bdb085867c78 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 22 May 2023 11:24:27 -0700 Subject: [PATCH 082/213] improved SetCell mutation --- google/cloud/bigtable/mutations.py | 45 ++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index f6e530af0..ae435889d 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -14,6 +14,7 @@ # from __future__ import annotations from typing import Any +import time from dataclasses import dataclass from abc import ABC, abstractmethod @@ -36,29 +37,55 @@ def __str__(self) -> str: return str(self._to_dict()) -@dataclass class SetCell(Mutation): - family: str - qualifier: bytes - new_value: bytes - timestamp_micros: int | None = None + def __init__( + self, + family: str, + qualifier: bytes | str, + new_value: bytes | str | int, + timestamp_micros: int | None = None, + ): + """ + Mutation to set the value of a cell + + Args: + - family: The name of the column family to which the new cell belongs. + - qualifier: The column qualifier of the new cell. + - new_value: The value of the new cell. str or int input will be converted to bytes + - timestamp_micros: The timestamp of the new cell. If None, the current timestamp will be used + If -1, the server will assign a timestamp. Note that SetCell mutations with server-side + timestamps are non-idempotent operations and will not be retried. + """ + qualifier = qualifier.encode() if isinstance(qualifier, str) else qualifier + if not isinstance(qualifier, bytes): + raise TypeError("qualifier must be bytes or str") + if isinstance(new_value, str): + new_value = new_value.encode() + elif isinstance(new_value, int): + new_value = new_value.to_bytes(8, "big", signed=True) + if not isinstance(new_value, bytes): + raise TypeError("new_value must be bytes, str, or int") + if timestamp_micros is None: + timestamp_micros = time.time_ns() // 1000 + self.family = family + self.qualifier = qualifier + self.new_value = new_value + self.timestamp_micros = timestamp_micros def _to_dict(self) -> dict[str, Any]: """Convert the mutation to a dictionary representation""" - # if timestamp not given, use -1 for server-side timestamp - timestamp = self.timestamp_micros if self.timestamp_micros is not None else -1 return { "set_cell": { "family_name": self.family, "column_qualifier": self.qualifier, - "timestamp_micros": timestamp, + "timestamp_micros": self.timestamp_micros, "value": self.new_value, } } def is_idempotent(self) -> bool: """Check if the mutation is idempotent""" - return self.timestamp_micros is not None and self.timestamp_micros >= 0 + return self.timestamp_micros is not None and self.timestamp_micros != -1 @dataclass From b3c90179708462411633a03433e72a5ce522dd94 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 22 May 2023 11:49:30 -0700 Subject: [PATCH 083/213] fixed mutations tests --- google/cloud/bigtable/mutations.py | 8 +++- tests/unit/test_mutations.py | 62 ++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index ae435889d..67e967b4e 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -18,6 +18,9 @@ from dataclasses import dataclass from abc import ABC, abstractmethod +# special value for SetCell mutation timestamps. If set, server will assign a timestamp +SERVER_SIDE_TIMESTAMP = -1 + class Mutation(ABC): """Model class for mutations""" @@ -85,7 +88,10 @@ def _to_dict(self) -> dict[str, Any]: def is_idempotent(self) -> bool: """Check if the mutation is idempotent""" - return self.timestamp_micros is not None and self.timestamp_micros != -1 + return ( + self.timestamp_micros is not None + and self.timestamp_micros != SERVER_SIDE_TIMESTAMP + ) @dataclass diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index b6f62d32a..b17475a75 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -53,8 +53,59 @@ def _target_class(self): def _make_one(self, *args, **kwargs): return self._target_class()(*args, **kwargs) + def test_ctor(self): + """Ensure constructor sets expected values""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = b"test-value" + expected_timestamp = 1234567890 + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.new_value == expected_value + assert instance.timestamp_micros == expected_timestamp + + def test_ctor_str_inputs(self): + """Test with string qualifier and value""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + expected_value = b"test-value" + instance = self._make_one(expected_family, "test-qualifier", "test-value") + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.new_value == expected_value + + @pytest.mark.parametrize( + "int_value,expected_bytes", + [ + (-42, b"\xff\xff\xff\xff\xff\xff\xff\xd6"), + (-2, b"\xff\xff\xff\xff\xff\xff\xff\xfe"), + (-1, b"\xff\xff\xff\xff\xff\xff\xff\xff"), + (0, b"\x00\x00\x00\x00\x00\x00\x00\x00"), + (1, b"\x00\x00\x00\x00\x00\x00\x00\x01"), + (2, b"\x00\x00\x00\x00\x00\x00\x00\x02"), + (100, b"\x00\x00\x00\x00\x00\x00\x00d"), + ], + ) + def test_ctor_int_value(self, int_value, expected_bytes): + """Test with int value""" + expected_family = "test-family" + expected_qualifier = b"test-qualifier" + instance = self._make_one(expected_family, expected_qualifier, int_value) + assert instance.family == expected_family + assert instance.qualifier == expected_qualifier + assert instance.new_value == expected_bytes + + def test_ctor_no_timestamp(self): + """If no timestamp is given, should use current time""" + with mock.patch("time.time_ns", return_value=1234000): + instance = self._make_one("test-family", b"test-qualifier", b"test-value") + assert instance.timestamp_micros == 1234 + def test__to_dict(self): - """Should be unimplemented in the base class""" + """ensure dict representation is as expected""" expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" @@ -72,12 +123,14 @@ def test__to_dict(self): assert len(got_inner_dict.keys()) == 4 def test__to_dict_server_timestamp(self): - """Should be unimplemented in the base class""" + """test with server side timestamp -1 value""" expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" expected_timestamp = -1 - instance = self._make_one(expected_family, expected_qualifier, expected_value) + instance = self._make_one( + expected_family, expected_qualifier, expected_value, expected_timestamp + ) got_dict = instance._to_dict() assert list(got_dict.keys()) == ["set_cell"] got_inner_dict = got_dict["set_cell"] @@ -94,7 +147,8 @@ def test__to_dict_server_timestamp(self): (1, True), (0, True), (-1, False), - (None, False), + (-2, True), + (None, True), ], ) def test_is_idempotent(self, timestamp, expected_value): From cac9e2d815400f065f8bd8762f85fe9867f06b0c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 22 May 2023 12:32:32 -0700 Subject: [PATCH 084/213] SetCell timestamps use millisecond precision --- google/cloud/bigtable/mutations.py | 22 +++++++++++----- tests/unit/test_client.py | 21 ++++++++-------- tests/unit/test_mutations.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 67e967b4e..2964b547d 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -55,7 +55,8 @@ def __init__( - family: The name of the column family to which the new cell belongs. - qualifier: The column qualifier of the new cell. - new_value: The value of the new cell. str or int input will be converted to bytes - - timestamp_micros: The timestamp of the new cell. If None, the current timestamp will be used + - timestamp_micros: The timestamp of the new cell. If None, the current timestamp will be used. + Timestamps will be sent with milisecond-percision. Extra precision will be truncated. If -1, the server will assign a timestamp. Note that SetCell mutations with server-side timestamps are non-idempotent operations and will not be retried. """ @@ -69,11 +70,23 @@ def __init__( if not isinstance(new_value, bytes): raise TypeError("new_value must be bytes, str, or int") if timestamp_micros is None: + # use current timestamp timestamp_micros = time.time_ns() // 1000 + if timestamp_micros < SERVER_SIDE_TIMESTAMP: + raise ValueError( + "timestamp_micros must be positive (or -1 for server-side timestamp)" + ) self.family = family self.qualifier = qualifier self.new_value = new_value - self.timestamp_micros = timestamp_micros + self._timestamp_micros = timestamp_micros + + @property + def timestamp_micros(self): + if self._timestamp_micros > 0: + # round to use milisecond precision + return (self._timestamp_micros // 1000) * 1000 + return self._timestamp_micros def _to_dict(self) -> dict[str, Any]: """Convert the mutation to a dictionary representation""" @@ -88,10 +101,7 @@ def _to_dict(self) -> dict[str, Any]: def is_idempotent(self) -> bool: """Check if the mutation is idempotent""" - return ( - self.timestamp_micros is not None - and self.timestamp_micros != SERVER_SIDE_TIMESTAMP - ) + return self.timestamp_micros != SERVER_SIDE_TIMESTAMP @dataclass diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8850639bf..2f71dde46 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -876,7 +876,6 @@ async def test_mutate_row(self, mutation_arg): [ core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ], ) @pytest.mark.asyncio @@ -905,7 +904,6 @@ async def test_mutate_row_retryable_errors(self, retryable_exception): [ core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ], ) @pytest.mark.asyncio @@ -922,7 +920,9 @@ async def test_mutate_row_non_idempotent_retryable_errors( ) as mock_gapic: mock_gapic.side_effect = retryable_exception("mock") with pytest.raises(retryable_exception): - mutation = mutations.SetCell("family", b"qualifier", b"value") + mutation = mutations.SetCell( + "family", b"qualifier", b"value", -1 + ) assert mutation.is_idempotent() is False await table.mutate_row( "row_key", mutation, operation_timeout=0.2 @@ -936,6 +936,7 @@ async def test_mutate_row_non_idempotent_retryable_errors( core_exceptions.FailedPrecondition, RuntimeError, ValueError, + core_exceptions.Aborted, ], ) @pytest.mark.asyncio @@ -1066,7 +1067,6 @@ async def test_bulk_mutate_rows_multiple_entries(self): [ core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ], ) async def test_bulk_mutate_rows_idempotent_mutation_error_retryable( @@ -1113,6 +1113,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retryable( core_exceptions.OutOfRange, core_exceptions.NotFound, core_exceptions.FailedPrecondition, + core_exceptions.Aborted, ], ) async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable( @@ -1151,7 +1152,6 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable( [ core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ], ) @pytest.mark.asyncio @@ -1194,7 +1194,6 @@ async def test_bulk_mutate_idempotent_retryable_request_errors( [ core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, ], ) async def test_bulk_mutate_rows_non_idempotent_retryable_errors( @@ -1215,7 +1214,9 @@ async def test_bulk_mutate_rows_non_idempotent_retryable_errors( [retryable_exception("mock")] ) with pytest.raises(MutationsExceptionGroup) as e: - mutation = mutations.SetCell("family", b"qualifier", b"value") + mutation = mutations.SetCell( + "family", b"qualifier", b"value", -1 + ) entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is False await table.bulk_mutate_rows([entry], operation_timeout=0.2) @@ -1273,7 +1274,7 @@ async def test_bulk_mutate_error_index(self): """ from google.api_core.exceptions import ( DeadlineExceeded, - Aborted, + ServiceUnavailable, FailedPrecondition, ) from google.cloud.bigtable.exceptions import ( @@ -1289,7 +1290,7 @@ async def test_bulk_mutate_error_index(self): ) as mock_gapic: # fail with retryable errors, then a non-retryable one mock_gapic.side_effect = [ - self._mock_response([None, Aborted("mock"), None]), + self._mock_response([None, ServiceUnavailable("mock"), None]), self._mock_response([DeadlineExceeded("mock")]), self._mock_response([FailedPrecondition("final")]), ] @@ -1313,7 +1314,7 @@ async def test_bulk_mutate_error_index(self): cause = failed.__cause__ assert isinstance(cause, RetryExceptionGroup) assert len(cause.exceptions) == 3 - assert isinstance(cause.exceptions[0], Aborted) + assert isinstance(cause.exceptions[0], ServiceUnavailable) assert isinstance(cause.exceptions[1], DeadlineExceeded) assert isinstance(cause.exceptions[2], FailedPrecondition) diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index b17475a75..b9e136cda 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -58,7 +58,7 @@ def test_ctor(self): expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" - expected_timestamp = 1234567890 + expected_timestamp = 1234567890000 instance = self._make_one( expected_family, expected_qualifier, expected_value, expected_timestamp ) @@ -102,14 +102,47 @@ def test_ctor_no_timestamp(self): """If no timestamp is given, should use current time""" with mock.patch("time.time_ns", return_value=1234000): instance = self._make_one("test-family", b"test-qualifier", b"test-value") - assert instance.timestamp_micros == 1234 + assert instance._timestamp_micros == 1234 + assert instance.timestamp_micros == 1000 + + def test_ctor_negative_timestamp(self): + """Only positive or -1 timestamps are valid""" + with pytest.raises(ValueError) as e: + self._make_one("test-family", b"test-qualifier", b"test-value", -2) + assert ( + "timestamp_micros must be positive (or -1 for server-side timestamp)" + in str(e.value) + ) + + @pytest.mark.parametrize( + "input_timestamp,expected_timestamp", + [ + (-1, -1), + (0, 0), + (1, 0), + (123, 0), + (999, 0), + (1000, 1000), + (1234, 1000), + (1999, 1000), + (2000, 2000), + (1234567890, 1234567000), + ], + ) + def test_timestamp_milli_precision(self, input_timestamp, expected_timestamp): + """timestamp_micros should have millisecond precision (3 trailing 0s)""" + instance = self._make_one( + "test-family", b"test-qualifier", b"test-value", input_timestamp + ) + assert instance._timestamp_micros == input_timestamp + assert instance.timestamp_micros == expected_timestamp def test__to_dict(self): """ensure dict representation is as expected""" expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" - expected_timestamp = 1234567890 + expected_timestamp = 123456789000 instance = self._make_one( expected_family, expected_qualifier, expected_value, expected_timestamp ) @@ -147,7 +180,6 @@ def test__to_dict_server_timestamp(self): (1, True), (0, True), (-1, False), - (-2, True), (None, True), ], ) From 34b051f53a18d8e2b7907aa026bd32f483402567 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 22 May 2023 12:56:49 -0700 Subject: [PATCH 085/213] renamed BulkMutationsEntry to RowMutationEntry --- google/cloud/bigtable/__init__.py | 4 ++-- google/cloud/bigtable/_mutate_rows.py | 16 ++++++++-------- google/cloud/bigtable/client.py | 12 ++++++------ google/cloud/bigtable/exceptions.py | 6 +++--- google/cloud/bigtable/mutations.py | 2 +- tests/system/test_system.py | 4 ++-- tests/unit/test_client.py | 22 ++++++++++------------ tests/unit/test_mutations.py | 6 +++--- 8 files changed, 35 insertions(+), 37 deletions(-) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index c5581f813..723b8a432 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -28,7 +28,7 @@ from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.mutations import Mutation -from google.cloud.bigtable.mutations import BulkMutationsEntry +from google.cloud.bigtable.mutations import RowMutationEntry from google.cloud.bigtable.mutations import SetCell from google.cloud.bigtable.mutations import DeleteRangeFromColumn from google.cloud.bigtable.mutations import DeleteAllFromFamily @@ -47,7 +47,7 @@ "RowRange", "MutationsBatcher", "Mutation", - "BulkMutationsEntry", + "RowMutationEntry", "SetCell", "DeleteRangeFromColumn", "DeleteAllFromFamily", diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 5ea53827d..26839fbe9 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -24,7 +24,7 @@ from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) - from google.cloud.bigtable.mutations import BulkMutationsEntry + from google.cloud.bigtable.mutations import RowMutationEntry class _MutateRowsIncomplete(RuntimeError): @@ -38,10 +38,10 @@ class _MutateRowsIncomplete(RuntimeError): async def _mutate_rows_operation( gapic_client: "BigtableAsyncClient", request: dict[str, Any], - mutation_entries: list["BulkMutationsEntry"], + mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, - on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + on_terminal_state: Callable[["RowMutationEntry", Exception | None], None] | None = None, ): """ @@ -50,14 +50,14 @@ async def _mutate_rows_operation( Args: - gapic_client: the client to use for the mutate_rows call - request: A request dict containing table name, app profile id, and other details to inclide in the request - - mutation_entries: a list of BulkMutationsEntry objects to send to the server + - mutation_entries: a list of RowMutationEntry objects to send to the server - operation_timeout: the timeout to use for the entire operation, in seconds. - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. - on_terminal_state: If given, this function will be called as soon as a mutation entry reaches a terminal state (success or failure). """ - mutations_dict: dict[int, BulkMutationsEntry | None] = { + mutations_dict: dict[int, RowMutationEntry | None] = { idx: mut for idx, mut in enumerate(mutation_entries) } error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} @@ -135,10 +135,10 @@ async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", request: dict[str, Any], per_request_timeout: float | None, - mutation_dict: dict[int, "BulkMutationsEntry" | None], + mutation_dict: dict[int, "RowMutationEntry" | None], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], - on_terminal_state: Callable[["BulkMutationsEntry", Exception | None], None] + on_terminal_state: Callable[["RowMutationEntry", Exception | None], None] | None = None, ): """ @@ -155,7 +155,7 @@ async def _mutate_rows_retryable_attempt( - request: the request to send to the server, populated with table name and app profile id - per_request_timeout: the timeout to use for each mutate_rows attempt - mutation_dict: a dictionary tracking which entries are outstanding - (stored as BulkMutationsEntry), and which have reached a terminal state (stored as None). + (stored as RowMutationEntry), and which have reached a terminal state (stored as None). At the start of the request, all entries are outstanding. - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will clear the error list. diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2b0dcf2aa..51382124e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -42,7 +42,7 @@ from google.cloud.bigtable.exceptions import _convert_retry_deadline -from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry +from google.cloud.bigtable.mutations import Mutation, RowMutationEntry from google.cloud.bigtable._mutate_rows import _mutate_rows_operation if TYPE_CHECKING: @@ -654,16 +654,16 @@ def on_error_fn(exc): async def bulk_mutate_rows( self, - mutation_entries: list[BulkMutationsEntry], + mutation_entries: list[RowMutationEntry], *, operation_timeout: float | None = 60, per_request_timeout: float | None = None, - on_success: Callable[[BulkMutationsEntry], None] | None = None, + on_success: Callable[[RowMutationEntry], None] | None = None, ): """ Applies mutations for multiple rows in a single batched request. - Each individual BulkMutationsEntry is applied atomically, but separate entries + Each individual RowMutationEntry is applied atomically, but separate entries may be applied in arbitrary order (even for entries targetting the same row) In total, the row_mutations can contain at most 100000 individual mutations across all entries @@ -704,11 +704,11 @@ async def bulk_mutate_rows( if self.app_profile_id: request["app_profile_id"] = self.app_profile_id - callback: Callable[[BulkMutationsEntry, Exception | None], None] | None = None + callback: Callable[[RowMutationEntry, Exception | None], None] | None = None if on_success is not None: # convert on_terminal_state callback to callback for successful results only # failed results will be rasied as exceptions - def callback(entry: BulkMutationsEntry, exc: Exception | None): + def callback(entry: RowMutationEntry, exc: Exception | None): if exc is None and on_success is not None: on_success(entry) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 0b5ff4e61..2cd2814cf 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -23,7 +23,7 @@ is_311_plus = sys.version_info >= (3, 11) if TYPE_CHECKING: - from google.cloud.bigtable.mutations import BulkMutationsEntry + from google.cloud.bigtable.mutations import RowMutationEntry def _convert_retry_deadline( @@ -113,14 +113,14 @@ def __new__(cls, excs: list[FailedMutationEntryError], total_entries: int): class FailedMutationEntryError(Exception): """ - Represents a single failed BulkMutationsEntry in a bulk_mutate_rows request. + Represents a single failed RowMutationEntry in a bulk_mutate_rows request. A collection of FailedMutationEntryErrors will be raised in a MutationsExceptionGroup """ def __init__( self, failed_idx: int, - failed_mutation_entry: "BulkMutationsEntry", + failed_mutation_entry: "RowMutationEntry", cause: Exception, ): idempotent_msg = ( diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 2964b547d..2121b7d5c 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -156,7 +156,7 @@ def _to_dict(self) -> dict[str, Any]: } -class BulkMutationsEntry: +class RowMutationEntry: def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): if isinstance(row_key, str): row_key = row_key.encode("utf-8") diff --git a/tests/system/test_system.py b/tests/system/test_system.py index b3ec26d57..1ddcd3474 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -174,10 +174,10 @@ async def test_bulk_mutations_set_cell(client, table): """ Ensure cells can be set properly """ - from google.cloud.bigtable.mutations import SetCell, BulkMutationsEntry + from google.cloud.bigtable.mutations import SetCell, RowMutationEntry mutation = SetCell( family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value" ) - bulk_mutation = BulkMutationsEntry(b"abc", [mutation]) + bulk_mutation = RowMutationEntry(b"abc", [mutation]) await table.bulk_mutate_rows([bulk_mutation]) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2f71dde46..580b14d1e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1020,9 +1020,7 @@ async def test_bulk_mutate_rows(self, mutation_arg): client._gapic_client, "mutate_rows" ) as mock_gapic: mock_gapic.return_value = self._mock_response([None]) - bulk_mutation = mutations.BulkMutationsEntry( - b"row_key", mutation_arg - ) + bulk_mutation = mutations.RowMutationEntry(b"row_key", mutation_arg) await table.bulk_mutate_rows( [bulk_mutation], per_request_timeout=expected_per_request_timeout, @@ -1047,8 +1045,8 @@ async def test_bulk_mutate_rows_multiple_entries(self): ) as mock_gapic: mock_gapic.return_value = self._mock_response([None, None]) mutation_list = [mutations.DeleteAllFromRow()] - entry_1 = mutations.BulkMutationsEntry(b"row_key_1", mutation_list) - entry_2 = mutations.BulkMutationsEntry(b"row_key_2", mutation_list) + entry_1 = mutations.RowMutationEntry(b"row_key_1", mutation_list) + entry_2 = mutations.RowMutationEntry(b"row_key_2", mutation_list) await table.bulk_mutate_rows( [entry_1, entry_2], ) @@ -1091,7 +1089,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retryable( ) with pytest.raises(MutationsExceptionGroup) as e: mutation = mutations.DeleteAllFromRow() - entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + entry = mutations.RowMutationEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.05) assert len(e.value.exceptions) == 1 @@ -1137,7 +1135,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable( ) with pytest.raises(MutationsExceptionGroup) as e: mutation = mutations.DeleteAllFromRow() - entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + entry = mutations.RowMutationEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.05) assert len(e.value.exceptions) == 1 @@ -1177,7 +1175,7 @@ async def test_bulk_mutate_idempotent_retryable_request_errors( mutation = mutations.SetCell( "family", b"qualifier", b"value", timestamp_micros=123 ) - entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + entry = mutations.RowMutationEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.05) assert len(e.value.exceptions) == 1 @@ -1217,7 +1215,7 @@ async def test_bulk_mutate_rows_non_idempotent_retryable_errors( mutation = mutations.SetCell( "family", b"qualifier", b"value", -1 ) - entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + entry = mutations.RowMutationEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is False await table.bulk_mutate_rows([entry], operation_timeout=0.2) assert len(e.value.exceptions) == 1 @@ -1257,7 +1255,7 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti mutation = mutations.SetCell( "family", b"qualifier", b"value", timestamp_micros=123 ) - entry = mutations.BulkMutationsEntry(b"row_key", [mutation]) + entry = mutations.RowMutationEntry(b"row_key", [mutation]) assert mutation.is_idempotent() is True await table.bulk_mutate_rows([entry], operation_timeout=0.2) assert len(e.value.exceptions) == 1 @@ -1299,7 +1297,7 @@ async def test_bulk_mutate_error_index(self): "family", b"qualifier", b"value", timestamp_micros=123 ) entries = [ - mutations.BulkMutationsEntry( + mutations.RowMutationEntry( (f"row_key_{i}").encode(), [mutation] ) for i in range(3) @@ -1347,7 +1345,7 @@ async def test_bulk_mutate_rows_on_success(self): "family", b"qualifier", b"value", timestamp_micros=123 ) entries = [ - mutations.BulkMutationsEntry( + mutations.RowMutationEntry( (f"row_key_{i}").encode(), [mutation] ) for i in range(3) diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index b9e136cda..dd4b8b20f 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -354,11 +354,11 @@ def test___str__(self): assert instance.__str__() == "{'delete_from_row': {}}" -class TestBulkMutationsEntry: +class TestRowMutationEntry: def _target_class(self): - from google.cloud.bigtable.mutations import BulkMutationsEntry + from google.cloud.bigtable.mutations import RowMutationEntry - return BulkMutationsEntry + return RowMutationEntry def _make_one(self, row_key, mutations): return self._target_class()(row_key, mutations) From cef70f243541820225f86a520e0b2abd3a7354f7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 10:31:36 -0400 Subject: [PATCH 086/213] feat: add ChangeStreamConfig to CreateTable and UpdateTable (#786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add ChangeStreamConfig to CreateTable and UpdateTable PiperOrigin-RevId: 534836567 Source-Link: https://github.com/googleapis/googleapis/commit/eb2d1f1555df526abd00aa475e8fd5d014af6489 Source-Link: https://github.com/googleapis/googleapis-gen/commit/64cebcfc2765bff5afb19c140d4b1600dfdaebad Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjRjZWJjZmMyNzY1YmZmNWFmYjE5YzE0MGQ0YjE2MDBkZmRhZWJhZCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigtable_admin/__init__.py | 2 ++ google/cloud/bigtable_admin_v2/__init__.py | 2 ++ .../bigtable_table_admin/async_client.py | 13 +++++--- .../services/bigtable_table_admin/client.py | 13 +++++--- .../cloud/bigtable_admin_v2/types/__init__.py | 2 ++ .../types/bigtable_table_admin.py | 20 +++++++----- google/cloud/bigtable_admin_v2/types/table.py | 31 +++++++++++++++++++ .../test_bigtable_table_admin.py | 2 ++ 8 files changed, 67 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable_admin/__init__.py b/google/cloud/bigtable_admin/__init__.py index 6ddc6acb2..0ba93ec63 100644 --- a/google/cloud/bigtable_admin/__init__.py +++ b/google/cloud/bigtable_admin/__init__.py @@ -200,6 +200,7 @@ from google.cloud.bigtable_admin_v2.types.instance import Instance from google.cloud.bigtable_admin_v2.types.table import Backup from google.cloud.bigtable_admin_v2.types.table import BackupInfo +from google.cloud.bigtable_admin_v2.types.table import ChangeStreamConfig from google.cloud.bigtable_admin_v2.types.table import ColumnFamily from google.cloud.bigtable_admin_v2.types.table import EncryptionInfo from google.cloud.bigtable_admin_v2.types.table import GcRule @@ -282,6 +283,7 @@ "Instance", "Backup", "BackupInfo", + "ChangeStreamConfig", "ColumnFamily", "EncryptionInfo", "GcRule", diff --git a/google/cloud/bigtable_admin_v2/__init__.py b/google/cloud/bigtable_admin_v2/__init__.py index 282834fe7..c030ec1bd 100644 --- a/google/cloud/bigtable_admin_v2/__init__.py +++ b/google/cloud/bigtable_admin_v2/__init__.py @@ -92,6 +92,7 @@ from .types.instance import Instance from .types.table import Backup from .types.table import BackupInfo +from .types.table import ChangeStreamConfig from .types.table import ColumnFamily from .types.table import EncryptionInfo from .types.table import GcRule @@ -110,6 +111,7 @@ "BackupInfo", "BigtableInstanceAdminClient", "BigtableTableAdminClient", + "ChangeStreamConfig", "CheckConsistencyRequest", "CheckConsistencyResponse", "Cluster", diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py index 91f059f8b..1663c16eb 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/async_client.py @@ -683,16 +683,19 @@ async def update_table( should not be set. update_mask (:class:`google.protobuf.field_mask_pb2.FieldMask`): Required. The list of fields to update. A mask - specifying which fields (e.g. ``deletion_protection``) + specifying which fields (e.g. ``change_stream_config``) in the ``table`` field should be updated. This mask is relative to the ``table`` field, not to the request message. The wildcard (*) path is currently not supported. Currently UpdateTable is only supported for - the following field: + the following fields: - - ``deletion_protection`` If ``column_families`` is set - in ``update_mask``, it will return an UNIMPLEMENTED - error. + - ``change_stream_config`` + - ``change_stream_config.retention_period`` + - ``deletion_protection`` + + If ``column_families`` is set in ``update_mask``, it + will return an UNIMPLEMENTED error. This corresponds to the ``update_mask`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py index efceae90a..e043aa224 100644 --- a/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py +++ b/google/cloud/bigtable_admin_v2/services/bigtable_table_admin/client.py @@ -992,16 +992,19 @@ def update_table( should not be set. update_mask (google.protobuf.field_mask_pb2.FieldMask): Required. The list of fields to update. A mask - specifying which fields (e.g. ``deletion_protection``) + specifying which fields (e.g. ``change_stream_config``) in the ``table`` field should be updated. This mask is relative to the ``table`` field, not to the request message. The wildcard (*) path is currently not supported. Currently UpdateTable is only supported for - the following field: + the following fields: - - ``deletion_protection`` If ``column_families`` is set - in ``update_mask``, it will return an UNIMPLEMENTED - error. + - ``change_stream_config`` + - ``change_stream_config.retention_period`` + - ``deletion_protection`` + + If ``column_families`` is set in ``update_mask``, it + will return an UNIMPLEMENTED error. This corresponds to the ``update_mask`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/bigtable_admin_v2/types/__init__.py b/google/cloud/bigtable_admin_v2/types/__init__.py index 5a66ddf09..69153c9fc 100644 --- a/google/cloud/bigtable_admin_v2/types/__init__.py +++ b/google/cloud/bigtable_admin_v2/types/__init__.py @@ -91,6 +91,7 @@ from .table import ( Backup, BackupInfo, + ChangeStreamConfig, ColumnFamily, EncryptionInfo, GcRule, @@ -170,6 +171,7 @@ "Instance", "Backup", "BackupInfo", + "ChangeStreamConfig", "ColumnFamily", "EncryptionInfo", "GcRule", diff --git a/google/cloud/bigtable_admin_v2/types/bigtable_table_admin.py b/google/cloud/bigtable_admin_v2/types/bigtable_table_admin.py index 9b236fea9..4c4b9e9e2 100644 --- a/google/cloud/bigtable_admin_v2/types/bigtable_table_admin.py +++ b/google/cloud/bigtable_admin_v2/types/bigtable_table_admin.py @@ -460,14 +460,18 @@ class UpdateTableRequest(proto.Message): used to identify the table to update. update_mask (google.protobuf.field_mask_pb2.FieldMask): Required. The list of fields to update. A mask specifying - which fields (e.g. ``deletion_protection``) in the ``table`` - field should be updated. This mask is relative to the - ``table`` field, not to the request message. The wildcard - (*) path is currently not supported. Currently UpdateTable - is only supported for the following field: - - - ``deletion_protection`` If ``column_families`` is set in - ``update_mask``, it will return an UNIMPLEMENTED error. + which fields (e.g. ``change_stream_config``) in the + ``table`` field should be updated. This mask is relative to + the ``table`` field, not to the request message. The + wildcard (*) path is currently not supported. Currently + UpdateTable is only supported for the following fields: + + - ``change_stream_config`` + - ``change_stream_config.retention_period`` + - ``deletion_protection`` + + If ``column_families`` is set in ``update_mask``, it will + return an UNIMPLEMENTED error. """ table: gba_table.Table = proto.Field( diff --git a/google/cloud/bigtable_admin_v2/types/table.py b/google/cloud/bigtable_admin_v2/types/table.py index fd936df63..16d136e16 100644 --- a/google/cloud/bigtable_admin_v2/types/table.py +++ b/google/cloud/bigtable_admin_v2/types/table.py @@ -29,6 +29,7 @@ manifest={ "RestoreSourceType", "RestoreInfo", + "ChangeStreamConfig", "Table", "ColumnFamily", "GcRule", @@ -82,6 +83,27 @@ class RestoreInfo(proto.Message): ) +class ChangeStreamConfig(proto.Message): + r"""Change stream configuration. + + Attributes: + retention_period (google.protobuf.duration_pb2.Duration): + How long the change stream should be + retained. Change stream data older than the + retention period will not be returned when + reading the change stream from the table. + Values must be at least 1 day and at most 7 + days, and will be truncated to microsecond + granularity. + """ + + retention_period: duration_pb2.Duration = proto.Field( + proto.MESSAGE, + number=1, + message=duration_pb2.Duration, + ) + + class Table(proto.Message): r"""A collection of user data indexed by row, column, and timestamp. Each table is served using the resources of its @@ -114,6 +136,10 @@ class Table(proto.Message): another data source (e.g. a backup), this field will be populated with information about the restore. + change_stream_config (google.cloud.bigtable_admin_v2.types.ChangeStreamConfig): + If specified, enable the change stream on + this table. Otherwise, the change stream is + disabled and the change stream is not retained. deletion_protection (bool): Set to true to make the table protected against data loss. i.e. deleting the following @@ -263,6 +289,11 @@ class ReplicationState(proto.Enum): number=6, message="RestoreInfo", ) + change_stream_config: "ChangeStreamConfig" = proto.Field( + proto.MESSAGE, + number=8, + message="ChangeStreamConfig", + ) deletion_protection: bool = proto.Field( proto.BOOL, number=9, diff --git a/tests/unit/gapic/bigtable_admin_v2/test_bigtable_table_admin.py b/tests/unit/gapic/bigtable_admin_v2/test_bigtable_table_admin.py index 8e4004ab1..8498e4fa5 100644 --- a/tests/unit/gapic/bigtable_admin_v2/test_bigtable_table_admin.py +++ b/tests/unit/gapic/bigtable_admin_v2/test_bigtable_table_admin.py @@ -8202,6 +8202,7 @@ def test_update_table_rest(request_type): "source_table": "source_table_value", }, }, + "change_stream_config": {"retention_period": {"seconds": 751, "nanos": 543}}, "deletion_protection": True, } request = request_type(**request_init) @@ -8399,6 +8400,7 @@ def test_update_table_rest_bad_request( "source_table": "source_table_value", }, }, + "change_stream_config": {"retention_period": {"seconds": 751, "nanos": 543}}, "deletion_protection": True, } request = request_type(**request_init) From ae477b5c0fed6f583d8bf98b05c9414c009e26d9 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 25 May 2023 12:38:11 -0400 Subject: [PATCH 087/213] build(deps): bump requests from 2.28.1 to 2.31.0 in /synthtool/gcp/templates/python_library/.kokoro (#790) Source-Link: https://github.com/googleapis/synthtool/commit/30bd01b4ab78bf1b2a425816e15b3e7e090993dd Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 3 ++- .kokoro/requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b8edda51c..32b3c4865 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2e247c7bf5154df7f98cce087a20ca7605e236340c7d6d1a14447e5c06791bd6 + digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b +# created: 2023-05-25T14:56:16.294623272Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 66a2172a7..3b8d7ee81 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -419,9 +419,9 @@ readme-renderer==37.3 \ --hash=sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273 \ --hash=sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343 # via twine -requests==2.28.1 \ - --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \ - --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 +requests==2.31.0 \ + --hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \ + --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1 # via # gcp-releasetool # google-api-core From ecf539c4c976fd9e5505b8abf0b697b218f09fef Mon Sep 17 00:00:00 2001 From: Sita Lakshmi Sangameswaran Date: Thu, 25 May 2023 22:29:46 +0530 Subject: [PATCH 088/213] docs(samples): add region tags (#788) * docs(samples): add read table snippet * remove snippet as it already exists --- samples/hello/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/hello/main.py b/samples/hello/main.py index 7b2b1764a..5e47b4a38 100644 --- a/samples/hello/main.py +++ b/samples/hello/main.py @@ -87,26 +87,30 @@ def main(project_id, instance_id, table_id): # [START bigtable_hw_create_filter] # Create a filter to only retrieve the most recent version of the cell - # for each column accross entire row. + # for each column across entire row. row_filter = row_filters.CellsColumnLimitFilter(1) # [END bigtable_hw_create_filter] # [START bigtable_hw_get_with_filter] + # [START bigtable_hw_get_by_key] print("Getting a single greeting by row key.") key = "greeting0".encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) + # [END bigtable_hw_get_by_key] # [END bigtable_hw_get_with_filter] # [START bigtable_hw_scan_with_filter] + # [START bigtable_hw_scan_all] print("Scanning for all greetings:") partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) + # [END bigtable_hw_scan_all] # [END bigtable_hw_scan_with_filter] # [START bigtable_hw_delete_table] From a51201c97149bb85994b02ca7c174063d6775d38 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 25 May 2023 16:23:42 -0700 Subject: [PATCH 089/213] added metadata to mutate rows and bulk mutate rows --- google/cloud/bigtable/_helpers.py | 29 ++++++++++ google/cloud/bigtable/_mutate_rows.py | 8 ++- google/cloud/bigtable/_read_rows.py | 11 ++-- google/cloud/bigtable/client.py | 4 +- tests/unit/test__helpers.py | 31 +++++++++++ tests/unit/test_client.py | 77 +++++++++++++++++++++++++++ 6 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 google/cloud/bigtable/_helpers.py create mode 100644 tests/unit/test__helpers.py diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py new file mode 100644 index 000000000..feb252f40 --- /dev/null +++ b/google/cloud/bigtable/_helpers.py @@ -0,0 +1,29 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + + +def _make_metadata( + table_name: str | None, app_profile_id: str | None +) -> list[tuple[str, str]]: + """ + Create properly formatted gRPC metadata for requests. + """ + params = [] + if table_name is not None: + params.append(f"table_name={table_name}") + if app_profile_id is not None: + params.append(f"app_profile_id={app_profile_id}") + params_str = ",".join(params) + return [("x-goog-request-params", params_str)] diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 26839fbe9..13cfde72f 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -19,6 +19,7 @@ from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries import google.cloud.bigtable.exceptions as bt_exceptions +from google.cloud.bigtable._helpers import _make_metadata if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -176,8 +177,13 @@ async def _mutate_rows_retryable_attempt( index_map[len(request_entries)] = index request_entries.append(entry._to_dict()) new_request["entries"] = request_entries + metadata = _make_metadata( + request.get("table_name", None), request.get("app_profile_id", None) + ) async for result_list in await gapic_client.mutate_rows( - new_request, timeout=per_request_timeout + new_request, + timeout=per_request_timeout, + metadata=metadata, ): for result in result_list.entries: # convert sub-request index to global index diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index 1c9e02d5a..a90cb2a6b 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -37,6 +37,7 @@ from google.cloud.bigtable.exceptions import _RowSetComplete from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable._helpers import _make_metadata """ This module provides a set of classes for merging ReadRowsResponse chunks @@ -183,16 +184,16 @@ async def _read_rows_retryable_attempt( raise RuntimeError("unexpected state: emit count exceeds row limit") else: self._request["rows_limit"] = new_limit - params_str = f'table_name={self._request.get("table_name", "")}' - app_profile_id = self._request.get("app_profile_id", None) - if app_profile_id: - params_str = f"{params_str},app_profile_id={app_profile_id}" time_to_deadline = operation_deadline - time.monotonic() gapic_timeout = max(0, min(time_to_deadline, per_request_timeout)) + metadata = _make_metadata( + self._request.get("table_name", None), + self._request.get("app_profile_id", None), + ) new_gapic_stream: RpcContext = await gapic_fn( self._request, timeout=gapic_timeout, - metadata=[("x-goog-request-params", params_str)], + metadata=metadata, ) try: state_machine = _StateMachine() diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 7a42272ab..862fb70fd 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -47,6 +47,7 @@ from google.cloud.bigtable.mutations import Mutation, RowMutationEntry from google.cloud.bigtable._mutate_rows import _mutate_rows_operation +from google.cloud.bigtable._helpers import _make_metadata if TYPE_CHECKING: from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -667,8 +668,9 @@ def on_error_fn(exc): deadline_wrapped = _convert_retry_deadline( retry_wrapped, operation_timeout, transient_errors ) + metadata = _make_metadata(self.table_name, self.app_profile_id) # trigger rpc - await deadline_wrapped(request, timeout=per_request_timeout) + await deadline_wrapped(request, timeout=per_request_timeout, metadata=metadata) async def bulk_mutate_rows( self, diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py new file mode 100644 index 000000000..9843c2fc9 --- /dev/null +++ b/tests/unit/test__helpers.py @@ -0,0 +1,31 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import google.cloud.bigtable._helpers as _helpers + + +class Test_MakeMetadata: + @pytest.mark.parametrize( + "table,profile,expected", + [ + ("table", "profile", "table_name=table,app_profile_id=profile"), + (None, "profile", "app_profile_id=profile"), + ("table", None, "table_name=table"), + (None, None, ""), + ], + ) + def test__make_metadata(self, table, profile, expected): + metadata = _helpers._make_metadata(table, profile) + assert metadata == [("x-goog-request-params", expected)] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index aaef46dc0..33e8f356d 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1296,6 +1296,30 @@ async def test_read_rows_default_timeout_override(self): assert kwargs["operation_timeout"] == operation_timeout assert kwargs["per_request_timeout"] == per_request_timeout + @pytest.mark.parametrize("include_app_profile", [True, False]) + @pytest.mark.asyncio + async def test_read_rows_metadata(self, include_app_profile): + """request should attach metadata headers""" + profile = "profile" if include_app_profile else None + async with self._make_client() as client: + async with client.get_table("i", "t", app_profile_id=profile) as table: + with mock.patch.object( + client._gapic_client, "read_rows", AsyncMock() + ) as read_rows: + await table.read_rows(ReadRowsQuery()) + kwargs = read_rows.call_args_list[0].kwargs + metadata = kwargs["metadata"] + goog_metadata = None + for key, value in metadata: + if key == "x-goog-request-params": + goog_metadata = value + assert goog_metadata is not None, "x-goog-request-params not found" + assert "table_name=" + table.table_name in goog_metadata + if include_app_profile: + assert "app_profile_id=profile" in goog_metadata + else: + assert "app_profile_id=" not in goog_metadata + class TestMutateRow: def _make_client(self, *args, **kwargs): @@ -1439,6 +1463,30 @@ async def test_mutate_row_non_retryable_errors(self, non_retryable_exception): "row_key", mutation, operation_timeout=0.2 ) + @pytest.mark.parametrize("include_app_profile", [True, False]) + @pytest.mark.asyncio + async def test_mutate_row_metadata(self, include_app_profile): + """request should attach metadata headers""" + profile = "profile" if include_app_profile else None + async with self._make_client() as client: + async with client.get_table("i", "t", app_profile_id=profile) as table: + with mock.patch.object( + client._gapic_client, "mutate_row", AsyncMock() + ) as read_rows: + await table.mutate_row("rk", {}) + kwargs = read_rows.call_args_list[0].kwargs + metadata = kwargs["metadata"] + goog_metadata = None + for key, value in metadata: + if key == "x-goog-request-params": + goog_metadata = value + assert goog_metadata is not None, "x-goog-request-params not found" + assert "table_name=" + table.table_name in goog_metadata + if include_app_profile: + assert "app_profile_id=profile" in goog_metadata + else: + assert "app_profile_id=" not in goog_metadata + class TestBulkMutateRows: def _make_client(self, *args, **kwargs): @@ -1837,3 +1885,32 @@ async def test_bulk_mutate_rows_on_success(self): assert callback.call_count == 2 assert callback.call_args_list[0][0][0] == entries[0] assert callback.call_args_list[1][0][0] == entries[2] + + @pytest.mark.parametrize("include_app_profile", [True, False]) + @pytest.mark.asyncio + async def test_bulk_mutate_row_metadata(self, include_app_profile): + """request should attach metadata headers""" + profile = "profile" if include_app_profile else None + async with self._make_client() as client: + async with client.get_table("i", "t", app_profile_id=profile) as table: + with mock.patch.object( + client._gapic_client, "mutate_rows", AsyncMock() + ) as read_rows: + read_rows.side_effect = core_exceptions.Aborted("mock") + try: + await table.bulk_mutate_rows([mock.Mock()]) + except Exception: + # exception used to end early + pass + kwargs = read_rows.call_args_list[0].kwargs + metadata = kwargs["metadata"] + goog_metadata = None + for key, value in metadata: + if key == "x-goog-request-params": + goog_metadata = value + assert goog_metadata is not None, "x-goog-request-params not found" + assert "table_name=" + table.table_name in goog_metadata + if include_app_profile: + assert "app_profile_id=profile" in goog_metadata + else: + assert "app_profile_id=" not in goog_metadata From a21bebfd025da48d8b48e2e619153fc87f0f1d9b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 25 May 2023 16:32:09 -0700 Subject: [PATCH 090/213] moved _convert_retry_deadline wrapper from exceptions into _helpers --- google/cloud/bigtable/_helpers.py | 57 +++++++++++++++++++++++++++ google/cloud/bigtable/_mutate_rows.py | 5 +-- google/cloud/bigtable/client.py | 4 +- google/cloud/bigtable/exceptions.py | 51 +----------------------- google/cloud/bigtable/iterators.py | 2 +- tests/unit/test__helpers.py | 49 +++++++++++++++++++++++ tests/unit/test_exceptions.py | 48 ---------------------- 7 files changed, 111 insertions(+), 105 deletions(-) diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py index feb252f40..98b5ff70e 100644 --- a/google/cloud/bigtable/_helpers.py +++ b/google/cloud/bigtable/_helpers.py @@ -13,6 +13,15 @@ # from __future__ import annotations +from inspect import iscoroutinefunction +from typing import Callable, Any + +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable.exceptions import RetryExceptionGroup +""" +Helper functions used in various places in the library. +""" + def _make_metadata( table_name: str | None, app_profile_id: str | None @@ -27,3 +36,51 @@ def _make_metadata( params.append(f"app_profile_id={app_profile_id}") params_str = ",".join(params) return [("x-goog-request-params", params_str)] + + +def _convert_retry_deadline( + func: Callable[..., Any], + timeout_value: float | None = None, + retry_errors: list[Exception] | None = None, +): + """ + Decorator to convert RetryErrors raised by api_core.retry into + DeadlineExceeded exceptions, indicating that the underlying retries have + exhaused the timeout value. + Optionally attaches a RetryExceptionGroup to the DeadlineExceeded.__cause__, + detailing the failed exceptions associated with each retry. + + Supports both sync and async function wrapping. + + Args: + - func: The function to decorate + - timeout_value: The timeout value to display in the DeadlineExceeded error message + - retry_errors: An optional list of exceptions to attach as a RetryExceptionGroup to the DeadlineExceeded.__cause__ + """ + timeout_str = f" of {timeout_value:.1f}s" if timeout_value is not None else "" + error_str = f"operation_timeout{timeout_str} exceeded" + + def handle_error(): + new_exc = core_exceptions.DeadlineExceeded( + error_str, + ) + source_exc = None + if retry_errors: + source_exc = RetryExceptionGroup(retry_errors) + new_exc.__cause__ = source_exc + raise new_exc from source_exc + + # separate wrappers for async and sync functions + async def wrapper_async(*args, **kwargs): + try: + return await func(*args, **kwargs) + except core_exceptions.RetryError: + handle_error() + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except core_exceptions.RetryError: + handle_error() + + return wrapper_async if iscoroutinefunction(func) else wrapper diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 13cfde72f..9b055b8e8 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -20,6 +20,7 @@ from google.api_core import retry_async as retries import google.cloud.bigtable.exceptions as bt_exceptions from google.cloud.bigtable._helpers import _make_metadata +from google.cloud.bigtable._helpers import _convert_retry_deadline if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -91,9 +92,7 @@ def on_error_fn(exc): # wrap attempt in retry logic retry_wrapped = retry(_mutate_rows_retryable_attempt) # convert RetryErrors from retry wrapper into DeadlineExceeded errors - deadline_wrapped = bt_exceptions._convert_retry_deadline( - retry_wrapped, operation_timeout - ) + deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) try: # trigger mutate_rows await deadline_wrapped( diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 862fb70fd..2e5a8dac0 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -42,12 +42,10 @@ from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable.iterators import ReadRowsIterator - -from google.cloud.bigtable.exceptions import _convert_retry_deadline - from google.cloud.bigtable.mutations import Mutation, RowMutationEntry from google.cloud.bigtable._mutate_rows import _mutate_rows_operation from google.cloud.bigtable._helpers import _make_metadata +from google.cloud.bigtable._helpers import _convert_retry_deadline if TYPE_CHECKING: from google.cloud.bigtable.mutations_batcher import MutationsBatcher diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 1e03a9367..fe3bec7e9 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -15,9 +15,8 @@ from __future__ import annotations import sys -from inspect import iscoroutinefunction -from typing import Callable, Any, TYPE_CHECKING +from typing import TYPE_CHECKING from google.api_core import exceptions as core_exceptions @@ -27,54 +26,6 @@ from google.cloud.bigtable.mutations import RowMutationEntry -def _convert_retry_deadline( - func: Callable[..., Any], - timeout_value: float | None = None, - retry_errors: list[Exception] | None = None, -): - """ - Decorator to convert RetryErrors raised by api_core.retry into - DeadlineExceeded exceptions, indicating that the underlying retries have - exhaused the timeout value. - Optionally attaches a RetryExceptionGroup to the DeadlineExceeded.__cause__, - detailing the failed exceptions associated with each retry. - - Supports both sync and async function wrapping. - - Args: - - func: The function to decorate - - timeout_value: The timeout value to display in the DeadlineExceeded error message - - retry_errors: An optional list of exceptions to attach as a RetryExceptionGroup to the DeadlineExceeded.__cause__ - """ - timeout_str = f" of {timeout_value:.1f}s" if timeout_value is not None else "" - error_str = f"operation_timeout{timeout_str} exceeded" - - def handle_error(): - new_exc = core_exceptions.DeadlineExceeded( - error_str, - ) - source_exc = None - if retry_errors: - source_exc = RetryExceptionGroup(retry_errors) - new_exc.__cause__ = source_exc - raise new_exc from source_exc - - # separate wrappers for async and sync functions - async def wrapper_async(*args, **kwargs): - try: - return await func(*args, **kwargs) - except core_exceptions.RetryError: - handle_error() - - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except core_exceptions.RetryError: - handle_error() - - return wrapper_async if iscoroutinefunction(func) else wrapper - - class IdleTimeout(core_exceptions.DeadlineExceeded): """ Exception raised by ReadRowsIterator when the generator diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/iterators.py index 169bbc3f3..b20932fb2 100644 --- a/google/cloud/bigtable/iterators.py +++ b/google/cloud/bigtable/iterators.py @@ -22,7 +22,7 @@ from google.cloud.bigtable._read_rows import _ReadRowsOperation from google.cloud.bigtable.exceptions import IdleTimeout -from google.cloud.bigtable.exceptions import _convert_retry_deadline +from google.cloud.bigtable._helpers import _convert_retry_deadline from google.cloud.bigtable.row import Row diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 9843c2fc9..8e7d2cdfe 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -14,6 +14,7 @@ import pytest import google.cloud.bigtable._helpers as _helpers +import google.cloud.bigtable.exceptions as bigtable_exceptions class Test_MakeMetadata: @@ -29,3 +30,51 @@ class Test_MakeMetadata: def test__make_metadata(self, table, profile, expected): metadata = _helpers._make_metadata(table, profile) assert metadata == [("x-goog-request-params", expected)] + + +class TestConvertRetryDeadline: + """ + Test _convert_retry_deadline wrapper + """ + + @pytest.mark.asyncio + async def test_no_error(self): + async def test_func(): + return 1 + + wrapped = _helpers._convert_retry_deadline(test_func, 0.1) + assert await wrapped() == 1 + + @pytest.mark.asyncio + @pytest.mark.parametrize("timeout", [0.1, 2.0, 30.0]) + async def test_retry_error(self, timeout): + from google.api_core.exceptions import RetryError, DeadlineExceeded + + async def test_func(): + raise RetryError("retry error", None) + + wrapped = _helpers._convert_retry_deadline(test_func, timeout) + with pytest.raises(DeadlineExceeded) as e: + await wrapped() + assert e.value.__cause__ is None + assert f"operation_timeout of {timeout}s exceeded" in str(e.value) + + @pytest.mark.asyncio + async def test_with_retry_errors(self): + from google.api_core.exceptions import RetryError, DeadlineExceeded + + timeout = 10.0 + + async def test_func(): + raise RetryError("retry error", None) + + associated_errors = [RuntimeError("error1"), ZeroDivisionError("other")] + wrapped = _helpers._convert_retry_deadline( + test_func, timeout, associated_errors + ) + with pytest.raises(DeadlineExceeded) as e: + await wrapped() + cause = e.value.__cause__ + assert isinstance(cause, bigtable_exceptions.RetryExceptionGroup) + assert cause.exceptions == tuple(associated_errors) + assert f"operation_timeout of {timeout}s exceeded" in str(e.value) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 29cb1d02b..49b90a8a9 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -19,54 +19,6 @@ import google.cloud.bigtable.exceptions as bigtable_exceptions -class TestConvertRetryDeadline: - """ - Test _convert_retry_deadline wrapper - """ - - @pytest.mark.asyncio - async def test_no_error(self): - async def test_func(): - return 1 - - wrapped = bigtable_exceptions._convert_retry_deadline(test_func, 0.1) - assert await wrapped() == 1 - - @pytest.mark.asyncio - @pytest.mark.parametrize("timeout", [0.1, 2.0, 30.0]) - async def test_retry_error(self, timeout): - from google.api_core.exceptions import RetryError, DeadlineExceeded - - async def test_func(): - raise RetryError("retry error", None) - - wrapped = bigtable_exceptions._convert_retry_deadline(test_func, timeout) - with pytest.raises(DeadlineExceeded) as e: - await wrapped() - assert e.value.__cause__ is None - assert f"operation_timeout of {timeout}s exceeded" in str(e.value) - - @pytest.mark.asyncio - async def test_with_retry_errors(self): - from google.api_core.exceptions import RetryError, DeadlineExceeded - - timeout = 10.0 - - async def test_func(): - raise RetryError("retry error", None) - - associated_errors = [RuntimeError("error1"), ZeroDivisionError("other")] - wrapped = bigtable_exceptions._convert_retry_deadline( - test_func, timeout, associated_errors - ) - with pytest.raises(DeadlineExceeded) as e: - await wrapped() - cause = e.value.__cause__ - assert isinstance(cause, bigtable_exceptions.RetryExceptionGroup) - assert cause.exceptions == tuple(associated_errors) - assert f"operation_timeout of {timeout}s exceeded" in str(e.value) - - class TestBigtableExceptionGroup: """ Subclass for MutationsExceptionGroup and RetryExceptionGroup From 4ca89d9ff3fda10f4453812be87dbfffe2a75eea Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 25 May 2023 16:52:27 -0700 Subject: [PATCH 091/213] fixed system tests --- google/cloud/bigtable/_helpers.py | 1 + tests/system/test_system.py | 54 ++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py index 98b5ff70e..9bb396f08 100644 --- a/google/cloud/bigtable/_helpers.py +++ b/google/cloud/bigtable/_helpers.py @@ -18,6 +18,7 @@ from google.api_core import exceptions as core_exceptions from google.cloud.bigtable.exceptions import RetryExceptionGroup + """ Helper functions used in various places in the library. """ diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 52375a4b4..7d015224c 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -187,6 +187,19 @@ async def delete_rows(self): await self.table.client._gapic_client.mutate_rows(request) +async def _retrieve_cell_value(table, row_key): + """ + Helper to read an individual row + """ + from google.cloud.bigtable import ReadRowsQuery + + row_list = await table.read_rows(ReadRowsQuery(row_keys=row_key)) + assert len(row_list) == 1 + row = row_list[0] + cell = row.cells[0] + return cell.value + + @pytest_asyncio.fixture(scope="function") async def temp_rows(table): builder = TempRowBuilder(table) @@ -213,26 +226,57 @@ async def test_mutation_set_cell(table, temp_rows): """ from google.cloud.bigtable.mutations import SetCell + row_key = b"mutate" + family = TEST_FAMILY + qualifier = b"test-qualifier" + start_value = b"start" + await temp_rows.add_row( + row_key, family=family, qualifier=qualifier, value=start_value + ) + + # ensure cell is initialized + assert (await _retrieve_cell_value(table, row_key)) == start_value + + expected_value = b"new-value" mutation = SetCell( - family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value" + family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=expected_value ) - await table.mutate_row("abc", mutation) + + await table.mutate_row(row_key, mutation) + + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == expected_value @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio -async def test_bulk_mutations_set_cell(client, table): +async def test_bulk_mutations_set_cell(client, table, temp_rows): """ Ensure cells can be set properly """ from google.cloud.bigtable.mutations import SetCell, RowMutationEntry + row_key = b"bulk_mutate" + family = TEST_FAMILY + qualifier = b"test-qualifier" + start_value = b"start" + await temp_rows.add_row( + row_key, family=family, qualifier=qualifier, value=start_value + ) + + # ensure cell is initialized + assert (await _retrieve_cell_value(table, row_key)) == start_value + + expected_value = b"new-value" mutation = SetCell( - family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=b"test-value" + family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=expected_value ) - bulk_mutation = RowMutationEntry(b"abc", [mutation]) + bulk_mutation = RowMutationEntry(row_key, [mutation]) await table.bulk_mutate_rows([bulk_mutation]) + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == expected_value + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio From b240ee1d941359caf1278d7584f245c1711ac21a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 15:04:00 -0700 Subject: [PATCH 092/213] only handle precision adjustment when creating timestamp --- google/cloud/bigtable/mutations.py | 12 +++------- tests/unit/test_mutations.py | 37 +++++++++++------------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 2121b7d5c..be7069435 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -70,8 +70,9 @@ def __init__( if not isinstance(new_value, bytes): raise TypeError("new_value must be bytes, str, or int") if timestamp_micros is None: - # use current timestamp + # use current timestamp, with milisecond precision timestamp_micros = time.time_ns() // 1000 + timestamp_micros = timestamp_micros - (timestamp_micros % 1000) if timestamp_micros < SERVER_SIDE_TIMESTAMP: raise ValueError( "timestamp_micros must be positive (or -1 for server-side timestamp)" @@ -79,14 +80,7 @@ def __init__( self.family = family self.qualifier = qualifier self.new_value = new_value - self._timestamp_micros = timestamp_micros - - @property - def timestamp_micros(self): - if self._timestamp_micros > 0: - # round to use milisecond precision - return (self._timestamp_micros // 1000) * 1000 - return self._timestamp_micros + self.timestamp_micros = timestamp_micros def _to_dict(self) -> dict[str, Any]: """Convert the mutation to a dictionary representation""" diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index dd4b8b20f..4d65f1adb 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -58,7 +58,7 @@ def test_ctor(self): expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" - expected_timestamp = 1234567890000 + expected_timestamp = 1234567890 instance = self._make_one( expected_family, expected_qualifier, expected_value, expected_timestamp ) @@ -98,13 +98,6 @@ def test_ctor_int_value(self, int_value, expected_bytes): assert instance.qualifier == expected_qualifier assert instance.new_value == expected_bytes - def test_ctor_no_timestamp(self): - """If no timestamp is given, should use current time""" - with mock.patch("time.time_ns", return_value=1234000): - instance = self._make_one("test-family", b"test-qualifier", b"test-value") - assert instance._timestamp_micros == 1234 - assert instance.timestamp_micros == 1000 - def test_ctor_negative_timestamp(self): """Only positive or -1 timestamps are valid""" with pytest.raises(ValueError) as e: @@ -115,34 +108,32 @@ def test_ctor_negative_timestamp(self): ) @pytest.mark.parametrize( - "input_timestamp,expected_timestamp", + "timestamp_ns,expected_timestamp_micros", [ - (-1, -1), (0, 0), (1, 0), (123, 0), (999, 0), - (1000, 1000), - (1234, 1000), - (1999, 1000), - (2000, 2000), - (1234567890, 1234567000), + (999_999, 0), + (1_000_000, 1000), + (1_234_567, 1000), + (1_999_999, 1000), + (2_000_000, 2000), + (1_234_567_890_123, 1_234_567_000), ], ) - def test_timestamp_milli_precision(self, input_timestamp, expected_timestamp): - """timestamp_micros should have millisecond precision (3 trailing 0s)""" - instance = self._make_one( - "test-family", b"test-qualifier", b"test-value", input_timestamp - ) - assert instance._timestamp_micros == input_timestamp - assert instance.timestamp_micros == expected_timestamp + def test_ctor_no_timestamp(self, timestamp_ns, expected_timestamp_micros): + """If no timestamp is given, should use current time with millisecond precision""" + with mock.patch("time.time_ns", return_value=timestamp_ns): + instance = self._make_one("test-family", b"test-qualifier", b"test-value") + assert instance.timestamp_micros == expected_timestamp_micros def test__to_dict(self): """ensure dict representation is as expected""" expected_family = "test-family" expected_qualifier = b"test-qualifier" expected_value = b"test-value" - expected_timestamp = 123456789000 + expected_timestamp = 123456789 instance = self._make_one( expected_family, expected_qualifier, expected_value, expected_timestamp ) From cb0e95115d29dab0006210bc59fd2ceba0abe63f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 15:14:45 -0700 Subject: [PATCH 093/213] added _from_dict for mutation models --- google/cloud/bigtable/mutations.py | 42 ++++++++ tests/unit/test_mutations.py | 150 +++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index be7069435..c72f132c8 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -39,6 +39,39 @@ def is_idempotent(self) -> bool: def __str__(self) -> str: return str(self._to_dict()) + @classmethod + def _from_dict(cls, input_dict: dict[str, Any]) -> Mutation: + instance: Mutation | None = None + try: + if "set_cell" in input_dict: + details = input_dict["set_cell"] + instance = SetCell( + details["family_name"], + details["column_qualifier"], + details["value"], + details["timestamp_micros"], + ) + elif "delete_from_column" in input_dict: + details = input_dict["delete_from_column"] + time_range = details.get("time_range", {}) + start = time_range.get("start_timestamp_micros", None) + end = time_range.get("end_timestamp_micros", None) + instance = DeleteRangeFromColumn( + details["family_name"], details["column_qualifier"], start, end + ) + elif "delete_from_family" in input_dict: + details = input_dict["delete_from_family"] + instance = DeleteAllFromFamily(details["family_name"]) + elif "delete_from_row" in input_dict: + instance = DeleteAllFromRow() + except KeyError as e: + raise ValueError("Invalid mutation dictionary") from e + if instance is None: + raise ValueError("No valid mutation found") + if not issubclass(instance.__class__, cls): + raise ValueError("Mutation type mismatch") + return instance + class SetCell(Mutation): def __init__( @@ -168,3 +201,12 @@ def _to_dict(self) -> dict[str, Any]: def is_idempotent(self) -> bool: """Check if the mutation is idempotent""" return all(mutation.is_idempotent() for mutation in self.mutations) + + @classmethod + def _from_dict(cls, input_dict: dict[str, Any]) -> RowMutationEntry: + return RowMutationEntry( + row_key=input_dict["row_key"], + mutations=[ + Mutation._from_dict(mutation) for mutation in input_dict["mutations"] + ], + ) diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 4d65f1adb..2a376609e 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -14,6 +14,8 @@ import pytest +import google.cloud.bigtable.mutations as mutations + # try/except added for compatibility with python < 3.8 try: from unittest import mock @@ -43,6 +45,121 @@ def test___str__(self): assert self_mock._to_dict.called assert str_value == str(self_mock._to_dict.return_value) + @pytest.mark.parametrize( + "expected_class,input_dict", + [ + ( + mutations.SetCell, + { + "set_cell": { + "family_name": "foo", + "column_qualifier": b"bar", + "value": b"test", + "timestamp_micros": 12345, + } + }, + ), + ( + mutations.DeleteRangeFromColumn, + { + "delete_from_column": { + "family_name": "foo", + "column_qualifier": b"bar", + "time_range": {}, + } + }, + ), + ( + mutations.DeleteRangeFromColumn, + { + "delete_from_column": { + "family_name": "foo", + "column_qualifier": b"bar", + "time_range": {"start_timestamp_micros": 123456789}, + } + }, + ), + ( + mutations.DeleteRangeFromColumn, + { + "delete_from_column": { + "family_name": "foo", + "column_qualifier": b"bar", + "time_range": {"end_timestamp_micros": 123456789}, + } + }, + ), + ( + mutations.DeleteRangeFromColumn, + { + "delete_from_column": { + "family_name": "foo", + "column_qualifier": b"bar", + "time_range": { + "start_timestamp_micros": 123, + "end_timestamp_micros": 123456789, + }, + } + }, + ), + ( + mutations.DeleteAllFromFamily, + {"delete_from_family": {"family_name": "foo"}}, + ), + (mutations.DeleteAllFromRow, {"delete_from_row": {}}), + ], + ) + def test__from_dict(self, expected_class, input_dict): + """Should be able to create instance from dict""" + instance = self._target_class()._from_dict(input_dict) + assert isinstance(instance, expected_class) + found_dict = instance._to_dict() + assert found_dict == input_dict + + @pytest.mark.parametrize( + "input_dict", + [ + {"set_cell": {}}, + { + "set_cell": { + "column_qualifier": b"bar", + "value": b"test", + "timestamp_micros": 12345, + } + }, + { + "set_cell": { + "family_name": "f", + "column_qualifier": b"bar", + "value": b"test", + } + }, + {"delete_from_family": {}}, + {"delete_from_column": {}}, + {"fake-type"}, + {}, + ], + ) + def test__from_dict_missing_fields(self, input_dict): + """If dict is malformed or fields are missing, should raise ValueError""" + with pytest.raises(ValueError): + self._target_class()._from_dict(input_dict) + + def test__from_dict_wrong_subclass(self): + """You shouldn't be able to instantiate one mutation type using the dict of another""" + subclasses = [ + mutations.SetCell("foo", b"bar", b"test"), + mutations.DeleteRangeFromColumn("foo", b"bar"), + mutations.DeleteAllFromFamily("foo"), + mutations.DeleteAllFromRow(), + ] + for instance in subclasses: + others = [other for other in subclasses if other != instance] + for other in others: + with pytest.raises(ValueError) as e: + type(other)._from_dict(instance._to_dict()) + assert "Mutation type mismatch" in str(e.value) + class TestSetCell: def _target_class(self): @@ -417,3 +534,36 @@ def test__to_dict(self): def test_is_idempotent(self, mutations, result): instance = self._make_one("row_key", mutations) assert instance.is_idempotent() == result + + def test__from_dict_mock(self): + """ + test creating instance from entry dict, with mocked mutation._from_dict + """ + expected_key = b"row_key" + expected_mutations = [mock.Mock(), mock.Mock()] + input_dict = { + "row_key": expected_key, + "mutations": [{"test": "data"}, {"another": "data"}], + } + with mock.patch.object(mutations.Mutation, "_from_dict") as inner_from_dict: + inner_from_dict.side_effect = expected_mutations + instance = self._target_class()._from_dict(input_dict) + assert instance.row_key == b"row_key" + assert inner_from_dict.call_count == 2 + assert len(instance.mutations) == 2 + assert instance.mutations[0] == expected_mutations[0] + assert instance.mutations[1] == expected_mutations[1] + + def test__from_dict(self): + """ + test creating end-to-end with a real mutation instance + """ + input_dict = { + "row_key": b"row_key", + "mutations": [{"delete_from_family": {"family_name": "test_family"}}], + } + instance = self._target_class()._from_dict(input_dict) + assert instance.row_key == b"row_key" + assert len(instance.mutations) == 1 + assert isinstance(instance.mutations[0], mutations.DeleteAllFromFamily) + assert instance.mutations[0].family_to_delete == "test_family" From a9cf385a042a8cafb059e937f783294a76182ad2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 16:29:11 -0700 Subject: [PATCH 094/213] rpc timeouts adjust when approaching operation_timeout --- google/cloud/bigtable/_helpers.py | 27 ++++++++++- google/cloud/bigtable/_mutate_rows.py | 13 ++++-- google/cloud/bigtable/_read_rows.py | 20 ++++---- tests/unit/test__helpers.py | 67 ++++++++++++++++++++++++++- tests/unit/test__mutate_rows.py | 16 ++++--- tests/unit/test__read_rows.py | 43 +++++++++++------ 6 files changed, 150 insertions(+), 36 deletions(-) diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py index 9bb396f08..ff70805e8 100644 --- a/google/cloud/bigtable/_helpers.py +++ b/google/cloud/bigtable/_helpers.py @@ -13,8 +13,9 @@ # from __future__ import annotations -from inspect import iscoroutinefunction from typing import Callable, Any +from inspect import iscoroutinefunction +import time from google.api_core import exceptions as core_exceptions from google.cloud.bigtable.exceptions import RetryExceptionGroup @@ -39,6 +40,30 @@ def _make_metadata( return [("x-goog-request-params", params_str)] +def _attempt_timeout_generator( + per_request_timeout: float | None, operation_timeout: float +): + """ + Generator that yields the timeout value for each attempt of a retry loop. + + Will return per_request_timeout until the operation_timeout is approached, + at which point it will return the remaining time in the operation_timeout. + + Args: + - per_request_timeout: The timeout value to use for each request. If None, + the operation_timeout will be used for each request. + - operation_timeout: The timeout value to use for the entire operation. + Yields: + - The timeout value to use for the next request. + """ + per_request_timeout = ( + per_request_timeout if per_request_timeout is not None else operation_timeout + ) + deadline = operation_timeout + time.monotonic() + while True: + yield max(0, min(per_request_timeout, deadline - time.monotonic())) + + def _convert_retry_deadline( func: Callable[..., Any], timeout_value: float | None = None, diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 9b055b8e8..8f2789239 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,13 +14,14 @@ # from __future__ import annotations -from typing import Callable, Any, TYPE_CHECKING +from typing import Iterator, Callable, Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries import google.cloud.bigtable.exceptions as bt_exceptions from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline +from google.cloud.bigtable._helpers import _attempt_timeout_generator if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( @@ -89,6 +90,10 @@ def on_error_fn(exc): multiplier=2, maximum=60, ) + # use generator to lower per-attempt timeout as we approach operation_timeout deadline + attempt_timeout_gen = _attempt_timeout_generator( + per_request_timeout, operation_timeout + ) # wrap attempt in retry logic retry_wrapped = retry(_mutate_rows_retryable_attempt) # convert RetryErrors from retry wrapper into DeadlineExceeded errors @@ -98,7 +103,7 @@ def on_error_fn(exc): await deadline_wrapped( gapic_client, request, - per_request_timeout, + attempt_timeout_gen, mutations_dict, error_dict, predicate, @@ -134,7 +139,7 @@ def on_error_fn(exc): async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", request: dict[str, Any], - per_request_timeout: float | None, + timeout_generator: Iterator[float], mutation_dict: dict[int, "RowMutationEntry" | None], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], @@ -181,7 +186,7 @@ async def _mutate_rows_retryable_attempt( ) async for result_list in await gapic_client.mutate_rows( new_request, - timeout=per_request_timeout, + timeout=next(timeout_generator), metadata=metadata, ): for result in result_list.entries: diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/_read_rows.py index a90cb2a6b..ee094f1a7 100644 --- a/google/cloud/bigtable/_read_rows.py +++ b/google/cloud/bigtable/_read_rows.py @@ -20,13 +20,13 @@ AsyncIterable, AsyncIterator, AsyncGenerator, + Iterator, Callable, Awaitable, Type, ) import asyncio -import time from functools import partial from grpc.aio import RpcContext @@ -38,6 +38,7 @@ from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions from google.cloud.bigtable._helpers import _make_metadata +from google.cloud.bigtable._helpers import _attempt_timeout_generator """ This module provides a set of classes for merging ReadRowsResponse chunks @@ -88,16 +89,16 @@ def __init__( self._emit_count = 0 self._request = request self.operation_timeout = operation_timeout - deadline = operation_timeout + time.monotonic() + # use generator to lower per-attempt timeout as we approach operation_timeout deadline + attempt_timeout_gen = _attempt_timeout_generator( + per_request_timeout, operation_timeout + ) row_limit = request.get("rows_limit", 0) - if per_request_timeout is None: - per_request_timeout = operation_timeout # lock in paramters for retryable wrapper self._partial_retryable = partial( self._read_rows_retryable_attempt, client.read_rows, - per_request_timeout, - deadline, + attempt_timeout_gen, row_limit, ) predicate = retries.if_exception_type( @@ -146,8 +147,7 @@ async def aclose(self): async def _read_rows_retryable_attempt( self, gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], - per_request_timeout: float, - operation_deadline: float, + timeout_generator: Iterator[float], total_row_limit: int, ) -> AsyncGenerator[Row, None]: """ @@ -184,15 +184,13 @@ async def _read_rows_retryable_attempt( raise RuntimeError("unexpected state: emit count exceeds row limit") else: self._request["rows_limit"] = new_limit - time_to_deadline = operation_deadline - time.monotonic() - gapic_timeout = max(0, min(time_to_deadline, per_request_timeout)) metadata = _make_metadata( self._request.get("table_name", None), self._request.get("app_profile_id", None), ) new_gapic_stream: RpcContext = await gapic_fn( self._request, - timeout=gapic_timeout, + timeout=next(timeout_generator), metadata=metadata, ) try: diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 8e7d2cdfe..cebc5095e 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -16,8 +16,10 @@ import google.cloud.bigtable._helpers as _helpers import google.cloud.bigtable.exceptions as bigtable_exceptions +import mock -class Test_MakeMetadata: + +class TestMakeMetadata: @pytest.mark.parametrize( "table,profile,expected", [ @@ -32,6 +34,69 @@ def test__make_metadata(self, table, profile, expected): assert metadata == [("x-goog-request-params", expected)] +class TestAttemptTimeoutGenerator: + @pytest.mark.parametrize( + "request_t,operation_t,expected_list", + [ + (1, 3.5, [1, 1, 1, 0.5, 0, 0]), + (None, 3.5, [3.5, 2.5, 1.5, 0.5, 0, 0]), + (10, 5, [5, 4, 3, 2, 1, 0, 0]), + (3, 3, [3, 2, 1, 0, 0, 0, 0]), + (0, 3, [0, 0, 0]), + (3, 0, [0, 0, 0]), + (-1, 3, [0, 0, 0]), + (3, -1, [0, 0, 0]), + ], + ) + def test_attempt_timeout_generator(self, request_t, operation_t, expected_list): + """ + test different values for timeouts. Clock is incremented by 1 second for each item in expected_list + """ + timestamp_start = 123 + with mock.patch("time.monotonic") as mock_monotonic: + mock_monotonic.return_value = timestamp_start + generator = _helpers._attempt_timeout_generator(request_t, operation_t) + for val in expected_list: + mock_monotonic.return_value += 1 + assert next(generator) == val + + @pytest.mark.parametrize( + "request_t,operation_t,expected", + [ + (1, 3.5, 1), + (None, 3.5, 3.5), + (10, 5, 5), + (5, 10, 5), + (3, 3, 3), + (0, 3, 0), + (3, 0, 0), + (-1, 3, 0), + (3, -1, 0), + ], + ) + def test_attempt_timeout_frozen_time(self, request_t, operation_t, expected): + """test with time.monotonic frozen""" + timestamp_start = 123 + with mock.patch("time.monotonic") as mock_monotonic: + mock_monotonic.return_value = timestamp_start + generator = _helpers._attempt_timeout_generator(request_t, operation_t) + assert next(generator) == expected + # value should not change without time.monotonic changing + assert next(generator) == expected + + def test_attempt_timeout_w_sleeps(self): + """use real sleep values to make sure it matches expectations""" + from time import sleep + operation_timeout = 1 + generator = _helpers._attempt_timeout_generator(None, operation_timeout) + expected_value = operation_timeout + sleep_time = 0.1 + for i in range(3): + found_value = next(generator) + assert abs(found_value - expected_value) < 0.001 + sleep(sleep_time) + expected_value -= sleep_time + class TestConvertRetryDeadline: """ Test _convert_retry_deadline wrapper diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 9748e513e..873931389 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -52,6 +52,7 @@ def _make_mock_client(self, mutation_dict, error_dict=None): async def test_single_entry_success(self): """Test mutating a single entry""" from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt + import itertools mutation = mock.Mock() mutations = {0: mutation} @@ -59,10 +60,11 @@ async def test_single_entry_success(self): errors = {0: []} expected_request = {"test": "data"} expected_timeout = 9 + mock_timeout_gen = itertools.repeat(expected_timeout) await _mutate_rows_retryable_attempt( client, expected_request, - expected_timeout, + mock_timeout_gen, mutations, errors, lambda x: False, @@ -81,7 +83,9 @@ async def test_empty_request(self): from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt client = self._make_mock_client({}) - await _mutate_rows_retryable_attempt(client, {}, None, {}, {}, lambda x: False) + await _mutate_rows_retryable_attempt( + client, {}, iter([0]), {}, {}, lambda x: False + ) assert client.mutate_rows.call_count == 1 @pytest.mark.asyncio @@ -105,7 +109,7 @@ async def test_partial_success_retryable(self): await _mutate_rows_retryable_attempt( client, expected_request, - expected_timeout, + iter([expected_timeout]), mutations, errors, lambda x: True, @@ -132,7 +136,7 @@ async def test_partial_success_non_retryable(self): await _mutate_rows_retryable_attempt( client, expected_request, - expected_timeout, + iter([expected_timeout]), mutations, errors, lambda x: False, @@ -161,7 +165,7 @@ async def test_on_terminal_state_no_retries(self): await _mutate_rows_retryable_attempt( client, {}, - 9, + iter([9]), mutations, errors, lambda x: False, @@ -198,7 +202,7 @@ async def test_on_terminal_state_with_retries(self): await _mutate_rows_retryable_attempt( client, {}, - 9, + iter([9]), mutations, errors, lambda x: True, diff --git a/tests/unit/test__read_rows.py b/tests/unit/test__read_rows.py index e57b5d992..c893c56cd 100644 --- a/tests/unit/test__read_rows.py +++ b/tests/unit/test__read_rows.py @@ -41,10 +41,14 @@ def test_ctor_defaults(self): client = mock.Mock() client.read_rows = mock.Mock() client.read_rows.return_value = None - start_time = 123 default_operation_timeout = 600 - with mock.patch("time.monotonic", return_value=start_time): + time_gen_mock = mock.Mock() + with mock.patch( + "google.cloud.bigtable._read_rows._attempt_timeout_generator", time_gen_mock + ): instance = self._make_one(request, client) + assert time_gen_mock.call_count == 1 + time_gen_mock.assert_called_once_with(None, default_operation_timeout) assert instance.transient_errors == [] assert instance._last_emitted_row_key is None assert instance._emit_count == 0 @@ -52,9 +56,8 @@ def test_ctor_defaults(self): retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows - assert retryable_fn.args[1] == default_operation_timeout - assert retryable_fn.args[2] == default_operation_timeout + start_time - assert retryable_fn.args[3] == 0 + assert retryable_fn.args[1] == time_gen_mock.return_value + assert retryable_fn.args[2] == 0 assert client.read_rows.call_count == 0 def test_ctor(self): @@ -65,14 +68,20 @@ def test_ctor(self): client.read_rows.return_value = None expected_operation_timeout = 42 expected_request_timeout = 44 - start_time = 123 - with mock.patch("time.monotonic", return_value=start_time): + time_gen_mock = mock.Mock() + with mock.patch( + "google.cloud.bigtable._read_rows._attempt_timeout_generator", time_gen_mock + ): instance = self._make_one( request, client, operation_timeout=expected_operation_timeout, per_request_timeout=expected_request_timeout, ) + assert time_gen_mock.call_count == 1 + time_gen_mock.assert_called_once_with( + expected_request_timeout, expected_operation_timeout + ) assert instance.transient_errors == [] assert instance._last_emitted_row_key is None assert instance._emit_count == 0 @@ -80,9 +89,8 @@ def test_ctor(self): retryable_fn = instance._partial_retryable assert retryable_fn.func == instance._read_rows_retryable_attempt assert retryable_fn.args[0] == client.read_rows - assert retryable_fn.args[1] == expected_request_timeout - assert retryable_fn.args[2] == start_time + expected_operation_timeout - assert retryable_fn.args[3] == row_limit + assert retryable_fn.args[1] == time_gen_mock.return_value + assert retryable_fn.args[2] == row_limit assert client.read_rows.call_count == 0 def test___aiter__(self): @@ -217,14 +225,18 @@ async def test_revise_limit(self, start_limit, emit_num, expected_limit): - if the number emitted exceeds the new limit, an exception should should be raised (tested in test_revise_limit_over_limit) """ + import itertools + request = {"rows_limit": start_limit} instance = self._make_one(request, mock.Mock()) instance._emit_count = emit_num instance._last_emitted_row_key = "a" gapic_mock = mock.Mock() gapic_mock.side_effect = [GeneratorExit("stop_fn")] + mock_timeout_gen = itertools.repeat(5) + attempt = instance._read_rows_retryable_attempt( - gapic_mock, 100, 100, start_limit + gapic_mock, mock_timeout_gen, start_limit ) if start_limit != 0 and expected_limit == 0: # if we emitted the expected number of rows, we should receive a StopAsyncIteration @@ -242,12 +254,15 @@ async def test_revise_limit_over_limit(self, start_limit, emit_num): Should raise runtime error if we get in state where emit_num > start_num (unless start_num == 0, which represents unlimited) """ + import itertools + request = {"rows_limit": start_limit} instance = self._make_one(request, mock.Mock()) instance._emit_count = emit_num instance._last_emitted_row_key = "a" + mock_timeout_gen = itertools.repeat(5) attempt = instance._read_rows_retryable_attempt( - mock.Mock(), 100, 100, start_limit + mock.Mock(), mock_timeout_gen, start_limit ) with pytest.raises(RuntimeError) as e: await attempt.__anext__() @@ -273,6 +288,7 @@ async def test_retryable_attempt_hit_limit(self, limit): Stream should end after hitting the limit """ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + import itertools instance = self._make_one({}, mock.Mock()) @@ -290,7 +306,8 @@ async def gen(): return gen() - gen = instance._read_rows_retryable_attempt(mock_gapic, 100, 100, limit) + mock_timeout_gen = itertools.repeat(5) + gen = instance._read_rows_retryable_attempt(mock_gapic, mock_timeout_gen, limit) # should yield values up to the limit for i in range(limit): await gen.__anext__() From eddc1c988fd18dc8a14cd64ff0001e63b8ef3cc2 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 16:53:34 -0700 Subject: [PATCH 095/213] pass table instead of request dict --- google/cloud/bigtable/_mutate_rows.py | 25 ++++++++++++------------- google/cloud/bigtable/client.py | 6 +----- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 8f2789239..d15138dba 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -40,7 +40,7 @@ class _MutateRowsIncomplete(RuntimeError): async def _mutate_rows_operation( gapic_client: "BigtableAsyncClient", - request: dict[str, Any], + table: "Table", mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, @@ -102,7 +102,7 @@ def on_error_fn(exc): # trigger mutate_rows await deadline_wrapped( gapic_client, - request, + table, attempt_timeout_gen, mutations_dict, error_dict, @@ -138,7 +138,7 @@ def on_error_fn(exc): async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", - request: dict[str, Any], + table: "Table", timeout_generator: Iterator[float], mutation_dict: dict[int, "RowMutationEntry" | None], error_dict: dict[int, list[Exception]], @@ -171,21 +171,19 @@ async def _mutate_rows_retryable_attempt( - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - GoogleAPICallError: if the server returns an error on the grpc call """ - new_request = request.copy() - # keep map between sub-request indices and global entry indices + # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} - # continue to retry until timeout, or all mutations are complete (success or failure) request_entries: list[dict[str, Any]] = [] - for index, entry in mutation_dict.items(): + for request_idx, dict_key, entry in enumerate(mutation_dict.items()): if entry is not None: - index_map[len(request_entries)] = index + index_map[request_idx] = dict_key request_entries.append(entry._to_dict()) - new_request["entries"] = request_entries - metadata = _make_metadata( - request.get("table_name", None), request.get("app_profile_id", None) - ) + # make gapic request + metadata = _make_metadata(table.table_name, table.app_profile_id) async for result_list in await gapic_client.mutate_rows( - new_request, + table_name=table.table_name, + app_profile_id=table.app_profile_id, + entries=request_entries, timeout=next(timeout_generator), metadata=metadata, ): @@ -223,3 +221,4 @@ async def _mutate_rows_retryable_attempt( if any(mutation is not None for mutation in mutation_dict.values()): # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete() + diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 2e5a8dac0..1c08bbae2 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -718,10 +718,6 @@ async def bulk_mutate_rows( if per_request_timeout is not None and per_request_timeout > operation_timeout: raise ValueError("per_request_timeout must be less than operation_timeout") - request = {"table_name": self.table_name} - if self.app_profile_id: - request["app_profile_id"] = self.app_profile_id - callback: Callable[[RowMutationEntry, Exception | None], None] | None = None if on_success is not None: # convert on_terminal_state callback to callback for successful results only @@ -732,7 +728,7 @@ def callback(entry: RowMutationEntry, exc: Exception | None): await _mutate_rows_operation( self.client._gapic_client, - request, + self, mutation_entries, operation_timeout, per_request_timeout, From f8b26aa0d97b6ccdf351551cea670ff176cd0cf5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 17:17:43 -0700 Subject: [PATCH 096/213] refactoring mutate rows --- google/cloud/bigtable/_mutate_rows.py | 42 ++++++++++++--------------- google/cloud/bigtable/client.py | 9 +++--- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index d15138dba..06fc09234 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -44,7 +44,7 @@ async def _mutate_rows_operation( mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, - on_terminal_state: Callable[["RowMutationEntry", Exception | None], None] + on_terminal_state: Callable[[int, "RowMutationEntry", Exception | None], None] | None = None, ): """ @@ -63,6 +63,7 @@ async def _mutate_rows_operation( mutations_dict: dict[int, RowMutationEntry | None] = { idx: mut for idx, mut in enumerate(mutation_entries) } + updated_callback = lambda idx, entry, exc: mutations_dict.pop(idx); on_terminal_state(idx, entry, exc) error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} predicate = retries.if_exception_type( @@ -75,12 +76,12 @@ def on_error_fn(exc): if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): # add this exception to list for each active mutation for idx in error_dict.keys(): - if mutations_dict[idx] is not None: + if idx in mutations_dict: error_dict[idx].append(exc) - # remove non-idempotent mutations from mutations_dict, so they are not retried - for idx, mut in mutations_dict.items(): - if mut is not None and not mut.is_idempotent(): - mutations_dict[idx] = None + # remove non-idempotent mutations from mutations_dict, so they are not retriedi + for idx, mut in list(mutations_dict.items()): + if not mut.is_idempotent(): + mutations_dict.pop(idx) retry = retries.AsyncRetry( predicate=predicate, @@ -107,12 +108,12 @@ def on_error_fn(exc): mutations_dict, error_dict, predicate, - on_terminal_state, + updated_callback, ) except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations for idx in error_dict.keys(): - if mutations_dict[idx] is not None: + if idx in mutations_dict: error_dict[idx].append(exc) finally: # raise exception detailing incomplete mutations @@ -128,7 +129,7 @@ def on_error_fn(exc): bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) ) # call on_terminal_state for each unreported failed mutation - if on_terminal_state and mutations_dict[idx] is not None: + if on_terminal_state and idx in mutations_dict: on_terminal_state(entry, cause_exc) if all_errors: raise bt_exceptions.MutationsExceptionGroup( @@ -140,10 +141,10 @@ async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", table: "Table", timeout_generator: Iterator[float], - mutation_dict: dict[int, "RowMutationEntry" | None], + mutation_dict: dict[int, "RowMutationEntry"], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], - on_terminal_state: Callable[["RowMutationEntry", Exception | None], None] + on_terminal_state: Callable[[int, "RowMutationEntry", Exception | None], None] | None = None, ): """ @@ -174,9 +175,9 @@ async def _mutate_rows_retryable_attempt( # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} request_entries: list[dict[str, Any]] = [] - for request_idx, dict_key, entry in enumerate(mutation_dict.items()): + for request_idx, global_idx, entry in enumerate(mutation_dict.items()): if entry is not None: - index_map[request_idx] = dict_key + index_map[request_idx] = global_idx request_entries.append(entry._to_dict()) # make gapic request metadata = _make_metadata(table.table_name, table.app_profile_id) @@ -190,8 +191,7 @@ async def _mutate_rows_retryable_attempt( for result in result_list.entries: # convert sub-request index to global index idx = index_map[result.index] - entry = mutation_dict[idx] - terminal_state = False + entry = mutation_dict.get(idx, None) exc = None if entry is None: # this entry has already reached a terminal state @@ -199,7 +199,7 @@ async def _mutate_rows_retryable_attempt( if result.status.code == 0: # mutation succeeded error_dict[idx] = [] - terminal_state = True + on_terminal_state(idx, entry, None) else: # mutation failed exc = core_exceptions.from_grpc_status( @@ -211,14 +211,8 @@ async def _mutate_rows_retryable_attempt( # if mutation is non-idempotent or the error is not retryable, # mark the mutation as terminal if not predicate(exc) or not entry.is_idempotent(): - terminal_state = True - # if the mutation is terminal and won't be retried, remove it from the mutation_dict - if terminal_state: - mutation_dict[idx] = None - if on_terminal_state is not None: - on_terminal_state(entry, exc) + on_terminal_state(idx, entry, exc) # check if attempt succeeded, or needs to be retried - if any(mutation is not None for mutation in mutation_dict.values()): + if mutation_dict: # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete() - diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 1c08bbae2..bfb1acf12 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -676,7 +676,7 @@ async def bulk_mutate_rows( *, operation_timeout: float | None = 60, per_request_timeout: float | None = None, - on_success: Callable[[RowMutationEntry], None] | None = None, + on_success: Callable[[int, RowMutationEntry], None] | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -703,7 +703,8 @@ async def bulk_mutate_rows( will be cancelled with a DeadlineExceeded exception, and a retry will be attempted if within operation_timeout budget - on_success: a callback function that will be called when each mutation - entry is confirmed to be applied successfully. + entry is confirmed to be applied successfully. Will be passed the + index and the entry itself. Raises: - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions @@ -722,9 +723,9 @@ async def bulk_mutate_rows( if on_success is not None: # convert on_terminal_state callback to callback for successful results only # failed results will be rasied as exceptions - def callback(entry: RowMutationEntry, exc: Exception | None): + def callback(idx:int, entry: RowMutationEntry, exc: Exception | None): if exc is None and on_success is not None: - on_success(entry) + on_success(idx, entry) await _mutate_rows_operation( self.client._gapic_client, From 5b80dc5a6f8247a3eda64424d12fccc40df730ac Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 17:27:24 -0700 Subject: [PATCH 097/213] made on_terminal_state into coroutine --- google/cloud/bigtable/_mutate_rows.py | 22 +++++++++++++++------- google/cloud/bigtable/client.py | 4 +++- tests/unit/test__helpers.py | 2 ++ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 06fc09234..641662f3e 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Iterator, Callable, Any, TYPE_CHECKING +from typing import Iterator, Callable, Any, Awaitable, TYPE_CHECKING from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -44,7 +44,9 @@ async def _mutate_rows_operation( mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, - on_terminal_state: Callable[[int, "RowMutationEntry", Exception | None], None] + on_terminal_state: Callable[ + [int, "RowMutationEntry", Exception | None], Awaitable[None] + ] | None = None, ): """ @@ -63,7 +65,12 @@ async def _mutate_rows_operation( mutations_dict: dict[int, RowMutationEntry | None] = { idx: mut for idx, mut in enumerate(mutation_entries) } - updated_callback = lambda idx, entry, exc: mutations_dict.pop(idx); on_terminal_state(idx, entry, exc) + + async def updated_callback(idx, entry, exc): + mutations_dict[idx] = entry + if on_terminal_state is not None: + await on_terminal_state(idx, entry, exc) + error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} predicate = retries.if_exception_type( @@ -144,8 +151,9 @@ async def _mutate_rows_retryable_attempt( mutation_dict: dict[int, "RowMutationEntry"], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], - on_terminal_state: Callable[[int, "RowMutationEntry", Exception | None], None] - | None = None, + on_terminal_state: Callable[ + [int, "RowMutationEntry", Exception | None], Awaitable[None] + ], ): """ Helper function for managing a single mutate_rows attempt. @@ -199,7 +207,7 @@ async def _mutate_rows_retryable_attempt( if result.status.code == 0: # mutation succeeded error_dict[idx] = [] - on_terminal_state(idx, entry, None) + await on_terminal_state(idx, entry, None) else: # mutation failed exc = core_exceptions.from_grpc_status( @@ -211,7 +219,7 @@ async def _mutate_rows_retryable_attempt( # if mutation is non-idempotent or the error is not retryable, # mark the mutation as terminal if not predicate(exc) or not entry.is_idempotent(): - on_terminal_state(idx, entry, exc) + await on_terminal_state(idx, entry, exc) # check if attempt succeeded, or needs to be retried if mutation_dict: # unfinished work; raise exception to trigger retry diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index bfb1acf12..6ebd17cfe 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -723,7 +723,9 @@ async def bulk_mutate_rows( if on_success is not None: # convert on_terminal_state callback to callback for successful results only # failed results will be rasied as exceptions - def callback(idx:int, entry: RowMutationEntry, exc: Exception | None): + async def callback( + idx: int, entry: RowMutationEntry, exc: Exception | None + ): if exc is None and on_success is not None: on_success(idx, entry) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index cebc5095e..6e7adc610 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -87,6 +87,7 @@ def test_attempt_timeout_frozen_time(self, request_t, operation_t, expected): def test_attempt_timeout_w_sleeps(self): """use real sleep values to make sure it matches expectations""" from time import sleep + operation_timeout = 1 generator = _helpers._attempt_timeout_generator(None, operation_timeout) expected_value = operation_timeout @@ -97,6 +98,7 @@ def test_attempt_timeout_w_sleeps(self): sleep(sleep_time) expected_value -= sleep_time + class TestConvertRetryDeadline: """ Test _convert_retry_deadline wrapper From 9e5b80a76543af73cd6d125293e5940cba201fa7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 26 May 2023 18:37:22 -0700 Subject: [PATCH 098/213] fixed style issues --- google/cloud/bigtable/_mutate_rows.py | 29 +++++++++++++++------------ google/cloud/bigtable/client.py | 23 +++++++++++++++++---- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 641662f3e..09381c01b 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Iterator, Callable, Any, Awaitable, TYPE_CHECKING +from typing import Iterator, Callable, Any, Coroutine, TYPE_CHECKING from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -27,6 +27,7 @@ from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) + from google.cloud.bigtable.client import Table from google.cloud.bigtable.mutations import RowMutationEntry @@ -45,7 +46,7 @@ async def _mutate_rows_operation( operation_timeout: float, per_request_timeout: float | None, on_terminal_state: Callable[ - [int, "RowMutationEntry", Exception | None], Awaitable[None] + [int, "RowMutationEntry", Exception | None], Coroutine[None, None, None] ] | None = None, ): @@ -62,7 +63,7 @@ async def _mutate_rows_operation( - on_terminal_state: If given, this function will be called as soon as a mutation entry reaches a terminal state (success or failure). """ - mutations_dict: dict[int, RowMutationEntry | None] = { + mutations_dict: dict[int, RowMutationEntry] = { idx: mut for idx, mut in enumerate(mutation_entries) } @@ -137,7 +138,7 @@ def on_error_fn(exc): ) # call on_terminal_state for each unreported failed mutation if on_terminal_state and idx in mutations_dict: - on_terminal_state(entry, cause_exc) + await on_terminal_state(idx, entry, cause_exc) if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -152,7 +153,8 @@ async def _mutate_rows_retryable_attempt( error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], on_terminal_state: Callable[ - [int, "RowMutationEntry", Exception | None], Awaitable[None] + [int, "RowMutationEntry", Exception | None], + Coroutine[None, None, None], ], ): """ @@ -183,23 +185,24 @@ async def _mutate_rows_retryable_attempt( # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} request_entries: list[dict[str, Any]] = [] - for request_idx, global_idx, entry in enumerate(mutation_dict.items()): - if entry is not None: - index_map[request_idx] = global_idx - request_entries.append(entry._to_dict()) + for request_idx, (global_idx, m) in enumerate(mutation_dict.items()): + index_map[request_idx] = global_idx + request_entries.append(m._to_dict()) # make gapic request metadata = _make_metadata(table.table_name, table.app_profile_id) async for result_list in await gapic_client.mutate_rows( - table_name=table.table_name, - app_profile_id=table.app_profile_id, - entries=request_entries, + request={ + "table_name": table.table_name, + "app_profile_id": table.app_profile_id, + "entries": request_entries, + }, timeout=next(timeout_generator), metadata=metadata, ): for result in result_list.entries: # convert sub-request index to global index idx = index_map[result.index] - entry = mutation_dict.get(idx, None) + entry = mutation_dict[idx] if idx in mutation_dict else None exc = None if entry is None: # this entry has already reached a terminal state diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 6ebd17cfe..142e314f4 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -15,7 +15,16 @@ from __future__ import annotations -from typing import cast, Any, Optional, Set, Callable, TYPE_CHECKING +from typing import ( + cast, + Any, + Optional, + Set, + Callable, + Awaitable, + Coroutine, + TYPE_CHECKING, +) import asyncio import grpc @@ -23,6 +32,7 @@ import warnings import sys import random +from inspect import iscoroutine from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -676,7 +686,8 @@ async def bulk_mutate_rows( *, operation_timeout: float | None = 60, per_request_timeout: float | None = None, - on_success: Callable[[int, RowMutationEntry], None] | None = None, + on_success: Callable[[int, RowMutationEntry], None | Awaitable[None]] + | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -719,7 +730,9 @@ async def bulk_mutate_rows( if per_request_timeout is not None and per_request_timeout > operation_timeout: raise ValueError("per_request_timeout must be less than operation_timeout") - callback: Callable[[RowMutationEntry, Exception | None], None] | None = None + callback: Callable[ + [int, RowMutationEntry, Exception | None], Coroutine[None, None, None] + ] | None = None if on_success is not None: # convert on_terminal_state callback to callback for successful results only # failed results will be rasied as exceptions @@ -727,7 +740,9 @@ async def callback( idx: int, entry: RowMutationEntry, exc: Exception | None ): if exc is None and on_success is not None: - on_success(idx, entry) + output = on_success(idx, entry) + if iscoroutine(output): + await output await _mutate_rows_operation( self.client._gapic_client, From f7539f651b3060e41a01ae9b94b6d0f513ff5df3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 27 May 2023 12:59:47 -0700 Subject: [PATCH 099/213] moved callback rewriting into retryable attempt --- google/cloud/bigtable/_mutate_rows.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 09381c01b..6a6349408 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -67,11 +67,6 @@ async def _mutate_rows_operation( idx: mut for idx, mut in enumerate(mutation_entries) } - async def updated_callback(idx, entry, exc): - mutations_dict[idx] = entry - if on_terminal_state is not None: - await on_terminal_state(idx, entry, exc) - error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} predicate = retries.if_exception_type( @@ -116,7 +111,7 @@ def on_error_fn(exc): mutations_dict, error_dict, predicate, - updated_callback, + on_terminal_state, ) except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations @@ -155,7 +150,8 @@ async def _mutate_rows_retryable_attempt( on_terminal_state: Callable[ [int, "RowMutationEntry", Exception | None], Coroutine[None, None, None], - ], + ] + | None = None, ): """ Helper function for managing a single mutate_rows attempt. @@ -182,6 +178,15 @@ async def _mutate_rows_retryable_attempt( - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - GoogleAPICallError: if the server returns an error on the grpc call """ + # update on_terminal_state to remove completed mutations from mutation_dict + input_callback = on_terminal_state + + async def on_terminal_patched(idx, entry, exc): + mutation_dict.pop(idx) + if input_callback is not None: + await input_callback(idx, entry, exc) + + on_terminal_state = on_terminal_patched # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} request_entries: list[dict[str, Any]] = [] From e77a4faf586bc9cbda2709b685296bd7e83ce476 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 27 May 2023 13:00:01 -0700 Subject: [PATCH 100/213] fixed tests --- tests/unit/test__mutate_rows.py | 69 ++++++++++++++++++++------------- tests/unit/test_client.py | 10 +++-- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 873931389..4b8a995ff 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -28,7 +28,8 @@ class Test_MutateRowsRetryableAttempt: async def _mock_stream(self, mutation_dict, error_dict): - for idx, entry in mutation_dict.items(): + items = list(mutation_dict.items()) + for idx, entry in items: code = error_dict.get(idx, 0) yield MutateRowsResponse( entries=[ @@ -58,24 +59,26 @@ async def test_single_entry_success(self): mutations = {0: mutation} client = self._make_mock_client(mutations) errors = {0: []} - expected_request = {"test": "data"} + expected_table = mock.Mock() expected_timeout = 9 mock_timeout_gen = itertools.repeat(expected_timeout) await _mutate_rows_retryable_attempt( client, - expected_request, + expected_table, mock_timeout_gen, mutations, errors, lambda x: False, ) - assert mutations[0] is None + assert len(mutations) == 0 assert errors[0] == [] assert client.mutate_rows.call_count == 1 - args, kwargs = client.mutate_rows.call_args + _, kwargs = client.mutate_rows.call_args assert kwargs["timeout"] == expected_timeout - assert args[0]["test"] == "data" - assert args[0]["entries"] == [mutation._to_dict()] + request = kwargs["request"] + assert request["table_name"] == expected_table.table_name + assert request["app_profile_id"] == expected_table.app_profile_id + assert request["entries"] == [mutation._to_dict()] @pytest.mark.asyncio async def test_empty_request(self): @@ -83,8 +86,9 @@ async def test_empty_request(self): from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt client = self._make_mock_client({}) + expected_table = mock.Mock() await _mutate_rows_retryable_attempt( - client, {}, iter([0]), {}, {}, lambda x: False + client, expected_table, iter([0]), {}, {}, lambda x: False ) assert client.mutate_rows.call_count == 1 @@ -103,18 +107,18 @@ async def test_partial_success_retryable(self): errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) # raise retryable error 3 times, then raise non-retryable error - expected_request = {} + expected_table = mock.Mock() expected_timeout = 9 with pytest.raises(_MutateRowsIncomplete): await _mutate_rows_retryable_attempt( client, - expected_request, + expected_table, iter([expected_timeout]), mutations, errors, lambda x: True, ) - assert mutations == {0: None, 1: failure_mutation, 2: None} + assert mutations == {1: failure_mutation} assert errors[0] == [] assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 @@ -131,17 +135,17 @@ async def test_partial_success_non_retryable(self): mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) - expected_request = {} expected_timeout = 9 + expected_table = mock.Mock() await _mutate_rows_retryable_attempt( client, - expected_request, + expected_table, iter([expected_timeout]), mutations, errors, lambda x: False, ) - assert mutations == {0: None, 1: None, 2: None} + assert len(mutations) == 0 assert errors[0] == [] assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 @@ -158,13 +162,14 @@ async def test_on_terminal_state_no_retries(self): success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - callback = mock.Mock() + callback = AsyncMock() errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) + expected_table = mock.Mock() # raise retryable error 3 times, then raise non-retryable error await _mutate_rows_retryable_attempt( client, - {}, + expected_table, iter([9]), mutations, errors, @@ -172,13 +177,17 @@ async def test_on_terminal_state_no_retries(self): callback, ) assert callback.call_count == 3 + assert callback.await_count == 3 call_args = callback.call_args_list - assert call_args[0][0][0] == success_mutation - assert call_args[0][0][1] is None - assert call_args[1][0][0] == failure_mutation - assert call_args[1][0][1].grpc_status_code == 300 - assert call_args[2][0][0] == success_mutation_2 - assert call_args[2][0][1] is None + assert call_args[0][0][0] == 0 # index + assert call_args[0][0][1] == success_mutation + assert call_args[0][0][2] is None + assert call_args[1][0][0] == 1 # index + assert call_args[1][0][1] == failure_mutation + assert call_args[1][0][2].grpc_status_code == 300 + assert call_args[2][0][0] == 2 # index + assert call_args[2][0][1] == success_mutation_2 + assert call_args[2][0][2] is None @pytest.mark.asyncio async def test_on_terminal_state_with_retries(self): @@ -194,14 +203,15 @@ async def test_on_terminal_state_with_retries(self): success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - callback = mock.Mock() + callback = AsyncMock() errors = {0: [], 1: [], 2: []} client = self._make_mock_client(mutations, error_dict={1: 300}) + expected_table = mock.Mock() # raise retryable error 3 times, then raise non-retryable error with pytest.raises(_MutateRowsIncomplete): await _mutate_rows_retryable_attempt( client, - {}, + expected_table, iter([9]), mutations, errors, @@ -209,8 +219,11 @@ async def test_on_terminal_state_with_retries(self): callback, ) assert callback.call_count == 2 + assert callback.await_count == 2 call_args = callback.call_args_list - assert call_args[0][0][0] == success_mutation - assert call_args[0][0][1] is None - assert call_args[1][0][0] == success_mutation_2 - assert call_args[1][0][1] is None + assert call_args[0][0][0] == 0 # index + assert call_args[0][0][1] == success_mutation + assert call_args[0][0][2] is None + assert call_args[1][0][0] == 2 # index + assert call_args[1][0][1] == success_mutation_2 + assert call_args[1][0][2] is None diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 33e8f356d..39a0d2939 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1554,7 +1554,7 @@ async def test_bulk_mutate_rows(self, mutation_arg): per_request_timeout=expected_per_request_timeout, ) assert mock_gapic.call_count == 1 - request = mock_gapic.call_args[0][0] + request = mock_gapic.call_args[1]["request"] assert ( request["table_name"] == "projects/project/instances/instance/tables/table" @@ -1579,7 +1579,7 @@ async def test_bulk_mutate_rows_multiple_entries(self): [entry_1, entry_2], ) assert mock_gapic.call_count == 1 - request = mock_gapic.call_args[0][0] + request = mock_gapic.call_args[1]["request"] assert ( request["table_name"] == "projects/project/instances/instance/tables/table" @@ -1883,8 +1883,10 @@ async def test_bulk_mutate_rows_on_success(self): entries, operation_timeout=1000, on_success=callback ) assert callback.call_count == 2 - assert callback.call_args_list[0][0][0] == entries[0] - assert callback.call_args_list[1][0][0] == entries[2] + assert callback.call_args_list[0][0][0] == 0 # index + assert callback.call_args_list[0][0][1] == entries[0] + assert callback.call_args_list[1][0][0] == 2 # index + assert callback.call_args_list[1][0][1] == entries[2] @pytest.mark.parametrize("include_app_profile", [True, False]) @pytest.mark.asyncio From 4e19ed07f4f64ba3570fd5b11eb93d38575f5266 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 08:45:15 -0700 Subject: [PATCH 101/213] pop successful mutations from error dict --- google/cloud/bigtable/_mutate_rows.py | 18 ++++++++---------- tests/unit/test__mutate_rows.py | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 6a6349408..823a9d237 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -114,10 +114,9 @@ def on_error_fn(exc): on_terminal_state, ) except Exception as exc: - # exceptions raised by retryable are added to the list of exceptions for all unprocessed mutations - for idx in error_dict.keys(): - if idx in mutations_dict: - error_dict[idx].append(exc) + # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations + for error_list in error_dict.values(): + error_list.append(exc) finally: # raise exception detailing incomplete mutations all_errors = [] @@ -132,7 +131,7 @@ def on_error_fn(exc): bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) ) # call on_terminal_state for each unreported failed mutation - if on_terminal_state and idx in mutations_dict: + if on_terminal_state: await on_terminal_state(idx, entry, cause_exc) if all_errors: raise bt_exceptions.MutationsExceptionGroup( @@ -166,11 +165,10 @@ async def _mutate_rows_retryable_attempt( - gapic_client: the client to use for the mutate_rows call - request: the request to send to the server, populated with table name and app profile id - per_request_timeout: the timeout to use for each mutate_rows attempt - - mutation_dict: a dictionary tracking which entries are outstanding - (stored as RowMutationEntry), and which have reached a terminal state (stored as None). - At the start of the request, all entries are outstanding. + - mutation_dict: a dictionary tracking unfinalized mutations. At the start of the request, + all entries are outstanding. As mutations are finalized, they are removed from the dict. - error_dict: a dictionary tracking errors associated with each entry index. - Each retry will append a new error. Successful mutations will clear the error list. + Each retry will append a new error. Successful mutations will remove their index from the dict. - predicate: a function that takes an exception and returns True if the exception is retryable. - on_terminal_state: If given, this function will be called as soon as a mutation entry reaches a terminal state (success or failure). @@ -214,7 +212,7 @@ async def on_terminal_patched(idx, entry, exc): continue if result.status.code == 0: # mutation succeeded - error_dict[idx] = [] + error_dict.pop(idx) await on_terminal_state(idx, entry, None) else: # mutation failed diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 4b8a995ff..0156aeca5 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -71,7 +71,7 @@ async def test_single_entry_success(self): lambda x: False, ) assert len(mutations) == 0 - assert errors[0] == [] + assert 0 not in errors assert client.mutate_rows.call_count == 1 _, kwargs = client.mutate_rows.call_args assert kwargs["timeout"] == expected_timeout @@ -119,10 +119,10 @@ async def test_partial_success_retryable(self): lambda x: True, ) assert mutations == {1: failure_mutation} - assert errors[0] == [] + assert 0 not in errors assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 - assert errors[2] == [] + assert 2 not in errors @pytest.mark.asyncio async def test_partial_success_non_retryable(self): @@ -146,10 +146,10 @@ async def test_partial_success_non_retryable(self): lambda x: False, ) assert len(mutations) == 0 - assert errors[0] == [] + assert 0 not in errors assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 - assert errors[2] == [] + assert 2 not in errors @pytest.mark.asyncio async def test_on_terminal_state_no_retries(self): From 920e4b7849b0fe93c223c7a555eecac22ad1c89e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 08:49:52 -0700 Subject: [PATCH 102/213] removed unneeded check --- google/cloud/bigtable/_mutate_rows.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 823a9d237..b3b4933bc 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -79,9 +79,8 @@ def on_error_fn(exc): if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): # add this exception to list for each active mutation for idx in error_dict.keys(): - if idx in mutations_dict: - error_dict[idx].append(exc) - # remove non-idempotent mutations from mutations_dict, so they are not retriedi + error_dict[idx].append(exc) + # remove non-idempotent mutations from mutations_dict, so they are not retried for idx, mut in list(mutations_dict.items()): if not mut.is_idempotent(): mutations_dict.pop(idx) From 725f5ff82b92e58f8d38db3e8bbc7d52958fa954 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 09:19:14 -0700 Subject: [PATCH 103/213] refactoring --- google/cloud/bigtable/_mutate_rows.py | 50 +++++++++++++++------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index b3b4933bc..b6e07b28c 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -120,18 +120,19 @@ def on_error_fn(exc): # raise exception detailing incomplete mutations all_errors = [] for idx, exc_list in error_dict.items(): - if exc_list: - if len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) - entry = mutation_entries[idx] - all_errors.append( - bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) - ) - # call on_terminal_state for each unreported failed mutation - if on_terminal_state: - await on_terminal_state(idx, entry, cause_exc) + if len(exc_list) == 0: + continue + elif len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + entry = mutation_entries[idx] + all_errors.append( + bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) + ) + # call on_terminal_state for each unreported failed mutation + if on_terminal_state: + await on_terminal_state(idx, entry, cause_exc) if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -142,7 +143,7 @@ async def _mutate_rows_retryable_attempt( gapic_client: "BigtableAsyncClient", table: "Table", timeout_generator: Iterator[float], - mutation_dict: dict[int, "RowMutationEntry"], + active_dict: dict[int, "RowMutationEntry"], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], on_terminal_state: Callable[ @@ -164,7 +165,7 @@ async def _mutate_rows_retryable_attempt( - gapic_client: the client to use for the mutate_rows call - request: the request to send to the server, populated with table name and app profile id - per_request_timeout: the timeout to use for each mutate_rows attempt - - mutation_dict: a dictionary tracking unfinalized mutations. At the start of the request, + - active_dict: a dictionary tracking unfinalized mutations. At the start of the request, all entries are outstanding. As mutations are finalized, they are removed from the dict. - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will remove their index from the dict. @@ -175,11 +176,14 @@ async def _mutate_rows_retryable_attempt( - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - GoogleAPICallError: if the server returns an error on the grpc call """ - # update on_terminal_state to remove completed mutations from mutation_dict + # update on_terminal_state to remove finalized mutations from active_dict, + # and successful mutations from error_dict input_callback = on_terminal_state async def on_terminal_patched(idx, entry, exc): - mutation_dict.pop(idx) + active_dict.pop(idx) + if exc is None: + error_dict.pop(idx) if input_callback is not None: await input_callback(idx, entry, exc) @@ -187,7 +191,7 @@ async def on_terminal_patched(idx, entry, exc): # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} request_entries: list[dict[str, Any]] = [] - for request_idx, (global_idx, m) in enumerate(mutation_dict.items()): + for request_idx, (global_idx, m) in enumerate(active_dict.items()): index_map[request_idx] = global_idx request_entries.append(m._to_dict()) # make gapic request @@ -204,14 +208,14 @@ async def on_terminal_patched(idx, entry, exc): for result in result_list.entries: # convert sub-request index to global index idx = index_map[result.index] - entry = mutation_dict[idx] if idx in mutation_dict else None + if idx not in active_dict: + raise core_exceptions.ClientError( + f"Received result for already finalized mutation at index {idx}" + ) + entry = active_dict[idx] exc = None - if entry is None: - # this entry has already reached a terminal state - continue if result.status.code == 0: # mutation succeeded - error_dict.pop(idx) await on_terminal_state(idx, entry, None) else: # mutation failed @@ -226,6 +230,6 @@ async def on_terminal_patched(idx, entry, exc): if not predicate(exc) or not entry.is_idempotent(): await on_terminal_state(idx, entry, exc) # check if attempt succeeded, or needs to be retried - if mutation_dict: + if active_dict: # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete() From 1054bc473f6e6eb630bf679a2ba15564b184b443 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 09:35:52 -0700 Subject: [PATCH 104/213] pass list of exceptions in callback --- google/cloud/bigtable/_mutate_rows.py | 16 +++++++++------- google/cloud/bigtable/client.py | 11 ++++++----- tests/unit/test__mutate_rows.py | 4 +++- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index b6e07b28c..ab452210c 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -46,7 +46,7 @@ async def _mutate_rows_operation( operation_timeout: float, per_request_timeout: float | None, on_terminal_state: Callable[ - [int, "RowMutationEntry", Exception | None], Coroutine[None, None, None] + [int, "RowMutationEntry", list[Exception] | None], Coroutine[None, None, None] ] | None = None, ): @@ -132,7 +132,7 @@ def on_error_fn(exc): ) # call on_terminal_state for each unreported failed mutation if on_terminal_state: - await on_terminal_state(idx, entry, cause_exc) + await on_terminal_state(idx, entry, exc_list) if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -147,7 +147,7 @@ async def _mutate_rows_retryable_attempt( error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], on_terminal_state: Callable[ - [int, "RowMutationEntry", Exception | None], + [int, "RowMutationEntry", list[Exception] | None], Coroutine[None, None, None], ] | None = None, @@ -180,12 +180,14 @@ async def _mutate_rows_retryable_attempt( # and successful mutations from error_dict input_callback = on_terminal_state - async def on_terminal_patched(idx, entry, exc): + async def on_terminal_patched( + idx: int, entry: "RowMutationEntry", exc_list: list[Exception] | None + ): active_dict.pop(idx) - if exc is None: + if exc_list is None: error_dict.pop(idx) if input_callback is not None: - await input_callback(idx, entry, exc) + await input_callback(idx, entry, exc_list) on_terminal_state = on_terminal_patched # keep map between sub-request indices and global mutation_dict indices @@ -228,7 +230,7 @@ async def on_terminal_patched(idx, entry, exc): # if mutation is non-idempotent or the error is not retryable, # mark the mutation as terminal if not predicate(exc) or not entry.is_idempotent(): - await on_terminal_state(idx, entry, exc) + await on_terminal_state(idx, entry, error_dict[idx]) # check if attempt succeeded, or needs to be retried if active_dict: # unfinished work; raise exception to trigger retry diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 142e314f4..14e2d6459 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -21,7 +21,6 @@ Optional, Set, Callable, - Awaitable, Coroutine, TYPE_CHECKING, ) @@ -686,7 +685,9 @@ async def bulk_mutate_rows( *, operation_timeout: float | None = 60, per_request_timeout: float | None = None, - on_success: Callable[[int, RowMutationEntry], None | Awaitable[None]] + on_success: Callable[ + [int, RowMutationEntry], None | Coroutine[None, None, None] + ] | None = None, ): """ @@ -731,15 +732,15 @@ async def bulk_mutate_rows( raise ValueError("per_request_timeout must be less than operation_timeout") callback: Callable[ - [int, RowMutationEntry, Exception | None], Coroutine[None, None, None] + [int, RowMutationEntry, list[Exception] | None], Coroutine[None, None, None] ] | None = None if on_success is not None: # convert on_terminal_state callback to callback for successful results only # failed results will be rasied as exceptions async def callback( - idx: int, entry: RowMutationEntry, exc: Exception | None + idx: int, entry: RowMutationEntry, excs: list[Exception] | None ): - if exc is None and on_success is not None: + if excs is None and on_success is not None: output = on_success(idx, entry) if iscoroutine(output): await output diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 0156aeca5..b0be26eb4 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -184,7 +184,9 @@ async def test_on_terminal_state_no_retries(self): assert call_args[0][0][2] is None assert call_args[1][0][0] == 1 # index assert call_args[1][0][1] == failure_mutation - assert call_args[1][0][2].grpc_status_code == 300 + error_list = call_args[1][0][2] + assert len(error_list) == 1 + assert error_list[0].grpc_status_code == 300 assert call_args[2][0][0] == 2 # index assert call_args[2][0][1] == success_mutation_2 assert call_args[2][0][2] is None From f39a891ba8af23b70d9300c53a862b6b473b6bce Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 30 May 2023 09:36:04 -0700 Subject: [PATCH 105/213] raise error in unexpected state --- google/cloud/bigtable/_mutate_rows.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index ab452210c..60d1ba4ee 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -121,7 +121,9 @@ def on_error_fn(exc): all_errors = [] for idx, exc_list in error_dict.items(): if len(exc_list) == 0: - continue + raise core_exceptions.ClientError( + f"Mutation {idx} failed with no associated errors" + ) elif len(exc_list) == 1: cause_exc = exc_list[0] else: From 8038662d84a2fc22e6261f7555507b7d29e99cb7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 12:09:28 -0700 Subject: [PATCH 106/213] added timeout to user facing flush --- google/cloud/bigtable/mutations_batcher.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 01a29745f..826614357 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -230,7 +230,8 @@ def append(self, mutations: RowMutationEntry): ): self._schedule_flush() - async def flush(self, *, raise_exceptions=True): + # TODO: add tests for timeout + async def flush(self, *, raise_exceptions=True, timeout=30): """ Flush all staged mutations to the server @@ -240,9 +241,13 @@ async def flush(self, *, raise_exceptions=True): or when the batcher is closed. Raises: - MutationsExceptionGroup if raise_exceptions is True and any mutations fail + - asyncio.TimeoutError if timeout is reached """ # add recent staged mutations to flush task, and wait for flush to complete - await self._schedule_flush() + flush_task = self._schedule_flush() + # wait timeout seconds for flush to complete + # if timeout is exceeded, flush task will still be running in the background + await asyncio.wait_for(asyncio.shield(flush_task), timeout=timeout) # raise any unreported exceptions from this or previous flushes if raise_exceptions: self._raise_exceptions() From c2ec26dd2614c59c59cf0fa84fcb64aaf32d6ce0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 12:10:27 -0700 Subject: [PATCH 107/213] added comments --- google/cloud/bigtable/mutations_batcher.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 826614357..5d77539a6 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -239,6 +239,8 @@ async def flush(self, *, raise_exceptions=True, timeout=30): - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. If False, exceptions will be stored in self.exceptions and raised on a future flush or when the batcher is closed. + - timeout: maximum time to wait for flush to complete. If exceeded, flush will + continue in the background and exceptions will be raised on a future flush Raises: - MutationsExceptionGroup if raise_exceptions is True and any mutations fail - asyncio.TimeoutError if timeout is reached From 1d9713524233de0e7223b91775361cbb0441478a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 13:07:48 -0700 Subject: [PATCH 108/213] removed callback --- google/cloud/bigtable/_mutate_rows.py | 39 ++----------- google/cloud/bigtable/client.py | 16 ------ tests/unit/test__mutate_rows.py | 79 --------------------------- tests/unit/test_client.py | 44 --------------- 4 files changed, 5 insertions(+), 173 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 60d1ba4ee..78c748d57 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Iterator, Callable, Any, Coroutine, TYPE_CHECKING +from typing import Iterator, Callable, Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -45,10 +45,6 @@ async def _mutate_rows_operation( mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, - on_terminal_state: Callable[ - [int, "RowMutationEntry", list[Exception] | None], Coroutine[None, None, None] - ] - | None = None, ): """ Helper function for managing a single mutate_rows operation, end-to-end. @@ -60,8 +56,6 @@ async def _mutate_rows_operation( - operation_timeout: the timeout to use for the entire operation, in seconds. - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. - - on_terminal_state: If given, this function will be called as soon as a mutation entry - reaches a terminal state (success or failure). """ mutations_dict: dict[int, RowMutationEntry] = { idx: mut for idx, mut in enumerate(mutation_entries) @@ -110,7 +104,6 @@ def on_error_fn(exc): mutations_dict, error_dict, predicate, - on_terminal_state, ) except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations @@ -132,9 +125,6 @@ def on_error_fn(exc): all_errors.append( bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) ) - # call on_terminal_state for each unreported failed mutation - if on_terminal_state: - await on_terminal_state(idx, entry, exc_list) if all_errors: raise bt_exceptions.MutationsExceptionGroup( all_errors, len(mutation_entries) @@ -148,11 +138,6 @@ async def _mutate_rows_retryable_attempt( active_dict: dict[int, "RowMutationEntry"], error_dict: dict[int, list[Exception]], predicate: Callable[[Exception], bool], - on_terminal_state: Callable[ - [int, "RowMutationEntry", list[Exception] | None], - Coroutine[None, None, None], - ] - | None = None, ): """ Helper function for managing a single mutate_rows attempt. @@ -172,26 +157,11 @@ async def _mutate_rows_retryable_attempt( - error_dict: a dictionary tracking errors associated with each entry index. Each retry will append a new error. Successful mutations will remove their index from the dict. - predicate: a function that takes an exception and returns True if the exception is retryable. - - on_terminal_state: If given, this function will be called as soon as a mutation entry - reaches a terminal state (success or failure). Raises: - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - GoogleAPICallError: if the server returns an error on the grpc call """ - # update on_terminal_state to remove finalized mutations from active_dict, - # and successful mutations from error_dict - input_callback = on_terminal_state - async def on_terminal_patched( - idx: int, entry: "RowMutationEntry", exc_list: list[Exception] | None - ): - active_dict.pop(idx) - if exc_list is None: - error_dict.pop(idx) - if input_callback is not None: - await input_callback(idx, entry, exc_list) - - on_terminal_state = on_terminal_patched # keep map between sub-request indices and global mutation_dict indices index_map: dict[int, int] = {} request_entries: list[dict[str, Any]] = [] @@ -220,7 +190,8 @@ async def on_terminal_patched( exc = None if result.status.code == 0: # mutation succeeded - await on_terminal_state(idx, entry, None) + active_dict.pop(idx) + error_dict.pop(idx) else: # mutation failed exc = core_exceptions.from_grpc_status( @@ -230,9 +201,9 @@ async def on_terminal_patched( ) error_dict[idx].append(exc) # if mutation is non-idempotent or the error is not retryable, - # mark the mutation as terminal + # remove mutation from active_dict so it is not retried if not predicate(exc) or not entry.is_idempotent(): - await on_terminal_state(idx, entry, error_dict[idx]) + active_dict.pop(idx) # check if attempt succeeded, or needs to be retried if active_dict: # unfinished work; raise exception to trigger retry diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 14e2d6459..7e965fde3 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -31,7 +31,6 @@ import warnings import sys import random -from inspect import iscoroutine from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient @@ -731,27 +730,12 @@ async def bulk_mutate_rows( if per_request_timeout is not None and per_request_timeout > operation_timeout: raise ValueError("per_request_timeout must be less than operation_timeout") - callback: Callable[ - [int, RowMutationEntry, list[Exception] | None], Coroutine[None, None, None] - ] | None = None - if on_success is not None: - # convert on_terminal_state callback to callback for successful results only - # failed results will be rasied as exceptions - async def callback( - idx: int, entry: RowMutationEntry, excs: list[Exception] | None - ): - if excs is None and on_success is not None: - output = on_success(idx, entry) - if iscoroutine(output): - await output - await _mutate_rows_operation( self.client._gapic_client, self, mutation_entries, operation_timeout, per_request_timeout, - on_terminal_state=callback, ) async def check_and_mutate_row( diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index b0be26eb4..81b8697bc 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -150,82 +150,3 @@ async def test_partial_success_non_retryable(self): assert len(errors[1]) == 1 assert errors[1][0].grpc_status_code == 300 assert 2 not in errors - - @pytest.mark.asyncio - async def test_on_terminal_state_no_retries(self): - """ - Should call on_terminal_state for each successful or non-retryable mutation - """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt - - success_mutation = mock.Mock() - success_mutation_2 = mock.Mock() - failure_mutation = mock.Mock() - mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - callback = AsyncMock() - errors = {0: [], 1: [], 2: []} - client = self._make_mock_client(mutations, error_dict={1: 300}) - expected_table = mock.Mock() - # raise retryable error 3 times, then raise non-retryable error - await _mutate_rows_retryable_attempt( - client, - expected_table, - iter([9]), - mutations, - errors, - lambda x: False, - callback, - ) - assert callback.call_count == 3 - assert callback.await_count == 3 - call_args = callback.call_args_list - assert call_args[0][0][0] == 0 # index - assert call_args[0][0][1] == success_mutation - assert call_args[0][0][2] is None - assert call_args[1][0][0] == 1 # index - assert call_args[1][0][1] == failure_mutation - error_list = call_args[1][0][2] - assert len(error_list) == 1 - assert error_list[0].grpc_status_code == 300 - assert call_args[2][0][0] == 2 # index - assert call_args[2][0][1] == success_mutation_2 - assert call_args[2][0][2] is None - - @pytest.mark.asyncio - async def test_on_terminal_state_with_retries(self): - """ - Should not call on_terminal_state for retryable mutations - """ - from google.cloud.bigtable._mutate_rows import ( - _mutate_rows_retryable_attempt, - _MutateRowsIncomplete, - ) - - success_mutation = mock.Mock() - success_mutation_2 = mock.Mock() - failure_mutation = mock.Mock() - mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - callback = AsyncMock() - errors = {0: [], 1: [], 2: []} - client = self._make_mock_client(mutations, error_dict={1: 300}) - expected_table = mock.Mock() - # raise retryable error 3 times, then raise non-retryable error - with pytest.raises(_MutateRowsIncomplete): - await _mutate_rows_retryable_attempt( - client, - expected_table, - iter([9]), - mutations, - errors, - lambda x: True, - callback, - ) - assert callback.call_count == 2 - assert callback.await_count == 2 - call_args = callback.call_args_list - assert call_args[0][0][0] == 0 # index - assert call_args[0][0][1] == success_mutation - assert call_args[0][0][2] is None - assert call_args[1][0][0] == 2 # index - assert call_args[1][0][1] == success_mutation_2 - assert call_args[1][0][2] is None diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 39a0d2939..3e273f3af 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1844,50 +1844,6 @@ async def test_bulk_mutate_error_index(self): assert isinstance(cause.exceptions[1], DeadlineExceeded) assert isinstance(cause.exceptions[2], FailedPrecondition) - @pytest.mark.asyncio - async def test_bulk_mutate_rows_on_success(self): - """ - on_success should be called for each successful mutation - """ - from google.api_core.exceptions import ( - Aborted, - FailedPrecondition, - ) - from google.cloud.bigtable.exceptions import ( - MutationsExceptionGroup, - ) - - callback = mock.Mock() - async with self._make_client(project="project") as client: - async with client.get_table("instance", "table") as table: - with mock.patch.object( - client._gapic_client, "mutate_rows" - ) as mock_gapic: - # fail with retryable errors, then a non-retryable one - mock_gapic.side_effect = [ - self._mock_response([None, Aborted("mock"), None]), - self._mock_response([FailedPrecondition("final")]), - ] - with pytest.raises(MutationsExceptionGroup): - mutation = mutations.SetCell( - "family", b"qualifier", b"value", timestamp_micros=123 - ) - entries = [ - mutations.RowMutationEntry( - (f"row_key_{i}").encode(), [mutation] - ) - for i in range(3) - ] - assert mutation.is_idempotent() is True - await table.bulk_mutate_rows( - entries, operation_timeout=1000, on_success=callback - ) - assert callback.call_count == 2 - assert callback.call_args_list[0][0][0] == 0 # index - assert callback.call_args_list[0][0][1] == entries[0] - assert callback.call_args_list[1][0][0] == 2 # index - assert callback.call_args_list[1][0][1] == entries[2] - @pytest.mark.parametrize("include_app_profile", [True, False]) @pytest.mark.asyncio async def test_bulk_mutate_row_metadata(self, include_app_profile): From 88e2bf5a9d077d727089e65faffa4e4748e9c1b4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 14:50:29 -0700 Subject: [PATCH 109/213] refactoring mutation attempt into class --- google/cloud/bigtable/_mutate_rows.py | 186 +++++++++++--------------- 1 file changed, 81 insertions(+), 105 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 78c748d57..593bc4fa0 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Iterator, Callable, Any, TYPE_CHECKING +from typing import Iterator, Callable, TYPE_CHECKING from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -57,11 +57,6 @@ async def _mutate_rows_operation( - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. """ - mutations_dict: dict[int, RowMutationEntry] = { - idx: mut for idx, mut in enumerate(mutation_entries) - } - - error_dict: dict[int, list[Exception]] = {idx: [] for idx in mutations_dict.keys()} predicate = retries.if_exception_type( core_exceptions.DeadlineExceeded, @@ -69,19 +64,8 @@ async def _mutate_rows_operation( _MutateRowsIncomplete, ) - def on_error_fn(exc): - if predicate(exc) and not isinstance(exc, _MutateRowsIncomplete): - # add this exception to list for each active mutation - for idx in error_dict.keys(): - error_dict[idx].append(exc) - # remove non-idempotent mutations from mutations_dict, so they are not retried - for idx, mut in list(mutations_dict.items()): - if not mut.is_idempotent(): - mutations_dict.pop(idx) - retry = retries.AsyncRetry( predicate=predicate, - on_error=on_error_fn, timeout=operation_timeout, initial=0.01, multiplier=2, @@ -92,27 +76,27 @@ def on_error_fn(exc): per_request_timeout, operation_timeout ) # wrap attempt in retry logic - retry_wrapped = retry(_mutate_rows_retryable_attempt) + manager = _MutateRowsAttemptManager( + gapic_client, + table, + mutation_entries, + attempt_timeout_gen, + predicate, + ) + retry_wrapped = retry(manager.run) # convert RetryErrors from retry wrapper into DeadlineExceeded errors deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) try: # trigger mutate_rows - await deadline_wrapped( - gapic_client, - table, - attempt_timeout_gen, - mutations_dict, - error_dict, - predicate, - ) + await deadline_wrapped() except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations - for error_list in error_dict.values(): - error_list.append(exc) + for idx in manager.remaining_indices: + manager._append_error(idx, exc) finally: # raise exception detailing incomplete mutations all_errors = [] - for idx, exc_list in error_dict.items(): + for idx, exc_list in manager.errors.items(): if len(exc_list) == 0: raise core_exceptions.ClientError( f"Mutation {idx} failed with no associated errors" @@ -131,80 +115,72 @@ def on_error_fn(exc): ) -async def _mutate_rows_retryable_attempt( - gapic_client: "BigtableAsyncClient", - table: "Table", - timeout_generator: Iterator[float], - active_dict: dict[int, "RowMutationEntry"], - error_dict: dict[int, list[Exception]], - predicate: Callable[[Exception], bool], -): - """ - Helper function for managing a single mutate_rows attempt. - - If one or more retryable mutations remain incomplete at the end of the function, - _MutateRowsIncomplete will be raised to trigger a retry - - This function is intended to be wrapped in an api_core.retry.AsyncRetry object, which will handle - timeouts and retrying raised exceptions. - - Args: - - gapic_client: the client to use for the mutate_rows call - - request: the request to send to the server, populated with table name and app profile id - - per_request_timeout: the timeout to use for each mutate_rows attempt - - active_dict: a dictionary tracking unfinalized mutations. At the start of the request, - all entries are outstanding. As mutations are finalized, they are removed from the dict. - - error_dict: a dictionary tracking errors associated with each entry index. - Each retry will append a new error. Successful mutations will remove their index from the dict. - - predicate: a function that takes an exception and returns True if the exception is retryable. - Raises: - - _MutateRowsIncomplete: if one or more retryable mutations remain incomplete at the end of the function - - GoogleAPICallError: if the server returns an error on the grpc call - """ - - # keep map between sub-request indices and global mutation_dict indices - index_map: dict[int, int] = {} - request_entries: list[dict[str, Any]] = [] - for request_idx, (global_idx, m) in enumerate(active_dict.items()): - index_map[request_idx] = global_idx - request_entries.append(m._to_dict()) - # make gapic request - metadata = _make_metadata(table.table_name, table.app_profile_id) - async for result_list in await gapic_client.mutate_rows( - request={ - "table_name": table.table_name, - "app_profile_id": table.app_profile_id, - "entries": request_entries, - }, - timeout=next(timeout_generator), - metadata=metadata, +class _MutateRowsAttemptManager: + def __init__( + self, + gapic_client: "BigtableAsyncClient", + table: "Table", + mutations: list["RowMutationEntry"], + timeout_generator: Iterator[float], + is_retryable_predicate: Callable[[Exception], bool], ): - for result in result_list.entries: - # convert sub-request index to global index - idx = index_map[result.index] - if idx not in active_dict: - raise core_exceptions.ClientError( - f"Received result for already finalized mutation at index {idx}" - ) - entry = active_dict[idx] - exc = None - if result.status.code == 0: - # mutation succeeded - active_dict.pop(idx) - error_dict.pop(idx) - else: - # mutation failed - exc = core_exceptions.from_grpc_status( - result.status.code, - result.status.message, - details=result.status.details, - ) - error_dict[idx].append(exc) - # if mutation is non-idempotent or the error is not retryable, - # remove mutation from active_dict so it is not retried - if not predicate(exc) or not entry.is_idempotent(): - active_dict.pop(idx) - # check if attempt succeeded, or needs to be retried - if active_dict: - # unfinished work; raise exception to trigger retry - raise _MutateRowsIncomplete() + self.gapic_client = gapic_client + self.table = table + self.mutations = mutations + self.remaining_indices = list(range(len(mutations))) + self.timeout_generator = timeout_generator + self.is_retryable = is_retryable_predicate + self.errors : dict[int, list[Exception]] = {} + self.metadata = _make_metadata(self.table.table_name, self.table.app_profile_id) + + async def run(self): + request_entries = [ + self.mutations[idx]._to_dict() for idx in self.remaining_indices + ] + new_remaining_indices : list[int] = [] + # make gapic request + try: + result_generator = await self.gapic_client.mutate_rows( + request={ + "table_name": self.table.table_name, + "app_profile_id": self.table.app_profile_id, + "entries": request_entries, + }, + timeout=next(self.timeout_generator), + metadata=self.metadata, + ) + async for result_list in result_generator: + for result in result_list.entries: + # convert sub-request index to global index + orig_idx = self.remaining_indices[result.index] + entry_error = core_exceptions.from_grpc_status( + result.status.code, result.status.message, details=result.status.details + ) + if result.status.code == 0: + continue + else: + self._append_error(orig_idx, entry_error, new_remaining_indices) + except Exception as exc: + # add this exception to list for each active mutation + for idx in self.remaining_indices: + self._append_error(idx, exc, new_remaining_indices) + # bubble up exception to be handled by retry wrapper + raise + finally: + self.remaining_indices = new_remaining_indices + # check if attempt succeeded, or needs to be retried + if self.remaining_indices: + # unfinished work; raise exception to trigger retry + raise _MutateRowsIncomplete + + def _append_error( + self, idx: int, exc: Exception, retry_index_list: list[int] | None = None + ): + entry = self.mutations[idx] + self.errors.setdefault(idx, []).append(exc) + if ( + entry.is_idempotent() + and self.is_retryable(exc) + and retry_index_list is not None + ): + retry_index_list.append(idx) From a3c0166a6d219b5d6a9269e6400b81b3aa704103 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 15:15:42 -0700 Subject: [PATCH 110/213] use partial function --- google/cloud/bigtable/_mutate_rows.py | 32 ++++++++++++++------------- tests/unit/test_client.py | 17 +++++++------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 593bc4fa0..bc813e490 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,8 @@ # from __future__ import annotations -from typing import Iterator, Callable, TYPE_CHECKING +from typing import Iterator, Callable, Awaitable, AsyncIterable, TYPE_CHECKING +import functools from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries @@ -29,6 +30,7 @@ ) from google.cloud.bigtable.client import Table from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable_v2.types.bigtable import MutateRowsResponse class _MutateRowsIncomplete(RuntimeError): @@ -75,10 +77,18 @@ async def _mutate_rows_operation( attempt_timeout_gen = _attempt_timeout_generator( per_request_timeout, operation_timeout ) + # create partial function to pass to trigger rpc call + metadata = _make_metadata(table.table_name, table.app_profile_id) + gapic_fn = functools.partial( + gapic_client.mutate_rows, + table_name=table.table_name, + app_profile_id=table.app_profile_id, + metadata=metadata, + ) + # wrap attempt in retry logic manager = _MutateRowsAttemptManager( - gapic_client, - table, + gapic_fn, mutation_entries, attempt_timeout_gen, predicate, @@ -118,20 +128,17 @@ async def _mutate_rows_operation( class _MutateRowsAttemptManager: def __init__( self, - gapic_client: "BigtableAsyncClient", - table: "Table", + gapic_fn: Callable[..., Awaitable[AsyncIterable["MutateRowsResponse"]]], mutations: list["RowMutationEntry"], timeout_generator: Iterator[float], is_retryable_predicate: Callable[[Exception], bool], ): - self.gapic_client = gapic_client - self.table = table + self.gapic_fn = gapic_fn self.mutations = mutations self.remaining_indices = list(range(len(mutations))) self.timeout_generator = timeout_generator self.is_retryable = is_retryable_predicate self.errors : dict[int, list[Exception]] = {} - self.metadata = _make_metadata(self.table.table_name, self.table.app_profile_id) async def run(self): request_entries = [ @@ -140,14 +147,9 @@ async def run(self): new_remaining_indices : list[int] = [] # make gapic request try: - result_generator = await self.gapic_client.mutate_rows( - request={ - "table_name": self.table.table_name, - "app_profile_id": self.table.app_profile_id, - "entries": request_entries, - }, + result_generator = await self.gapic_fn( timeout=next(self.timeout_generator), - metadata=self.metadata, + entries=request_entries, ) async for result_list in result_generator: for result in result_list.entries: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 3e273f3af..be3703a23 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1554,14 +1554,13 @@ async def test_bulk_mutate_rows(self, mutation_arg): per_request_timeout=expected_per_request_timeout, ) assert mock_gapic.call_count == 1 - request = mock_gapic.call_args[1]["request"] + kwargs = mock_gapic.call_args[1] assert ( - request["table_name"] + kwargs["table_name"] == "projects/project/instances/instance/tables/table" ) - assert request["entries"] == [bulk_mutation._to_dict()] - found_per_request_timeout = mock_gapic.call_args[1]["timeout"] - assert found_per_request_timeout == expected_per_request_timeout + assert kwargs["entries"] == [bulk_mutation._to_dict()] + assert kwargs["timeout"] == expected_per_request_timeout @pytest.mark.asyncio async def test_bulk_mutate_rows_multiple_entries(self): @@ -1579,13 +1578,13 @@ async def test_bulk_mutate_rows_multiple_entries(self): [entry_1, entry_2], ) assert mock_gapic.call_count == 1 - request = mock_gapic.call_args[1]["request"] + kwargs = mock_gapic.call_args[1] assert ( - request["table_name"] + kwargs["table_name"] == "projects/project/instances/instance/tables/table" ) - assert request["entries"][0] == entry_1._to_dict() - assert request["entries"][1] == entry_2._to_dict() + assert kwargs["entries"][0] == entry_1._to_dict() + assert kwargs["entries"][1] == entry_2._to_dict() @pytest.mark.asyncio @pytest.mark.parametrize( From 70c35efd532e5c3053954e6da4935450b2ea32dc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 15:47:32 -0700 Subject: [PATCH 111/213] renamed class --- google/cloud/bigtable/_mutate_rows.py | 31 +++--- tests/unit/test__mutate_rows.py | 153 +++++++++++++------------- 2 files changed, 96 insertions(+), 88 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index bc813e490..40588e40c 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -87,13 +87,13 @@ async def _mutate_rows_operation( ) # wrap attempt in retry logic - manager = _MutateRowsAttemptManager( + attempt_context = _MutateRowsAttemptContext( gapic_fn, mutation_entries, attempt_timeout_gen, predicate, ) - retry_wrapped = retry(manager.run) + retry_wrapped = retry(attempt_context.run_attempt) # convert RetryErrors from retry wrapper into DeadlineExceeded errors deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) try: @@ -101,12 +101,12 @@ async def _mutate_rows_operation( await deadline_wrapped() except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations - for idx in manager.remaining_indices: - manager._append_error(idx, exc) + for idx in attempt_context.remaining_indices: + attempt_context.append_error(idx, exc) finally: # raise exception detailing incomplete mutations all_errors = [] - for idx, exc_list in manager.errors.items(): + for idx, exc_list in attempt_context.errors.items(): if len(exc_list) == 0: raise core_exceptions.ClientError( f"Mutation {idx} failed with no associated errors" @@ -125,7 +125,7 @@ async def _mutate_rows_operation( ) -class _MutateRowsAttemptManager: +class _MutateRowsAttemptContext: def __init__( self, gapic_fn: Callable[..., Awaitable[AsyncIterable["MutateRowsResponse"]]], @@ -138,13 +138,16 @@ def __init__( self.remaining_indices = list(range(len(mutations))) self.timeout_generator = timeout_generator self.is_retryable = is_retryable_predicate - self.errors : dict[int, list[Exception]] = {} + self.errors: dict[int, list[Exception]] = {} - async def run(self): + async def run_attempt(self): request_entries = [ self.mutations[idx]._to_dict() for idx in self.remaining_indices ] - new_remaining_indices : list[int] = [] + if not request_entries: + # no more mutations. return early + return + new_remaining_indices: list[int] = [] # make gapic request try: result_generator = await self.gapic_fn( @@ -156,16 +159,18 @@ async def run(self): # convert sub-request index to global index orig_idx = self.remaining_indices[result.index] entry_error = core_exceptions.from_grpc_status( - result.status.code, result.status.message, details=result.status.details + result.status.code, + result.status.message, + details=result.status.details, ) if result.status.code == 0: continue else: - self._append_error(orig_idx, entry_error, new_remaining_indices) + self.append_error(orig_idx, entry_error, new_remaining_indices) except Exception as exc: # add this exception to list for each active mutation for idx in self.remaining_indices: - self._append_error(idx, exc, new_remaining_indices) + self.append_error(idx, exc, new_remaining_indices) # bubble up exception to be handled by retry wrapper raise finally: @@ -175,7 +180,7 @@ async def run(self): # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete - def _append_error( + def append_error( self, idx: int, exc: Exception, retry_index_list: list[int] | None = None ): entry = self.mutations[idx] diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 81b8697bc..268230dc6 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -26,10 +26,14 @@ from mock import AsyncMock # type: ignore -class Test_MutateRowsRetryableAttempt: - async def _mock_stream(self, mutation_dict, error_dict): - items = list(mutation_dict.items()) - for idx, entry in items: +class TestMutateRowsAttemptContext: + def _make_one(self, *args, **kwargs): + from google.cloud.bigtable._mutate_rows import _MutateRowsAttemptContext + + return _MutateRowsAttemptContext(*args, **kwargs) + + async def _mock_stream(self, mutation_list, error_dict): + for idx, entry in enumerate(mutation_list): code = error_dict.get(idx, 0) yield MutateRowsResponse( entries=[ @@ -39,114 +43,113 @@ async def _mock_stream(self, mutation_dict, error_dict): ] ) - def _make_mock_client(self, mutation_dict, error_dict=None): - client = mock.Mock() - client.mutate_rows = AsyncMock() + def _make_mock_gapic(self, mutation_list, error_dict=None): + mock_fn = AsyncMock() if error_dict is None: error_dict = {} - client.mutate_rows.side_effect = lambda *args, **kwargs: self._mock_stream( - mutation_dict, error_dict + mock_fn.side_effect = lambda *args, **kwargs: self._mock_stream( + mutation_list, error_dict + ) + return mock_fn + + def test_ctor(self): + mock_gapic = mock.Mock() + mutations = list(range(10)) + timeout_gen = mock.Mock() + predicate = mock.Mock() + instance = self._make_one( + mock_gapic, + mutations, + timeout_gen, + predicate, ) - return client + assert instance.gapic_fn == mock_gapic + assert instance.mutations == mutations + assert instance.remaining_indices == list(range(10)) + assert instance.timeout_generator == timeout_gen + assert instance.is_retryable == predicate + assert instance.errors == {} @pytest.mark.asyncio async def test_single_entry_success(self): """Test mutating a single entry""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt import itertools mutation = mock.Mock() mutations = {0: mutation} - client = self._make_mock_client(mutations) - errors = {0: []} - expected_table = mock.Mock() expected_timeout = 9 mock_timeout_gen = itertools.repeat(expected_timeout) - await _mutate_rows_retryable_attempt( - client, - expected_table, - mock_timeout_gen, + mock_gapic_fn = self._make_mock_gapic(mutations) + instance = self._make_one( + mock_gapic_fn, mutations, - errors, + mock_timeout_gen, lambda x: False, ) - assert len(mutations) == 0 - assert 0 not in errors - assert client.mutate_rows.call_count == 1 - _, kwargs = client.mutate_rows.call_args + await instance.run_attempt() + assert len(instance.remaining_indices) == 0 + assert mock_gapic_fn.call_count == 1 + _, kwargs = mock_gapic_fn.call_args assert kwargs["timeout"] == expected_timeout - request = kwargs["request"] - assert request["table_name"] == expected_table.table_name - assert request["app_profile_id"] == expected_table.app_profile_id - assert request["entries"] == [mutation._to_dict()] + assert kwargs["entries"] == [mutation._to_dict()] @pytest.mark.asyncio async def test_empty_request(self): - """Calling with no mutations should result in a single API call""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt - - client = self._make_mock_client({}) - expected_table = mock.Mock() - await _mutate_rows_retryable_attempt( - client, expected_table, iter([0]), {}, {}, lambda x: False + """Calling with no mutations should result in no API calls""" + mock_timeout_gen = iter([0] * 10) + mock_gapic_fn = self._make_mock_gapic([]) + instance = self._make_one( + mock_gapic_fn, + [], + mock_timeout_gen, + lambda x: False, ) - assert client.mutate_rows.call_count == 1 + await instance.run_attempt() + assert mock_gapic_fn.call_count == 0 @pytest.mark.asyncio async def test_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" - from google.cloud.bigtable._mutate_rows import ( - _mutate_rows_retryable_attempt, - _MutateRowsIncomplete, - ) + from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() - mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - errors = {0: [], 1: [], 2: []} - client = self._make_mock_client(mutations, error_dict={1: 300}) - # raise retryable error 3 times, then raise non-retryable error - expected_table = mock.Mock() - expected_timeout = 9 + mutations = [success_mutation, failure_mutation, success_mutation_2] + mock_timeout_gen = iter([0] * 10) + mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) + instance = self._make_one( + mock_gapic_fn, + mutations, + mock_timeout_gen, + lambda x: True, + ) with pytest.raises(_MutateRowsIncomplete): - await _mutate_rows_retryable_attempt( - client, - expected_table, - iter([expected_timeout]), - mutations, - errors, - lambda x: True, - ) - assert mutations == {1: failure_mutation} - assert 0 not in errors - assert len(errors[1]) == 1 - assert errors[1][0].grpc_status_code == 300 - assert 2 not in errors + await instance.run_attempt() + assert instance.remaining_indices == [1] + assert 0 not in instance.errors + assert len(instance.errors[1]) == 1 + assert instance.errors[1][0].grpc_status_code == 300 + assert 2 not in instance.errors @pytest.mark.asyncio async def test_partial_success_non_retryable(self): """Some entries succeed, but one fails. Exception marked as non-retryable. Do not raise incomplete error""" - from google.cloud.bigtable._mutate_rows import _mutate_rows_retryable_attempt - success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() - mutations = {0: success_mutation, 1: failure_mutation, 2: success_mutation_2} - errors = {0: [], 1: [], 2: []} - client = self._make_mock_client(mutations, error_dict={1: 300}) - expected_timeout = 9 - expected_table = mock.Mock() - await _mutate_rows_retryable_attempt( - client, - expected_table, - iter([expected_timeout]), + mutations = [success_mutation, failure_mutation, success_mutation_2] + mock_timeout_gen = iter([0] * 10) + mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) + instance = self._make_one( + mock_gapic_fn, mutations, - errors, + mock_timeout_gen, lambda x: False, ) - assert len(mutations) == 0 - assert 0 not in errors - assert len(errors[1]) == 1 - assert errors[1][0].grpc_status_code == 300 - assert 2 not in errors + await instance.run_attempt() + assert instance.remaining_indices == [] + assert 0 not in instance.errors + assert len(instance.errors[1]) == 1 + assert instance.errors[1][0].grpc_status_code == 300 + assert 2 not in instance.errors From e00f59240c0645a84c6f067bad8392ab50f18131 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 17:03:17 -0700 Subject: [PATCH 112/213] added comments --- google/cloud/bigtable/_mutate_rows.py | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 40588e40c..f459bb94b 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -53,7 +53,7 @@ async def _mutate_rows_operation( Args: - gapic_client: the client to use for the mutate_rows call - - request: A request dict containing table name, app profile id, and other details to inclide in the request + - table: the table associated with the request - mutation_entries: a list of RowMutationEntry objects to send to the server - operation_timeout: the timeout to use for the entire operation, in seconds. - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. @@ -133,6 +133,16 @@ def __init__( timeout_generator: Iterator[float], is_retryable_predicate: Callable[[Exception], bool], ): + """ + Helper class for managing saved state between mutate_rows attempts. + + Args: + - gapic_fn: the function to call to trigger a mutate_rows rpc + - mutations: the list of mutations to send to the server + - timeout_generator: an iterator that yields values to use for the rpc timeout + - is_retryable_predicate: a function that returns True if an exception is retryable + should be the same predicate used by the retry wrapper + """ self.gapic_fn = gapic_fn self.mutations = mutations self.remaining_indices = list(range(len(mutations))) @@ -141,6 +151,14 @@ def __init__( self.errors: dict[int, list[Exception]] = {} async def run_attempt(self): + """ + Run a single attempt of the mutate_rows rpc. + + Raises: + - _MutateRowsIncomplete: if there are failed mutations eligible for + retry after the attempt is complete + - GoogleAPICallError: if the gapic rpc fails + """ request_entries = [ self.mutations[idx]._to_dict() for idx in self.remaining_indices ] @@ -183,6 +201,15 @@ async def run_attempt(self): def append_error( self, idx: int, exc: Exception, retry_index_list: list[int] | None = None ): + """ + Add an exception to the list of exceptions for a given mutation index, + and optionally add it to a working list of indices to retry. + + Args: + - idx: the index of the mutation that failed + - exc: the exception to add to the list + - retry_index_list: a list to add the index to, if the mutation should be retried. + """ entry = self.mutations[idx] self.errors.setdefault(idx, []).append(exc) if ( From 18af78a66a30249503b5df9de60297b175476203 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 17:03:26 -0700 Subject: [PATCH 113/213] added tests --- tests/unit/test__mutate_rows.py | 170 ++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 268230dc6..497b5d419 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -16,6 +16,7 @@ from google.cloud.bigtable_v2.types import MutateRowsResponse from google.rpc import status_pb2 +import google.api_core.exceptions as core_exceptions # try/except added for compatibility with python < 3.8 try: @@ -26,6 +27,175 @@ from mock import AsyncMock # type: ignore +class TestMutateRowsOperation: + @pytest.mark.asyncio + async def test_mutate_rows_operation(self): + """ + Test successful case of mutate_rows_operation + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + + client = mock.Mock() + table = mock.Mock() + entries = [mock.Mock(), mock.Mock()] + operation_timeout = 0.05 + with mock.patch( + "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + AsyncMock(), + ) as attempt_mock: + attempt_mock.return_value = None + await _mutate_rows_operation( + client, table, entries, operation_timeout, operation_timeout + ) + assert attempt_mock.call_count == 1 + + @pytest.mark.asyncio + async def test_mutate_rows_operation_args(self): + """ + Test the args passed down to _MutateRowsAttemptContext + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete + from google.api_core.exceptions import DeadlineExceeded + from google.api_core.exceptions import ServiceUnavailable + + client = mock.Mock() + table = mock.Mock() + entries = [mock.Mock(), mock.Mock()] + operation_timeout = 0.05 + attempt_timeout = 0.01 + with mock.patch( + "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.__init__" + ) as attempt_mock: + attempt_mock.side_effect = RuntimeError("abort") + try: + await _mutate_rows_operation( + client, table, entries, operation_timeout, attempt_timeout + ) + except RuntimeError: + pass + args, kwargs = attempt_mock.call_args + found_fn = args[0] + found_entries = args[1] + found_timeout_gen = args[2] + found_predicate = args[3] + # running found_fn should trigger a client call + assert client.mutate_rows.call_count == 0 + found_fn() + assert client.mutate_rows.call_count == 1 + # found_fn should call with table details + inner_kwargs = client.mutate_rows.call_args[1] + assert len(inner_kwargs) == 3 + assert inner_kwargs["table_name"] == table.table_name + assert inner_kwargs["app_profile_id"] == table.app_profile_id + metadata = inner_kwargs["metadata"] + assert len(metadata) == 1 + assert metadata[0][0] == "x-goog-request-params" + assert str(table.table_name) in metadata[0][1] + assert str(table.app_profile_id) in metadata[0][1] + # entries should be passed down + assert found_entries == entries + # timeout_gen should generate per-attempt timeout + assert next(found_timeout_gen) == attempt_timeout + # ensure predicate is set + assert found_predicate is not None + assert found_predicate(DeadlineExceeded("")) is True + assert found_predicate(ServiceUnavailable("")) is True + assert found_predicate(_MutateRowsIncomplete("")) is True + assert found_predicate(RuntimeError("")) is False + + @pytest.mark.parametrize( + "exc_type", [RuntimeError, ZeroDivisionError, core_exceptions.Forbidden] + ) + @pytest.mark.asyncio + async def test_mutate_rows_exception(self, exc_type): + """ + exceptions raised from retryable should be raised in MutationsExceptionGroup + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + from google.cloud.bigtable.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.exceptions import FailedMutationEntryError + + client = mock.Mock() + table = mock.Mock() + entries = [mock.Mock()] + operation_timeout = 0.05 + expected_cause = exc_type("abort") + with mock.patch( + "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + AsyncMock(), + ) as attempt_mock: + attempt_mock.side_effect = expected_cause + found_exc = None + try: + await _mutate_rows_operation( + client, table, entries, operation_timeout, operation_timeout + ) + except MutationsExceptionGroup as e: + found_exc = e + assert attempt_mock.call_count == 1 + assert len(found_exc.exceptions) == 1 + assert isinstance(found_exc.exceptions[0], FailedMutationEntryError) + assert found_exc.exceptions[0].__cause__ == expected_cause + + @pytest.mark.parametrize( + "exc_type", + [core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable], + ) + @pytest.mark.asyncio + async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): + """ + If an exception fails but eventually passes, it should not raise an exception + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + + client = mock.Mock() + table = mock.Mock() + entries = [mock.Mock()] + operation_timeout = 1 + expected_cause = exc_type("retry") + num_retries = 2 + with mock.patch( + "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + AsyncMock(), + ) as attempt_mock: + attempt_mock.side_effect = [expected_cause] * num_retries + [None] + await _mutate_rows_operation( + client, table, entries, operation_timeout, operation_timeout + ) + assert attempt_mock.call_count == num_retries + 1 + + @pytest.mark.asyncio + async def test_mutate_rows_incomplete_ignored(self): + """ + MutateRowsIncomplete exceptions should not be added to error list + """ + from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.exceptions import MutationsExceptionGroup + from google.api_core.exceptions import DeadlineExceeded + + client = mock.Mock() + table = mock.Mock() + entries = [mock.Mock()] + operation_timeout = 0.05 + with mock.patch( + "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + AsyncMock(), + ) as attempt_mock: + attempt_mock.side_effect = _MutateRowsIncomplete("ignored") + found_exc = None + try: + await _mutate_rows_operation( + client, table, entries, operation_timeout, operation_timeout + ) + except MutationsExceptionGroup as e: + found_exc = e + assert attempt_mock.call_count > 0 + assert len(found_exc.exceptions) == 1 + assert isinstance(found_exc.exceptions[0].__cause__, DeadlineExceeded) + + class TestMutateRowsAttemptContext: def _make_one(self, *args, **kwargs): from google.cloud.bigtable._mutate_rows import _MutateRowsAttemptContext From 23e84f5a01912d2035d7efee7633bae2382afc7a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 31 May 2023 17:32:20 -0700 Subject: [PATCH 114/213] improved helpers --- google/cloud/bigtable/_helpers.py | 13 ++++++------- tests/unit/test__helpers.py | 2 -- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py index ff70805e8..dec4c2014 100644 --- a/google/cloud/bigtable/_helpers.py +++ b/google/cloud/bigtable/_helpers.py @@ -26,14 +26,13 @@ def _make_metadata( - table_name: str | None, app_profile_id: str | None + table_name: str, app_profile_id: str | None ) -> list[tuple[str, str]]: """ Create properly formatted gRPC metadata for requests. """ params = [] - if table_name is not None: - params.append(f"table_name={table_name}") + params.append(f"table_name={table_name}") if app_profile_id is not None: params.append(f"app_profile_id={app_profile_id}") params_str = ",".join(params) @@ -50,11 +49,11 @@ def _attempt_timeout_generator( at which point it will return the remaining time in the operation_timeout. Args: - - per_request_timeout: The timeout value to use for each request. If None, - the operation_timeout will be used for each request. - - operation_timeout: The timeout value to use for the entire operation. + - per_request_timeout: The timeout value to use for each request, in seconds. + If None, the operation_timeout will be used for each request. + - operation_timeout: The timeout value to use for the entire operationm in seconds. Yields: - - The timeout value to use for the next request. + - The timeout value to use for the next request, in seonds """ per_request_timeout = ( per_request_timeout if per_request_timeout is not None else operation_timeout diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 6e7adc610..2765afe24 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,9 +24,7 @@ class TestMakeMetadata: "table,profile,expected", [ ("table", "profile", "table_name=table,app_profile_id=profile"), - (None, "profile", "app_profile_id=profile"), ("table", None, "table_name=table"), - (None, None, ""), ], ) def test__make_metadata(self, table, profile, expected): From c4fd78f92d8bd04df8214a2c6f0a9e84da5c15b5 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 3 Jun 2023 18:32:05 -0400 Subject: [PATCH 115/213] build(deps): bump cryptography from 39.0.1 to 41.0.0 in /synthtool/gcp/templates/python_library/.kokoro (#793) Source-Link: https://github.com/googleapis/synthtool/commit/d0f51a0c2a9a6bcca86911eabea9e484baadf64b Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 42 +++++++++++++++++++-------------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 32b3c4865..02a4dedce 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:9bc5fa3b62b091f60614c08a7fb4fd1d3e1678e326f34dd66ce1eefb5dc3267b -# created: 2023-05-25T14:56:16.294623272Z + digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc +# created: 2023-06-03T21:25:37.968717478Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 3b8d7ee81..c7929db6d 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,28 +113,26 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==39.0.1 \ - --hash=sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4 \ - --hash=sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f \ - --hash=sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502 \ - --hash=sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41 \ - --hash=sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965 \ - --hash=sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e \ - --hash=sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc \ - --hash=sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad \ - --hash=sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505 \ - --hash=sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388 \ - --hash=sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6 \ - --hash=sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2 \ - --hash=sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac \ - --hash=sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695 \ - --hash=sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6 \ - --hash=sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336 \ - --hash=sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0 \ - --hash=sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c \ - --hash=sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106 \ - --hash=sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a \ - --hash=sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8 +cryptography==41.0.0 \ + --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ + --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ + --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ + --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ + --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ + --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ + --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ + --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ + --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ + --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ + --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ + --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ + --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ + --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ + --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ + --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ + --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ + --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ + --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be # via # gcp-releasetool # secretstorage From 56fdf7c796e279a4c6024021bc7e958057f6e992 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 5 Jun 2023 10:54:58 -0700 Subject: [PATCH 116/213] refactored operation into class only --- google/cloud/bigtable/_mutate_rows.py | 184 +++++++++++--------------- google/cloud/bigtable/client.py | 5 +- 2 files changed, 80 insertions(+), 109 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index f459bb94b..56976b413 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import Iterator, Callable, Awaitable, AsyncIterable, TYPE_CHECKING +from typing import TYPE_CHECKING import functools from google.api_core import exceptions as core_exceptions @@ -30,7 +30,6 @@ ) from google.cloud.bigtable.client import Table from google.cloud.bigtable.mutations import RowMutationEntry - from google.cloud.bigtable_v2.types.bigtable import MutateRowsResponse class _MutateRowsIncomplete(RuntimeError): @@ -41,116 +40,87 @@ class _MutateRowsIncomplete(RuntimeError): pass -async def _mutate_rows_operation( - gapic_client: "BigtableAsyncClient", - table: "Table", - mutation_entries: list["RowMutationEntry"], - operation_timeout: float, - per_request_timeout: float | None, -): - """ - Helper function for managing a single mutate_rows operation, end-to-end. - - Args: - - gapic_client: the client to use for the mutate_rows call - - table: the table associated with the request - - mutation_entries: a list of RowMutationEntry objects to send to the server - - operation_timeout: the timeout to use for the entire operation, in seconds. - - per_request_timeout: the timeout to use for each mutate_rows attempt, in seconds. - If not specified, the request will run until operation_timeout is reached. - """ - - predicate = retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - _MutateRowsIncomplete, - ) - - retry = retries.AsyncRetry( - predicate=predicate, - timeout=operation_timeout, - initial=0.01, - multiplier=2, - maximum=60, - ) - # use generator to lower per-attempt timeout as we approach operation_timeout deadline - attempt_timeout_gen = _attempt_timeout_generator( - per_request_timeout, operation_timeout - ) - # create partial function to pass to trigger rpc call - metadata = _make_metadata(table.table_name, table.app_profile_id) - gapic_fn = functools.partial( - gapic_client.mutate_rows, - table_name=table.table_name, - app_profile_id=table.app_profile_id, - metadata=metadata, - ) - - # wrap attempt in retry logic - attempt_context = _MutateRowsAttemptContext( - gapic_fn, - mutation_entries, - attempt_timeout_gen, - predicate, - ) - retry_wrapped = retry(attempt_context.run_attempt) - # convert RetryErrors from retry wrapper into DeadlineExceeded errors - deadline_wrapped = _convert_retry_deadline(retry_wrapped, operation_timeout) - try: - # trigger mutate_rows - await deadline_wrapped() - except Exception as exc: - # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations - for idx in attempt_context.remaining_indices: - attempt_context.append_error(idx, exc) - finally: - # raise exception detailing incomplete mutations - all_errors = [] - for idx, exc_list in attempt_context.errors.items(): - if len(exc_list) == 0: - raise core_exceptions.ClientError( - f"Mutation {idx} failed with no associated errors" - ) - elif len(exc_list) == 1: - cause_exc = exc_list[0] - else: - cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) - entry = mutation_entries[idx] - all_errors.append( - bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) - ) - if all_errors: - raise bt_exceptions.MutationsExceptionGroup( - all_errors, len(mutation_entries) - ) - - -class _MutateRowsAttemptContext: +class _MutateRowsOperation: def __init__( self, - gapic_fn: Callable[..., Awaitable[AsyncIterable["MutateRowsResponse"]]], - mutations: list["RowMutationEntry"], - timeout_generator: Iterator[float], - is_retryable_predicate: Callable[[Exception], bool], + gapic_client: "BigtableAsyncClient", + table: "Table", + mutation_entries: list["RowMutationEntry"], + operation_timeout: float, + per_request_timeout: float | None, ): """ - Helper class for managing saved state between mutate_rows attempts. - Args: - - gapic_fn: the function to call to trigger a mutate_rows rpc - - mutations: the list of mutations to send to the server - - timeout_generator: an iterator that yields values to use for the rpc timeout - - is_retryable_predicate: a function that returns True if an exception is retryable - should be the same predicate used by the retry wrapper + - gapic_client: the client to use for the mutate_rows call + - table: the table associated with the request + - mutation_entries: a list of RowMutationEntry objects to send to the server + - operation_timeout: the timeout t o use for the entire operation, in seconds. + - per_request_timeout: the timeoutto use for each mutate_rows attempt, in seconds. + If not specified, the request will run until operation_timeout is reached. """ - self.gapic_fn = gapic_fn - self.mutations = mutations - self.remaining_indices = list(range(len(mutations))) - self.timeout_generator = timeout_generator - self.is_retryable = is_retryable_predicate + # create partial function to pass to trigger rpc call + metadata = _make_metadata(table.table_name, table.app_profile_id) + self.gapic_fn = functools.partial( + gapic_client.mutate_rows, + table_name=table.table_name, + app_profile_id=table.app_profile_id, + metadata=metadata, + ) + # create predicate for determining which errors are retryable + self.is_retryable = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + _MutateRowsIncomplete, + ) + # use generator to lower per-attempt timeout as we approach operation_timeout deadline + self.timeout_generator = _attempt_timeout_generator( + per_request_timeout, operation_timeout + ) + # build retryable operation + retry = retries.AsyncRetry( + predicate=self.is_retryable, + timeout=operation_timeout, + initial=0.01, + multiplier=2, + maximum=60, + ) + retry_wrapped = retry(self._run_attempt) + self._operation = _convert_retry_deadline(retry_wrapped, operation_timeout) + # initialize state + self.mutations = mutation_entries + self.remaining_indices = list(range(len(self.mutations))) self.errors: dict[int, list[Exception]] = {} - async def run_attempt(self): + async def start(self): + try: + # trigger mutate_rows + await self._operation() + except Exception as exc: + # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations + for idx in self.remaining_indices: + self._append_error(idx, exc) + finally: + # raise exception detailing incomplete mutations + all_errors = [] + for idx, exc_list in self.errors.items(): + if len(exc_list) == 0: + raise core_exceptions.ClientError( + f"Mutation {idx} failed with no associated errors" + ) + elif len(exc_list) == 1: + cause_exc = exc_list[0] + else: + cause_exc = bt_exceptions.RetryExceptionGroup(exc_list) + entry = self.mutations[idx] + all_errors.append( + bt_exceptions.FailedMutationEntryError(idx, entry, cause_exc) + ) + if all_errors: + raise bt_exceptions.MutationsExceptionGroup( + all_errors, len(self.mutations) + ) + + async def _run_attempt(self): """ Run a single attempt of the mutate_rows rpc. @@ -184,11 +154,11 @@ async def run_attempt(self): if result.status.code == 0: continue else: - self.append_error(orig_idx, entry_error, new_remaining_indices) + self._append_error(orig_idx, entry_error, new_remaining_indices) except Exception as exc: # add this exception to list for each active mutation for idx in self.remaining_indices: - self.append_error(idx, exc, new_remaining_indices) + self._append_error(idx, exc, new_remaining_indices) # bubble up exception to be handled by retry wrapper raise finally: @@ -198,7 +168,7 @@ async def run_attempt(self): # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete - def append_error( + def _append_error( self, idx: int, exc: Exception, retry_index_list: list[int] | None = None ): """ diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 7e965fde3..3921d6640 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -51,7 +51,7 @@ from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable.iterators import ReadRowsIterator from google.cloud.bigtable.mutations import Mutation, RowMutationEntry -from google.cloud.bigtable._mutate_rows import _mutate_rows_operation +from google.cloud.bigtable._mutate_rows import _MutateRowsOperation from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline @@ -730,13 +730,14 @@ async def bulk_mutate_rows( if per_request_timeout is not None and per_request_timeout > operation_timeout: raise ValueError("per_request_timeout must be less than operation_timeout") - await _mutate_rows_operation( + operation = _MutateRowsOperation( self.client._gapic_client, self, mutation_entries, operation_timeout, per_request_timeout, ) + await operation.start() async def check_and_mutate_row( self, From aca31f02bfad84dc6df0e1eb8af4b993f064e49e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 5 Jun 2023 12:06:57 -0700 Subject: [PATCH 117/213] restructured how remaining indices are tracked --- google/cloud/bigtable/_mutate_rows.py | 41 +++++++++++++-------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 56976b413..a76a6aab8 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -72,10 +72,6 @@ def __init__( core_exceptions.ServiceUnavailable, _MutateRowsIncomplete, ) - # use generator to lower per-attempt timeout as we approach operation_timeout deadline - self.timeout_generator = _attempt_timeout_generator( - per_request_timeout, operation_timeout - ) # build retryable operation retry = retries.AsyncRetry( predicate=self.is_retryable, @@ -87,6 +83,9 @@ def __init__( retry_wrapped = retry(self._run_attempt) self._operation = _convert_retry_deadline(retry_wrapped, operation_timeout) # initialize state + self.timeout_generator = _attempt_timeout_generator( + per_request_timeout, operation_timeout + ) self.mutations = mutation_entries self.remaining_indices = list(range(len(self.mutations))) self.errors: dict[int, list[Exception]] = {} @@ -97,8 +96,9 @@ async def start(self): await self._operation() except Exception as exc: # exceptions raised by retryable are added to the list of exceptions for all unfinalized mutations - for idx in self.remaining_indices: - self._append_error(idx, exc) + incomplete_indices = self.remaining_indices.copy() + for idx in incomplete_indices: + self._handle_entry_error(idx, exc) finally: # raise exception detailing incomplete mutations all_errors = [] @@ -132,10 +132,12 @@ async def _run_attempt(self): request_entries = [ self.mutations[idx]._to_dict() for idx in self.remaining_indices ] + # track mutations in this request that have not been finalized yet + active_request_indices = {req_idx:orig_idx for req_idx, orig_idx in enumerate(self.remaining_indices)} + self.remaining_indices = [] if not request_entries: # no more mutations. return early return - new_remaining_indices: list[int] = [] # make gapic request try: result_generator = await self.gapic_fn( @@ -145,7 +147,7 @@ async def _run_attempt(self): async for result_list in result_generator: for result in result_list.entries: # convert sub-request index to global index - orig_idx = self.remaining_indices[result.index] + orig_idx = active_request_indices.pop(result.index) entry_error = core_exceptions.from_grpc_status( result.status.code, result.status.message, @@ -154,37 +156,34 @@ async def _run_attempt(self): if result.status.code == 0: continue else: - self._append_error(orig_idx, entry_error, new_remaining_indices) + self._handle_entry_error(orig_idx, entry_error) except Exception as exc: - # add this exception to list for each active mutation - for idx in self.remaining_indices: - self._append_error(idx, exc, new_remaining_indices) + # add this exception to list for each mutation that wasn't + # already handled + for idx in active_request_indices.values(): + self._handle_entry_error(idx, exc) # bubble up exception to be handled by retry wrapper raise - finally: - self.remaining_indices = new_remaining_indices # check if attempt succeeded, or needs to be retried if self.remaining_indices: # unfinished work; raise exception to trigger retry raise _MutateRowsIncomplete - def _append_error( - self, idx: int, exc: Exception, retry_index_list: list[int] | None = None - ): + def _handle_entry_error(self, idx: int, exc: Exception): """ Add an exception to the list of exceptions for a given mutation index, - and optionally add it to a working list of indices to retry. + and add the index to the list of remaining indices if the exception is + retryable. Args: - idx: the index of the mutation that failed - exc: the exception to add to the list - - retry_index_list: a list to add the index to, if the mutation should be retried. """ entry = self.mutations[idx] self.errors.setdefault(idx, []).append(exc) if ( entry.is_idempotent() and self.is_retryable(exc) - and retry_index_list is not None + and idx not in self.remaining_indices ): - retry_index_list.append(idx) + self.remaining_indices.append(idx) From 5a5d5416928d195ee35203818697cfabf30b3ee0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 5 Jun 2023 13:08:38 -0700 Subject: [PATCH 118/213] fixed tests --- google/cloud/bigtable/_mutate_rows.py | 4 +- tests/unit/test__mutate_rows.py | 265 +++++++++++--------------- 2 files changed, 117 insertions(+), 152 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index a76a6aab8..4a0dcbe38 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -60,7 +60,7 @@ def __init__( """ # create partial function to pass to trigger rpc call metadata = _make_metadata(table.table_name, table.app_profile_id) - self.gapic_fn = functools.partial( + self._gapic_fn = functools.partial( gapic_client.mutate_rows, table_name=table.table_name, app_profile_id=table.app_profile_id, @@ -140,7 +140,7 @@ async def _run_attempt(self): return # make gapic request try: - result_generator = await self.gapic_fn( + result_generator = await self._gapic_fn( timeout=next(self.timeout_generator), entries=request_entries, ) diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 497b5d419..de6a181dd 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -28,81 +28,100 @@ class TestMutateRowsOperation: - @pytest.mark.asyncio - async def test_mutate_rows_operation(self): + + def _target_class(self): + from google.cloud.bigtable._mutate_rows import _MutateRowsOperation + + return _MutateRowsOperation + + def _make_one(self, *args, **kwargs): + if not args: + kwargs["gapic_client"] = kwargs.pop("gapic_client", mock.Mock()) + kwargs["table"] = kwargs.pop("table", AsyncMock()) + kwargs["mutation_entries"] = kwargs.pop("mutation_entries", []) + kwargs["operation_timeout"] = kwargs.pop("operation_timeout", 5) + kwargs["per_request_timeout"] = kwargs.pop("per_request_timeout", 0.1) + return self._target_class()(*args, **kwargs) + + async def _mock_stream(self, mutation_list, error_dict): + for idx, entry in enumerate(mutation_list): + code = error_dict.get(idx, 0) + yield MutateRowsResponse( + entries=[ + MutateRowsResponse.Entry( + index=idx, status=status_pb2.Status(code=code) + ) + ] + ) + + def _make_mock_gapic(self, mutation_list, error_dict=None): + mock_fn = AsyncMock() + if error_dict is None: + error_dict = {} + mock_fn.side_effect = lambda *args, **kwargs: self._mock_stream( + mutation_list, error_dict + ) + return mock_fn + + def test_ctor(self): """ - Test successful case of mutate_rows_operation + test that constructor sets all the attributes correctly """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_operation + from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete + from google.api_core.exceptions import DeadlineExceeded + from google.api_core.exceptions import ServiceUnavailable client = mock.Mock() table = mock.Mock() entries = [mock.Mock(), mock.Mock()] operation_timeout = 0.05 - with mock.patch( - "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", - AsyncMock(), - ) as attempt_mock: - attempt_mock.return_value = None - await _mutate_rows_operation( - client, table, entries, operation_timeout, operation_timeout - ) - assert attempt_mock.call_count == 1 + attempt_timeout = 0.01 + instance = self._make_one( + client, table, entries, operation_timeout, attempt_timeout + ) + # running gapic_fn should trigger a client call + assert client.mutate_rows.call_count == 0 + instance._gapic_fn() + assert client.mutate_rows.call_count == 1 + # gapic_fn should call with table details + inner_kwargs = client.mutate_rows.call_args[1] + assert len(inner_kwargs) == 3 + assert inner_kwargs["table_name"] == table.table_name + assert inner_kwargs["app_profile_id"] == table.app_profile_id + metadata = inner_kwargs["metadata"] + assert len(metadata) == 1 + assert metadata[0][0] == "x-goog-request-params" + assert str(table.table_name) in metadata[0][1] + assert str(table.app_profile_id) in metadata[0][1] + # entries should be passed down + assert instance.mutations == entries + # timeout_gen should generate per-attempt timeout + assert next(instance.timeout_generator) == attempt_timeout + # ensure predicate is set + assert instance.is_retryable is not None + assert instance.is_retryable(DeadlineExceeded("")) is True + assert instance.is_retryable(ServiceUnavailable("")) is True + assert instance.is_retryable(_MutateRowsIncomplete("")) is True + assert instance.is_retryable(RuntimeError("")) is False + assert instance.remaining_indices == list(range(len(entries))) + assert instance.errors == {} @pytest.mark.asyncio - async def test_mutate_rows_operation_args(self): + async def test_mutate_rows_operation(self): """ - Test the args passed down to _MutateRowsAttemptContext + Test successful case of mutate_rows_operation """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_operation - from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete - from google.api_core.exceptions import DeadlineExceeded - from google.api_core.exceptions import ServiceUnavailable - client = mock.Mock() table = mock.Mock() entries = [mock.Mock(), mock.Mock()] operation_timeout = 0.05 - attempt_timeout = 0.01 - with mock.patch( - "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.__init__" - ) as attempt_mock: - attempt_mock.side_effect = RuntimeError("abort") - try: - await _mutate_rows_operation( - client, table, entries, operation_timeout, attempt_timeout - ) - except RuntimeError: - pass - args, kwargs = attempt_mock.call_args - found_fn = args[0] - found_entries = args[1] - found_timeout_gen = args[2] - found_predicate = args[3] - # running found_fn should trigger a client call - assert client.mutate_rows.call_count == 0 - found_fn() - assert client.mutate_rows.call_count == 1 - # found_fn should call with table details - inner_kwargs = client.mutate_rows.call_args[1] - assert len(inner_kwargs) == 3 - assert inner_kwargs["table_name"] == table.table_name - assert inner_kwargs["app_profile_id"] == table.app_profile_id - metadata = inner_kwargs["metadata"] - assert len(metadata) == 1 - assert metadata[0][0] == "x-goog-request-params" - assert str(table.table_name) in metadata[0][1] - assert str(table.app_profile_id) in metadata[0][1] - # entries should be passed down - assert found_entries == entries - # timeout_gen should generate per-attempt timeout - assert next(found_timeout_gen) == attempt_timeout - # ensure predicate is set - assert found_predicate is not None - assert found_predicate(DeadlineExceeded("")) is True - assert found_predicate(ServiceUnavailable("")) is True - assert found_predicate(_MutateRowsIncomplete("")) is True - assert found_predicate(RuntimeError("")) is False + instance = self._make_one( + client, table, entries, operation_timeout, operation_timeout + ) + with mock.patch.object(instance, "_operation", AsyncMock()) as attempt_mock: + attempt_mock.return_value = None + await instance.start() + assert attempt_mock.call_count == 1 @pytest.mark.parametrize( "exc_type", [RuntimeError, ZeroDivisionError, core_exceptions.Forbidden] @@ -112,7 +131,6 @@ async def test_mutate_rows_exception(self, exc_type): """ exceptions raised from retryable should be raised in MutationsExceptionGroup """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_operation from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.cloud.bigtable.exceptions import FailedMutationEntryError @@ -121,16 +139,18 @@ async def test_mutate_rows_exception(self, exc_type): entries = [mock.Mock()] operation_timeout = 0.05 expected_cause = exc_type("abort") - with mock.patch( - "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + with mock.patch.object( + self._target_class(), + "_run_attempt", AsyncMock(), ) as attempt_mock: attempt_mock.side_effect = expected_cause found_exc = None try: - await _mutate_rows_operation( + instance = self._make_one( client, table, entries, operation_timeout, operation_timeout ) + await instance.start() except MutationsExceptionGroup as e: found_exc = e assert attempt_mock.call_count == 1 @@ -147,22 +167,23 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): """ If an exception fails but eventually passes, it should not raise an exception """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_operation - + from google.cloud.bigtable._mutate_rows import _MutateRowsOperation client = mock.Mock() table = mock.Mock() entries = [mock.Mock()] operation_timeout = 1 expected_cause = exc_type("retry") num_retries = 2 - with mock.patch( - "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + with mock.patch.object( + _MutateRowsOperation, + "_run_attempt", AsyncMock(), ) as attempt_mock: attempt_mock.side_effect = [expected_cause] * num_retries + [None] - await _mutate_rows_operation( + instance = self._make_one( client, table, entries, operation_timeout, operation_timeout ) + await instance.start() assert attempt_mock.call_count == num_retries + 1 @pytest.mark.asyncio @@ -170,7 +191,6 @@ async def test_mutate_rows_incomplete_ignored(self): """ MutateRowsIncomplete exceptions should not be added to error list """ - from google.cloud.bigtable._mutate_rows import _mutate_rows_operation from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.api_core.exceptions import DeadlineExceeded @@ -179,84 +199,37 @@ async def test_mutate_rows_incomplete_ignored(self): table = mock.Mock() entries = [mock.Mock()] operation_timeout = 0.05 - with mock.patch( - "google.cloud.bigtable._mutate_rows._MutateRowsAttemptContext.run_attempt", + with mock.patch.object( + self._target_class(), + "_run_attempt", AsyncMock(), ) as attempt_mock: attempt_mock.side_effect = _MutateRowsIncomplete("ignored") found_exc = None try: - await _mutate_rows_operation( + instance = self._make_one( client, table, entries, operation_timeout, operation_timeout ) + await instance.start() except MutationsExceptionGroup as e: found_exc = e assert attempt_mock.call_count > 0 assert len(found_exc.exceptions) == 1 assert isinstance(found_exc.exceptions[0].__cause__, DeadlineExceeded) - -class TestMutateRowsAttemptContext: - def _make_one(self, *args, **kwargs): - from google.cloud.bigtable._mutate_rows import _MutateRowsAttemptContext - - return _MutateRowsAttemptContext(*args, **kwargs) - - async def _mock_stream(self, mutation_list, error_dict): - for idx, entry in enumerate(mutation_list): - code = error_dict.get(idx, 0) - yield MutateRowsResponse( - entries=[ - MutateRowsResponse.Entry( - index=idx, status=status_pb2.Status(code=code) - ) - ] - ) - - def _make_mock_gapic(self, mutation_list, error_dict=None): - mock_fn = AsyncMock() - if error_dict is None: - error_dict = {} - mock_fn.side_effect = lambda *args, **kwargs: self._mock_stream( - mutation_list, error_dict - ) - return mock_fn - - def test_ctor(self): - mock_gapic = mock.Mock() - mutations = list(range(10)) - timeout_gen = mock.Mock() - predicate = mock.Mock() - instance = self._make_one( - mock_gapic, - mutations, - timeout_gen, - predicate, - ) - assert instance.gapic_fn == mock_gapic - assert instance.mutations == mutations - assert instance.remaining_indices == list(range(10)) - assert instance.timeout_generator == timeout_gen - assert instance.is_retryable == predicate - assert instance.errors == {} - @pytest.mark.asyncio - async def test_single_entry_success(self): + async def test_run_attempt_single_entry_success(self): """Test mutating a single entry""" - import itertools - mutation = mock.Mock() mutations = {0: mutation} - expected_timeout = 9 - mock_timeout_gen = itertools.repeat(expected_timeout) + expected_timeout = 1.3 mock_gapic_fn = self._make_mock_gapic(mutations) instance = self._make_one( - mock_gapic_fn, - mutations, - mock_timeout_gen, - lambda x: False, + mutation_entries=mutations, + per_request_timeout=expected_timeout, ) - await instance.run_attempt() + with mock.patch.object(instance, "_gapic_fn", mock_gapic_fn): + await instance._run_attempt() assert len(instance.remaining_indices) == 0 assert mock_gapic_fn.call_count == 1 _, kwargs = mock_gapic_fn.call_args @@ -264,21 +237,17 @@ async def test_single_entry_success(self): assert kwargs["entries"] == [mutation._to_dict()] @pytest.mark.asyncio - async def test_empty_request(self): + async def test_run_attempt_empty_request(self): """Calling with no mutations should result in no API calls""" - mock_timeout_gen = iter([0] * 10) mock_gapic_fn = self._make_mock_gapic([]) instance = self._make_one( - mock_gapic_fn, - [], - mock_timeout_gen, - lambda x: False, + mutation_entries=[], ) - await instance.run_attempt() + await instance._run_attempt() assert mock_gapic_fn.call_count == 0 @pytest.mark.asyncio - async def test_partial_success_retryable(self): + async def test_run_attempt_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete @@ -286,16 +255,14 @@ async def test_partial_success_retryable(self): success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() mutations = [success_mutation, failure_mutation, success_mutation_2] - mock_timeout_gen = iter([0] * 10) mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) instance = self._make_one( - mock_gapic_fn, - mutations, - mock_timeout_gen, - lambda x: True, + mutation_entries=mutations, ) - with pytest.raises(_MutateRowsIncomplete): - await instance.run_attempt() + instance.is_retryable = lambda x: True + with mock.patch.object(instance, "_gapic_fn", mock_gapic_fn): + with pytest.raises(_MutateRowsIncomplete): + await instance._run_attempt() assert instance.remaining_indices == [1] assert 0 not in instance.errors assert len(instance.errors[1]) == 1 @@ -303,21 +270,19 @@ async def test_partial_success_retryable(self): assert 2 not in instance.errors @pytest.mark.asyncio - async def test_partial_success_non_retryable(self): + async def test_run_attempt_partial_success_non_retryable(self): """Some entries succeed, but one fails. Exception marked as non-retryable. Do not raise incomplete error""" success_mutation = mock.Mock() success_mutation_2 = mock.Mock() failure_mutation = mock.Mock() mutations = [success_mutation, failure_mutation, success_mutation_2] - mock_timeout_gen = iter([0] * 10) mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) instance = self._make_one( - mock_gapic_fn, - mutations, - mock_timeout_gen, - lambda x: False, + mutation_entries=mutations, ) - await instance.run_attempt() + instance.is_retryable = lambda x: False + with mock.patch.object(instance, "_gapic_fn", mock_gapic_fn): + await instance._run_attempt() assert instance.remaining_indices == [] assert 0 not in instance.errors assert len(instance.errors[1]) == 1 From afed7312a9ef75cf78664622cec843fb48e0a19b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 5 Jun 2023 13:17:11 -0700 Subject: [PATCH 119/213] added docstrings --- google/cloud/bigtable/_mutate_rows.py | 20 +++++++++++++++++++- tests/unit/test__mutate_rows.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 4a0dcbe38..ef5ce4c14 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -41,6 +41,16 @@ class _MutateRowsIncomplete(RuntimeError): class _MutateRowsOperation: + """ + MutateRowsOperation manages the logic of sending a set of row mutations, + and retrying on failed entries. It manages this using the _run_attempt + function, which attempts to mutate all outstanding entries, and raises + _MutateRowsIncomplete if any retryable errors are encountered. + + Errors are exposed as a MutationsExceptionGroup, which contains a list of + exceptions organized by the related failed mutation entries. + """ + def __init__( self, gapic_client: "BigtableAsyncClient", @@ -91,6 +101,12 @@ def __init__( self.errors: dict[int, list[Exception]] = {} async def start(self): + """ + Start the operation, and run until completion + + Raises: + - MutationsExceptionGroup: if any mutations failed + """ try: # trigger mutate_rows await self._operation() @@ -133,7 +149,9 @@ async def _run_attempt(self): self.mutations[idx]._to_dict() for idx in self.remaining_indices ] # track mutations in this request that have not been finalized yet - active_request_indices = {req_idx:orig_idx for req_idx, orig_idx in enumerate(self.remaining_indices)} + active_request_indices = { + req_idx: orig_idx for req_idx, orig_idx in enumerate(self.remaining_indices) + } self.remaining_indices = [] if not request_entries: # no more mutations. return early diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index de6a181dd..4fba16f23 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -28,7 +28,6 @@ class TestMutateRowsOperation: - def _target_class(self): from google.cloud.bigtable._mutate_rows import _MutateRowsOperation @@ -168,6 +167,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): If an exception fails but eventually passes, it should not raise an exception """ from google.cloud.bigtable._mutate_rows import _MutateRowsOperation + client = mock.Mock() table = mock.Mock() entries = [mock.Mock()] From 2396ec895e7cc9b70980faf18ae64ee44639124c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 11:37:11 -0700 Subject: [PATCH 120/213] moved index deletion to end of block --- google/cloud/bigtable/_mutate_rows.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index ef5ce4c14..158c462cb 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -165,19 +165,20 @@ async def _run_attempt(self): async for result_list in result_generator: for result in result_list.entries: # convert sub-request index to global index - orig_idx = active_request_indices.pop(result.index) + orig_idx = active_request_indices[result.index] entry_error = core_exceptions.from_grpc_status( result.status.code, result.status.message, details=result.status.details, ) - if result.status.code == 0: - continue - else: + if result.status.code != 0: + # mutation failed; update error list (and remaining_indices if retryable) self._handle_entry_error(orig_idx, entry_error) + # remove processed entry from active list + del active_request_indices[result.index] except Exception as exc: # add this exception to list for each mutation that wasn't - # already handled + # already handled, and update remaining_indices if mutation is retryable for idx in active_request_indices.values(): self._handle_entry_error(idx, exc) # bubble up exception to be handled by retry wrapper From 3d441a285a0ac6a6069423808883d3334409dd0e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 11:49:00 -0700 Subject: [PATCH 121/213] added comment to exception types --- google/cloud/bigtable/_mutate_rows.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 158c462cb..a422c99b2 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -78,8 +78,10 @@ def __init__( ) # create predicate for determining which errors are retryable self.is_retryable = retries.if_exception_type( + # RPC level errors core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, + # Entry level errors _MutateRowsIncomplete, ) # build retryable operation From 8b41518fb5549b5e238cfbd69ef2970b49c09b6a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 12:33:40 -0700 Subject: [PATCH 122/213] update flow control after batch, instead of after each entry --- google/cloud/bigtable/client.py | 2 -- google/cloud/bigtable/mutations_batcher.py | 15 +++++++++------ tests/unit/test_mutations_batcher.py | 22 +++++++++++----------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e3897adbd..df1177247 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -20,8 +20,6 @@ Any, Optional, Set, - Callable, - Coroutine, TYPE_CHECKING, ) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 5d77539a6..a62bba97a 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -23,7 +23,7 @@ from google.cloud.bigtable.exceptions import MutationsExceptionGroup from google.cloud.bigtable.exceptions import FailedMutationEntryError -from google.cloud.bigtable._mutate_rows import _mutate_rows_operation +from google.cloud.bigtable._mutate_rows import _MutateRowsOperation if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover @@ -70,12 +70,14 @@ def _has_capacity(self, additional_count: int, additional_size: int) -> bool: new_size <= self.max_mutation_bytes and new_count <= self.max_mutation_count ) - async def remove_from_flow(self, mutation_entry: RowMutationEntry, *args) -> None: + async def remove_from_flow(self, mutations: list[RowMutationEntry]) -> None: """ Every time an in-flight mutation is complete, release the flow control semaphore """ - self.in_flight_mutation_count -= len(mutation_entry.mutations) - self.in_flight_mutation_bytes -= mutation_entry.size() + total_count = sum(len(entry.mutations) for entry in mutations) + total_size = sum(entry.size() for entry in mutations) + self.in_flight_mutation_count -= total_count + self.in_flight_mutation_bytes -= total_size # notify any blocked requests that there is additional capacity async with self.capacity_condition: self.capacity_condition.notify_all() @@ -284,6 +286,7 @@ async def _flush_internal( batch_errors = await self._execute_mutate_rows(batch) self.exceptions.extend(batch_errors) self._entries_processed_since_last_raise += len(batch) + await self._flow_control.remove_from_flow(batch) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] @@ -303,14 +306,14 @@ async def _execute_mutate_rows( if self._table.app_profile_id: request["app_profile_id"] = self._table.app_profile_id try: - await _mutate_rows_operation( + operation = _MutateRowsOperation( self._table.client._gapic_client, self._table, batch, self._table.default_operation_timeout, self._table.default_per_request_timeout, - self._flow_control.remove_from_flow, ) + await operation.start() except MutationsExceptionGroup as e: for subexc in e.exceptions: subexc.index = None diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index d2991c277..338b3289f 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -120,7 +120,7 @@ async def test_remove_from_flow_value_update( instance.in_flight_mutation_count = existing_count instance.in_flight_mutation_bytes = existing_size mutation = _make_mutation(added_count, added_size) - await instance.remove_from_flow(mutation) + await instance.remove_from_flow([mutation]) assert instance.in_flight_mutation_count == new_count assert instance.in_flight_mutation_bytes == new_size @@ -143,7 +143,7 @@ async def task_routine(): assert task.done() is False # try changing size mutation = _make_mutation(count=0, size=5) - await instance.remove_from_flow(mutation) + await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 10 assert instance.in_flight_mutation_bytes == 5 @@ -151,7 +151,7 @@ async def task_routine(): # try changing count instance.in_flight_mutation_bytes = 10 mutation = _make_mutation(count=5, size=0) - await instance.remove_from_flow(mutation) + await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 5 assert instance.in_flight_mutation_bytes == 10 @@ -159,7 +159,7 @@ async def task_routine(): # try changing both instance.in_flight_mutation_count = 10 mutation = _make_mutation(count=5, size=5) - await instance.remove_from_flow(mutation) + await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) assert instance.in_flight_mutation_count == 5 assert instance.in_flight_mutation_bytes == 5 @@ -203,8 +203,7 @@ async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_result # check sizes assert batch[j].size() == expected_batch[j][1] # update lock - for entry in batch: - await instance.remove_from_flow(entry) + await instance.remove_from_flow(batch) i += 1 assert i == len(expected_results) @@ -674,9 +673,11 @@ async def test_timer_flush_end_to_end(self): @pytest.mark.asyncio @unittest.mock.patch( - "google.cloud.bigtable.mutations_batcher._mutate_rows_operation" + "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", ) async def test__execute_mutate_rows(self, mutate_rows): + mutate_rows.return_value = AsyncMock() + start_operation = mutate_rows().start table = mock.Mock() table.table_name = "test-table" table.app_profile_id = "test-app-profile" @@ -685,20 +686,18 @@ async def test__execute_mutate_rows(self, mutate_rows): async with self._make_one(table) as instance: batch = [mock.Mock()] result = await instance._execute_mutate_rows(batch) - assert mutate_rows.call_count == 1 - assert mutate_rows.await_count == 1 + assert start_operation.call_count == 1 args, _ = mutate_rows.call_args assert args[0] == table.client._gapic_client assert args[1] == table assert args[2] == batch assert args[3] == 17 assert args[4] == 13 - assert args[5] == instance._flow_control.remove_from_flow assert result == [] @pytest.mark.asyncio @unittest.mock.patch( - "google.cloud.bigtable.mutations_batcher._mutate_rows_operation" + "google.cloud.bigtable.mutations_batcher._MutateRowsOperation.start" ) async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" @@ -711,6 +710,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): err2 = FailedMutationEntryError(1, mock.Mock(), RuntimeError("test error")) mutate_rows.side_effect = MutationsExceptionGroup([err1, err2], 10) table = mock.Mock() + table.default_operation_timeout = 17 async with self._make_one(table) as instance: batch = [mock.Mock()] result = await instance._execute_mutate_rows(batch) From bdd7f455c65463d57d2b32512f80b49c0c451e42 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 14:56:37 -0700 Subject: [PATCH 123/213] fixed test --- google/cloud/bigtable/client.py | 2 -- tests/unit/test_mutations.py | 1 - 2 files changed, 3 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e3897adbd..df1177247 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -20,8 +20,6 @@ Any, Optional, Set, - Callable, - Coroutine, TYPE_CHECKING, ) diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 0066290bc..ebb0d6e60 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -522,7 +522,6 @@ def test__to_dict(self): @pytest.mark.parametrize( "mutations,result", [ - ([], True), ([mock.Mock(is_idempotent=lambda: True)], True), ([mock.Mock(is_idempotent=lambda: False)], False), ( From 553bc4afa34018c52dff7ef5f07081b270617a97 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 6 Jun 2023 19:07:46 -0700 Subject: [PATCH 124/213] add kwargs docstring --- google/cloud/bigtable/client.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index df1177247..4d1d9c14d 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -581,10 +581,22 @@ def mutations_batcher(self, **kwargs) -> MutationsBatcher: Can be used to iteratively add mutations that are flushed as a group, to avoid excess network calls + Kwargs: + - flush_interval: Automatically flush every flush_interval seconds. If None, + a table default will be used + - flush_limit_count: Flush immediately after flush_limit_count mutations are added. + If None, this limit is ignored. + - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. + If None, this limit is ignored. + - flow_control_max_count: Maximum number of inflight mutations. + If None, this limit is ignored. + - flow_control_max_bytes: Maximum number of inflight bytes. + If None, this limit is ignored. Returns: - a MutationsBatcher context manager that can batch requests """ - return MutationsBatcher(self, **kwargs) + kwargs["table"] = self + return MutationsBatcher(**kwargs) async def mutate_row( self, From 20156c6eb6024a42b60066331bc77f40a9a18c0e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 09:56:27 -0700 Subject: [PATCH 125/213] allow mutations over flow limits --- google/cloud/bigtable/mutations_batcher.py | 32 +++++--------- tests/unit/test_mutations_batcher.py | 51 +++++++++++++--------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index a62bba97a..a396db01b 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -39,6 +39,9 @@ class _FlowControl: limits have reached capacity. When a mutation is complete, it is unregistered from the FlowControl object, which will notify any blocked requests that there is additional capacity. + + Flow limits are not hard limits. If a single mutation exceeds the configured + limits, it will be sent in a single batch when the capacity is available. """ def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | None): @@ -63,11 +66,18 @@ def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | Non def _has_capacity(self, additional_count: int, additional_size: int) -> bool: """ Checks if there is capacity to send a new mutation with the given size and count + + FlowControl limits are not hard limits. If a single mutation exceeds + the configured limits, it can be sent in a single batch. """ + # adjust limits to allow overly large mutations + acceptable_size = max(self.max_mutation_bytes, additional_size) + acceptable_count = max(self.max_mutation_count, additional_count) + # check if we have capacity for new mutation new_size = self.in_flight_mutation_bytes + additional_size new_count = self.in_flight_mutation_count + additional_count return ( - new_size <= self.max_mutation_bytes and new_count <= self.max_mutation_count + new_size <= acceptable_size and new_count <= acceptable_count ) async def remove_from_flow(self, mutations: list[RowMutationEntry]) -> None: @@ -106,15 +116,6 @@ async def add_to_flow(self, mutations: list[RowMutationEntry]): next_entry = mutations[end_idx] next_size = next_entry.size() next_count = len(next_entry.mutations) - # do extra sanity check to avoid blocking forever - if next_count > self.max_mutation_count: - raise ValueError( - f"Mutation count {next_count} exceeds maximum: {self.max_mutation_count}" - ) - if next_size > self.max_mutation_bytes: - raise ValueError( - f"Mutation size {next_size} exceeds maximum: {self.max_mutation_bytes}" - ) if self._has_capacity(next_count, next_size): end_idx += 1 self.in_flight_mutation_bytes += next_size @@ -213,19 +214,10 @@ def append(self, mutations: RowMutationEntry): """ if self.closed: raise RuntimeError("Cannot append to closed MutationsBatcher") - size = mutations.size() - if size > self._flow_control.max_mutation_bytes: - raise ValueError( - f"Mutation size {size} exceeds flow_control_max_bytes: {self._flow_control.max_mutation_bytes}" - ) - if len(mutations.mutations) > self._flow_control.max_mutation_count: - raise ValueError( - f"Mutation count {len(mutations.mutations)} exceeds flow_control_max_count: {self._flow_control.max_mutation_count}" - ) self._staged_mutations.append(mutations) # start a new flush task if limits exceeded self._staged_count += len(mutations.mutations) - self._staged_bytes += size + self._staged_bytes += mutations.size() if ( self._staged_count >= self._flush_limit_count or self._staged_bytes >= self._flush_limit_bytes diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 338b3289f..85a4b8a6f 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -61,12 +61,16 @@ def test_ctor_empty_values(self): (0, 0, 1, 1, 1, 1, False), (10, 10, 0, 0, 0, 0, True), (10, 10, 0, 0, 9, 9, True), - (10, 10, 0, 0, 11, 9, False), - (10, 10, 0, 0, 9, 11, False), + (10, 10, 0, 0, 11, 9, True), + (10, 10, 0, 1, 11, 9, True), + (10, 10, 1, 0, 11, 9, False), + (10, 10, 0, 0, 9, 11, True), + (10, 10, 1, 0, 9, 11, True), + (10, 10, 0, 1, 9, 11, False), (10, 1, 0, 0, 1, 0, True), (1, 10, 0, 0, 0, 8, True), (float("inf"), float("inf"), 0, 0, 1e10, 1e10, True), - (8, 8, 0, 0, 1e10, 1e10, False), + (8, 8, 0, 0, 1e10, 1e10, True), (12, 12, 6, 6, 5, 5, True), (12, 12, 5, 5, 6, 6, True), (12, 12, 6, 6, 6, 6, True), @@ -208,21 +212,18 @@ async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_result assert i == len(expected_results) @pytest.mark.asyncio - async def test_add_to_flow_invalid_mutation(self): + async def test_add_to_flow_oversize(self): """ - batching should raise exception for mutations larger than limits to avoid deadlock + mutations over the flow control limits should still be accepted """ instance = self._make_one(2, 3) large_size_mutation = _make_mutation(count=1, size=10) large_count_mutation = _make_mutation(count=10, size=1) - with pytest.raises(ValueError) as e: - async for _ in instance.add_to_flow([large_size_mutation]): - pass - assert "Mutation size 10 exceeds maximum: 3" in str(e.value) - with pytest.raises(ValueError) as e: - async for _ in instance.add_to_flow([large_count_mutation]): - pass - assert "Mutation count 10 exceeds maximum: 2" in str(e.value) + results = [out async for out in instance.add_to_flow([large_size_mutation])] + assert len(results) == 1 + await instance.remove_from_flow(results[0]) + count_results = [out async for out in instance.add_to_flow([large_count_mutation])] + assert len(count_results) == 1 class TestMutationsBatcher: @@ -231,6 +232,8 @@ def _make_one(self, table=None, **kwargs): if table is None: table = mock.Mock() + table.default_operation_timeout = 10 + table.default_per_request_timeout = 10 return MutationsBatcher(table, **kwargs) @@ -416,18 +419,25 @@ async def test_append_closed(self): @pytest.mark.asyncio async def test_append_outside_flow_limits(self): - """entries larger than mutation limits are rejected""" + """entries larger than mutation limits are still processed""" async with self._make_one( flow_control_max_count=1, flow_control_max_bytes=1 ) as instance: oversized_entry = _make_mutation(count=0, size=2) + instance.append(oversized_entry) + assert instance._staged_mutations == [oversized_entry] + assert instance._staged_count == 0 + assert instance._staged_bytes == 2 + instance._staged_mutations = [] + async with self._make_one( + flow_control_max_count=1, flow_control_max_bytes=1 + ) as instance: overcount_entry = _make_mutation(count=2, size=0) - with pytest.raises(ValueError) as e: - instance.append(oversized_entry) - assert "Mutation size 2 exceeds flow_control_max_bytes: 1" in str(e.value) - with pytest.raises(ValueError) as e: - instance.append(overcount_entry) - assert "Mutation count 2 exceeds flow_control_max_count: 1" in str(e.value) + instance.append(overcount_entry) + assert instance._staged_mutations == [overcount_entry] + assert instance._staged_count == 2 + assert instance._staged_bytes == 0 + instance._staged_mutations = [] @pytest.mark.parametrize( "flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", @@ -628,7 +638,6 @@ async def gen(num): @pytest.mark.asyncio async def test_manual_flush_end_to_end(self): """Test full flush process with minimal mocking""" - num_nutations = 10 mutations = [_make_mutation(count=2, size=2)] * num_nutations From b9c44678e39bf8d4bce7a6c3bdf9a354d55d4cdf Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 11:16:56 -0700 Subject: [PATCH 126/213] added limits for mutate_rows mutation and entry counts --- google/cloud/bigtable/_mutate_rows.py | 9 +++ google/cloud/bigtable/mutations.py | 7 +++ google/cloud/bigtable/mutations_batcher.py | 39 ++++++++++--- tests/unit/test__mutate_rows.py | 23 ++++++++ tests/unit/test_mutations.py | 13 +++++ tests/unit/test_mutations_batcher.py | 68 +++++++++++++++++----- 6 files changed, 138 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index a422c99b2..64bcf376c 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -31,6 +31,9 @@ from google.cloud.bigtable.client import Table from google.cloud.bigtable.mutations import RowMutationEntry +# mutate_rows requests are limited to this value +MAX_MUTATE_ROWS_ENTRY_COUNT = 100_000 + class _MutateRowsIncomplete(RuntimeError): """ @@ -68,6 +71,12 @@ def __init__( - per_request_timeout: the timeoutto use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. """ + # check that mutations are within limits + if len(mutation_entries) > MAX_MUTATE_ROWS_ENTRY_COUNT: + raise ValueError( + "mutate_rows must contain at most " + f"{MAX_MUTATE_ROWS_ENTRY_COUNT} entries. Received {len(mutation_entries)}" + ) # create partial function to pass to trigger rpc call metadata = _make_metadata(table.table_name, table.app_profile_id) self._gapic_fn = functools.partial( diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 28c14b0a8..38c129750 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -22,6 +22,9 @@ # special value for SetCell mutation timestamps. If set, server will assign a timestamp SERVER_SIDE_TIMESTAMP = -1 +# mutation entries above this should be rejected +MAX_MUTATIONS_PER_ENTRY = 100_000 + class Mutation(ABC): """Model class for mutations""" @@ -198,6 +201,10 @@ def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): mutations = [mutations] if len(mutations) == 0: raise ValueError("mutations must not be empty") + elif len(mutations) > MAX_MUTATIONS_PER_ENTRY: + raise ValueError( + f"entries must have <= {MAX_MUTATIONS_PER_ENTRY} mutations" + ) self.row_key = row_key self.mutations = tuple(mutations) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index a396db01b..df1564b10 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -24,6 +24,7 @@ from google.cloud.bigtable.exceptions import FailedMutationEntryError from google.cloud.bigtable._mutate_rows import _MutateRowsOperation +from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover @@ -36,15 +37,20 @@ class _FlowControl: """ Manages flow control for batched mutations. Mutations are registered against the FlowControl object before being sent, which will block if size or count - limits have reached capacity. When a mutation is complete, it is unregistered - from the FlowControl object, which will notify any blocked requests that there + limits have reached capacity. As mutations completed, they are removed from + the FlowControl object, which will notify any blocked requests that there is additional capacity. Flow limits are not hard limits. If a single mutation exceeds the configured - limits, it will be sent in a single batch when the capacity is available. + limits, it will be allowed as a single batch when the capacity is available. """ - def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | None): + def __init__( + self, + max_mutation_count: int | None, + max_mutation_bytes: int | None, + max_entry_count: int = MAX_MUTATE_ROWS_ENTRY_COUNT, + ): """ Args: - max_mutation_count: maximum number of mutations to send in a single rpc. @@ -52,6 +58,8 @@ def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | Non If None, no limit is enforced. - max_mutation_bytes: maximum number of bytes to send in a single rpc. If None, no limit is enforced. + - max_entry_count: maximum number of entries to send in a single rpc. + Limited to 100,000 by the MutateRows API. """ self.max_mutation_count = ( max_mutation_count if max_mutation_count is not None else float("inf") @@ -59,6 +67,18 @@ def __init__(self, max_mutation_count: int | None, max_mutation_bytes: int | Non self.max_mutation_bytes = ( max_mutation_bytes if max_mutation_bytes is not None else float("inf") ) + self.max_entry_count = max_entry_count + if ( + self.max_entry_count > MAX_MUTATE_ROWS_ENTRY_COUNT + or self.max_entry_count < 1 + ): + raise ValueError( + f"max_entry_count must be between 1 and {MAX_MUTATE_ROWS_ENTRY_COUNT}" + ) + if self.max_mutation_count < 1: + raise ValueError("max_mutation_count must be greater than 0") + if self.max_mutation_bytes < 1: + raise ValueError("max_mutation_bytes must be greater than 0") self.capacity_condition = asyncio.Condition() self.in_flight_mutation_count = 0 self.in_flight_mutation_bytes = 0 @@ -76,9 +96,7 @@ def _has_capacity(self, additional_count: int, additional_size: int) -> bool: # check if we have capacity for new mutation new_size = self.in_flight_mutation_bytes + additional_size new_count = self.in_flight_mutation_count + additional_count - return ( - new_size <= acceptable_size and new_count <= acceptable_count - ) + return new_size <= acceptable_size and new_count <= acceptable_count async def remove_from_flow(self, mutations: list[RowMutationEntry]) -> None: """ @@ -116,7 +134,12 @@ async def add_to_flow(self, mutations: list[RowMutationEntry]): next_entry = mutations[end_idx] next_size = next_entry.size() next_count = len(next_entry.mutations) - if self._has_capacity(next_count, next_size): + num_in_batch = end_idx - start_idx + if ( + self._has_capacity(next_count, next_size) + and num_in_batch < self.max_entry_count + ): + # room for new mutation; add to batch end_idx += 1 self.in_flight_mutation_bytes += next_size self.in_flight_mutation_count += next_count diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 4fba16f23..49bc90d90 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -105,6 +105,29 @@ def test_ctor(self): assert instance.remaining_indices == list(range(len(entries))) assert instance.errors == {} + def test_ctor_too_many_entries(self): + """ + should raise an error if an operation is created with more than 100,000 entries + """ + from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT + + assert MAX_MUTATE_ROWS_ENTRY_COUNT == 100000 + + client = mock.Mock() + table = mock.Mock() + entries = [None] * MAX_MUTATE_ROWS_ENTRY_COUNT + operation_timeout = 0.05 + attempt_timeout = 0.01 + # no errors if at limit + self._make_one(client, table, entries, operation_timeout, attempt_timeout) + # raise error after crossing + with pytest.raises(ValueError) as e: + self._make_one( + client, table, entries + [None], operation_timeout, attempt_timeout + ) + assert "mutate_rows must contain at most 100000 entries" in str(e.value) + assert "Received 100001" in str(e.value) + @pytest.mark.asyncio async def test_mutate_rows_operation(self): """ diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index ebb0d6e60..7540bb212 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -488,6 +488,19 @@ def test_ctor(self): assert instance.row_key == expected_key assert list(instance.mutations) == expected_mutations + def test_ctor_over_limit(self): + """Should raise error if mutations exceed MAX_MUTATIONS_PER_ENTRY""" + from google.cloud.bigtable.mutations import MAX_MUTATIONS_PER_ENTRY + + assert MAX_MUTATIONS_PER_ENTRY == 100_000 + # no errors at limit + expected_mutations = [None for _ in range(MAX_MUTATIONS_PER_ENTRY)] + self._make_one(b"row_key", expected_mutations) + # error if over limit + with pytest.raises(ValueError) as e: + self._make_one("key", expected_mutations + [mock.Mock()]) + assert "entries must have <= 100000 mutations" in str(e.value) + def test_ctor_str_key(self): expected_key = "row_key" expected_mutations = [mock.Mock(), mock.Mock()] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 85a4b8a6f..6e53965f3 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -33,32 +33,58 @@ def _make_mutation(count=1, size=1): class Test_FlowControl: - def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): + def _make_one( + self, max_mutation_count=10, max_mutation_bytes=100, max_entry_count=100_000 + ): from google.cloud.bigtable.mutations_batcher import _FlowControl - return _FlowControl(max_mutation_count, max_mutation_bytes) + return _FlowControl(max_mutation_count, max_mutation_bytes, max_entry_count) def test_ctor(self): max_mutation_count = 9 max_mutation_bytes = 19 - instance = self._make_one(max_mutation_count, max_mutation_bytes) + max_entry_count = 29 + instance = self._make_one( + max_mutation_count, max_mutation_bytes, max_entry_count + ) assert instance.max_mutation_count == max_mutation_count assert instance.max_mutation_bytes == max_mutation_bytes + assert instance.max_entry_count == max_entry_count assert instance.in_flight_mutation_count == 0 assert instance.in_flight_mutation_bytes == 0 assert isinstance(instance.capacity_condition, asyncio.Condition) def test_ctor_empty_values(self): """Test constructor with None count and bytes""" + from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT + instance = self._make_one(None, None) assert instance.max_mutation_count == float("inf") assert instance.max_mutation_bytes == float("inf") + assert instance.max_entry_count == MAX_MUTATE_ROWS_ENTRY_COUNT + + def test_ctor_invalid_values(self): + """Test that values are positive, and fit within expected limits""" + from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT + + with pytest.raises(ValueError) as e: + self._make_one(0, 1) + assert "max_mutation_count must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + self._make_one(1, 0) + assert "max_mutation_bytes must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + self._make_one(1, 1, 0) + assert "max_entry_count must be between 1 and 100000" in str(e.value) + with pytest.raises(ValueError) as e: + self._make_one(1, 1, MAX_MUTATE_ROWS_ENTRY_COUNT + 1) + assert "max_entry_count must be between 1 and" in str(e.value) @pytest.mark.parametrize( "max_count,max_size,existing_count,existing_size,new_count,new_size,expected", [ - (0, 0, 0, 0, 0, 0, True), - (0, 0, 1, 1, 1, 1, False), + (1, 1, 0, 0, 0, 0, True), + (1, 1, 1, 1, 1, 1, False), (10, 10, 0, 0, 0, 0, True), (10, 10, 0, 0, 9, 9, True), (10, 10, 0, 0, 11, 9, True), @@ -75,6 +101,14 @@ def test_ctor_empty_values(self): (12, 12, 5, 5, 6, 6, True), (12, 12, 6, 6, 6, 6, True), (12, 12, 6, 6, 7, 7, False), + # allow capacity check if new_count or new_size exceeds limits + (12, 12, 0, 0, 13, 13, True), + (12, 12, 12, 0, 0, 13, True), + (12, 12, 0, 12, 13, 0, True), + # but not if there's already values in flight + (12, 12, 1, 1, 13, 13, False), + (12, 12, 1, 1, 0, 13, False), + (12, 12, 1, 1, 13, 0, False), ], ) def test__has_capacity( @@ -172,31 +206,37 @@ async def task_routine(): @pytest.mark.asyncio @pytest.mark.parametrize( - "mutations,count_cap,size_cap,expected_results", + "mutations,count_cap,size_cap,entry_cap,expected_results", [ # high capacity results in no batching - ([(5, 5), (1, 1), (1, 1)], 10, 10, [[(5, 5), (1, 1), (1, 1)]]), + ([(5, 5), (1, 1), (1, 1)], 10, 10, 100, [[(5, 5), (1, 1), (1, 1)]]), # low capacity splits up into batches - ([(1, 1), (1, 1), (1, 1)], 1, 1, [[(1, 1)], [(1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 1, 1, 100, [[(1, 1)], [(1, 1)], [(1, 1)]]), # test count as limiting factor - ([(1, 1), (1, 1), (1, 1)], 2, 10, [[(1, 1), (1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 2, 10, 100, [[(1, 1), (1, 1)], [(1, 1)]]), # test size as limiting factor - ([(1, 1), (1, 1), (1, 1)], 10, 2, [[(1, 1), (1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 10, 2, 100, [[(1, 1), (1, 1)], [(1, 1)]]), # test with some bloackages and some flows ( [(1, 1), (5, 5), (4, 1), (1, 4), (1, 1)], 5, 5, + 100, [[(1, 1)], [(5, 5)], [(4, 1), (1, 4)], [(1, 1)]], ), + # flows with entry count above max request limit should be batched + ([(1, 1)] * 11, 100, 100, 10, [[(1, 1)] * 10, [(1, 1)]]), + ([(1, 1)] * 10, 100, 100, 1, [[(1, 1)] for _ in range(10)]), ], ) - async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_results): + async def test_add_to_flow( + self, mutations, count_cap, size_cap, entry_cap, expected_results + ): """ Test batching with various flow control settings """ mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] - instance = self._make_one(count_cap, size_cap) + instance = self._make_one(count_cap, size_cap, entry_cap) i = 0 async for batch in instance.add_to_flow(mutation_objs): expected_batch = expected_results[i] @@ -222,7 +262,9 @@ async def test_add_to_flow_oversize(self): results = [out async for out in instance.add_to_flow([large_size_mutation])] assert len(results) == 1 await instance.remove_from_flow(results[0]) - count_results = [out async for out in instance.add_to_flow([large_count_mutation])] + count_results = [ + out async for out in instance.add_to_flow([large_count_mutation]) + ] assert len(count_results) == 1 From 8b8d8ab4060c69e1198ed4705967590cd2b5dbae Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 14:16:14 -0700 Subject: [PATCH 127/213] refactored mutation system tests --- google/cloud/bigtable/client.py | 2 +- tests/system/test_system.py | 89 ++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 4d1d9c14d..c51c82e58 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -52,9 +52,9 @@ from google.cloud.bigtable._mutate_rows import _MutateRowsOperation from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline +from google.cloud.bigtable.mutations_batcher import MutationsBatcher if TYPE_CHECKING: - from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule diff --git a/tests/system/test_system.py b/tests/system/test_system.py index f0fab7d45..b04a8e83a 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -16,6 +16,7 @@ import pytest_asyncio import os import asyncio +import uuid from google.api_core import retry from google.api_core.exceptions import ClientError @@ -203,6 +204,24 @@ async def _retrieve_cell_value(table, row_key): cell = row.cells[0] return cell.value +async def _create_row_and_mutation(table, temp_rows, *, start_value=b"start", new_value=b"new_value"): + """ + Helper to create a new row, and a sample set_cell mutation to change its value + """ + from google.cloud.bigtable.mutations import SetCell + row_key = uuid.uuid4().hex.encode() + family = TEST_FAMILY + qualifier = b"test-qualifier" + await temp_rows.add_row( + row_key, family=family, qualifier=qualifier, value=start_value + ) + # ensure cell is initialized + assert (await _retrieve_cell_value(table, row_key)) == start_value + + mutation = SetCell( + family=TEST_FAMILY, qualifier=qualifier, new_value=new_value + ) + return row_key, mutation @pytest_asyncio.fixture(scope="function") async def temp_rows(table): @@ -228,28 +247,14 @@ async def test_mutation_set_cell(table, temp_rows): """ Ensure cells can be set properly """ - from google.cloud.bigtable.mutations import SetCell - - row_key = b"mutate" - family = TEST_FAMILY - qualifier = b"test-qualifier" - start_value = b"start" - await temp_rows.add_row( - row_key, family=family, qualifier=qualifier, value=start_value - ) - - # ensure cell is initialized - assert (await _retrieve_cell_value(table, row_key)) == start_value - - expected_value = b"new-value" - mutation = SetCell( - family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=expected_value - ) + row_key = b"bulk_mutate" + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) await table.mutate_row(row_key, mutation) # ensure cell is updated - assert (await _retrieve_cell_value(table, row_key)) == expected_value + assert (await _retrieve_cell_value(table, row_key)) == new_value @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @@ -258,28 +263,44 @@ async def test_bulk_mutations_set_cell(client, table, temp_rows): """ Ensure cells can be set properly """ - from google.cloud.bigtable.mutations import SetCell, RowMutationEntry + from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + bulk_mutation = RowMutationEntry(row_key, [mutation]) - row_key = b"bulk_mutate" - family = TEST_FAMILY - qualifier = b"test-qualifier" - start_value = b"start" - await temp_rows.add_row( - row_key, family=family, qualifier=qualifier, value=start_value - ) + await table.bulk_mutate_rows([bulk_mutation]) - # ensure cell is initialized - assert (await _retrieve_cell_value(table, row_key)) == start_value + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == new_value - expected_value = b"new-value" - mutation = SetCell( - family=TEST_FAMILY, qualifier=b"test-qualifier", new_value=expected_value - ) +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_set_cell(client, table, temp_rows): + from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) bulk_mutation = RowMutationEntry(row_key, [mutation]) - await table.bulk_mutate_rows([bulk_mutation]) + async with table.mutations_batcher() as batcher: + batcher.append(bulk_mutation) + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == new_value + assert len(batcher._staged_mutations) == 0 + +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_manual_flush(client, table, temp_rows): + from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + batcher = table.mutations_batcher() + batcher.append(bulk_mutation) + assert len(batcher._staged_mutations) == 1 + await batcher.flush() + assert len(batcher._staged_mutations) == 0 # ensure cell is updated - assert (await _retrieve_cell_value(table, row_key)) == expected_value + assert (await _retrieve_cell_value(table, row_key)) == new_value @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) From a1de714866ec28dfe6de13bb8888647422fb7c75 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 14:51:56 -0700 Subject: [PATCH 128/213] added batcher system tests --- tests/system/test_system.py | 179 ++++++++++++++++++++++++++++++++---- 1 file changed, 163 insertions(+), 16 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index b04a8e83a..4503fb236 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -204,11 +204,15 @@ async def _retrieve_cell_value(table, row_key): cell = row.cells[0] return cell.value -async def _create_row_and_mutation(table, temp_rows, *, start_value=b"start", new_value=b"new_value"): + +async def _create_row_and_mutation( + table, temp_rows, *, start_value=b"start", new_value=b"new_value" +): """ Helper to create a new row, and a sample set_cell mutation to change its value """ from google.cloud.bigtable.mutations import SetCell + row_key = uuid.uuid4().hex.encode() family = TEST_FAMILY qualifier = b"test-qualifier" @@ -218,11 +222,10 @@ async def _create_row_and_mutation(table, temp_rows, *, start_value=b"start", ne # ensure cell is initialized assert (await _retrieve_cell_value(table, row_key)) == start_value - mutation = SetCell( - family=TEST_FAMILY, qualifier=qualifier, new_value=new_value - ) + mutation = SetCell(family=TEST_FAMILY, qualifier=qualifier, new_value=new_value) return row_key, mutation + @pytest_asyncio.fixture(scope="function") async def temp_rows(table): builder = TempRowBuilder(table) @@ -249,7 +252,9 @@ async def test_mutation_set_cell(table, temp_rows): """ row_key = b"bulk_mutate" new_value = uuid.uuid4().hex.encode() - row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) await table.mutate_row(row_key, mutation) @@ -264,8 +269,11 @@ async def test_bulk_mutations_set_cell(client, table, temp_rows): Ensure cells can be set properly """ from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() - row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) bulk_mutation = RowMutationEntry(row_key, [mutation]) await table.bulk_mutate_rows([bulk_mutation]) @@ -273,12 +281,19 @@ async def test_bulk_mutations_set_cell(client, table, temp_rows): # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio -async def test_mutations_batcher_set_cell(client, table, temp_rows): +async def test_mutations_batcher_context_manager(client, table, temp_rows): + """ + test batcher with context manager. Should flush on exit + """ from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() - row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) bulk_mutation = RowMutationEntry(row_key, [mutation]) async with table.mutations_batcher() as batcher: @@ -287,20 +302,152 @@ async def test_mutations_batcher_set_cell(client, table, temp_rows): assert (await _retrieve_cell_value(table, row_key)) == new_value assert len(batcher._staged_mutations) == 0 + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio async def test_mutations_batcher_manual_flush(client, table, temp_rows): + """ + batcher should flush when manually requested + """ from google.cloud.bigtable.mutations import RowMutationEntry + new_value = uuid.uuid4().hex.encode() - row_key, mutation = await _create_row_and_mutation(table, temp_rows, new_value=new_value) + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) bulk_mutation = RowMutationEntry(row_key, [mutation]) - batcher = table.mutations_batcher() - batcher.append(bulk_mutation) - assert len(batcher._staged_mutations) == 1 - await batcher.flush() - assert len(batcher._staged_mutations) == 0 - # ensure cell is updated - assert (await _retrieve_cell_value(table, row_key)) == new_value + async with table.mutations_batcher() as batcher: + batcher.append(bulk_mutation) + assert len(batcher._staged_mutations) == 1 + await batcher.flush() + assert len(batcher._staged_mutations) == 0 + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == new_value + + +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_timer_flush(client, table, temp_rows): + """ + batch should occur after flush_interval seconds + """ + from google.cloud.bigtable.mutations import RowMutationEntry + + new_value = uuid.uuid4().hex.encode() + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + flush_interval = 0.1 + async with table.mutations_batcher(flush_interval=flush_interval) as batcher: + batcher.append(bulk_mutation) + await asyncio.sleep(0) + assert len(batcher._staged_mutations) == 1 + await asyncio.sleep(flush_interval + 0.1) + assert len(batcher._staged_mutations) == 0 + # ensure cell is updated + assert (await _retrieve_cell_value(table, row_key)) == new_value + + +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_count_flush(client, table, temp_rows): + """ + batch should flush after flush_limit_count mutations + """ + from google.cloud.bigtable.mutations import RowMutationEntry + + new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + row_key2, mutation2 = await _create_row_and_mutation( + table, temp_rows, new_value=new_value2 + ) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) + + async with table.mutations_batcher(flush_limit_count=2) as batcher: + batcher.append(bulk_mutation) + # should be noop; flush not scheduled + await batcher._prev_flush + assert len(batcher._staged_mutations) == 1 + batcher.append(bulk_mutation2) + # task should now be scheduled + await batcher._prev_flush + assert len(batcher._staged_mutations) == 0 + # ensure cells were updated + assert (await _retrieve_cell_value(table, row_key)) == new_value + assert (await _retrieve_cell_value(table, row_key2)) == new_value2 + + +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_bytes_flush(client, table, temp_rows): + """ + batch should flush after flush_limit_count mutations + """ + from google.cloud.bigtable.mutations import RowMutationEntry + + new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + row_key2, mutation2 = await _create_row_and_mutation( + table, temp_rows, new_value=new_value2 + ) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) + + flush_limit = bulk_mutation.size() + bulk_mutation2.size() - 1 + + async with table.mutations_batcher(flush_limit_bytes=flush_limit) as batcher: + batcher.append(bulk_mutation) + # should be noop; flush not scheduled + await batcher._prev_flush + assert len(batcher._staged_mutations) == 1 + batcher.append(bulk_mutation2) + # task should now be scheduled + await batcher._prev_flush + assert len(batcher._staged_mutations) == 0 + # ensure cells were updated + assert (await _retrieve_cell_value(table, row_key)) == new_value + assert (await _retrieve_cell_value(table, row_key2)) == new_value2 + + +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_mutations_batcher_no_flush(client, table, temp_rows): + """ + test with no flush requirements met + """ + from google.cloud.bigtable.mutations import RowMutationEntry + + new_value = uuid.uuid4().hex.encode() + start_value = b"unchanged" + row_key, mutation = await _create_row_and_mutation( + table, temp_rows, start_value=start_value, new_value=new_value + ) + bulk_mutation = RowMutationEntry(row_key, [mutation]) + row_key2, mutation2 = await _create_row_and_mutation( + table, temp_rows, start_value=start_value, new_value=new_value + ) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) + + size_limit = bulk_mutation.size() + bulk_mutation2.size() + 1 + async with table.mutations_batcher( + flush_limit_bytes=size_limit, flush_limit_count=3, flush_interval=1 + ) as batcher: + batcher.append(bulk_mutation) + assert len(batcher._staged_mutations) == 1 + batcher.append(bulk_mutation2) + # should be noop; flush not scheduled + await batcher._prev_flush + await asyncio.sleep(0.01) + assert len(batcher._staged_mutations) == 2 + # ensure cells were updated + assert (await _retrieve_cell_value(table, row_key)) == start_value + assert (await _retrieve_cell_value(table, row_key2)) == start_value @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) From 2382a04b6d40c25ca616fe80fccd2d382018f16f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 15:27:23 -0700 Subject: [PATCH 129/213] allow batcher to append multiple entries --- google/cloud/bigtable/mutations_batcher.py | 22 ++++++++++--- tests/system/test_system.py | 10 ++++-- tests/unit/test_mutations_batcher.py | 37 ++++++++++++++++++++-- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index df1564b10..e9ffd5ace 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -25,6 +25,7 @@ from google.cloud.bigtable._mutate_rows import _MutateRowsOperation from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT +from google.cloud.bigtable.mutations import Mutation if TYPE_CHECKING: from google.cloud.bigtable.client import Table # pragma: no cover @@ -231,16 +232,29 @@ async def _flush_timer(self, interval: float | None): if not self.closed and self._staged_mutations: self._schedule_flush() - def append(self, mutations: RowMutationEntry): + def append(self, mutations: RowMutationEntry | list[RowMutationEntry]): """ Add a new set of mutations to the internal queue + + Args: + - mutations: entries to add to flush queue + Raises: + - RuntimeError if batcher is closed + - ValueError if an invalid mutation type is added """ if self.closed: raise RuntimeError("Cannot append to closed MutationsBatcher") - self._staged_mutations.append(mutations) + if not isinstance(mutations, list): + mutations = [mutations] + for m in mutations: + if isinstance(m, Mutation): # type: ignore + raise ValueError( + f"invalid mutation type: {type(m).__name__}. Only RowMutationEntry objects are supported by batcher" + ) + self._staged_mutations.extend(mutations) # start a new flush task if limits exceeded - self._staged_count += len(mutations.mutations) - self._staged_bytes += mutations.size() + self._staged_count += sum([len(m.mutations) for m in mutations]) + self._staged_bytes += sum([m.size() for m in mutations]) if ( self._staged_count >= self._flush_limit_count or self._staged_bytes >= self._flush_limit_bytes diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 4503fb236..64257ea08 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -233,7 +233,7 @@ async def temp_rows(table): await builder.delete_rows() -@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=10) @pytest.mark.asyncio async def test_ping_and_warm_gapic(client, table): """ @@ -290,14 +290,18 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): """ from google.cloud.bigtable.mutations import RowMutationEntry - new_value = uuid.uuid4().hex.encode() + new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] row_key, mutation = await _create_row_and_mutation( table, temp_rows, new_value=new_value ) + row_key2, mutation2 = await _create_row_and_mutation( + table, temp_rows, new_value=new_value2 + ) bulk_mutation = RowMutationEntry(row_key, [mutation]) + bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) async with table.mutations_batcher() as batcher: - batcher.append(bulk_mutation) + batcher.append([bulk_mutation, bulk_mutation2]) # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert len(batcher._staged_mutations) == 0 diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 6e53965f3..36fdce7cd 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -457,7 +457,24 @@ async def test_append_closed(self): with pytest.raises(RuntimeError): instance = self._make_one() await instance.close() - instance.append([mock.Mock()]) + instance.append(mock.Mock()) + + @pytest.mark.asyncio + async def test_append_wrong_mutation(self): + """ + Mutation objects should raise an exception. + Only support RowMutationEntry + """ + from google.cloud.bigtable.mutations import DeleteAllFromRow + + instance = self._make_one() + expected_error = "invalid mutation type: DeleteAllFromRow. Only RowMutationEntry objects are supported by batcher" + with pytest.raises(ValueError) as e: + instance.append(DeleteAllFromRow()) + assert str(e.value) == expected_error + with pytest.raises(ValueError) as e: + instance.append([DeleteAllFromRow(), DeleteAllFromRow()]) + assert str(e.value) == expected_error @pytest.mark.asyncio async def test_append_outside_flow_limits(self): @@ -514,7 +531,7 @@ async def test_append( instance._staged_mutations = [] @pytest.mark.asyncio - async def test_append_multiple(self): + async def test_append_multiple_sequentially(self): """Append multiple mutations""" async with self._make_one(flush_limit_count=8, flush_limit_bytes=8) as instance: assert instance._staged_count == 0 @@ -539,6 +556,22 @@ async def test_append_multiple(self): assert len(instance._staged_mutations) == 3 instance._staged_mutations = [] + @pytest.mark.asyncio + async def test_append_multiple_single_call(self): + """Append multiple mutations in a single append call""" + async with self._make_one(flush_limit_count=8, flush_limit_bytes=8) as instance: + assert instance._staged_count == 0 + assert instance._staged_bytes == 0 + assert instance._staged_mutations == [] + mutation_list = [_make_mutation(count=2, size=3) for _ in range(3)] + with mock.patch.object(instance, "_schedule_flush") as flush_mock: + instance.append(mutation_list) + assert flush_mock.call_count == 1 + assert instance._staged_count == 6 + assert instance._staged_bytes == 9 + assert len(instance._staged_mutations) == 3 + instance._staged_mutations = [] + @pytest.mark.parametrize("raise_exceptions", [True, False]) @pytest.mark.asyncio async def test_flush(self, raise_exceptions): From 1db03777eac7301f08a917d3100fa2f04384385e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 7 Jun 2023 15:50:21 -0700 Subject: [PATCH 130/213] improved timeout handling --- google/cloud/bigtable/mutations_batcher.py | 24 +++++++++++---------- tests/unit/test_mutations_batcher.py | 25 +++++++++++++++++----- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index e9ffd5ace..821c4d8d3 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -17,7 +17,7 @@ import asyncio import atexit import warnings -from typing import TYPE_CHECKING +from typing import Awaitable, TYPE_CHECKING from google.cloud.bigtable.mutations import RowMutationEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup @@ -261,26 +261,28 @@ def append(self, mutations: RowMutationEntry | list[RowMutationEntry]): ): self._schedule_flush() - # TODO: add tests for timeout - async def flush(self, *, raise_exceptions=True, timeout=30): + async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = 60): """ - Flush all staged mutations to the server + Flush all staged mutations Args: - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. If False, exceptions will be stored in self.exceptions and raised on a future flush or when the batcher is closed. - - timeout: maximum time to wait for flush to complete. If exceeded, flush will - continue in the background and exceptions will be raised on a future flush + - timeout: maximum time to wait for flush to complete, in seconds. + If exceeded, flush will continue in the background and exceptions + will be surfaced on the next flush Raises: - MutationsExceptionGroup if raise_exceptions is True and any mutations fail - - asyncio.TimeoutError if timeout is reached + - asyncio.TimeoutError if timeout is reached before flush task completes. """ # add recent staged mutations to flush task, and wait for flush to complete - flush_task = self._schedule_flush() - # wait timeout seconds for flush to complete - # if timeout is exceeded, flush task will still be running in the background - await asyncio.wait_for(asyncio.shield(flush_task), timeout=timeout) + flush_job : Awaitable[None] = self._schedule_flush() + if timeout is not None: + # wait `timeout seconds for flush to complete + # if timeout is exceeded, flush task will still be running in the background + flush_job = asyncio.wait_for(asyncio.shield(flush_job), timeout=timeout) + await flush_job # raise any unreported exceptions from this or previous flushes if raise_exceptions: self._raise_exceptions() diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 36fdce7cd..124b112e1 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -574,21 +574,36 @@ async def test_append_multiple_single_call(self): @pytest.mark.parametrize("raise_exceptions", [True, False]) @pytest.mark.asyncio - async def test_flush(self, raise_exceptions): + async def test_flush_no_timeout(self, raise_exceptions): """flush should internally call _schedule_flush""" mock_obj = AsyncMock() async with self._make_one() as instance: with mock.patch.object(instance, "_schedule_flush") as flush_mock: with mock.patch.object(instance, "_raise_exceptions") as raise_mock: flush_mock.return_value = mock_obj.__call__() - if not raise_exceptions: - await instance.flush(raise_exceptions=False) - else: - await instance.flush() + await instance.flush(raise_exceptions=raise_exceptions, timeout=None) assert flush_mock.call_count == 1 assert mock_obj.await_count == 1 assert raise_mock.call_count == int(raise_exceptions) + @pytest.mark.asyncio + async def test_flush_w_timeout(self): + """ + flush should raise TimeoutError if incomplete by timeline, but flush + task should continue internally + """ + async with self._make_one() as instance: + # create mock internal flush job + instance._prev_flush = asyncio.create_task(asyncio.sleep(0.5)) + with pytest.raises(asyncio.TimeoutError): + await instance.flush(timeout=0.01) + # ensure that underlying flush task is still running + assert not instance._prev_flush.done() + # ensure flush task can complete without error + await instance._prev_flush + assert instance._prev_flush.done() + assert instance._prev_flush.exception() is None + @pytest.mark.asyncio async def test_schedule_flush_no_mutations(self): """schedule flush should return prev_flush if no new mutations""" From 589aa5d04f6b5a2bd310d0bf06aeb7058fb6fcd2 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Thu, 8 Jun 2023 16:36:43 -0400 Subject: [PATCH 131/213] fix: add a callback function on flush_rows (#796) * fix: add a callback function on flush_rows * reformat * address comments * update doc * update names * add a test --- google/cloud/bigtable/batcher.py | 13 ++++++++++++- tests/unit/test_batcher.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py index 6b06ec060..a6eb806e9 100644 --- a/google/cloud/bigtable/batcher.py +++ b/google/cloud/bigtable/batcher.py @@ -192,6 +192,11 @@ class MutationsBatcher(object): :type flush_interval: float :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. Default is 1 second. + + :type batch_completed_callback: Callable[list:[`~google.rpc.status_pb2.Status`]] = None + :param batch_completed_callback: (Optional) A callable for handling responses + after the current batch is sent. The callable function expect a list of grpc + Status. """ def __init__( @@ -200,6 +205,7 @@ def __init__( flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE, flush_interval=1, + batch_completed_callback=None, ): self._rows = _MutationsBatchQueue( max_mutation_bytes=max_row_bytes, flush_count=flush_count @@ -215,6 +221,7 @@ def __init__( ) self.futures_mapping = {} self.exceptions = queue.Queue() + self._user_batch_completed_callback = batch_completed_callback @property def flush_count(self): @@ -337,7 +344,8 @@ def _flush_async(self): batch_info = _BatchInfo() def _batch_completed_callback(self, future): - """Callback for when the mutation has finished. + """Callback for when the mutation has finished to clean up the current batch + and release items from the flow controller. Raise exceptions if there's any. Release the resources locked by the flow control and allow enqueued tasks to be run. @@ -357,6 +365,9 @@ def _flush_rows(self, rows_to_flush): if len(rows_to_flush) > 0: response = self.table.mutate_rows(rows_to_flush) + if self._user_batch_completed_callback: + self._user_batch_completed_callback(response) + for result in response: if result.code != 0: exc = from_grpc_status(result.code, result.message) diff --git a/tests/unit/test_batcher.py b/tests/unit/test_batcher.py index a238b2852..998748141 100644 --- a/tests/unit/test_batcher.py +++ b/tests/unit/test_batcher.py @@ -35,6 +35,27 @@ def test_mutation_batcher_constructor(): assert table is mutation_batcher.table +def test_mutation_batcher_w_user_callback(): + table = _Table(TABLE_NAME) + + def callback_fn(response): + callback_fn.count = len(response) + + with MutationsBatcher( + table, flush_count=1, batch_completed_callback=callback_fn + ) as mutation_batcher: + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] + + mutation_batcher.mutate_rows(rows) + + assert callback_fn.count == 4 + + def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) with MutationsBatcher(table=table) as mutation_batcher: From fbe6acebd265c243508b6a08f0743bd55d144fb7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 8 Jun 2023 18:02:10 -0400 Subject: [PATCH 132/213] chore(main): release 2.19.0 (#789) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- CHANGELOG.md | 17 +++++++++++++++++ google/cloud/bigtable/gapic_version.py | 2 +- google/cloud/bigtable_admin/gapic_version.py | 2 +- google/cloud/bigtable_admin_v2/gapic_version.py | 2 +- google/cloud/bigtable_v2/gapic_version.py | 2 +- 6 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index e7a7a136b..b7f666a68 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.18.1" + ".": "2.19.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d56f02896..dc80386a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,23 @@ [1]: https://pypi.org/project/google-cloud-bigtable/#history +## [2.19.0](https://github.com/googleapis/python-bigtable/compare/v2.18.1...v2.19.0) (2023-06-08) + + +### Features + +* Add ChangeStreamConfig to CreateTable and UpdateTable ([#786](https://github.com/googleapis/python-bigtable/issues/786)) ([cef70f2](https://github.com/googleapis/python-bigtable/commit/cef70f243541820225f86a520e0b2abd3a7354f7)) + + +### Bug Fixes + +* Add a callback function on flush_rows ([#796](https://github.com/googleapis/python-bigtable/issues/796)) ([589aa5d](https://github.com/googleapis/python-bigtable/commit/589aa5d04f6b5a2bd310d0bf06aeb7058fb6fcd2)) + + +### Documentation + +* **samples:** Add region tags ([#788](https://github.com/googleapis/python-bigtable/issues/788)) ([ecf539c](https://github.com/googleapis/python-bigtable/commit/ecf539c4c976fd9e5505b8abf0b697b218f09fef)) + ## [2.18.1](https://github.com/googleapis/python-bigtable/compare/v2.18.0...v2.18.1) (2023-05-11) diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/gapic_version.py index e1b4da1de..0f1a446f3 100644 --- a/google/cloud/bigtable/gapic_version.py +++ b/google/cloud/bigtable/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.1" # {x-release-please-version} +__version__ = "2.19.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin/gapic_version.py b/google/cloud/bigtable_admin/gapic_version.py index e1b4da1de..0f1a446f3 100644 --- a/google/cloud/bigtable_admin/gapic_version.py +++ b/google/cloud/bigtable_admin/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.1" # {x-release-please-version} +__version__ = "2.19.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_admin_v2/gapic_version.py b/google/cloud/bigtable_admin_v2/gapic_version.py index e1b4da1de..0f1a446f3 100644 --- a/google/cloud/bigtable_admin_v2/gapic_version.py +++ b/google/cloud/bigtable_admin_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.1" # {x-release-please-version} +__version__ = "2.19.0" # {x-release-please-version} diff --git a/google/cloud/bigtable_v2/gapic_version.py b/google/cloud/bigtable_v2/gapic_version.py index e1b4da1de..0f1a446f3 100644 --- a/google/cloud/bigtable_v2/gapic_version.py +++ b/google/cloud/bigtable_v2/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "2.18.1" # {x-release-please-version} +__version__ = "2.19.0" # {x-release-please-version} From 6a66c48a4183f95076884b4b6232a1cb0abf5d1e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 15:58:52 -0700 Subject: [PATCH 133/213] made batch rpcs concurrent --- google/cloud/bigtable/mutations_batcher.py | 30 +++++++-- tests/unit/test_mutations_batcher.py | 78 +++++++++++++++++++++- 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 821c4d8d3..ed26f9b88 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -277,7 +277,7 @@ async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = - asyncio.TimeoutError if timeout is reached before flush task completes. """ # add recent staged mutations to flush task, and wait for flush to complete - flush_job : Awaitable[None] = self._schedule_flush() + flush_job: Awaitable[None] = self._schedule_flush() if timeout is not None: # wait `timeout seconds for flush to complete # if timeout is exceeded, flush task will still be running in the background @@ -310,14 +310,26 @@ async def _flush_internal( - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ - # wait for previous flush to complete - await prev_flush # flush new entries + in_process_requests: list[asyncio.Task[None]] = [prev_flush] async for batch in self._flow_control.add_to_flow(new_entries): - batch_errors = await self._execute_mutate_rows(batch) - self.exceptions.extend(batch_errors) - self._entries_processed_since_last_raise += len(batch) - await self._flow_control.remove_from_flow(batch) + batch_task = asyncio.create_task( + self._execute_mutate_rows_w_state_update(batch) + ) + in_process_requests.append(batch_task) + # wait for all inflight requests to complete + await asyncio.gather(*in_process_requests) + + async def _execute_mutate_rows_w_state_update( + self, batch: list[RowMutationEntry] + ) -> None: + """ + Calls _execute_mutate_rows, and then updates internal flush state based on results + """ + results = await self._execute_mutate_rows(batch) + self.exceptions.extend(results) + self._entries_processed_since_last_raise += len(batch) + await self._flow_control.remove_from_flow(batch) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] @@ -346,6 +358,7 @@ async def _execute_mutate_rows( ) await operation.start() except MutationsExceptionGroup as e: + # strip index information from exceptions, since it is not useful in a batch context for subexc in e.exceptions: subexc.index = None return list(e.exceptions) @@ -354,6 +367,9 @@ async def _execute_mutate_rows( def _raise_exceptions(self): """ Raise any unreported exceptions from background flush operations + + Raises: + - MutationsExceptionGroup with all unreported exceptions """ if self.exceptions: exc_list, self.exceptions = self.exceptions, [] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 124b112e1..bd4b6e42d 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -581,7 +581,9 @@ async def test_flush_no_timeout(self, raise_exceptions): with mock.patch.object(instance, "_schedule_flush") as flush_mock: with mock.patch.object(instance, "_raise_exceptions") as raise_mock: flush_mock.return_value = mock_obj.__call__() - await instance.flush(raise_exceptions=raise_exceptions, timeout=None) + await instance.flush( + raise_exceptions=raise_exceptions, timeout=None + ) assert flush_mock.call_count == 1 assert mock_obj.await_count == 1 assert raise_mock.call_count == int(raise_exceptions) @@ -604,6 +606,80 @@ async def test_flush_w_timeout(self): assert instance._prev_flush.done() assert instance._prev_flush.exception() is None + @pytest.mark.asyncio + async def test_flush_concurrent_requests(self): + """ + requests should happen in parallel if multiple flushes overlap + """ + import time + + num_flushes = 10 + fake_mutations = [_make_mutation() for _ in range(num_flushes)] + async with self._make_one() as instance: + with mock.patch.object( + instance, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + # mock network calls + async def mock_call(*args, **kwargs): + await asyncio.sleep(0.1) + return [] + + op_mock.side_effect = mock_call + start_time = time.monotonic() + # create a few concurrent flushes + for i in range(num_flushes): + instance._staged_mutations = [fake_mutations[i]] + try: + await instance.flush(timeout=0.01) + except asyncio.TimeoutError: + pass + # allow flushes to complete + await instance.flush() + duration = time.monotonic() - start_time + # if flushes were sequential, total duration would be 1s + assert duration < 0.25 + assert op_mock.call_count == num_flushes + + @pytest.mark.asyncio + async def test_flush_flow_control_concurrent_requests(self): + """ + requests should happen in parallel if flow control breaks up single flush into batches + """ + import time + + num_calls = 10 + fake_mutations = [_make_mutation(count=1) for _ in range(num_calls)] + async with self._make_one(flow_control_max_count=1) as instance: + with mock.patch.object( + instance, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + # mock network calls + async def mock_call(*args, **kwargs): + await asyncio.sleep(0.1) + return [] + + op_mock.side_effect = mock_call + start_time = time.monotonic() + # flush one large batch, that will be broken up into smaller batches + instance._staged_mutations = fake_mutations + try: + await instance.flush(timeout=0.01) + except asyncio.TimeoutError: + pass + # make room for new mutations + for i in range(num_calls): + await instance._flow_control.remove_from_flow( + [_make_mutation(count=1)] + ) + await asyncio.sleep(0.01) + # allow flushes to complete + await instance.flush() + duration = time.monotonic() - start_time + # if flushes were sequential, total duration would be 1s + assert instance.exceptions == [] + assert duration < 0.25 + assert op_mock.call_count == num_calls + @pytest.mark.asyncio async def test_schedule_flush_no_mutations(self): """schedule flush should return prev_flush if no new mutations""" From e1718f0a022cafe5d71fa00b460aa9279d40e471 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 16:01:38 -0700 Subject: [PATCH 134/213] allow single mutations in flow control public methods --- google/cloud/bigtable/mutations_batcher.py | 10 ++++++++-- tests/unit/test_mutations_batcher.py | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index ed26f9b88..6fe78a98d 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -99,10 +99,14 @@ def _has_capacity(self, additional_count: int, additional_size: int) -> bool: new_count = self.in_flight_mutation_count + additional_count return new_size <= acceptable_size and new_count <= acceptable_count - async def remove_from_flow(self, mutations: list[RowMutationEntry]) -> None: + async def remove_from_flow( + self, mutations: RowMutationEntry | list[RowMutationEntry] + ) -> None: """ Every time an in-flight mutation is complete, release the flow control semaphore """ + if not isinstance(mutations, list): + mutations = [mutations] total_count = sum(len(entry.mutations) for entry in mutations) total_size = sum(entry.size() for entry in mutations) self.in_flight_mutation_count -= total_count @@ -111,7 +115,7 @@ async def remove_from_flow(self, mutations: list[RowMutationEntry]) -> None: async with self.capacity_condition: self.capacity_condition.notify_all() - async def add_to_flow(self, mutations: list[RowMutationEntry]): + async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry]): """ Breaks up list of mutations into batches that were registered to fit within flow control limits. This method will block when the flow control limits are @@ -125,6 +129,8 @@ async def add_to_flow(self, mutations: list[RowMutationEntry]): Raises: - ValueError if any mutation entry is larger than the flow control limits """ + if not isinstance(mutations, list): + mutations = [mutations] start_idx = 0 end_idx = 0 while end_idx < len(mutations): diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index bd4b6e42d..0a7cf5202 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -158,7 +158,7 @@ async def test_remove_from_flow_value_update( instance.in_flight_mutation_count = existing_count instance.in_flight_mutation_bytes = existing_size mutation = _make_mutation(added_count, added_size) - await instance.remove_from_flow([mutation]) + await instance.remove_from_flow(mutation) assert instance.in_flight_mutation_count == new_count assert instance.in_flight_mutation_bytes == new_size @@ -263,7 +263,7 @@ async def test_add_to_flow_oversize(self): assert len(results) == 1 await instance.remove_from_flow(results[0]) count_results = [ - out async for out in instance.add_to_flow([large_count_mutation]) + out async for out in instance.add_to_flow(large_count_mutation) ] assert len(count_results) == 1 From eeda2e919399a08c0bb0b29de95c337c90a12083 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 18:07:55 -0700 Subject: [PATCH 135/213] mutation batcher only accepts single entries --- google/cloud/bigtable/mutations_batcher.py | 21 +++++++++------------ tests/system/test_system.py | 3 ++- tests/unit/test_mutations_batcher.py | 19 ------------------- 3 files changed, 11 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 6fe78a98d..f3fb7c07a 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -238,29 +238,26 @@ async def _flush_timer(self, interval: float | None): if not self.closed and self._staged_mutations: self._schedule_flush() - def append(self, mutations: RowMutationEntry | list[RowMutationEntry]): + def append(self, mutation_entry: RowMutationEntry): """ Add a new set of mutations to the internal queue Args: - - mutations: entries to add to flush queue + - mutation_entry: new entry to add to flush queue Raises: - RuntimeError if batcher is closed - ValueError if an invalid mutation type is added """ if self.closed: raise RuntimeError("Cannot append to closed MutationsBatcher") - if not isinstance(mutations, list): - mutations = [mutations] - for m in mutations: - if isinstance(m, Mutation): # type: ignore - raise ValueError( - f"invalid mutation type: {type(m).__name__}. Only RowMutationEntry objects are supported by batcher" - ) - self._staged_mutations.extend(mutations) + if isinstance(mutation_entry, Mutation): # type: ignore + raise ValueError( + f"invalid mutation type: {type(mutation_entry).__name__}. Only RowMutationEntry objects are supported by batcher" + ) + self._staged_mutations.append(mutation_entry) # start a new flush task if limits exceeded - self._staged_count += sum([len(m.mutations) for m in mutations]) - self._staged_bytes += sum([m.size() for m in mutations]) + self._staged_count += len(mutation_entry.mutations) + self._staged_bytes += mutation_entry.size() if ( self._staged_count >= self._flush_limit_count or self._staged_bytes >= self._flush_limit_bytes diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 64257ea08..5796d7522 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -301,7 +301,8 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) async with table.mutations_batcher() as batcher: - batcher.append([bulk_mutation, bulk_mutation2]) + batcher.append(bulk_mutation) + batcher.append(bulk_mutation2) # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert len(batcher._staged_mutations) == 0 diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 0a7cf5202..63497b78d 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -472,9 +472,6 @@ async def test_append_wrong_mutation(self): with pytest.raises(ValueError) as e: instance.append(DeleteAllFromRow()) assert str(e.value) == expected_error - with pytest.raises(ValueError) as e: - instance.append([DeleteAllFromRow(), DeleteAllFromRow()]) - assert str(e.value) == expected_error @pytest.mark.asyncio async def test_append_outside_flow_limits(self): @@ -556,22 +553,6 @@ async def test_append_multiple_sequentially(self): assert len(instance._staged_mutations) == 3 instance._staged_mutations = [] - @pytest.mark.asyncio - async def test_append_multiple_single_call(self): - """Append multiple mutations in a single append call""" - async with self._make_one(flush_limit_count=8, flush_limit_bytes=8) as instance: - assert instance._staged_count == 0 - assert instance._staged_bytes == 0 - assert instance._staged_mutations == [] - mutation_list = [_make_mutation(count=2, size=3) for _ in range(3)] - with mock.patch.object(instance, "_schedule_flush") as flush_mock: - instance.append(mutation_list) - assert flush_mock.call_count == 1 - assert instance._staged_count == 6 - assert instance._staged_bytes == 9 - assert len(instance._staged_mutations) == 3 - instance._staged_mutations = [] - @pytest.mark.parametrize("raise_exceptions", [True, False]) @pytest.mark.asyncio async def test_flush_no_timeout(self, raise_exceptions): From eeb483ff12a042694ecd64d66b7bd4e5b8cac1fc Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 19:33:25 -0700 Subject: [PATCH 136/213] ensure that proper exceptions are raised for each flush --- google/cloud/bigtable/exceptions.py | 1 + google/cloud/bigtable/mutations_batcher.py | 33 +++++++------- tests/unit/test_mutations_batcher.py | 52 +++++++++++++++++++++- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 9221be82f..0579e047f 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -93,6 +93,7 @@ def _format_message(excs: list[FailedMutationEntryError], total_entries: int): def __init__(self, excs: list[FailedMutationEntryError], total_entries: int): super().__init__(self._format_message(excs, total_entries), excs) + self.total_entries_attempted = total_entries def __new__(cls, excs: list[FailedMutationEntryError], total_entries: int): return super().__new__(cls, cls._format_message(excs, total_entries), excs) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index f3fb7c07a..4a419d732 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -314,25 +314,23 @@ async def _flush_internal( a new flush is initiated """ # flush new entries - in_process_requests: list[asyncio.Task[None]] = [prev_flush] + in_process_requests: list[ + asyncio.Task[None | list[FailedMutationEntryError]] + ] = [prev_flush] async for batch in self._flow_control.add_to_flow(new_entries): - batch_task = asyncio.create_task( - self._execute_mutate_rows_w_state_update(batch) - ) + batch_task = asyncio.create_task(self._execute_mutate_rows(batch)) in_process_requests.append(batch_task) # wait for all inflight requests to complete - await asyncio.gather(*in_process_requests) - - async def _execute_mutate_rows_w_state_update( - self, batch: list[RowMutationEntry] - ) -> None: - """ - Calls _execute_mutate_rows, and then updates internal flush state based on results - """ - results = await self._execute_mutate_rows(batch) - self.exceptions.extend(results) - self._entries_processed_since_last_raise += len(batch) - await self._flow_control.remove_from_flow(batch) + all_results = await asyncio.gather(*in_process_requests) + # allow previous flush tasks to finalize before adding new exceptions to list + await asyncio.sleep(0) + # collect exception data for next raise, after previous flush tasks have completed + self._entries_processed_since_last_raise += len(new_entries) + for exc_list in all_results: + if exc_list is not None and all( + isinstance(e, FailedMutationEntryError) for e in exc_list + ): + self.exceptions.extend(exc_list) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] @@ -365,6 +363,9 @@ async def _execute_mutate_rows( for subexc in e.exceptions: subexc.index = None return list(e.exceptions) + finally: + # mark batch as complete in flow control + await self._flow_control.remove_from_flow(batch) return [] def _raise_exceptions(self): diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 63497b78d..d46e56891 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -661,6 +661,54 @@ async def mock_call(*args, **kwargs): assert duration < 0.25 assert op_mock.call_count == num_calls + @pytest.mark.asyncio + async def test_overlapping_flush_requests(self): + """ + Should allow multiple flushes to be scheduled concurrently, with + each flush raising the errors related to the mutations at flush time + """ + from google.cloud.bigtable.exceptions import ( + MutationsExceptionGroup, + FailedMutationEntryError, + ) + exception1 = RuntimeError("test error1") + exception2 = ValueError("test error2") + wrapped_exception_list = [FailedMutationEntryError(2, mock.Mock(), exc) for exc in [exception1, exception2]] + # excpetion1 is flushed first, but finishes second + sleep_times = [0.1, 0.05] + + async with self._make_one() as instance: + with mock.patch.object( + instance, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + # mock network calls + async def mock_call(*args, **kwargs): + time, exception = sleep_times.pop(0), wrapped_exception_list.pop(0) + await asyncio.sleep(time) + return [exception] + op_mock.side_effect = mock_call + # create a few concurrent flushes + instance._staged_mutations = [_make_mutation()] + flush_task1 = asyncio.create_task(instance.flush()) + # let flush task initialize + await asyncio.sleep(0) + instance._staged_mutations = [_make_mutation()] + flush_task2 = asyncio.create_task(instance.flush()) + # raise errors + with pytest.raises(MutationsExceptionGroup) as exc2: + await flush_task2 + assert len(exc2.value.exceptions) == 1 + assert exc2.value.total_entries_attempted == 1 + assert exc2.value.exceptions[0].__cause__ == exception2 + + # flushes should be finalized in order. flush_task1 should already be done + assert flush_task1.done() + with pytest.raises(MutationsExceptionGroup) as exc: + await flush_task1 + assert len(exc.value.exceptions) == 1 + assert exc2.value.total_entries_attempted == 1 + assert exc.value.exceptions[0].__cause__ == exception1 + @pytest.mark.asyncio async def test_schedule_flush_no_mutations(self): """schedule flush should return prev_flush if no new mutations""" @@ -840,7 +888,7 @@ async def test__execute_mutate_rows(self, mutate_rows): table.default_operation_timeout = 17 table.default_per_request_timeout = 13 async with self._make_one(table) as instance: - batch = [mock.Mock()] + batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) assert start_operation.call_count == 1 args, _ = mutate_rows.call_args @@ -868,7 +916,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): table = mock.Mock() table.default_operation_timeout = 17 async with self._make_one(table) as instance: - batch = [mock.Mock()] + batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) assert len(result) == 2 assert result[0] == err1 From 853b5f20207ade4afcbf6cb80772ebd7189e354a Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 14 Jun 2023 21:58:23 -0700 Subject: [PATCH 137/213] surface unexpected exceptions in MutationsExceptionGroup --- google/cloud/bigtable/_mutate_rows.py | 2 +- google/cloud/bigtable/exceptions.py | 9 ++++++--- google/cloud/bigtable/mutations_batcher.py | 10 +++++++--- tests/unit/test_mutations_batcher.py | 7 ++++++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 64bcf376c..212380de3 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -128,7 +128,7 @@ async def start(self): self._handle_entry_error(idx, exc) finally: # raise exception detailing incomplete mutations - all_errors = [] + all_errors: list[Exception] = [] for idx, exc_list in self.errors.items(): if len(exc_list) == 0: raise core_exceptions.ClientError( diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 0579e047f..d314d2157 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -83,19 +83,22 @@ def __str__(self): class MutationsExceptionGroup(BigtableExceptionGroup): """ Represents one or more exceptions that occur during a bulk mutation operation + + Exceptions will typically be of type FailedMutationEntryError, but other exceptions may + be included if they are raised during the mutation operation """ @staticmethod - def _format_message(excs: list[FailedMutationEntryError], total_entries: int): + def _format_message(excs: list[Exception], total_entries: int): entry_str = "entry" if total_entries == 1 else "entries" plural_str = "" if len(excs) == 1 else "s" return f"{len(excs)} sub-exception{plural_str} (from {total_entries} {entry_str} attempted)" - def __init__(self, excs: list[FailedMutationEntryError], total_entries: int): + def __init__(self, excs: list[Exception], total_entries: int): super().__init__(self._format_message(excs, total_entries), excs) self.total_entries_attempted = total_entries - def __new__(cls, excs: list[FailedMutationEntryError], total_entries: int): + def __new__(cls, excs: list[Exception], total_entries: int): return super().__new__(cls, cls._format_message(excs, total_entries), excs) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 4a419d732..567424e2f 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -217,7 +217,7 @@ def __init__( self._flush_limit_count = ( flush_limit_count if flush_limit_count is not None else float("inf") ) - self.exceptions: list[FailedMutationEntryError] = [] + self.exceptions: list[Exception] = [] self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( self._flush_timer(flush_interval) ) @@ -321,13 +321,17 @@ async def _flush_internal( batch_task = asyncio.create_task(self._execute_mutate_rows(batch)) in_process_requests.append(batch_task) # wait for all inflight requests to complete - all_results = await asyncio.gather(*in_process_requests) + all_results: list[ + list[FailedMutationEntryError] | Exception | None + ] = await asyncio.gather(*in_process_requests, return_exceptions=True) # allow previous flush tasks to finalize before adding new exceptions to list await asyncio.sleep(0) # collect exception data for next raise, after previous flush tasks have completed self._entries_processed_since_last_raise += len(new_entries) for exc_list in all_results: - if exc_list is not None and all( + if isinstance(exc_list, Exception): + self.exceptions.append(exc_list) + elif exc_list is not None and all( isinstance(e, FailedMutationEntryError) for e in exc_list ): self.exceptions.extend(exc_list) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index d46e56891..55b32cedc 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -671,9 +671,13 @@ async def test_overlapping_flush_requests(self): MutationsExceptionGroup, FailedMutationEntryError, ) + exception1 = RuntimeError("test error1") exception2 = ValueError("test error2") - wrapped_exception_list = [FailedMutationEntryError(2, mock.Mock(), exc) for exc in [exception1, exception2]] + wrapped_exception_list = [ + FailedMutationEntryError(2, mock.Mock(), exc) + for exc in [exception1, exception2] + ] # excpetion1 is flushed first, but finishes second sleep_times = [0.1, 0.05] @@ -686,6 +690,7 @@ async def mock_call(*args, **kwargs): time, exception = sleep_times.pop(0), wrapped_exception_list.pop(0) await asyncio.sleep(time) return [exception] + op_mock.side_effect = mock_call # create a few concurrent flushes instance._staged_mutations = [_make_mutation()] From 2865acdc6880d8dc5505d77eaab3d122b08e56aa Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 09:00:23 -0700 Subject: [PATCH 138/213] prefixed flow control attributes with underscore --- google/cloud/bigtable/mutations_batcher.py | 46 ++++++------- tests/unit/test_mutations_batcher.py | 78 +++++++++++----------- 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 567424e2f..fc182a9ce 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -62,27 +62,27 @@ def __init__( - max_entry_count: maximum number of entries to send in a single rpc. Limited to 100,000 by the MutateRows API. """ - self.max_mutation_count = ( + self._max_mutation_count = ( max_mutation_count if max_mutation_count is not None else float("inf") ) - self.max_mutation_bytes = ( + self._max_mutation_bytes = ( max_mutation_bytes if max_mutation_bytes is not None else float("inf") ) - self.max_entry_count = max_entry_count + self._max_entry_count = max_entry_count if ( - self.max_entry_count > MAX_MUTATE_ROWS_ENTRY_COUNT - or self.max_entry_count < 1 + self._max_entry_count > MAX_MUTATE_ROWS_ENTRY_COUNT + or self._max_entry_count < 1 ): raise ValueError( f"max_entry_count must be between 1 and {MAX_MUTATE_ROWS_ENTRY_COUNT}" ) - if self.max_mutation_count < 1: + if self._max_mutation_count < 1: raise ValueError("max_mutation_count must be greater than 0") - if self.max_mutation_bytes < 1: + if self._max_mutation_bytes < 1: raise ValueError("max_mutation_bytes must be greater than 0") - self.capacity_condition = asyncio.Condition() - self.in_flight_mutation_count = 0 - self.in_flight_mutation_bytes = 0 + self._capacity_condition = asyncio.Condition() + self._in_flight_mutation_count = 0 + self._in_flight_mutation_bytes = 0 def _has_capacity(self, additional_count: int, additional_size: int) -> bool: """ @@ -92,11 +92,11 @@ def _has_capacity(self, additional_count: int, additional_size: int) -> bool: the configured limits, it can be sent in a single batch. """ # adjust limits to allow overly large mutations - acceptable_size = max(self.max_mutation_bytes, additional_size) - acceptable_count = max(self.max_mutation_count, additional_count) + acceptable_size = max(self._max_mutation_bytes, additional_size) + acceptable_count = max(self._max_mutation_count, additional_count) # check if we have capacity for new mutation - new_size = self.in_flight_mutation_bytes + additional_size - new_count = self.in_flight_mutation_count + additional_count + new_size = self._in_flight_mutation_bytes + additional_size + new_count = self._in_flight_mutation_count + additional_count return new_size <= acceptable_size and new_count <= acceptable_count async def remove_from_flow( @@ -109,11 +109,11 @@ async def remove_from_flow( mutations = [mutations] total_count = sum(len(entry.mutations) for entry in mutations) total_size = sum(entry.size() for entry in mutations) - self.in_flight_mutation_count -= total_count - self.in_flight_mutation_bytes -= total_size + self._in_flight_mutation_count -= total_count + self._in_flight_mutation_bytes -= total_size # notify any blocked requests that there is additional capacity - async with self.capacity_condition: - self.capacity_condition.notify_all() + async with self._capacity_condition: + self._capacity_condition.notify_all() async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry]): """ @@ -136,7 +136,7 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] while end_idx < len(mutations): start_idx = end_idx # fill up batch until we hit capacity - async with self.capacity_condition: + async with self._capacity_condition: while end_idx < len(mutations): next_entry = mutations[end_idx] next_size = next_entry.size() @@ -144,18 +144,18 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] num_in_batch = end_idx - start_idx if ( self._has_capacity(next_count, next_size) - and num_in_batch < self.max_entry_count + and num_in_batch < self._max_entry_count ): # room for new mutation; add to batch end_idx += 1 - self.in_flight_mutation_bytes += next_size - self.in_flight_mutation_count += next_count + self._in_flight_mutation_bytes += next_size + self._in_flight_mutation_count += next_count elif start_idx != end_idx: # we have at least one mutation in the batch, so send it break else: # batch is empty. Block until we have capacity - await self.capacity_condition.wait_for( + await self._capacity_condition.wait_for( lambda: self._has_capacity(next_count, next_size) ) yield mutations[start_idx:end_idx] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 55b32cedc..157a0ce30 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -47,21 +47,21 @@ def test_ctor(self): instance = self._make_one( max_mutation_count, max_mutation_bytes, max_entry_count ) - assert instance.max_mutation_count == max_mutation_count - assert instance.max_mutation_bytes == max_mutation_bytes - assert instance.max_entry_count == max_entry_count - assert instance.in_flight_mutation_count == 0 - assert instance.in_flight_mutation_bytes == 0 - assert isinstance(instance.capacity_condition, asyncio.Condition) + assert instance._max_mutation_count == max_mutation_count + assert instance._max_mutation_bytes == max_mutation_bytes + assert instance._max_entry_count == max_entry_count + assert instance._in_flight_mutation_count == 0 + assert instance._in_flight_mutation_bytes == 0 + assert isinstance(instance._capacity_condition, asyncio.Condition) def test_ctor_empty_values(self): """Test constructor with None count and bytes""" from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT instance = self._make_one(None, None) - assert instance.max_mutation_count == float("inf") - assert instance.max_mutation_bytes == float("inf") - assert instance.max_entry_count == MAX_MUTATE_ROWS_ENTRY_COUNT + assert instance._max_mutation_count == float("inf") + assert instance._max_mutation_bytes == float("inf") + assert instance._max_entry_count == MAX_MUTATE_ROWS_ENTRY_COUNT def test_ctor_invalid_values(self): """Test that values are positive, and fit within expected limits""" @@ -125,8 +125,8 @@ def test__has_capacity( _has_capacity should return True if the new mutation will will not exceed the max count or size """ instance = self._make_one(max_count, max_size) - instance.in_flight_mutation_count = existing_count - instance.in_flight_mutation_bytes = existing_size + instance._in_flight_mutation_count = existing_count + instance._in_flight_mutation_bytes = existing_size assert instance._has_capacity(new_count, new_size) == expected @pytest.mark.asyncio @@ -155,23 +155,23 @@ async def test_remove_from_flow_value_update( completed mutations should lower the inflight values """ instance = self._make_one() - instance.in_flight_mutation_count = existing_count - instance.in_flight_mutation_bytes = existing_size + instance._in_flight_mutation_count = existing_count + instance._in_flight_mutation_bytes = existing_size mutation = _make_mutation(added_count, added_size) await instance.remove_from_flow(mutation) - assert instance.in_flight_mutation_count == new_count - assert instance.in_flight_mutation_bytes == new_size + assert instance._in_flight_mutation_count == new_count + assert instance._in_flight_mutation_bytes == new_size @pytest.mark.asyncio async def test__remove_from_flow_unlock(self): """capacity condition should notify after mutation is complete""" instance = self._make_one(10, 10) - instance.in_flight_mutation_count = 10 - instance.in_flight_mutation_bytes = 10 + instance._in_flight_mutation_count = 10 + instance._in_flight_mutation_bytes = 10 async def task_routine(): - async with instance.capacity_condition: - await instance.capacity_condition.wait_for( + async with instance._capacity_condition: + await instance._capacity_condition.wait_for( lambda: instance._has_capacity(1, 1) ) @@ -183,24 +183,24 @@ async def task_routine(): mutation = _make_mutation(count=0, size=5) await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) - assert instance.in_flight_mutation_count == 10 - assert instance.in_flight_mutation_bytes == 5 + assert instance._in_flight_mutation_count == 10 + assert instance._in_flight_mutation_bytes == 5 assert task.done() is False # try changing count - instance.in_flight_mutation_bytes = 10 + instance._in_flight_mutation_bytes = 10 mutation = _make_mutation(count=5, size=0) await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) - assert instance.in_flight_mutation_count == 5 - assert instance.in_flight_mutation_bytes == 10 + assert instance._in_flight_mutation_count == 5 + assert instance._in_flight_mutation_bytes == 10 assert task.done() is False # try changing both - instance.in_flight_mutation_count = 10 + instance._in_flight_mutation_count = 10 mutation = _make_mutation(count=5, size=5) await instance.remove_from_flow([mutation]) await asyncio.sleep(0.05) - assert instance.in_flight_mutation_count == 5 - assert instance.in_flight_mutation_bytes == 5 + assert instance._in_flight_mutation_count == 5 + assert instance._in_flight_mutation_bytes == 5 # task should be complete assert task.done() is True @@ -290,10 +290,10 @@ async def test_ctor_defaults(self, flush_timer_mock): assert instance.closed is False assert instance._staged_mutations == [] assert instance.exceptions == [] - assert instance._flow_control.max_mutation_count == 100000 - assert instance._flow_control.max_mutation_bytes == 104857600 - assert instance._flow_control.in_flight_mutation_count == 0 - assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._flow_control._max_mutation_count == 100000 + assert instance._flow_control._max_mutation_bytes == 104857600 + assert instance._flow_control._in_flight_mutation_count == 0 + assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 @@ -324,10 +324,10 @@ async def test_ctor_explicit(self, flush_timer_mock): assert instance.closed is False assert instance._staged_mutations == [] assert instance.exceptions == [] - assert instance._flow_control.max_mutation_count == flow_control_max_count - assert instance._flow_control.max_mutation_bytes == flow_control_max_bytes - assert instance._flow_control.in_flight_mutation_count == 0 - assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._flow_control._max_mutation_count == flow_control_max_count + assert instance._flow_control._max_mutation_bytes == flow_control_max_bytes + assert instance._flow_control._in_flight_mutation_count == 0 + assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 @@ -358,10 +358,10 @@ async def test_ctor_no_limits(self, flush_timer_mock): assert instance.closed is False assert instance._staged_mutations == [] assert instance.exceptions == [] - assert instance._flow_control.max_mutation_count == float("inf") - assert instance._flow_control.max_mutation_bytes == float("inf") - assert instance._flow_control.in_flight_mutation_count == 0 - assert instance._flow_control.in_flight_mutation_bytes == 0 + assert instance._flow_control._max_mutation_count == float("inf") + assert instance._flow_control._max_mutation_bytes == float("inf") + assert instance._flow_control._in_flight_mutation_count == 0 + assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 From e568d924254302471ca33e2def43b88325f4cc4d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 09:44:10 -0700 Subject: [PATCH 139/213] batcher signature improvements --- google/cloud/bigtable/client.py | 28 +++++++++++++----- google/cloud/bigtable/mutations_batcher.py | 19 ++++++------ tests/unit/test_mutations_batcher.py | 34 +++++++++++++++++++--- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index c51c82e58..40810f74c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -53,6 +53,7 @@ from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline from google.cloud.bigtable.mutations_batcher import MutationsBatcher +from google.cloud.bigtable.mutations_batcher import MB_SIZE if TYPE_CHECKING: from google.cloud.bigtable import RowKeySamples @@ -574,29 +575,42 @@ async def sample_keys( """ raise NotImplementedError - def mutations_batcher(self, **kwargs) -> MutationsBatcher: + def mutations_batcher( + self, + *, + flush_interval: float | None = 5, + flush_limit_mutation_count: int | None = 1000, + flush_limit_bytes: int = 20 * MB_SIZE, + flow_control_max_count: int | None = 100_000, + flow_control_max_bytes: int | None = 100 * MB_SIZE, + ) -> MutationsBatcher: """ Returns a new mutations batcher instance. Can be used to iteratively add mutations that are flushed as a group, to avoid excess network calls - Kwargs: + Args: - flush_interval: Automatically flush every flush_interval seconds. If None, a table default will be used - - flush_limit_count: Flush immediately after flush_limit_count mutations are added. - If None, this limit is ignored. + - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count + mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. If None, this limit is ignored. - flow_control_max_count: Maximum number of inflight mutations. - If None, this limit is ignored. - flow_control_max_bytes: Maximum number of inflight bytes. If None, this limit is ignored. Returns: - a MutationsBatcher context manager that can batch requests """ - kwargs["table"] = self - return MutationsBatcher(**kwargs) + return MutationsBatcher( + self, + flush_interval=flush_interval, + flush_limit_mutation_count=flush_limit_mutation_count, + flush_limit_bytes=flush_limit_bytes, + flow_control_max_count=flow_control_max_count, + flow_control_max_bytes=flow_control_max_bytes, + ) async def mutate_row( self, diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index fc182a9ce..2be8e616a 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -185,19 +185,18 @@ def __init__( table: "Table", *, flush_interval: float | None = 5, - flush_limit_count: int | None = 100, - flush_limit_bytes: int | None = 20 * MB_SIZE, - flow_control_max_count: int | None = 100000, + flush_limit_mutation_count: int | None = 1000, + flush_limit_bytes: int = 20 * MB_SIZE, + flow_control_max_count: int | None = 100_000, flow_control_max_bytes: int | None = 100 * MB_SIZE, ): """ Args: - table: Table to preform rpc calls - flush_interval: Automatically flush every flush_interval seconds - - flush_limit_count: Flush immediately after flush_limit_count mutations are added. - If None, this limit is ignored. + - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count + mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - If None, this limit is ignored. - flow_control_max_count: Maximum number of inflight mutations. If None, this limit is ignored. - flow_control_max_bytes: Maximum number of inflight bytes. @@ -211,11 +210,11 @@ def __init__( self._flow_control = _FlowControl( flow_control_max_count, flow_control_max_bytes ) - self._flush_limit_bytes = ( - flush_limit_bytes if flush_limit_bytes is not None else float("inf") - ) + self._flush_limit_bytes = flush_limit_bytes self._flush_limit_count = ( - flush_limit_count if flush_limit_count is not None else float("inf") + flush_limit_mutation_count + if flush_limit_mutation_count is not None + else float("inf") ) self.exceptions: list[Exception] = [] self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 157a0ce30..b06b7926e 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -315,7 +315,7 @@ async def test_ctor_explicit(self, flush_timer_mock): async with self._make_one( table, flush_interval=flush_interval, - flush_limit_count=flush_limit_count, + flush_limit_mutation_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes, @@ -349,7 +349,7 @@ async def test_ctor_no_limits(self, flush_timer_mock): async with self._make_one( table, flush_interval=flush_interval, - flush_limit_count=flush_limit_count, + flush_limit_mutation_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, flow_control_max_count=flow_control_max_count, flow_control_max_bytes=flow_control_max_bytes, @@ -368,6 +368,32 @@ async def test_ctor_no_limits(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] is None assert isinstance(instance._flush_timer_task, asyncio.Task) + def test_default_argument_consistency(self): + """ + We supply default arguments in MutationsBatcher.__init__, and in + table.mutations_batcher. Make sure any changes to defaults are applied to + both places + """ + from google.cloud.bigtable.client import Table + from google.cloud.bigtable.mutations_batcher import MutationsBatcher + import inspect + + get_batcher_signature = dict(inspect.signature(Table.mutations_batcher).parameters) + get_batcher_signature.pop("self") + batcher_init_signature = dict(inspect.signature(MutationsBatcher).parameters) + batcher_init_signature.pop("table") + # both should have same number of arguments + assert len(get_batcher_signature) == len(batcher_init_signature) + assert len(get_batcher_signature) == 5 + # both should have same argument names + assert set(get_batcher_signature.keys()) == set(batcher_init_signature.keys()) + # both should have same default values + for arg_name in get_batcher_signature.keys(): + assert ( + get_batcher_signature[arg_name].default + == batcher_init_signature[arg_name].default + ) + @unittest.mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @@ -513,7 +539,7 @@ async def test_append( ): """test appending different mutations, and checking if it causes a flush""" async with self._make_one( - flush_limit_count=flush_count, flush_limit_bytes=flush_bytes + flush_limit_mutation_count=flush_count, flush_limit_bytes=flush_bytes ) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 @@ -530,7 +556,7 @@ async def test_append( @pytest.mark.asyncio async def test_append_multiple_sequentially(self): """Append multiple mutations""" - async with self._make_one(flush_limit_count=8, flush_limit_bytes=8) as instance: + async with self._make_one(flush_limit_mutation_count=8, flush_limit_bytes=8) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 assert instance._staged_mutations == [] From bc7b850e3a6af2b03fe6563350c9f7e5d810ed73 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 11:00:27 -0700 Subject: [PATCH 140/213] use request mutation limit instead of entry limit --- google/cloud/bigtable/_mutate_rows.py | 10 ++- google/cloud/bigtable/mutations.py | 11 +-- google/cloud/bigtable/mutations_batcher.py | 20 ++--- tests/system/test_system.py | 8 +- tests/unit/test__mutate_rows.py | 56 ++++++++------ tests/unit/test_client.py | 12 ++- tests/unit/test_mutations.py | 8 +- tests/unit/test_mutations_batcher.py | 88 +++++++++++++--------- 8 files changed, 123 insertions(+), 90 deletions(-) diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/_mutate_rows.py index 212380de3..e34ebaeb6 100644 --- a/google/cloud/bigtable/_mutate_rows.py +++ b/google/cloud/bigtable/_mutate_rows.py @@ -32,7 +32,7 @@ from google.cloud.bigtable.mutations import RowMutationEntry # mutate_rows requests are limited to this value -MAX_MUTATE_ROWS_ENTRY_COUNT = 100_000 +MUTATE_ROWS_REQUEST_MUTATION_LIMIT = 100_000 class _MutateRowsIncomplete(RuntimeError): @@ -72,10 +72,12 @@ def __init__( If not specified, the request will run until operation_timeout is reached. """ # check that mutations are within limits - if len(mutation_entries) > MAX_MUTATE_ROWS_ENTRY_COUNT: + total_mutations = sum(len(entry.mutations) for entry in mutation_entries) + if total_mutations > MUTATE_ROWS_REQUEST_MUTATION_LIMIT: raise ValueError( - "mutate_rows must contain at most " - f"{MAX_MUTATE_ROWS_ENTRY_COUNT} entries. Received {len(mutation_entries)}" + "mutate_rows requests can contain at most " + f"{MUTATE_ROWS_REQUEST_MUTATION_LIMIT} mutations across " + f"all entries. Found {total_mutations}." ) # create partial function to pass to trigger rpc call metadata = _make_metadata(table.table_name, table.app_profile_id) diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index 38c129750..aaf9ad93c 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -19,12 +19,13 @@ from abc import ABC, abstractmethod from sys import getsizeof +# mutation entries above this should be rejected +from google.cloud.bigtable._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT + + # special value for SetCell mutation timestamps. If set, server will assign a timestamp SERVER_SIDE_TIMESTAMP = -1 -# mutation entries above this should be rejected -MAX_MUTATIONS_PER_ENTRY = 100_000 - class Mutation(ABC): """Model class for mutations""" @@ -201,9 +202,9 @@ def __init__(self, row_key: bytes | str, mutations: Mutation | list[Mutation]): mutations = [mutations] if len(mutations) == 0: raise ValueError("mutations must not be empty") - elif len(mutations) > MAX_MUTATIONS_PER_ENTRY: + elif len(mutations) > MUTATE_ROWS_REQUEST_MUTATION_LIMIT: raise ValueError( - f"entries must have <= {MAX_MUTATIONS_PER_ENTRY} mutations" + f"entries must have <= {MUTATE_ROWS_REQUEST_MUTATION_LIMIT} mutations" ) self.row_key = row_key self.mutations = tuple(mutations) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 2be8e616a..73aae66f0 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -24,7 +24,7 @@ from google.cloud.bigtable.exceptions import FailedMutationEntryError from google.cloud.bigtable._mutate_rows import _MutateRowsOperation -from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT +from google.cloud.bigtable._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT from google.cloud.bigtable.mutations import Mutation if TYPE_CHECKING: @@ -50,7 +50,6 @@ def __init__( self, max_mutation_count: int | None, max_mutation_bytes: int | None, - max_entry_count: int = MAX_MUTATE_ROWS_ENTRY_COUNT, ): """ Args: @@ -59,8 +58,6 @@ def __init__( If None, no limit is enforced. - max_mutation_bytes: maximum number of bytes to send in a single rpc. If None, no limit is enforced. - - max_entry_count: maximum number of entries to send in a single rpc. - Limited to 100,000 by the MutateRows API. """ self._max_mutation_count = ( max_mutation_count if max_mutation_count is not None else float("inf") @@ -68,14 +65,6 @@ def __init__( self._max_mutation_bytes = ( max_mutation_bytes if max_mutation_bytes is not None else float("inf") ) - self._max_entry_count = max_entry_count - if ( - self._max_entry_count > MAX_MUTATE_ROWS_ENTRY_COUNT - or self._max_entry_count < 1 - ): - raise ValueError( - f"max_entry_count must be between 1 and {MAX_MUTATE_ROWS_ENTRY_COUNT}" - ) if self._max_mutation_count < 1: raise ValueError("max_mutation_count must be greater than 0") if self._max_mutation_bytes < 1: @@ -135,19 +124,22 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] end_idx = 0 while end_idx < len(mutations): start_idx = end_idx + batch_mutation_count = 0 # fill up batch until we hit capacity async with self._capacity_condition: while end_idx < len(mutations): next_entry = mutations[end_idx] next_size = next_entry.size() next_count = len(next_entry.mutations) - num_in_batch = end_idx - start_idx if ( self._has_capacity(next_count, next_size) - and num_in_batch < self._max_entry_count + # make sure not to exceed per-request mutation count limits + and (batch_mutation_count + next_count) + <= MUTATE_ROWS_REQUEST_MUTATION_LIMIT ): # room for new mutation; add to batch end_idx += 1 + batch_mutation_count += next_count self._in_flight_mutation_bytes += next_size self._in_flight_mutation_count += next_count elif start_idx != end_idx: diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 5796d7522..6097d5955 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -358,7 +358,7 @@ async def test_mutations_batcher_timer_flush(client, table, temp_rows): @pytest.mark.asyncio async def test_mutations_batcher_count_flush(client, table, temp_rows): """ - batch should flush after flush_limit_count mutations + batch should flush after flush_limit_mutation_count mutations """ from google.cloud.bigtable.mutations import RowMutationEntry @@ -372,7 +372,7 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): ) bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) - async with table.mutations_batcher(flush_limit_count=2) as batcher: + async with table.mutations_batcher(flush_limit_mutation_count=2) as batcher: batcher.append(bulk_mutation) # should be noop; flush not scheduled await batcher._prev_flush @@ -390,7 +390,7 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): @pytest.mark.asyncio async def test_mutations_batcher_bytes_flush(client, table, temp_rows): """ - batch should flush after flush_limit_count mutations + batch should flush after flush_limit_bytes bytes """ from google.cloud.bigtable.mutations import RowMutationEntry @@ -441,7 +441,7 @@ async def test_mutations_batcher_no_flush(client, table, temp_rows): size_limit = bulk_mutation.size() + bulk_mutation2.size() + 1 async with table.mutations_batcher( - flush_limit_bytes=size_limit, flush_limit_count=3, flush_interval=1 + flush_limit_bytes=size_limit, flush_limit_mutation_count=3, flush_interval=1 ) as batcher: batcher.append(bulk_mutation) assert len(batcher._staged_mutations) == 1 diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/test__mutate_rows.py index 49bc90d90..18b2beede 100644 --- a/tests/unit/test__mutate_rows.py +++ b/tests/unit/test__mutate_rows.py @@ -27,6 +27,13 @@ from mock import AsyncMock # type: ignore +def _make_mutation(count=1, size=1): + mutation = mock.Mock() + mutation.size.return_value = size + mutation.mutations = [mock.Mock()] * count + return mutation + + class TestMutateRowsOperation: def _target_class(self): from google.cloud.bigtable._mutate_rows import _MutateRowsOperation @@ -72,7 +79,7 @@ def test_ctor(self): client = mock.Mock() table = mock.Mock() - entries = [mock.Mock(), mock.Mock()] + entries = [_make_mutation(), _make_mutation()] operation_timeout = 0.05 attempt_timeout = 0.01 instance = self._make_one( @@ -109,13 +116,15 @@ def test_ctor_too_many_entries(self): """ should raise an error if an operation is created with more than 100,000 entries """ - from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT + from google.cloud.bigtable._mutate_rows import ( + MUTATE_ROWS_REQUEST_MUTATION_LIMIT, + ) - assert MAX_MUTATE_ROWS_ENTRY_COUNT == 100000 + assert MUTATE_ROWS_REQUEST_MUTATION_LIMIT == 100_000 client = mock.Mock() table = mock.Mock() - entries = [None] * MAX_MUTATE_ROWS_ENTRY_COUNT + entries = [_make_mutation()] * MUTATE_ROWS_REQUEST_MUTATION_LIMIT operation_timeout = 0.05 attempt_timeout = 0.01 # no errors if at limit @@ -123,10 +132,16 @@ def test_ctor_too_many_entries(self): # raise error after crossing with pytest.raises(ValueError) as e: self._make_one( - client, table, entries + [None], operation_timeout, attempt_timeout + client, + table, + entries + [_make_mutation()], + operation_timeout, + attempt_timeout, ) - assert "mutate_rows must contain at most 100000 entries" in str(e.value) - assert "Received 100001" in str(e.value) + assert "mutate_rows requests can contain at most 100000 mutations" in str( + e.value + ) + assert "Found 100001" in str(e.value) @pytest.mark.asyncio async def test_mutate_rows_operation(self): @@ -135,7 +150,7 @@ async def test_mutate_rows_operation(self): """ client = mock.Mock() table = mock.Mock() - entries = [mock.Mock(), mock.Mock()] + entries = [_make_mutation(), _make_mutation()] operation_timeout = 0.05 instance = self._make_one( client, table, entries, operation_timeout, operation_timeout @@ -158,7 +173,7 @@ async def test_mutate_rows_exception(self, exc_type): client = mock.Mock() table = mock.Mock() - entries = [mock.Mock()] + entries = [_make_mutation()] operation_timeout = 0.05 expected_cause = exc_type("abort") with mock.patch.object( @@ -193,7 +208,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): client = mock.Mock() table = mock.Mock() - entries = [mock.Mock()] + entries = [_make_mutation()] operation_timeout = 1 expected_cause = exc_type("retry") num_retries = 2 @@ -220,7 +235,7 @@ async def test_mutate_rows_incomplete_ignored(self): client = mock.Mock() table = mock.Mock() - entries = [mock.Mock()] + entries = [_make_mutation()] operation_timeout = 0.05 with mock.patch.object( self._target_class(), @@ -243,12 +258,11 @@ async def test_mutate_rows_incomplete_ignored(self): @pytest.mark.asyncio async def test_run_attempt_single_entry_success(self): """Test mutating a single entry""" - mutation = mock.Mock() - mutations = {0: mutation} + mutation = _make_mutation() expected_timeout = 1.3 - mock_gapic_fn = self._make_mock_gapic(mutations) + mock_gapic_fn = self._make_mock_gapic({0: mutation}) instance = self._make_one( - mutation_entries=mutations, + mutation_entries=[mutation], per_request_timeout=expected_timeout, ) with mock.patch.object(instance, "_gapic_fn", mock_gapic_fn): @@ -274,9 +288,9 @@ async def test_run_attempt_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete - success_mutation = mock.Mock() - success_mutation_2 = mock.Mock() - failure_mutation = mock.Mock() + success_mutation = _make_mutation() + success_mutation_2 = _make_mutation() + failure_mutation = _make_mutation() mutations = [success_mutation, failure_mutation, success_mutation_2] mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) instance = self._make_one( @@ -295,9 +309,9 @@ async def test_run_attempt_partial_success_retryable(self): @pytest.mark.asyncio async def test_run_attempt_partial_success_non_retryable(self): """Some entries succeed, but one fails. Exception marked as non-retryable. Do not raise incomplete error""" - success_mutation = mock.Mock() - success_mutation_2 = mock.Mock() - failure_mutation = mock.Mock() + success_mutation = _make_mutation() + success_mutation_2 = _make_mutation() + failure_mutation = _make_mutation() mutations = [success_mutation, failure_mutation, success_mutation_2] mock_gapic_fn = self._make_mock_gapic(mutations, error_dict={1: 300}) instance = self._make_one( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index be3703a23..95c971161 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1852,14 +1852,18 @@ async def test_bulk_mutate_row_metadata(self, include_app_profile): async with client.get_table("i", "t", app_profile_id=profile) as table: with mock.patch.object( client._gapic_client, "mutate_rows", AsyncMock() - ) as read_rows: - read_rows.side_effect = core_exceptions.Aborted("mock") + ) as mutate_rows: + mutate_rows.side_effect = core_exceptions.Aborted("mock") + mutation = mock.Mock() + mutation.size.return_value = 1 + entry = mock.Mock() + entry.mutations = [mutation] try: - await table.bulk_mutate_rows([mock.Mock()]) + await table.bulk_mutate_rows([entry]) except Exception: # exception used to end early pass - kwargs = read_rows.call_args_list[0].kwargs + kwargs = mutate_rows.call_args_list[0].kwargs metadata = kwargs["metadata"] goog_metadata = None for key, value in metadata: diff --git a/tests/unit/test_mutations.py b/tests/unit/test_mutations.py index 7540bb212..5700abbc5 100644 --- a/tests/unit/test_mutations.py +++ b/tests/unit/test_mutations.py @@ -490,11 +490,13 @@ def test_ctor(self): def test_ctor_over_limit(self): """Should raise error if mutations exceed MAX_MUTATIONS_PER_ENTRY""" - from google.cloud.bigtable.mutations import MAX_MUTATIONS_PER_ENTRY + from google.cloud.bigtable._mutate_rows import ( + MUTATE_ROWS_REQUEST_MUTATION_LIMIT, + ) - assert MAX_MUTATIONS_PER_ENTRY == 100_000 + assert MUTATE_ROWS_REQUEST_MUTATION_LIMIT == 100_000 # no errors at limit - expected_mutations = [None for _ in range(MAX_MUTATIONS_PER_ENTRY)] + expected_mutations = [None for _ in range(MUTATE_ROWS_REQUEST_MUTATION_LIMIT)] self._make_one(b"row_key", expected_mutations) # error if over limit with pytest.raises(ValueError) as e: diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index b06b7926e..d33d1df6f 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -33,52 +33,35 @@ def _make_mutation(count=1, size=1): class Test_FlowControl: - def _make_one( - self, max_mutation_count=10, max_mutation_bytes=100, max_entry_count=100_000 - ): + def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): from google.cloud.bigtable.mutations_batcher import _FlowControl - return _FlowControl(max_mutation_count, max_mutation_bytes, max_entry_count) + return _FlowControl(max_mutation_count, max_mutation_bytes) def test_ctor(self): max_mutation_count = 9 max_mutation_bytes = 19 - max_entry_count = 29 - instance = self._make_one( - max_mutation_count, max_mutation_bytes, max_entry_count - ) + instance = self._make_one(max_mutation_count, max_mutation_bytes) assert instance._max_mutation_count == max_mutation_count assert instance._max_mutation_bytes == max_mutation_bytes - assert instance._max_entry_count == max_entry_count assert instance._in_flight_mutation_count == 0 assert instance._in_flight_mutation_bytes == 0 assert isinstance(instance._capacity_condition, asyncio.Condition) def test_ctor_empty_values(self): """Test constructor with None count and bytes""" - from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT - instance = self._make_one(None, None) assert instance._max_mutation_count == float("inf") assert instance._max_mutation_bytes == float("inf") - assert instance._max_entry_count == MAX_MUTATE_ROWS_ENTRY_COUNT def test_ctor_invalid_values(self): """Test that values are positive, and fit within expected limits""" - from google.cloud.bigtable._mutate_rows import MAX_MUTATE_ROWS_ENTRY_COUNT - with pytest.raises(ValueError) as e: self._make_one(0, 1) assert "max_mutation_count must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: self._make_one(1, 0) assert "max_mutation_bytes must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - self._make_one(1, 1, 0) - assert "max_entry_count must be between 1 and 100000" in str(e.value) - with pytest.raises(ValueError) as e: - self._make_one(1, 1, MAX_MUTATE_ROWS_ENTRY_COUNT + 1) - assert "max_entry_count must be between 1 and" in str(e.value) @pytest.mark.parametrize( "max_count,max_size,existing_count,existing_size,new_count,new_size,expected", @@ -206,37 +189,31 @@ async def task_routine(): @pytest.mark.asyncio @pytest.mark.parametrize( - "mutations,count_cap,size_cap,entry_cap,expected_results", + "mutations,count_cap,size_cap,expected_results", [ # high capacity results in no batching - ([(5, 5), (1, 1), (1, 1)], 10, 10, 100, [[(5, 5), (1, 1), (1, 1)]]), + ([(5, 5), (1, 1), (1, 1)], 10, 10, [[(5, 5), (1, 1), (1, 1)]]), # low capacity splits up into batches - ([(1, 1), (1, 1), (1, 1)], 1, 1, 100, [[(1, 1)], [(1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 1, 1, [[(1, 1)], [(1, 1)], [(1, 1)]]), # test count as limiting factor - ([(1, 1), (1, 1), (1, 1)], 2, 10, 100, [[(1, 1), (1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 2, 10, [[(1, 1), (1, 1)], [(1, 1)]]), # test size as limiting factor - ([(1, 1), (1, 1), (1, 1)], 10, 2, 100, [[(1, 1), (1, 1)], [(1, 1)]]), + ([(1, 1), (1, 1), (1, 1)], 10, 2, [[(1, 1), (1, 1)], [(1, 1)]]), # test with some bloackages and some flows ( [(1, 1), (5, 5), (4, 1), (1, 4), (1, 1)], 5, 5, - 100, [[(1, 1)], [(5, 5)], [(4, 1), (1, 4)], [(1, 1)]], ), - # flows with entry count above max request limit should be batched - ([(1, 1)] * 11, 100, 100, 10, [[(1, 1)] * 10, [(1, 1)]]), - ([(1, 1)] * 10, 100, 100, 1, [[(1, 1)] for _ in range(10)]), ], ) - async def test_add_to_flow( - self, mutations, count_cap, size_cap, entry_cap, expected_results - ): + async def test_add_to_flow(self, mutations, count_cap, size_cap, expected_results): """ Test batching with various flow control settings """ mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] - instance = self._make_one(count_cap, size_cap, entry_cap) + instance = self._make_one(count_cap, size_cap) i = 0 async for batch in instance.add_to_flow(mutation_objs): expected_batch = expected_results[i] @@ -251,6 +228,43 @@ async def test_add_to_flow( i += 1 assert i == len(expected_results) + @pytest.mark.asyncio + @pytest.mark.parametrize( + "mutations,max_limit,expected_results", + [ + ([(1, 1)] * 11, 10, [[(1, 1)] * 10, [(1, 1)]]), + ([(1, 1)] * 10, 1, [[(1, 1)] for _ in range(10)]), + ([(1, 1)] * 10, 2, [[(1, 1), (1, 1)] for _ in range(5)]), + ], + ) + async def test_add_to_flow_max_mutation_limits( + self, mutations, max_limit, expected_results + ): + """ + Test flow control running up against the max API limit + Should submit request early, even if the flow control has room for more + """ + with mock.patch( + "google.cloud.bigtable.mutations_batcher.MUTATE_ROWS_REQUEST_MUTATION_LIMIT", + max_limit, + ): + mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] + # flow control has no limits except API restrictions + instance = self._make_one(None, None) + i = 0 + async for batch in instance.add_to_flow(mutation_objs): + expected_batch = expected_results[i] + assert len(batch) == len(expected_batch) + for j in range(len(expected_batch)): + # check counts + assert len(batch[j].mutations) == expected_batch[j][0] + # check sizes + assert batch[j].size() == expected_batch[j][1] + # update lock + await instance.remove_from_flow(batch) + i += 1 + assert i == len(expected_results) + @pytest.mark.asyncio async def test_add_to_flow_oversize(self): """ @@ -378,7 +392,9 @@ def test_default_argument_consistency(self): from google.cloud.bigtable.mutations_batcher import MutationsBatcher import inspect - get_batcher_signature = dict(inspect.signature(Table.mutations_batcher).parameters) + get_batcher_signature = dict( + inspect.signature(Table.mutations_batcher).parameters + ) get_batcher_signature.pop("self") batcher_init_signature = dict(inspect.signature(MutationsBatcher).parameters) batcher_init_signature.pop("table") @@ -556,7 +572,9 @@ async def test_append( @pytest.mark.asyncio async def test_append_multiple_sequentially(self): """Append multiple mutations""" - async with self._make_one(flush_limit_mutation_count=8, flush_limit_bytes=8) as instance: + async with self._make_one( + flush_limit_mutation_count=8, flush_limit_bytes=8 + ) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 assert instance._staged_mutations == [] From ff0aed622ffeb851ec8f2c67fafde86e4ceafb83 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 12:13:55 -0700 Subject: [PATCH 141/213] reduce repeated flush tasks when entries appended in a loop --- google/cloud/bigtable/mutations_batcher.py | 40 ++++++++++++-------- tests/unit/test_mutations_batcher.py | 44 ++++++++++++++++++++-- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 73aae66f0..f71ff7aa9 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -216,6 +216,9 @@ def __init__( self._prev_flush: asyncio.Task[None] = asyncio.create_task(asyncio.sleep(0)) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 + # keep track of entries that are set to be sent in next flush, + # so we can add more before if mutations are added before flush task starts + self._next_flush_entries: list[RowMutationEntry] = [] async def _flush_timer(self, interval: float | None): """ @@ -245,15 +248,21 @@ def append(self, mutation_entry: RowMutationEntry): raise ValueError( f"invalid mutation type: {type(mutation_entry).__name__}. Only RowMutationEntry objects are supported by batcher" ) - self._staged_mutations.append(mutation_entry) - # start a new flush task if limits exceeded - self._staged_count += len(mutation_entry.mutations) - self._staged_bytes += mutation_entry.size() - if ( - self._staged_count >= self._flush_limit_count - or self._staged_bytes >= self._flush_limit_bytes - ): - self._schedule_flush() + if self._next_flush_entries: + # flush is already scheduled to run on next loop iteration + # add new entries directly to flush list + self._next_flush_entries.append(mutation_entry) + else: + # add to staged list + self._staged_mutations.append(mutation_entry) + # start a new flush task if limits exceeded + self._staged_count += len(mutation_entry.mutations) + self._staged_bytes += mutation_entry.size() + if ( + self._staged_count >= self._flush_limit_count + or self._staged_bytes >= self._flush_limit_bytes + ): + self._schedule_flush() async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = 60): """ @@ -286,24 +295,23 @@ def _schedule_flush(self) -> asyncio.Task[None]: if self._staged_mutations: entries, self._staged_mutations = self._staged_mutations, [] self._staged_count, self._staged_bytes = 0, 0 + # flush is scheduled to run on next loop iteration + # use _next_flush_entries to add new extra entries before flush task starts + self._next_flush_entries.extend(entries) self._prev_flush = asyncio.create_task( - self._flush_internal(entries, self._prev_flush) + self._flush_internal(self._prev_flush) ) return self._prev_flush - async def _flush_internal( - self, - new_entries: list[RowMutationEntry], - prev_flush: asyncio.Task[None], - ): + async def _flush_internal(self, prev_flush: asyncio.Task[None]): """ Flushes a set of mutations to the server, and updates internal state Args: - - new_entries: list of mutations to flush - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ + new_entries, self._next_flush_entries = self._next_flush_entries, [] # flush new entries in_process_requests: list[ asyncio.Task[None | list[FailedMutationEntryError]] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index d33d1df6f..937ba87fd 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -775,9 +775,16 @@ async def test_schedule_flush_with_mutations(self): async with self._make_one() as instance: orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: + flush_mock.side_effect = lambda *args, **kwargs: setattr( + instance, "_next_flush_entries", [] + ) for i in range(1, 4): - instance._staged_mutations = [mock.Mock()] + mutation = mock.Mock() + instance._staged_mutations = [mutation] instance._schedule_flush() + assert instance._next_flush_entries == [mutation] + # let flush task run + await asyncio.sleep(0) assert instance._staged_mutations == [] assert instance._staged_count == 0 assert instance._staged_bytes == 0 @@ -785,6 +792,35 @@ async def test_schedule_flush_with_mutations(self): assert instance._prev_flush != orig_flush orig_flush = instance._prev_flush + @pytest.mark.asyncio + async def test_schedule_flush_multiple_calls_single_flush(self): + """ + If the user appends a bunch of entries above the flush limits back-to-back, + it should still flush in a single task + """ + from google.cloud.bigtable.mutations_batcher import MutationsBatcher + + with mock.patch.object(MutationsBatcher, "_execute_mutate_rows") as op_mock: + async with self._make_one(flush_limit_mutation_count=1) as instance: + # mock network calls + async def mock_call(*args, **kwargs): + return [] + + op_mock.side_effect = mock_call + # append a bunch of entries back-to-back, without awaiting + num_entries = 10 + for _ in range(num_entries): + instance._staged_mutations.append(_make_mutation()) + instance._schedule_flush() + assert len(instance._staged_mutations) == 0 + # await to let flush run + await asyncio.sleep(0) + # should have batched into a single request + assert op_mock.call_count == 1 + sent_batch = op_mock.call_args[0][0] + assert len(sent_batch) == num_entries + assert instance._staged_mutations == [] + @pytest.mark.asyncio async def test__flush_internal(self): """ @@ -808,7 +844,8 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - await instance._flush_internal(mutations, prev_flush) + instance._next_flush_entries = mutations + await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 @@ -854,7 +891,8 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - await instance._flush_internal(mutations, prev_flush) + instance._next_flush_entries = mutations + await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 From f71eebc839db0bdeffa44ac72d6ac79e64d52f12 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 12:22:22 -0700 Subject: [PATCH 142/213] renamed variables --- google/cloud/bigtable/mutations_batcher.py | 32 +++++------ tests/system/test_system.py | 22 ++++---- tests/unit/test_mutations_batcher.py | 62 +++++++++++----------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index f71ff7aa9..ef7a8b3ce 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -197,7 +197,7 @@ def __init__( atexit.register(self._on_exit) self.closed: bool = False self._table = table - self._staged_mutations: list[RowMutationEntry] = [] + self._staged_entries: list[RowMutationEntry] = [] self._staged_count, self._staged_bytes = 0, 0 self._flow_control = _FlowControl( flow_control_max_count, flow_control_max_bytes @@ -218,7 +218,7 @@ def __init__( self._entries_processed_since_last_raise: int = 0 # keep track of entries that are set to be sent in next flush, # so we can add more before if mutations are added before flush task starts - self._next_flush_entries: list[RowMutationEntry] = [] + self._scheduled_flush_entries: list[RowMutationEntry] = [] async def _flush_timer(self, interval: float | None): """ @@ -229,7 +229,7 @@ async def _flush_timer(self, interval: float | None): while not self.closed: await asyncio.sleep(interval) # add new flush task to list - if not self.closed and self._staged_mutations: + if not self.closed and self._staged_entries: self._schedule_flush() def append(self, mutation_entry: RowMutationEntry): @@ -248,13 +248,13 @@ def append(self, mutation_entry: RowMutationEntry): raise ValueError( f"invalid mutation type: {type(mutation_entry).__name__}. Only RowMutationEntry objects are supported by batcher" ) - if self._next_flush_entries: + if self._scheduled_flush_entries: # flush is already scheduled to run on next loop iteration # add new entries directly to flush list - self._next_flush_entries.append(mutation_entry) + self._scheduled_flush_entries.append(mutation_entry) else: # add to staged list - self._staged_mutations.append(mutation_entry) + self._staged_entries.append(mutation_entry) # start a new flush task if limits exceeded self._staged_count += len(mutation_entry.mutations) self._staged_bytes += mutation_entry.size() @@ -266,7 +266,7 @@ def append(self, mutation_entry: RowMutationEntry): async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = 60): """ - Flush all staged mutations + Flush all staged entries Args: - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. @@ -279,7 +279,7 @@ async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = - MutationsExceptionGroup if raise_exceptions is True and any mutations fail - asyncio.TimeoutError if timeout is reached before flush task completes. """ - # add recent staged mutations to flush task, and wait for flush to complete + # add recent staged entries to flush task, and wait for flush to complete flush_job: Awaitable[None] = self._schedule_flush() if timeout is not None: # wait `timeout seconds for flush to complete @@ -291,13 +291,13 @@ async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = self._raise_exceptions() def _schedule_flush(self) -> asyncio.Task[None]: - """Update the flush task to include the latest staged mutations""" - if self._staged_mutations: - entries, self._staged_mutations = self._staged_mutations, [] + """Update the flush task to include the latest staged entries""" + if self._staged_entries: + entries, self._staged_entries = self._staged_entries, [] self._staged_count, self._staged_bytes = 0, 0 # flush is scheduled to run on next loop iteration - # use _next_flush_entries to add new extra entries before flush task starts - self._next_flush_entries.extend(entries) + # use _scheduled_flush_entries to add new extra entries before flush task starts + self._scheduled_flush_entries.extend(entries) self._prev_flush = asyncio.create_task( self._flush_internal(self._prev_flush) ) @@ -311,7 +311,7 @@ async def _flush_internal(self, prev_flush: asyncio.Task[None]): - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ - new_entries, self._next_flush_entries = self._next_flush_entries, [] + new_entries, self._scheduled_flush_entries = self._scheduled_flush_entries, [] # flush new entries in_process_requests: list[ asyncio.Task[None | list[FailedMutationEntryError]] @@ -410,8 +410,8 @@ def _on_exit(self): """ Called when program is exited. Raises warning if unflushed mutations remain """ - if not self.closed and self._staged_mutations: + if not self.closed and self._staged_entries: warnings.warn( f"MutationsBatcher for table {self._table.table_name} was not closed. " - f"{len(self._staged_mutations)} Unflushed mutations will not be sent to the server." + f"{len(self._staged_entries)} Unflushed mutations will not be sent to the server." ) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 6097d5955..2d7744241 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -305,7 +305,7 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): batcher.append(bulk_mutation2) # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value - assert len(batcher._staged_mutations) == 0 + assert len(batcher._staged_entries) == 0 @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @@ -323,9 +323,9 @@ async def test_mutations_batcher_manual_flush(client, table, temp_rows): bulk_mutation = RowMutationEntry(row_key, [mutation]) async with table.mutations_batcher() as batcher: batcher.append(bulk_mutation) - assert len(batcher._staged_mutations) == 1 + assert len(batcher._staged_entries) == 1 await batcher.flush() - assert len(batcher._staged_mutations) == 0 + assert len(batcher._staged_entries) == 0 # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value @@ -347,9 +347,9 @@ async def test_mutations_batcher_timer_flush(client, table, temp_rows): async with table.mutations_batcher(flush_interval=flush_interval) as batcher: batcher.append(bulk_mutation) await asyncio.sleep(0) - assert len(batcher._staged_mutations) == 1 + assert len(batcher._staged_entries) == 1 await asyncio.sleep(flush_interval + 0.1) - assert len(batcher._staged_mutations) == 0 + assert len(batcher._staged_entries) == 0 # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value @@ -376,11 +376,11 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): batcher.append(bulk_mutation) # should be noop; flush not scheduled await batcher._prev_flush - assert len(batcher._staged_mutations) == 1 + assert len(batcher._staged_entries) == 1 batcher.append(bulk_mutation2) # task should now be scheduled await batcher._prev_flush - assert len(batcher._staged_mutations) == 0 + assert len(batcher._staged_entries) == 0 # ensure cells were updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert (await _retrieve_cell_value(table, row_key2)) == new_value2 @@ -410,11 +410,11 @@ async def test_mutations_batcher_bytes_flush(client, table, temp_rows): batcher.append(bulk_mutation) # should be noop; flush not scheduled await batcher._prev_flush - assert len(batcher._staged_mutations) == 1 + assert len(batcher._staged_entries) == 1 batcher.append(bulk_mutation2) # task should now be scheduled await batcher._prev_flush - assert len(batcher._staged_mutations) == 0 + assert len(batcher._staged_entries) == 0 # ensure cells were updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert (await _retrieve_cell_value(table, row_key2)) == new_value2 @@ -444,12 +444,12 @@ async def test_mutations_batcher_no_flush(client, table, temp_rows): flush_limit_bytes=size_limit, flush_limit_mutation_count=3, flush_interval=1 ) as batcher: batcher.append(bulk_mutation) - assert len(batcher._staged_mutations) == 1 + assert len(batcher._staged_entries) == 1 batcher.append(bulk_mutation2) # should be noop; flush not scheduled await batcher._prev_flush await asyncio.sleep(0.01) - assert len(batcher._staged_mutations) == 2 + assert len(batcher._staged_entries) == 2 # ensure cells were updated assert (await _retrieve_cell_value(table, row_key)) == start_value assert (await _retrieve_cell_value(table, row_key2)) == start_value diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 937ba87fd..447465901 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -302,7 +302,7 @@ async def test_ctor_defaults(self, flush_timer_mock): async with self._make_one(table) as instance: assert instance._table == table assert instance.closed is False - assert instance._staged_mutations == [] + assert instance._staged_entries == [] assert instance.exceptions == [] assert instance._flow_control._max_mutation_count == 100000 assert instance._flow_control._max_mutation_bytes == 104857600 @@ -336,7 +336,7 @@ async def test_ctor_explicit(self, flush_timer_mock): ) as instance: assert instance._table == table assert instance.closed is False - assert instance._staged_mutations == [] + assert instance._staged_entries == [] assert instance.exceptions == [] assert instance._flow_control._max_mutation_count == flow_control_max_count assert instance._flow_control._max_mutation_bytes == flow_control_max_bytes @@ -370,7 +370,7 @@ async def test_ctor_no_limits(self, flush_timer_mock): ) as instance: assert instance._table == table assert instance.closed is False - assert instance._staged_mutations == [] + assert instance._staged_entries == [] assert instance.exceptions == [] assert instance._flow_control._max_mutation_count == float("inf") assert instance._flow_control._max_mutation_bytes == float("inf") @@ -443,7 +443,7 @@ async def test__flush_timer_call_when_closed(self, flush_mock): async def test__flush_timer(self, flush_mock): """Timer should continue to call _schedule_flush in a loop""" async with self._make_one() as instance: - instance._staged_mutations = [mock.Mock()] + instance._staged_entries = [mock.Mock()] loop_num = 3 expected_sleep = 12 with mock.patch("asyncio.sleep") as sleep_mock: @@ -523,19 +523,19 @@ async def test_append_outside_flow_limits(self): ) as instance: oversized_entry = _make_mutation(count=0, size=2) instance.append(oversized_entry) - assert instance._staged_mutations == [oversized_entry] + assert instance._staged_entries == [oversized_entry] assert instance._staged_count == 0 assert instance._staged_bytes == 2 - instance._staged_mutations = [] + instance._staged_entries = [] async with self._make_one( flow_control_max_count=1, flow_control_max_bytes=1 ) as instance: overcount_entry = _make_mutation(count=2, size=0) instance.append(overcount_entry) - assert instance._staged_mutations == [overcount_entry] + assert instance._staged_entries == [overcount_entry] assert instance._staged_count == 2 assert instance._staged_bytes == 0 - instance._staged_mutations = [] + instance._staged_entries = [] @pytest.mark.parametrize( "flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", @@ -559,15 +559,15 @@ async def test_append( ) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 - assert instance._staged_mutations == [] + assert instance._staged_entries == [] mutation = _make_mutation(count=mutation_count, size=mutation_bytes) with mock.patch.object(instance, "_schedule_flush") as flush_mock: instance.append(mutation) assert flush_mock.call_count == bool(expect_flush) assert instance._staged_count == mutation_count assert instance._staged_bytes == mutation_bytes - assert instance._staged_mutations == [mutation] - instance._staged_mutations = [] + assert instance._staged_entries == [mutation] + instance._staged_entries = [] @pytest.mark.asyncio async def test_append_multiple_sequentially(self): @@ -577,25 +577,25 @@ async def test_append_multiple_sequentially(self): ) as instance: assert instance._staged_count == 0 assert instance._staged_bytes == 0 - assert instance._staged_mutations == [] + assert instance._staged_entries == [] mutation = _make_mutation(count=2, size=3) with mock.patch.object(instance, "_schedule_flush") as flush_mock: instance.append(mutation) assert flush_mock.call_count == 0 assert instance._staged_count == 2 assert instance._staged_bytes == 3 - assert len(instance._staged_mutations) == 1 + assert len(instance._staged_entries) == 1 instance.append(mutation) assert flush_mock.call_count == 0 assert instance._staged_count == 4 assert instance._staged_bytes == 6 - assert len(instance._staged_mutations) == 2 + assert len(instance._staged_entries) == 2 instance.append(mutation) assert flush_mock.call_count == 1 assert instance._staged_count == 6 assert instance._staged_bytes == 9 - assert len(instance._staged_mutations) == 3 - instance._staged_mutations = [] + assert len(instance._staged_entries) == 3 + instance._staged_entries = [] @pytest.mark.parametrize("raise_exceptions", [True, False]) @pytest.mark.asyncio @@ -653,7 +653,7 @@ async def mock_call(*args, **kwargs): start_time = time.monotonic() # create a few concurrent flushes for i in range(num_flushes): - instance._staged_mutations = [fake_mutations[i]] + instance._staged_entries = [fake_mutations[i]] try: await instance.flush(timeout=0.01) except asyncio.TimeoutError: @@ -686,7 +686,7 @@ async def mock_call(*args, **kwargs): op_mock.side_effect = mock_call start_time = time.monotonic() # flush one large batch, that will be broken up into smaller batches - instance._staged_mutations = fake_mutations + instance._staged_entries = fake_mutations try: await instance.flush(timeout=0.01) except asyncio.TimeoutError: @@ -737,11 +737,11 @@ async def mock_call(*args, **kwargs): op_mock.side_effect = mock_call # create a few concurrent flushes - instance._staged_mutations = [_make_mutation()] + instance._staged_entries = [_make_mutation()] flush_task1 = asyncio.create_task(instance.flush()) # let flush task initialize await asyncio.sleep(0) - instance._staged_mutations = [_make_mutation()] + instance._staged_entries = [_make_mutation()] flush_task2 = asyncio.create_task(instance.flush()) # raise errors with pytest.raises(MutationsExceptionGroup) as exc2: @@ -776,16 +776,16 @@ async def test_schedule_flush_with_mutations(self): orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: flush_mock.side_effect = lambda *args, **kwargs: setattr( - instance, "_next_flush_entries", [] + instance, "_scheduled_flush_entries", [] ) for i in range(1, 4): mutation = mock.Mock() - instance._staged_mutations = [mutation] + instance._staged_entries = [mutation] instance._schedule_flush() - assert instance._next_flush_entries == [mutation] + assert instance._scheduled_flush_entries == [mutation] # let flush task run await asyncio.sleep(0) - assert instance._staged_mutations == [] + assert instance._staged_entries == [] assert instance._staged_count == 0 assert instance._staged_bytes == 0 assert flush_mock.call_count == i @@ -810,16 +810,16 @@ async def mock_call(*args, **kwargs): # append a bunch of entries back-to-back, without awaiting num_entries = 10 for _ in range(num_entries): - instance._staged_mutations.append(_make_mutation()) + instance._staged_entries.append(_make_mutation()) instance._schedule_flush() - assert len(instance._staged_mutations) == 0 + assert len(instance._staged_entries) == 0 # await to let flush run await asyncio.sleep(0) # should have batched into a single request assert op_mock.call_count == 1 sent_batch = op_mock.call_args[0][0] assert len(sent_batch) == num_entries - assert instance._staged_mutations == [] + assert instance._staged_entries == [] @pytest.mark.asyncio async def test__flush_internal(self): @@ -844,7 +844,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._next_flush_entries = mutations + instance._scheduled_flush_entries = mutations await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries @@ -891,7 +891,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._next_flush_entries = mutations + instance._scheduled_flush_entries = mutations await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries @@ -1086,7 +1086,7 @@ async def test__on_exit(self, recwarn): assert len(recwarn) == 0 # calling with existing mutations should raise warning num_left = 4 - instance._staged_mutations = [mock.Mock()] * num_left + instance._staged_entries = [mock.Mock()] * num_left with pytest.warns(UserWarning) as w: instance._on_exit() assert len(w) == 1 @@ -1097,7 +1097,7 @@ async def test__on_exit(self, recwarn): instance._on_exit() assert len(recwarn) == 0 # reset staged mutations for cleanup - instance._staged_mutations = [] + instance._staged_entries = [] @pytest.mark.asyncio async def test_atexit_registration(self): From c0dc760edc778413e4a3d84c591aefc58166df5e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 15 Jun 2023 13:46:39 -0700 Subject: [PATCH 143/213] added test; renamed variables --- google/cloud/bigtable/mutations_batcher.py | 14 ++--- tests/unit/test_mutations_batcher.py | 72 ++++++++++++++++++++-- 2 files changed, 74 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index ef7a8b3ce..fc22e5773 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -327,13 +327,13 @@ async def _flush_internal(self, prev_flush: asyncio.Task[None]): await asyncio.sleep(0) # collect exception data for next raise, after previous flush tasks have completed self._entries_processed_since_last_raise += len(new_entries) - for exc_list in all_results: - if isinstance(exc_list, Exception): - self.exceptions.append(exc_list) - elif exc_list is not None and all( - isinstance(e, FailedMutationEntryError) for e in exc_list - ): - self.exceptions.extend(exc_list) + for request_result in all_results: + if isinstance(request_result, Exception): + # will receive direct Exception objects if request task fails + self.exceptions.append(request_result) + elif request_result is not None: + # completed requests will return a list of FailedMutationEntryError + self.exceptions.extend(request_result) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 447465901..766d39197 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -715,6 +715,7 @@ async def test_overlapping_flush_requests(self): MutationsExceptionGroup, FailedMutationEntryError, ) + from google.cloud.bigtable.mutations_batcher import MutationsBatcher exception1 = RuntimeError("test error1") exception2 = ValueError("test error2") @@ -724,11 +725,10 @@ async def test_overlapping_flush_requests(self): ] # excpetion1 is flushed first, but finishes second sleep_times = [0.1, 0.05] - - async with self._make_one() as instance: - with mock.patch.object( - instance, "_execute_mutate_rows", AsyncMock() - ) as op_mock: + with mock.patch.object( + MutationsBatcher, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + async with self._make_one() as instance: # mock network calls async def mock_call(*args, **kwargs): time, exception = sleep_times.pop(0), wrapped_exception_list.pop(0) @@ -757,6 +757,68 @@ async def mock_call(*args, **kwargs): assert len(exc.value.exceptions) == 1 assert exc2.value.total_entries_attempted == 1 assert exc.value.exceptions[0].__cause__ == exception1 + # should have had two separate flush calls + assert op_mock.call_count == 2 + + @pytest.mark.asyncio + async def test_overlapping_flush_requests_background(self): + """ + Test scheduling multiple background flushes without yielding the event loop in between. + + Should result in first flush receiving both entries, and the second flush being an empty + request. + Entries added after a context switch should not be flushed until the next flush call. + """ + from google.cloud.bigtable.exceptions import ( + MutationsExceptionGroup, + FailedMutationEntryError, + ) + from google.cloud.bigtable.mutations_batcher import MutationsBatcher + + test_error = RuntimeError("test error") + with mock.patch.object( + MutationsBatcher, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + # mock network calls + async def mock_call(*args, **kwargs): + return [FailedMutationEntryError(2, mock.Mock(), test_error)] + + async with self._make_one() as instance: + mutations = [_make_mutation() for _ in range(4)] + op_mock.side_effect = mock_call + # create a few concurrent flushes + instance.append(mutations[0]) + flush_task1 = asyncio.create_task(instance.flush()) + instance.append(mutations[1]) + flush_task2 = asyncio.create_task(instance.flush()) + instance.append(mutations[2]) + # should have mutations staged and ready + assert len(instance._staged_entries) == 3 + assert len(instance._scheduled_flush_entries) == 0 + + # second task should be empty + await flush_task2 + # mutations should have been flushed + assert len(instance._staged_entries) == 0 + assert len(instance._scheduled_flush_entries) == 0 + # mutations added after a context switch should not be in flush batch + await asyncio.sleep(0) + instance.append(mutations[3]) + + # flushes should be finalized in order. flush_task1 should already be done + assert flush_task1.done() + # first task should have sent all mutations and raise exception + with pytest.raises(MutationsExceptionGroup) as exc: + await flush_task1 + assert exc.value.total_entries_attempted == 3 + assert len(exc.value.exceptions) == 1 + assert exc.value.exceptions[0].__cause__ == test_error + # should have just one flush call + assert op_mock.call_count == 1 + assert op_mock.call_args[0][0] == mutations[:3] + # final mutation should still be staged for next flush + assert instance._staged_entries == [mutations[3]] + instance._staged_entries = [] @pytest.mark.asyncio async def test_schedule_flush_no_mutations(self): From a6b12dc416e5ab58cb074cacea179b6b6f514814 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 16 Jun 2023 12:32:40 -0700 Subject: [PATCH 144/213] removed outdated comment --- google/cloud/bigtable/mutations_batcher.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index fc22e5773..61f796464 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -115,8 +115,6 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] Yields: - list of mutations that have reserved space in the flow control. Each batch contains at least one mutation. - Raises: - - ValueError if any mutation entry is larger than the flow control limits """ if not isinstance(mutations, list): mutations = [mutations] From d5128ebea4bbdcb304446e4baf17bb9eac744054 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 16 Jun 2023 12:41:25 -0700 Subject: [PATCH 145/213] added comments --- google/cloud/bigtable/mutations_batcher.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 61f796464..8d2ea1ac8 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -183,7 +183,8 @@ def __init__( """ Args: - table: Table to preform rpc calls - - flush_interval: Automatically flush every flush_interval seconds + - flush_interval: Automatically flush every flush_interval seconds. + If None, no time-based flushing is performed. - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. @@ -223,6 +224,7 @@ async def _flush_timer(self, interval: float | None): Triggers new flush tasks every `interval` seconds """ if interval is None: + # no time-based flushing is performed return while not self.closed: await asyncio.sleep(interval) From 335391cc1da91d3fcbbc050ea6bf3ae033feb0d1 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 16 Jun 2023 13:11:02 -0700 Subject: [PATCH 146/213] renamed variable --- google/cloud/bigtable/mutations_batcher.py | 12 ++++++------ tests/unit/test_mutations_batcher.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 8d2ea1ac8..4dd07ccd8 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -217,7 +217,7 @@ def __init__( self._entries_processed_since_last_raise: int = 0 # keep track of entries that are set to be sent in next flush, # so we can add more before if mutations are added before flush task starts - self._scheduled_flush_entries: list[RowMutationEntry] = [] + self._pending_flush_entries: list[RowMutationEntry] = [] async def _flush_timer(self, interval: float | None): """ @@ -248,10 +248,10 @@ def append(self, mutation_entry: RowMutationEntry): raise ValueError( f"invalid mutation type: {type(mutation_entry).__name__}. Only RowMutationEntry objects are supported by batcher" ) - if self._scheduled_flush_entries: + if self._pending_flush_entries: # flush is already scheduled to run on next loop iteration # add new entries directly to flush list - self._scheduled_flush_entries.append(mutation_entry) + self._pending_flush_entries.append(mutation_entry) else: # add to staged list self._staged_entries.append(mutation_entry) @@ -296,8 +296,8 @@ def _schedule_flush(self) -> asyncio.Task[None]: entries, self._staged_entries = self._staged_entries, [] self._staged_count, self._staged_bytes = 0, 0 # flush is scheduled to run on next loop iteration - # use _scheduled_flush_entries to add new extra entries before flush task starts - self._scheduled_flush_entries.extend(entries) + # use _pending_flush_entries to add new extra entries before flush task starts + self._pending_flush_entries.extend(entries) self._prev_flush = asyncio.create_task( self._flush_internal(self._prev_flush) ) @@ -311,7 +311,7 @@ async def _flush_internal(self, prev_flush: asyncio.Task[None]): - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ - new_entries, self._scheduled_flush_entries = self._scheduled_flush_entries, [] + new_entries, self._pending_flush_entries = self._pending_flush_entries, [] # flush new entries in_process_requests: list[ asyncio.Task[None | list[FailedMutationEntryError]] diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 766d39197..4a986a639 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -794,13 +794,13 @@ async def mock_call(*args, **kwargs): instance.append(mutations[2]) # should have mutations staged and ready assert len(instance._staged_entries) == 3 - assert len(instance._scheduled_flush_entries) == 0 + assert len(instance._pending_flush_entries) == 0 # second task should be empty await flush_task2 # mutations should have been flushed assert len(instance._staged_entries) == 0 - assert len(instance._scheduled_flush_entries) == 0 + assert len(instance._pending_flush_entries) == 0 # mutations added after a context switch should not be in flush batch await asyncio.sleep(0) instance.append(mutations[3]) @@ -838,13 +838,13 @@ async def test_schedule_flush_with_mutations(self): orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: flush_mock.side_effect = lambda *args, **kwargs: setattr( - instance, "_scheduled_flush_entries", [] + instance, "_pending_flush_entries", [] ) for i in range(1, 4): mutation = mock.Mock() instance._staged_entries = [mutation] instance._schedule_flush() - assert instance._scheduled_flush_entries == [mutation] + assert instance._pending_flush_entries == [mutation] # let flush task run await asyncio.sleep(0) assert instance._staged_entries == [] @@ -906,7 +906,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._scheduled_flush_entries = mutations + instance._pending_flush_entries = mutations await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries @@ -953,7 +953,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._scheduled_flush_entries = mutations + instance._pending_flush_entries = mutations await instance._flush_internal(prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries From 836171bf117a680bdf2571d0c68a1c9c422d9f90 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 20 Jun 2023 10:29:21 -0700 Subject: [PATCH 147/213] made append async; removed _pending_entries --- google/cloud/bigtable/mutations_batcher.py | 41 ++++----- tests/system/test_system.py | 20 ++--- tests/unit/test_mutations_batcher.py | 100 ++++++++++----------- 3 files changed, 73 insertions(+), 88 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 4dd07ccd8..6d0875238 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -215,9 +215,6 @@ def __init__( self._prev_flush: asyncio.Task[None] = asyncio.create_task(asyncio.sleep(0)) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 - # keep track of entries that are set to be sent in next flush, - # so we can add more before if mutations are added before flush task starts - self._pending_flush_entries: list[RowMutationEntry] = [] async def _flush_timer(self, interval: float | None): """ @@ -232,7 +229,7 @@ async def _flush_timer(self, interval: float | None): if not self.closed and self._staged_entries: self._schedule_flush() - def append(self, mutation_entry: RowMutationEntry): + async def append(self, mutation_entry: RowMutationEntry): """ Add a new set of mutations to the internal queue @@ -248,21 +245,17 @@ def append(self, mutation_entry: RowMutationEntry): raise ValueError( f"invalid mutation type: {type(mutation_entry).__name__}. Only RowMutationEntry objects are supported by batcher" ) - if self._pending_flush_entries: - # flush is already scheduled to run on next loop iteration - # add new entries directly to flush list - self._pending_flush_entries.append(mutation_entry) - else: - # add to staged list - self._staged_entries.append(mutation_entry) - # start a new flush task if limits exceeded - self._staged_count += len(mutation_entry.mutations) - self._staged_bytes += mutation_entry.size() - if ( - self._staged_count >= self._flush_limit_count - or self._staged_bytes >= self._flush_limit_bytes - ): - self._schedule_flush() + self._staged_entries.append(mutation_entry) + # start a new flush task if limits exceeded + self._staged_count += len(mutation_entry.mutations) + self._staged_bytes += mutation_entry.size() + if ( + self._staged_count >= self._flush_limit_count + or self._staged_bytes >= self._flush_limit_bytes + ): + self._schedule_flush() + # yield to the event loop to allow flush to run + await asyncio.sleep(0) async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = 60): """ @@ -295,15 +288,14 @@ def _schedule_flush(self) -> asyncio.Task[None]: if self._staged_entries: entries, self._staged_entries = self._staged_entries, [] self._staged_count, self._staged_bytes = 0, 0 - # flush is scheduled to run on next loop iteration - # use _pending_flush_entries to add new extra entries before flush task starts - self._pending_flush_entries.extend(entries) self._prev_flush = asyncio.create_task( - self._flush_internal(self._prev_flush) + self._flush_internal(entries, self._prev_flush) ) return self._prev_flush - async def _flush_internal(self, prev_flush: asyncio.Task[None]): + async def _flush_internal( + self, new_entries: list[RowMutationEntry], prev_flush: asyncio.Task[None] + ): """ Flushes a set of mutations to the server, and updates internal state @@ -311,7 +303,6 @@ async def _flush_internal(self, prev_flush: asyncio.Task[None]): - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ - new_entries, self._pending_flush_entries = self._pending_flush_entries, [] # flush new entries in_process_requests: list[ asyncio.Task[None | list[FailedMutationEntryError]] diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 2d7744241..5294d4ac5 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -301,8 +301,8 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) async with table.mutations_batcher() as batcher: - batcher.append(bulk_mutation) - batcher.append(bulk_mutation2) + await batcher.append(bulk_mutation) + await batcher.append(bulk_mutation2) # ensure cell is updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert len(batcher._staged_entries) == 0 @@ -322,7 +322,7 @@ async def test_mutations_batcher_manual_flush(client, table, temp_rows): ) bulk_mutation = RowMutationEntry(row_key, [mutation]) async with table.mutations_batcher() as batcher: - batcher.append(bulk_mutation) + await batcher.append(bulk_mutation) assert len(batcher._staged_entries) == 1 await batcher.flush() assert len(batcher._staged_entries) == 0 @@ -345,7 +345,7 @@ async def test_mutations_batcher_timer_flush(client, table, temp_rows): bulk_mutation = RowMutationEntry(row_key, [mutation]) flush_interval = 0.1 async with table.mutations_batcher(flush_interval=flush_interval) as batcher: - batcher.append(bulk_mutation) + await batcher.append(bulk_mutation) await asyncio.sleep(0) assert len(batcher._staged_entries) == 1 await asyncio.sleep(flush_interval + 0.1) @@ -373,11 +373,11 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): bulk_mutation2 = RowMutationEntry(row_key2, [mutation2]) async with table.mutations_batcher(flush_limit_mutation_count=2) as batcher: - batcher.append(bulk_mutation) + await batcher.append(bulk_mutation) # should be noop; flush not scheduled await batcher._prev_flush assert len(batcher._staged_entries) == 1 - batcher.append(bulk_mutation2) + await batcher.append(bulk_mutation2) # task should now be scheduled await batcher._prev_flush assert len(batcher._staged_entries) == 0 @@ -407,11 +407,11 @@ async def test_mutations_batcher_bytes_flush(client, table, temp_rows): flush_limit = bulk_mutation.size() + bulk_mutation2.size() - 1 async with table.mutations_batcher(flush_limit_bytes=flush_limit) as batcher: - batcher.append(bulk_mutation) + await batcher.append(bulk_mutation) # should be noop; flush not scheduled await batcher._prev_flush assert len(batcher._staged_entries) == 1 - batcher.append(bulk_mutation2) + await batcher.append(bulk_mutation2) # task should now be scheduled await batcher._prev_flush assert len(batcher._staged_entries) == 0 @@ -443,9 +443,9 @@ async def test_mutations_batcher_no_flush(client, table, temp_rows): async with table.mutations_batcher( flush_limit_bytes=size_limit, flush_limit_mutation_count=3, flush_interval=1 ) as batcher: - batcher.append(bulk_mutation) + await batcher.append(bulk_mutation) assert len(batcher._staged_entries) == 1 - batcher.append(bulk_mutation2) + await batcher.append(bulk_mutation2) # should be noop; flush not scheduled await batcher._prev_flush await asyncio.sleep(0.01) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 4a986a639..04f67746b 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -499,7 +499,7 @@ async def test_append_closed(self): with pytest.raises(RuntimeError): instance = self._make_one() await instance.close() - instance.append(mock.Mock()) + await instance.append(mock.Mock()) @pytest.mark.asyncio async def test_append_wrong_mutation(self): @@ -512,7 +512,7 @@ async def test_append_wrong_mutation(self): instance = self._make_one() expected_error = "invalid mutation type: DeleteAllFromRow. Only RowMutationEntry objects are supported by batcher" with pytest.raises(ValueError) as e: - instance.append(DeleteAllFromRow()) + await instance.append(DeleteAllFromRow()) assert str(e.value) == expected_error @pytest.mark.asyncio @@ -522,7 +522,7 @@ async def test_append_outside_flow_limits(self): flow_control_max_count=1, flow_control_max_bytes=1 ) as instance: oversized_entry = _make_mutation(count=0, size=2) - instance.append(oversized_entry) + await instance.append(oversized_entry) assert instance._staged_entries == [oversized_entry] assert instance._staged_count == 0 assert instance._staged_bytes == 2 @@ -531,12 +531,42 @@ async def test_append_outside_flow_limits(self): flow_control_max_count=1, flow_control_max_bytes=1 ) as instance: overcount_entry = _make_mutation(count=2, size=0) - instance.append(overcount_entry) + await instance.append(overcount_entry) assert instance._staged_entries == [overcount_entry] assert instance._staged_count == 2 assert instance._staged_bytes == 0 instance._staged_entries = [] + @pytest.mark.asyncio + async def test_append_flush_runs_after_limit_hit(self): + """ + If the user appends a bunch of entries above the flush limits back-to-back, + it should still flush in a single task + """ + from google.cloud.bigtable.mutations_batcher import MutationsBatcher + + with mock.patch.object(MutationsBatcher, "_execute_mutate_rows") as op_mock: + async with self._make_one(flush_limit_bytes=100) as instance: + # mock network calls + async def mock_call(*args, **kwargs): + return [] + + op_mock.side_effect = mock_call + # append a mutation just under the size limit + await instance.append(_make_mutation(size=99)) + # append a bunch of entries back-to-back in a loop + num_entries = 10 + for _ in range(num_entries): + await instance.append(_make_mutation(size=1)) + # let any flush jobs finish + await instance._prev_flush + # should have only flushed once, with large mutation and first mutation in loop + assert op_mock.call_count == 1 + sent_batch = op_mock.call_args[0][0] + assert len(sent_batch) == 2 + # others should still be pending + assert len(instance._staged_entries) == num_entries - 1 + @pytest.mark.parametrize( "flush_count,flush_bytes,mutation_count,mutation_bytes,expect_flush", [ @@ -562,7 +592,7 @@ async def test_append( assert instance._staged_entries == [] mutation = _make_mutation(count=mutation_count, size=mutation_bytes) with mock.patch.object(instance, "_schedule_flush") as flush_mock: - instance.append(mutation) + await instance.append(mutation) assert flush_mock.call_count == bool(expect_flush) assert instance._staged_count == mutation_count assert instance._staged_bytes == mutation_bytes @@ -580,17 +610,17 @@ async def test_append_multiple_sequentially(self): assert instance._staged_entries == [] mutation = _make_mutation(count=2, size=3) with mock.patch.object(instance, "_schedule_flush") as flush_mock: - instance.append(mutation) + await instance.append(mutation) assert flush_mock.call_count == 0 assert instance._staged_count == 2 assert instance._staged_bytes == 3 assert len(instance._staged_entries) == 1 - instance.append(mutation) + await instance.append(mutation) assert flush_mock.call_count == 0 assert instance._staged_count == 4 assert instance._staged_bytes == 6 assert len(instance._staged_entries) == 2 - instance.append(mutation) + await instance.append(mutation) assert flush_mock.call_count == 1 assert instance._staged_count == 6 assert instance._staged_bytes == 9 @@ -787,23 +817,21 @@ async def mock_call(*args, **kwargs): mutations = [_make_mutation() for _ in range(4)] op_mock.side_effect = mock_call # create a few concurrent flushes - instance.append(mutations[0]) + await instance.append(mutations[0]) flush_task1 = asyncio.create_task(instance.flush()) - instance.append(mutations[1]) + await instance.append(mutations[1]) flush_task2 = asyncio.create_task(instance.flush()) - instance.append(mutations[2]) + await instance.append(mutations[2]) # should have mutations staged and ready assert len(instance._staged_entries) == 3 - assert len(instance._pending_flush_entries) == 0 # second task should be empty await flush_task2 # mutations should have been flushed assert len(instance._staged_entries) == 0 - assert len(instance._pending_flush_entries) == 0 # mutations added after a context switch should not be in flush batch await asyncio.sleep(0) - instance.append(mutations[3]) + await instance.append(mutations[3]) # flushes should be finalized in order. flush_task1 should already be done assert flush_task1.done() @@ -837,14 +865,11 @@ async def test_schedule_flush_with_mutations(self): async with self._make_one() as instance: orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: - flush_mock.side_effect = lambda *args, **kwargs: setattr( - instance, "_pending_flush_entries", [] - ) for i in range(1, 4): mutation = mock.Mock() instance._staged_entries = [mutation] instance._schedule_flush() - assert instance._pending_flush_entries == [mutation] + assert instance._staged_entries == [] # let flush task run await asyncio.sleep(0) assert instance._staged_entries == [] @@ -854,35 +879,6 @@ async def test_schedule_flush_with_mutations(self): assert instance._prev_flush != orig_flush orig_flush = instance._prev_flush - @pytest.mark.asyncio - async def test_schedule_flush_multiple_calls_single_flush(self): - """ - If the user appends a bunch of entries above the flush limits back-to-back, - it should still flush in a single task - """ - from google.cloud.bigtable.mutations_batcher import MutationsBatcher - - with mock.patch.object(MutationsBatcher, "_execute_mutate_rows") as op_mock: - async with self._make_one(flush_limit_mutation_count=1) as instance: - # mock network calls - async def mock_call(*args, **kwargs): - return [] - - op_mock.side_effect = mock_call - # append a bunch of entries back-to-back, without awaiting - num_entries = 10 - for _ in range(num_entries): - instance._staged_entries.append(_make_mutation()) - instance._schedule_flush() - assert len(instance._staged_entries) == 0 - # await to let flush run - await asyncio.sleep(0) - # should have batched into a single request - assert op_mock.call_count == 1 - sent_batch = op_mock.call_args[0][0] - assert len(sent_batch) == num_entries - assert instance._staged_entries == [] - @pytest.mark.asyncio async def test__flush_internal(self): """ @@ -906,8 +902,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._pending_flush_entries = mutations - await instance._flush_internal(prev_flush) + await instance._flush_internal(mutations, prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 @@ -953,8 +948,7 @@ async def gen(x): prev_flush_mock = AsyncMock() prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - instance._pending_flush_entries = mutations - await instance._flush_internal(prev_flush) + await instance._flush_internal(mutations, prev_flush) assert prev_flush_mock.await_count == 1 assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 @@ -997,7 +991,7 @@ async def test_manual_flush_end_to_end(self): lambda *args, **kwargs: self._mock_gapic_return(num_nutations) ) for m in mutations: - instance.append(m) + await instance.append(m) assert instance._entries_processed_since_last_raise == 0 await instance.flush() assert instance._entries_processed_since_last_raise == num_nutations @@ -1018,7 +1012,7 @@ async def test_timer_flush_end_to_end(self): lambda *args, **kwargs: self._mock_gapic_return(num_nutations) ) for m in mutations: - instance.append(m) + await instance.append(m) assert instance._entries_processed_since_last_raise == 0 # let flush trigger due to timer await asyncio.sleep(0.1) From 06f16bc618dedb99978c4f7010051e5b2ae53f4d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 20 Jun 2023 10:41:10 -0700 Subject: [PATCH 148/213] use futures instead of tasks for type hints --- google/cloud/bigtable/mutations_batcher.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 6d0875238..52f8d2297 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -208,11 +208,12 @@ def __init__( else float("inf") ) self.exceptions: list[Exception] = [] - self._flush_timer_task: asyncio.Task[None] = asyncio.create_task( + self._flush_timer_task: asyncio.Future[None] = asyncio.create_task( self._flush_timer(flush_interval) ) - # create noop previous flush task to avoid None checks - self._prev_flush: asyncio.Task[None] = asyncio.create_task(asyncio.sleep(0)) + # create empty previous flush to avoid None checks + self._prev_flush: asyncio.Future[None] = asyncio.Future() + self._prev_flush.set_result(None) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 @@ -283,7 +284,7 @@ async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = if raise_exceptions: self._raise_exceptions() - def _schedule_flush(self) -> asyncio.Task[None]: + def _schedule_flush(self) -> asyncio.Future[None]: """Update the flush task to include the latest staged entries""" if self._staged_entries: entries, self._staged_entries = self._staged_entries, [] @@ -294,7 +295,7 @@ def _schedule_flush(self) -> asyncio.Task[None]: return self._prev_flush async def _flush_internal( - self, new_entries: list[RowMutationEntry], prev_flush: asyncio.Task[None] + self, new_entries: list[RowMutationEntry], prev_flush: asyncio.Future[None] ): """ Flushes a set of mutations to the server, and updates internal state @@ -305,7 +306,7 @@ async def _flush_internal( """ # flush new entries in_process_requests: list[ - asyncio.Task[None | list[FailedMutationEntryError]] + asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None] ] = [prev_flush] async for batch in self._flow_control.add_to_flow(new_entries): batch_task = asyncio.create_task(self._execute_mutate_rows(batch)) @@ -393,6 +394,10 @@ async def close(self): self._flush_timer_task.cancel() self._schedule_flush() await self._prev_flush + try: + await self._flush_timer_task + except asyncio.CancelledError: + pass # raise unreported exceptions self._raise_exceptions() atexit.unregister(self._on_exit) From 09a5d51427d6fa9cb391dd3ba83dbaafad3e1b33 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 20 Jun 2023 13:26:14 -0700 Subject: [PATCH 149/213] refactored asyncio calls out to make sync implementation easier --- google/cloud/bigtable/mutations_batcher.py | 99 +++++++++++++++------- tests/unit/test_mutations_batcher.py | 52 ++++++------ 2 files changed, 97 insertions(+), 54 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 52f8d2297..ff02bab3c 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -17,7 +17,7 @@ import asyncio import atexit import warnings -from typing import Awaitable, TYPE_CHECKING +from typing import Any, Awaitable, TYPE_CHECKING from google.cloud.bigtable.mutations import RowMutationEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup @@ -208,27 +208,42 @@ def __init__( else float("inf") ) self.exceptions: list[Exception] = [] - self._flush_timer_task: asyncio.Future[None] = asyncio.create_task( - self._flush_timer(flush_interval) - ) + self._flush_timer = self._start_flush_timer(flush_interval) # create empty previous flush to avoid None checks self._prev_flush: asyncio.Future[None] = asyncio.Future() self._prev_flush.set_result(None) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 - async def _flush_timer(self, interval: float | None): + def _start_flush_timer(self, interval: float | None) -> asyncio.Future[None]: """ - Triggers new flush tasks every `interval` seconds + Set up a background task to flush the batcher every interval seconds + + If interval is None, an empty future is returned + + Args: + - flush_interval: Automatically flush every flush_interval seconds. + If None, no time-based flushing is performed. + Returns: + - asyncio.Future that represents the background task """ - if interval is None: - # no time-based flushing is performed - return - while not self.closed: - await asyncio.sleep(interval) - # add new flush task to list - if not self.closed and self._staged_entries: - self._schedule_flush() + if interval is None or self.closed: + empty_future: asyncio.Future[None] = asyncio.Future() + empty_future.set_result(None) + return empty_future + + async def timer_routine(self, interval: float): + """ + Triggers new flush tasks every `interval` seconds + """ + while not self.closed: + await asyncio.sleep(interval) + # add new flush task to list + if not self.closed and self._staged_entries: + self._schedule_flush() + + timer_task = asyncio.create_task(timer_routine(self, interval)) + return timer_task async def append(self, mutation_entry: RowMutationEntry): """ @@ -289,8 +304,8 @@ def _schedule_flush(self) -> asyncio.Future[None]: if self._staged_entries: entries, self._staged_entries = self._staged_entries, [] self._staged_count, self._staged_bytes = 0, 0 - self._prev_flush = asyncio.create_task( - self._flush_internal(entries, self._prev_flush) + self._prev_flush = self._create_bg_task( + self._flush_internal, entries, self._prev_flush ) return self._prev_flush @@ -309,23 +324,15 @@ async def _flush_internal( asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None] ] = [prev_flush] async for batch in self._flow_control.add_to_flow(new_entries): - batch_task = asyncio.create_task(self._execute_mutate_rows(batch)) + batch_task = self._create_bg_task(self._execute_mutate_rows, batch) in_process_requests.append(batch_task) # wait for all inflight requests to complete - all_results: list[ - list[FailedMutationEntryError] | Exception | None - ] = await asyncio.gather(*in_process_requests, return_exceptions=True) + found_exceptions = await self._wait_for_batch_results(*in_process_requests) # allow previous flush tasks to finalize before adding new exceptions to list await asyncio.sleep(0) # collect exception data for next raise, after previous flush tasks have completed self._entries_processed_since_last_raise += len(new_entries) - for request_result in all_results: - if isinstance(request_result, Exception): - # will receive direct Exception objects if request task fails - self.exceptions.append(request_result) - elif request_result is not None: - # completed requests will return a list of FailedMutationEntryError - self.exceptions.extend(request_result) + self.exceptions.extend(found_exceptions) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] @@ -391,11 +398,11 @@ async def close(self): Flush queue and clean up resources """ self.closed = True - self._flush_timer_task.cancel() + self._flush_timer.cancel() self._schedule_flush() await self._prev_flush try: - await self._flush_timer_task + await self._flush_timer except asyncio.CancelledError: pass # raise unreported exceptions @@ -411,3 +418,37 @@ def _on_exit(self): f"MutationsBatcher for table {self._table.table_name} was not closed. " f"{len(self._staged_entries)} Unflushed mutations will not be sent to the server." ) + + @staticmethod + def _create_bg_task(func, *args, **kwargs) -> asyncio.Future[Any]: + """ + Create a new background task, and return a future + + This method wraps asyncio to make it easier to maintain subclasses + with different concurrency models. + """ + return asyncio.create_task(func(*args, **kwargs)) + + @staticmethod + async def _wait_for_batch_results( + *tasks: asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None], + ) -> list[list[FailedMutationEntryError] | Exception]: + """ + Takes in a list of futures representing _execute_mutate_rows tasks, + waits for them to complete, and returns a list of errors encountered. + + Errors are expected to be FailedMutationEntryError, representing a failed + mutation operation. If a task fails, a direct Exception object will be + added to the output list instead. + """ + all_results = await asyncio.gather(*tasks, return_exceptions=True) + found_errors = [] + for result in all_results: + if isinstance(result, Exception): + # will receive direct Exception objects if request task fails + found_errors.append(result) + elif result: + # completed requests will return a list of FailedMutationEntryError + found_errors.extend(result) + return found_errors + diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 04f67746b..24b5afb8d 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -294,10 +294,11 @@ def _make_one(self, table=None, **kwargs): return MutationsBatcher(table, **kwargs) @unittest.mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_defaults(self, flush_timer_mock): + flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() async with self._make_one(table) as instance: assert instance._table == table @@ -312,14 +313,15 @@ async def test_ctor_defaults(self, flush_timer_mock): await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == 5 - assert isinstance(instance._flush_timer_task, asyncio.Task) + assert isinstance(instance._flush_timer, asyncio.Future) @unittest.mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer", ) @pytest.mark.asyncio async def test_ctor_explicit(self, flush_timer_mock): """Test with explicit parameters""" + flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() flush_interval = 20 flush_limit_count = 17 @@ -346,14 +348,15 @@ async def test_ctor_explicit(self, flush_timer_mock): await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == flush_interval - assert isinstance(instance._flush_timer_task, asyncio.Task) + assert isinstance(instance._flush_timer, asyncio.Future) @unittest.mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._flush_timer" + "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_no_limits(self, flush_timer_mock): """Test with None for flow control and flush limits""" + flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() flush_interval = None flush_limit_count = None @@ -380,7 +383,7 @@ async def test_ctor_no_limits(self, flush_timer_mock): await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] is None - assert isinstance(instance._flush_timer_task, asyncio.Task) + assert isinstance(instance._flush_timer, asyncio.Future) def test_default_argument_consistency(self): """ @@ -414,11 +417,11 @@ def test_default_argument_consistency(self): "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio - async def test__flush_timer_w_None(self, flush_mock): + async def test__start_flush_timer_w_None(self, flush_mock): """Empty timer should return immediately""" async with self._make_one() as instance: with mock.patch("asyncio.sleep") as sleep_mock: - await instance._flush_timer(None) + await instance._start_flush_timer(None) assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 @@ -426,13 +429,13 @@ async def test__flush_timer_w_None(self, flush_mock): "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio - async def test__flush_timer_call_when_closed(self, flush_mock): + async def test__start_flush_timer_call_when_closed(self, flush_mock): """closed batcher's timer should return immediately""" async with self._make_one() as instance: await instance.close() flush_mock.reset_mock() with mock.patch("asyncio.sleep") as sleep_mock: - await instance._flush_timer(1) + await instance._start_flush_timer(1) assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 @@ -442,14 +445,14 @@ async def test__flush_timer_call_when_closed(self, flush_mock): @pytest.mark.asyncio async def test__flush_timer(self, flush_mock): """Timer should continue to call _schedule_flush in a loop""" - async with self._make_one() as instance: + expected_sleep = 12 + async with self._make_one(flush_interval=expected_sleep) as instance: instance._staged_entries = [mock.Mock()] loop_num = 3 - expected_sleep = 12 with mock.patch("asyncio.sleep") as sleep_mock: sleep_mock.side_effect = [None] * loop_num + [asyncio.CancelledError()] try: - await instance._flush_timer(expected_sleep) + await instance._flush_timer except asyncio.CancelledError: pass assert sleep_mock.call_count == loop_num + 1 @@ -462,13 +465,13 @@ async def test__flush_timer(self, flush_mock): @pytest.mark.asyncio async def test__flush_timer_no_mutations(self, flush_mock): """Timer should not flush if no new mutations have been staged""" - async with self._make_one() as instance: + expected_sleep = 12 + async with self._make_one(flush_interval=expected_sleep) as instance: loop_num = 3 - expected_sleep = 12 with mock.patch("asyncio.sleep") as sleep_mock: sleep_mock.side_effect = [None] * loop_num + [asyncio.CancelledError()] try: - await instance._flush_timer(expected_sleep) + await instance._flush_timer except asyncio.CancelledError: pass assert sleep_mock.call_count == loop_num + 1 @@ -482,16 +485,15 @@ async def test__flush_timer_no_mutations(self, flush_mock): async def test__flush_timer_close(self, flush_mock): """Timer should continue terminate after close""" async with self._make_one() as instance: - expected_sleep = 12 with mock.patch("asyncio.sleep"): - task = asyncio.create_task(instance._flush_timer(expected_sleep)) # let task run in background await asyncio.sleep(0.5) + assert instance._flush_timer.done() is False # close the batcher await instance.close() await asyncio.sleep(0.1) # task should be complete - assert task.done() + assert instance._flush_timer.done() is True @pytest.mark.asyncio async def test_append_closed(self): @@ -509,11 +511,11 @@ async def test_append_wrong_mutation(self): """ from google.cloud.bigtable.mutations import DeleteAllFromRow - instance = self._make_one() - expected_error = "invalid mutation type: DeleteAllFromRow. Only RowMutationEntry objects are supported by batcher" - with pytest.raises(ValueError) as e: - await instance.append(DeleteAllFromRow()) - assert str(e.value) == expected_error + async with self._make_one() as instance: + expected_error = "invalid mutation type: DeleteAllFromRow. Only RowMutationEntry objects are supported by batcher" + with pytest.raises(ValueError) as e: + await instance.append(DeleteAllFromRow()) + assert str(e.value) == expected_error @pytest.mark.asyncio async def test_append_outside_flow_limits(self): @@ -1110,7 +1112,7 @@ async def test_close(self): with mock.patch.object(instance, "_raise_exceptions") as raise_mock: await instance.close() assert instance.closed is True - assert instance._flush_timer_task.done() is True + assert instance._flush_timer.done() is True assert instance._prev_flush.done() is True assert flush_mock.call_count == 1 assert raise_mock.call_count == 1 From 0b0f30ed421e3d591ec99f2863e65852a9b9745f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 14:39:42 -0700 Subject: [PATCH 150/213] renamed flow_control_max_count --- google/cloud/bigtable/mutations_batcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index ff02bab3c..240f452b4 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -177,7 +177,7 @@ def __init__( flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, flush_limit_bytes: int = 20 * MB_SIZE, - flow_control_max_count: int | None = 100_000, + flow_control_max_mutation_count: int | None = 100_000, flow_control_max_bytes: int | None = 100 * MB_SIZE, ): """ @@ -199,7 +199,7 @@ def __init__( self._staged_entries: list[RowMutationEntry] = [] self._staged_count, self._staged_bytes = 0, 0 self._flow_control = _FlowControl( - flow_control_max_count, flow_control_max_bytes + flow_control_max_mutation_count, flow_control_max_bytes ) self._flush_limit_bytes = flush_limit_bytes self._flush_limit_count = ( From 283a949791917811cbc2651740ad2d287f989d38 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 14:45:33 -0700 Subject: [PATCH 151/213] removed None as a flow_control_max_* option --- google/cloud/bigtable/client.py | 10 ++++------ google/cloud/bigtable/mutations_batcher.py | 23 +++++++--------------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 40810f74c..46f28589c 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -581,8 +581,8 @@ def mutations_batcher( flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, flush_limit_bytes: int = 20 * MB_SIZE, - flow_control_max_count: int | None = 100_000, - flow_control_max_bytes: int | None = 100 * MB_SIZE, + flow_control_max_mutation_count: int = 100_000, + flow_control_max_bytes: int = 100 * MB_SIZE, ) -> MutationsBatcher: """ Returns a new mutations batcher instance. @@ -596,10 +596,8 @@ def mutations_batcher( - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - If None, this limit is ignored. - - flow_control_max_count: Maximum number of inflight mutations. + - flow_control_max_mitation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. - If None, this limit is ignored. Returns: - a MutationsBatcher context manager that can batch requests """ @@ -608,7 +606,7 @@ def mutations_batcher( flush_interval=flush_interval, flush_limit_mutation_count=flush_limit_mutation_count, flush_limit_bytes=flush_limit_bytes, - flow_control_max_count=flow_control_max_count, + flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, ) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 240f452b4..7ec45c850 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -48,23 +48,17 @@ class _FlowControl: def __init__( self, - max_mutation_count: int | None, - max_mutation_bytes: int | None, + max_mutation_count: int, + max_mutation_bytes: int, ): """ Args: - max_mutation_count: maximum number of mutations to send in a single rpc. This corresponds to individual mutations in a single RowMutationEntry. - If None, no limit is enforced. - max_mutation_bytes: maximum number of bytes to send in a single rpc. - If None, no limit is enforced. """ - self._max_mutation_count = ( - max_mutation_count if max_mutation_count is not None else float("inf") - ) - self._max_mutation_bytes = ( - max_mutation_bytes if max_mutation_bytes is not None else float("inf") - ) + self._max_mutation_count = max_mutation_count + self._max_mutation_bytes = max_mutation_bytes if self._max_mutation_count < 1: raise ValueError("max_mutation_count must be greater than 0") if self._max_mutation_bytes < 1: @@ -177,8 +171,8 @@ def __init__( flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, flush_limit_bytes: int = 20 * MB_SIZE, - flow_control_max_mutation_count: int | None = 100_000, - flow_control_max_bytes: int | None = 100 * MB_SIZE, + flow_control_max_mutation_count: int = 100_000, + flow_control_max_bytes: int = 100 * MB_SIZE, ): """ Args: @@ -188,10 +182,8 @@ def __init__( - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - - flow_control_max_count: Maximum number of inflight mutations. - If None, this limit is ignored. + - flow_control_max_mutation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. - If None, this limit is ignored. """ atexit.register(self._on_exit) self.closed: bool = False @@ -451,4 +443,3 @@ async def _wait_for_batch_results( # completed requests will return a list of FailedMutationEntryError found_errors.extend(result) return found_errors - From 834db1a169738aed9c711171bdce3bea6c32a5d0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 14:54:35 -0700 Subject: [PATCH 152/213] fixed type annotation --- google/cloud/bigtable/mutations_batcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 7ec45c850..f5ef48a83 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -424,7 +424,7 @@ def _create_bg_task(func, *args, **kwargs) -> asyncio.Future[Any]: @staticmethod async def _wait_for_batch_results( *tasks: asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None], - ) -> list[list[FailedMutationEntryError] | Exception]: + ) -> list[Exception]: """ Takes in a list of futures representing _execute_mutate_rows tasks, waits for them to complete, and returns a list of errors encountered. From 524f2d8807c2228d9fcf0627b6add109549c6991 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 14:55:06 -0700 Subject: [PATCH 153/213] fixed tests --- tests/unit/test_mutations_batcher.py | 35 +++++++++++----------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 24b5afb8d..ae2ce1713 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -48,12 +48,6 @@ def test_ctor(self): assert instance._in_flight_mutation_bytes == 0 assert isinstance(instance._capacity_condition, asyncio.Condition) - def test_ctor_empty_values(self): - """Test constructor with None count and bytes""" - instance = self._make_one(None, None) - assert instance._max_mutation_count == float("inf") - assert instance._max_mutation_bytes == float("inf") - def test_ctor_invalid_values(self): """Test that values are positive, and fit within expected limits""" with pytest.raises(ValueError) as e: @@ -250,7 +244,7 @@ async def test_add_to_flow_max_mutation_limits( ): mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] # flow control has no limits except API restrictions - instance = self._make_one(None, None) + instance = self._make_one(float("inf"), float("inf")) i = 0 async for batch in instance.add_to_flow(mutation_objs): expected_batch = expected_results[i] @@ -326,21 +320,24 @@ async def test_ctor_explicit(self, flush_timer_mock): flush_interval = 20 flush_limit_count = 17 flush_limit_bytes = 19 - flow_control_max_count = 1001 + flow_control_max_mutation_count = 1001 flow_control_max_bytes = 12 async with self._make_one( table, flush_interval=flush_interval, flush_limit_mutation_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, - flow_control_max_count=flow_control_max_count, + flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, ) as instance: assert instance._table == table assert instance.closed is False assert instance._staged_entries == [] assert instance.exceptions == [] - assert instance._flow_control._max_mutation_count == flow_control_max_count + assert ( + instance._flow_control._max_mutation_count + == flow_control_max_mutation_count + ) assert instance._flow_control._max_mutation_bytes == flow_control_max_bytes assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 @@ -354,29 +351,23 @@ async def test_ctor_explicit(self, flush_timer_mock): "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio - async def test_ctor_no_limits(self, flush_timer_mock): - """Test with None for flow control and flush limits""" + async def test_ctor_no_flush_limits(self, flush_timer_mock): + """Test with None for flush limits""" flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() flush_interval = None flush_limit_count = None flush_limit_bytes = None - flow_control_max_count = None - flow_control_max_bytes = None async with self._make_one( table, flush_interval=flush_interval, flush_limit_mutation_count=flush_limit_count, flush_limit_bytes=flush_limit_bytes, - flow_control_max_count=flow_control_max_count, - flow_control_max_bytes=flow_control_max_bytes, ) as instance: assert instance._table == table assert instance.closed is False assert instance._staged_entries == [] assert instance.exceptions == [] - assert instance._flow_control._max_mutation_count == float("inf") - assert instance._flow_control._max_mutation_bytes == float("inf") assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 @@ -521,7 +512,7 @@ async def test_append_wrong_mutation(self): async def test_append_outside_flow_limits(self): """entries larger than mutation limits are still processed""" async with self._make_one( - flow_control_max_count=1, flow_control_max_bytes=1 + flow_control_max_mutation_count=1, flow_control_max_bytes=1 ) as instance: oversized_entry = _make_mutation(count=0, size=2) await instance.append(oversized_entry) @@ -530,7 +521,7 @@ async def test_append_outside_flow_limits(self): assert instance._staged_bytes == 2 instance._staged_entries = [] async with self._make_one( - flow_control_max_count=1, flow_control_max_bytes=1 + flow_control_max_mutation_count=1, flow_control_max_bytes=1 ) as instance: overcount_entry = _make_mutation(count=2, size=0) await instance.append(overcount_entry) @@ -706,7 +697,7 @@ async def test_flush_flow_control_concurrent_requests(self): num_calls = 10 fake_mutations = [_make_mutation(count=1) for _ in range(num_calls)] - async with self._make_one(flow_control_max_count=1) as instance: + async with self._make_one(flow_control_max_mutation_count=1) as instance: with mock.patch.object( instance, "_execute_mutate_rows", AsyncMock() ) as op_mock: @@ -982,7 +973,7 @@ async def test_manual_flush_end_to_end(self): mutations = [_make_mutation(count=2, size=2)] * num_nutations async with self._make_one( - flow_control_max_count=3, flow_control_max_bytes=3 + flow_control_max_mutation_count=3, flow_control_max_bytes=3 ) as instance: instance._table.default_operation_timeout = 10 instance._table.default_per_request_timeout = 9 From df8d2d59923d786c9c9f1b63b2a922f7b58135a8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 15:09:44 -0700 Subject: [PATCH 154/213] improved docstrings --- google/cloud/bigtable/mutations_batcher.py | 42 +++++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index f5ef48a83..4c666144b 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -69,10 +69,17 @@ def __init__( def _has_capacity(self, additional_count: int, additional_size: int) -> bool: """ - Checks if there is capacity to send a new mutation with the given size and count + Checks if there is capacity to send a new entry with the given size and count FlowControl limits are not hard limits. If a single mutation exceeds - the configured limits, it can be sent in a single batch. + the configured flow limits, it will be sent in a single batch when + previous batches have completed. + + Args: + - additional_count: number of mutations in the pending entry + - additional_size: size of the pending entry + Returns: + - True if there is capacity to send the pending entry, False otherwise """ # adjust limits to allow overly large mutations acceptable_size = max(self._max_mutation_bytes, additional_size) @@ -86,7 +93,12 @@ async def remove_from_flow( self, mutations: RowMutationEntry | list[RowMutationEntry] ) -> None: """ - Every time an in-flight mutation is complete, release the flow control semaphore + Removes mutations from flow control. This method should be called once + for each mutation that was sent to add_to_flow, after the corresponding + operation is complete. + + Args: + - mutations: mutation or list of mutations to remove from flow control """ if not isinstance(mutations, list): mutations = [mutations] @@ -100,9 +112,10 @@ async def remove_from_flow( async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry]): """ - Breaks up list of mutations into batches that were registered to fit within - flow control limits. This method will block when the flow control limits are - reached. + Generator function that registers mutations with flow control. As mutations + are accepted into the flow control, they are yielded back to the caller, + to be sent in a batch. If the flow control is at capacity, the generator + will block until there is capacity available. Args: - mutations: list mutations to break up into batches @@ -418,6 +431,13 @@ def _create_bg_task(func, *args, **kwargs) -> asyncio.Future[Any]: This method wraps asyncio to make it easier to maintain subclasses with different concurrency models. + + Args: + - func: function to execute in background task + - *args: positional arguments to pass to func + - **kwargs: keyword arguments to pass to func + Returns: + - Future object representing the background task """ return asyncio.create_task(func(*args, **kwargs)) @@ -429,9 +449,13 @@ async def _wait_for_batch_results( Takes in a list of futures representing _execute_mutate_rows tasks, waits for them to complete, and returns a list of errors encountered. - Errors are expected to be FailedMutationEntryError, representing a failed - mutation operation. If a task fails, a direct Exception object will be - added to the output list instead. + Args: + - *tasks: futures representing _execute_mutate_rows or _flush_internal tasks + Returns: + - list of Exceptions encountered by any of the tasks. Errors are expected + to be FailedMutationEntryError, representing a failed mutation operation. + If a task fails with a different exception, it will be included in the + output list. Successful tasks will not be represented in the output list. """ all_results = await asyncio.gather(*tasks, return_exceptions=True) found_errors = [] From eaed9ab82caee34ba3f4b92a7b9cf17c07662d8d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 15:42:20 -0700 Subject: [PATCH 155/213] added timeouts to batcher init --- google/cloud/bigtable/client.py | 8 +++ google/cloud/bigtable/mutations_batcher.py | 29 +++++++++- tests/unit/test_mutations_batcher.py | 64 ++++++++++++++++++++-- 3 files changed, 93 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 46f28589c..d6cc4588b 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -583,6 +583,8 @@ def mutations_batcher( flush_limit_bytes: int = 20 * MB_SIZE, flow_control_max_mutation_count: int = 100_000, flow_control_max_bytes: int = 100 * MB_SIZE, + batch_operation_timeout: float | None = None, + batch_per_request_timeout: float | None = None, ) -> MutationsBatcher: """ Returns a new mutations batcher instance. @@ -598,6 +600,10 @@ def mutations_batcher( - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - flow_control_max_mitation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. + - batch_operation_timeout: timeout for each mutate_rows operation, in seconds. If None, + table default_operation_timeout will be used + - batch_per_request_timeout: timeout for each individual request, in seconds. If None, + table default_per_request_timeout will be used Returns: - a MutationsBatcher context manager that can batch requests """ @@ -608,6 +614,8 @@ def mutations_batcher( flush_limit_bytes=flush_limit_bytes, flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, + batch_operation_timeout=batch_operation_timeout, + batch_per_request_timeout=batch_per_request_timeout, ) async def mutate_row( diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 4c666144b..e820cdb67 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -186,6 +186,8 @@ def __init__( flush_limit_bytes: int = 20 * MB_SIZE, flow_control_max_mutation_count: int = 100_000, flow_control_max_bytes: int = 100 * MB_SIZE, + batch_operation_timeout: float | None = None, + batch_per_request_timeout: float | None = None, ): """ Args: @@ -197,8 +199,27 @@ def __init__( - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - flow_control_max_mutation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. + - batch_operation_timeout: timeout for each mutate_rows operation, in seconds. If None, + table default_operation_timeout will be used + - batch_per_request_timeout: timeout for each individual request, in seconds. If None, + table default_per_request_timeout will be used """ - atexit.register(self._on_exit) + self._operation_timeout: float = ( + batch_operation_timeout or table.default_operation_timeout + ) + self._per_request_timeout: float = ( + batch_per_request_timeout + or table.default_per_request_timeout + or self._operation_timeout + ) + if self._operation_timeout <= 0: + raise ValueError("batch_operation_timeout must be greater than 0") + if self._per_request_timeout <= 0: + raise ValueError("batch_per_request_timeout must be greater than 0") + if self._per_request_timeout > self._operation_timeout: + raise ValueError( + "batch_per_request_timeout must be less than batch_operation_timeout" + ) self.closed: bool = False self._table = table self._staged_entries: list[RowMutationEntry] = [] @@ -219,6 +240,8 @@ def __init__( self._prev_flush.set_result(None) # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 + # clean up on program exit + atexit.register(self._on_exit) def _start_flush_timer(self, interval: float | None) -> asyncio.Future[None]: """ @@ -361,8 +384,8 @@ async def _execute_mutate_rows( self._table.client._gapic_client, self._table, batch, - self._table.default_operation_timeout, - self._table.default_per_request_timeout, + operation_timeout=self._operation_timeout, + per_request_timeout=self._per_request_timeout, ) await operation.start() except MutationsExceptionGroup as e: diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index ae2ce1713..b77113692 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -294,6 +294,8 @@ def _make_one(self, table=None, **kwargs): async def test_ctor_defaults(self, flush_timer_mock): flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() + table.default_operation_timeout = 10 + table.default_per_request_timeout = 8 async with self._make_one(table) as instance: assert instance._table == table assert instance.closed is False @@ -304,6 +306,8 @@ async def test_ctor_defaults(self, flush_timer_mock): assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 + assert instance._operation_timeout == table.default_operation_timeout + assert instance._per_request_timeout == table.default_per_request_timeout await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == 5 @@ -322,6 +326,8 @@ async def test_ctor_explicit(self, flush_timer_mock): flush_limit_bytes = 19 flow_control_max_mutation_count = 1001 flow_control_max_bytes = 12 + operation_timeout = 11 + per_request_timeout = 2 async with self._make_one( table, flush_interval=flush_interval, @@ -329,6 +335,8 @@ async def test_ctor_explicit(self, flush_timer_mock): flush_limit_bytes=flush_limit_bytes, flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, + batch_operation_timeout=operation_timeout, + batch_per_request_timeout=per_request_timeout, ) as instance: assert instance._table == table assert instance.closed is False @@ -342,6 +350,8 @@ async def test_ctor_explicit(self, flush_timer_mock): assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 + assert instance._operation_timeout == operation_timeout + assert instance._per_request_timeout == per_request_timeout await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == flush_interval @@ -355,6 +365,8 @@ async def test_ctor_no_flush_limits(self, flush_timer_mock): """Test with None for flush limits""" flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() + table.default_operation_timeout = 10 + table.default_per_request_timeout = 8 flush_interval = None flush_limit_count = None flush_limit_bytes = None @@ -376,6 +388,22 @@ async def test_ctor_no_flush_limits(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] is None assert isinstance(instance._flush_timer, asyncio.Future) + @pytest.mark.asyncio + async def test_ctor_invalid_values(self): + """Test that timeout values are positive, and fit within expected limits""" + with pytest.raises(ValueError) as e: + self._make_one(batch_operation_timeout=-1) + assert "batch_operation_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + self._make_one(batch_per_request_timeout=-1) + assert "batch_per_request_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + self._make_one(batch_operation_timeout=1, batch_per_request_timeout=2) + assert ( + "batch_per_request_timeout must be less than batch_operation_timeout" + in str(e.value) + ) + def test_default_argument_consistency(self): """ We supply default arguments in MutationsBatcher.__init__, and in @@ -393,8 +421,8 @@ def test_default_argument_consistency(self): batcher_init_signature = dict(inspect.signature(MutationsBatcher).parameters) batcher_init_signature.pop("table") # both should have same number of arguments - assert len(get_batcher_signature) == len(batcher_init_signature) - assert len(get_batcher_signature) == 5 + assert len(get_batcher_signature.keys()) == len(batcher_init_signature.keys()) + assert len(get_batcher_signature) == 7 # update if expected params change # both should have same argument names assert set(get_batcher_signature.keys()) == set(batcher_init_signature.keys()) # both should have same default values @@ -1027,12 +1055,12 @@ async def test__execute_mutate_rows(self, mutate_rows): batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) assert start_operation.call_count == 1 - args, _ = mutate_rows.call_args + args, kwargs = mutate_rows.call_args assert args[0] == table.client._gapic_client assert args[1] == table assert args[2] == batch - assert args[3] == 17 - assert args[4] == 13 + kwargs["operation_timeout"] == 17 + kwargs["per_request_timeout"] == 13 assert result == [] @pytest.mark.asyncio @@ -1051,6 +1079,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): mutate_rows.side_effect = MutationsExceptionGroup([err1, err2], 10) table = mock.Mock() table.default_operation_timeout = 17 + table.default_per_request_timeout = 13 async with self._make_one(table) as instance: batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) @@ -1163,3 +1192,28 @@ async def test_atexit_registration(self): # should not call after close atexit._run_exitfuncs() assert on_exit_mock.call_count == 1 + + @pytest.mark.asyncio + @unittest.mock.patch( + "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", + ) + async def test_timeout_args_passed(self, mutate_rows): + """ + batch_operation_timeout and batch_per_request_timeout should be used + in api calls + """ + mutate_rows.return_value = AsyncMock() + expected_operation_timeout = 17 + expected_per_request_timeout = 13 + async with self._make_one( + batch_operation_timeout=expected_operation_timeout, + batch_per_request_timeout=expected_per_request_timeout, + ) as instance: + assert instance._operation_timeout == expected_operation_timeout + assert instance._per_request_timeout == expected_per_request_timeout + # make simulated gapic call + await instance._execute_mutate_rows([_make_mutation()]) + assert mutate_rows.call_count == 1 + kwargs = mutate_rows.call_args[1] + assert kwargs["operation_timeout"] == expected_operation_timeout + assert kwargs["per_request_timeout"] == expected_per_request_timeout From e0b5e27359d70ddeaead8635a2172cef3f03d6ba Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 17:28:20 -0700 Subject: [PATCH 156/213] fixed 3.11 test issue --- tests/unit/test_mutations_batcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index b77113692..edde475d5 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -1096,7 +1096,7 @@ async def test__raise_exceptions(self): from google.cloud.bigtable import exceptions expected_total = 1201 - expected_exceptions = [mock.Mock()] * 3 + expected_exceptions = [RuntimeError("mock")] * 3 async with self._make_one() as instance: instance.exceptions = expected_exceptions instance._entries_processed_since_last_raise = expected_total @@ -1143,7 +1143,7 @@ async def test_close_w_exceptions(self): from google.cloud.bigtable import exceptions expected_total = 10 - expected_exceptions = [mock.Mock()] + expected_exceptions = [RuntimeError("mock")] async with self._make_one() as instance: instance.exceptions = expected_exceptions instance._entries_processed_since_last_raise = expected_total From d50d89bc34a6409f1df9112e19691c3f7530568f Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 17:33:36 -0700 Subject: [PATCH 157/213] fixed 3.7 unit test issues --- tests/unit/test_mutations_batcher.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index edde475d5..6e8584c4d 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -287,7 +287,7 @@ def _make_one(self, table=None, **kwargs): return MutationsBatcher(table, **kwargs) - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio @@ -313,7 +313,7 @@ async def test_ctor_defaults(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] == 5 assert isinstance(instance._flush_timer, asyncio.Future) - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer", ) @pytest.mark.asyncio @@ -357,7 +357,7 @@ async def test_ctor_explicit(self, flush_timer_mock): assert flush_timer_mock.call_args[0][0] == flush_interval assert isinstance(instance._flush_timer, asyncio.Future) - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio @@ -432,7 +432,7 @@ def test_default_argument_consistency(self): == batcher_init_signature[arg_name].default ) - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio @@ -444,7 +444,7 @@ async def test__start_flush_timer_w_None(self, flush_mock): assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio @@ -458,7 +458,7 @@ async def test__start_flush_timer_call_when_closed(self, flush_mock): assert sleep_mock.call_count == 0 assert flush_mock.call_count == 0 - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio @@ -478,7 +478,7 @@ async def test__flush_timer(self, flush_mock): sleep_mock.assert_called_with(expected_sleep) assert flush_mock.call_count == loop_num - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio @@ -497,7 +497,7 @@ async def test__flush_timer_no_mutations(self, flush_mock): sleep_mock.assert_called_with(expected_sleep) assert flush_mock.call_count == 0 - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio @@ -1040,7 +1040,7 @@ async def test_timer_flush_end_to_end(self): assert instance._entries_processed_since_last_raise == num_nutations @pytest.mark.asyncio - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", ) async def test__execute_mutate_rows(self, mutate_rows): @@ -1064,7 +1064,7 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher._MutateRowsOperation.start" ) async def test__execute_mutate_rows_returns_errors(self, mutate_rows): @@ -1194,7 +1194,7 @@ async def test_atexit_registration(self): assert on_exit_mock.call_count == 1 @pytest.mark.asyncio - @unittest.mock.patch( + @mock.patch( "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", ) async def test_timeout_args_passed(self, mutate_rows): From 32d39fd7bb0584fc75bce52b7b15e491c83db482 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 21 Jun 2023 18:14:52 -0700 Subject: [PATCH 158/213] fixed lint issue --- tests/unit/test_mutations_batcher.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 6e8584c4d..d18194be2 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -1064,9 +1064,7 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @mock.patch( - "google.cloud.bigtable.mutations_batcher._MutateRowsOperation.start" - ) + @mock.patch("google.cloud.bigtable.mutations_batcher._MutateRowsOperation.start") async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" from google.cloud.bigtable.exceptions import ( From 1343b346e0b77f303ce9dd44986db7fb7e8b796e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 08:53:56 -0700 Subject: [PATCH 159/213] fixed lint --- tests/unit/test_mutations_batcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index d18194be2..00d55d7db 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -14,7 +14,6 @@ import pytest import asyncio -import unittest # try/except added for compatibility with python < 3.8 try: From 4f3aa3a13c13a97dbab642ad7d5fccef392327b7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 10:08:32 -0700 Subject: [PATCH 160/213] fixed up unit tests --- tests/unit/test_client.py | 367 ++++++++++++++++++-------------------- 1 file changed, 175 insertions(+), 192 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 7009069d1..e1663b016 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -280,32 +280,38 @@ async def test_start_background_channel_refresh_tasks_names(self): @pytest.mark.asyncio async def test__ping_and_warm_instances(self): - # test with no instances + """ + test ping and warm with mocked asyncio.gather + """ + client_mock = mock.Mock() with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - client = self._make_one(project="project-id", pool_size=1) - channel = client.transport._grpc_channel._pool[0] - await client._ping_and_warm_instances(channel) + # simulate gather by returning the same number of items as passed in + gather.side_effect = lambda *args, **kwargs: [None for _ in args] + channel = mock.Mock() + # test with no instances + client_mock._active_instances = [] + result = await self._get_target_class()._ping_and_warm_instances(client_mock, channel) + assert len(result) == 0 gather.assert_called_once() gather.assert_awaited_once() assert not gather.call_args.args assert gather.call_args.kwargs == {"return_exceptions": True} # test with instances - client._active_instances = [ + client_mock._active_instances = [ "instance-1", "instance-2", "instance-3", "instance-4", ] - with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: - await client._ping_and_warm_instances(channel) + gather.reset_mock() + result = await self._get_target_class()._ping_and_warm_instances(client_mock, channel) + assert len(result) == 4 gather.assert_called_once() gather.assert_awaited_once() assert len(gather.call_args.args) == 4 assert gather.call_args.kwargs == {"return_exceptions": True} for idx, call in enumerate(gather.call_args.args): - assert isinstance(call, grpc.aio.UnaryUnaryCall) - call._request["name"] = client._active_instances[idx] - await client.close() + assert call == channel.unary_unary()() @pytest.mark.asyncio @pytest.mark.parametrize( @@ -866,6 +872,17 @@ def _make_client(self, *args, **kwargs): return BigtableDataClient(*args, **kwargs) + def _make_table(self, *args, **kwargs): + from google.cloud.bigtable.client import Table + client_mock = mock.Mock() + client_mock._register_instance.side_effect = lambda *args, **kwargs: asyncio.sleep(0) + client_mock._remove_instance_registration.side_effect = lambda *args, **kwargs: asyncio.sleep(0) + kwargs["instance_id"] = kwargs.get("instance_id", args[0] if args else "instance") + kwargs["table_id"] = kwargs.get("table_id", args[1] if len(args) > 1 else "table") + client_mock._gapic_client.table_path.return_value = kwargs["table_id"] + client_mock._gapic_client.instance_path.return_value = kwargs["instance_id"] + return Table(client_mock, *args, **kwargs) + def _make_stats(self): from google.cloud.bigtable_v2.types import RequestStats from google.cloud.bigtable_v2.types import FullReadStatsView @@ -928,14 +945,13 @@ def cancel(self): @pytest.mark.asyncio async def test_read_rows(self): - client = self._make_client() - table = client.get_table("instance", "table") query = ReadRowsQuery() chunks = [ self._make_chunk(row_key=b"test_1"), self._make_chunk(row_key=b"test_2"), ] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( chunks ) @@ -943,18 +959,16 @@ async def test_read_rows(self): assert len(results) == 2 assert results[0].row_key == b"test_1" assert results[1].row_key == b"test_2" - await client.close() @pytest.mark.asyncio async def test_read_rows_stream(self): - client = self._make_client() - table = client.get_table("instance", "table") query = ReadRowsQuery() chunks = [ self._make_chunk(row_key=b"test_1"), self._make_chunk(row_key=b"test_2"), ] - with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( chunks ) @@ -963,16 +977,18 @@ async def test_read_rows_stream(self): assert len(results) == 2 assert results[0].row_key == b"test_1" assert results[1].row_key == b"test_2" - await client.close() @pytest.mark.parametrize("include_app_profile", [True, False]) @pytest.mark.asyncio async def test_read_rows_query_matches_request(self, include_app_profile): from google.cloud.bigtable import RowRange - async with self._make_client() as client: - app_profile_id = "app_profile_id" if include_app_profile else None - table = client.get_table("instance", "table", app_profile_id=app_profile_id) + app_profile_id = "app_profile_id" if include_app_profile else None + async with self._make_table(app_profile_id=app_profile_id) as table: + read_rows = table.client._gapic_client.read_rows + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [] + ) row_keys = [b"test_1", "test_2"] row_ranges = RowRange("start", "end") filter_ = {"test": "filter"} @@ -983,52 +999,44 @@ async def test_read_rows_query_matches_request(self, include_app_profile): row_filter=filter_, limit=limit, ) - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [] - ) - results = await table.read_rows(query, operation_timeout=3) - assert len(results) == 0 - call_request = read_rows.call_args_list[0][0][0] - query_dict = query._to_dict() - if include_app_profile: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name", - "app_profile_id", - } - else: - assert set(call_request.keys()) == set(query_dict.keys()) | { - "table_name" - } - assert call_request["rows"] == query_dict["rows"] - assert call_request["filter"] == filter_ - assert call_request["rows_limit"] == limit - assert call_request["table_name"] == table.table_name - if include_app_profile: - assert call_request["app_profile_id"] == app_profile_id + + results = await table.read_rows(query, operation_timeout=3) + assert len(results) == 0 + call_request = read_rows.call_args_list[0][0][0] + query_dict = query._to_dict() + if include_app_profile: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name", + "app_profile_id", + } + else: + assert set(call_request.keys()) == set(query_dict.keys()) | { + "table_name" + } + assert call_request["rows"] == query_dict["rows"] + assert call_request["filter"] == filter_ + assert call_request["rows_limit"] == limit + assert call_request["table_name"] == table.table_name + if include_app_profile: + assert call_request["app_profile_id"] == app_profile_id @pytest.mark.parametrize("operation_timeout", [0.001, 0.023, 0.1]) @pytest.mark.asyncio async def test_read_rows_timeout(self, operation_timeout): - async with self._make_client() as client: - table = client.get_table("instance", "table") + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows query = ReadRowsQuery() chunks = [self._make_chunk(row_key=b"test_1")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=1 + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=1 + ) + try: + await table.read_rows(query, operation_timeout=operation_timeout) + except core_exceptions.DeadlineExceeded as e: + assert ( + e.message + == f"operation_timeout of {operation_timeout:0.1f}s exceeded" ) - try: - await table.read_rows(query, operation_timeout=operation_timeout) - except core_exceptions.DeadlineExceeded as e: - assert ( - e.message - == f"operation_timeout of {operation_timeout:0.1f}s exceeded" - ) @pytest.mark.parametrize( "per_request_t, operation_t, expected_num", @@ -1056,46 +1064,44 @@ async def test_read_rows_per_request_timeout( # mocking uniform ensures there are no sleeps between retries with mock.patch("random.uniform", side_effect=lambda a, b: 0): - async with self._make_client() as client: - table = client.get_table("instance", "table") + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=per_request_t + ) + ) query = ReadRowsQuery() chunks = [core_exceptions.DeadlineExceeded("mock deadline")] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=per_request_t - ) + + try: + await table.read_rows( + query, + operation_timeout=operation_t, + per_request_timeout=per_request_t, ) - try: - await table.read_rows( - query, - operation_timeout=operation_t, - per_request_timeout=per_request_t, - ) - except core_exceptions.DeadlineExceeded as e: - retry_exc = e.__cause__ - if expected_num == 0: - assert retry_exc is None - else: - assert type(retry_exc) == RetryExceptionGroup - assert f"{expected_num} failed attempts" in str(retry_exc) - assert len(retry_exc.exceptions) == expected_num - for sub_exc in retry_exc.exceptions: - assert sub_exc.message == "mock deadline" - assert read_rows.call_count == expected_num - # check timeouts - for _, call_kwargs in read_rows.call_args_list[:-1]: - assert call_kwargs["timeout"] == per_request_t - # last timeout should be adjusted to account for the time spent - assert ( - abs( - read_rows.call_args_list[-1][1]["timeout"] - - expected_last_timeout - ) - < 0.05 + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + if expected_num == 0: + assert retry_exc is None + else: + assert type(retry_exc) == RetryExceptionGroup + assert f"{expected_num} failed attempts" in str(retry_exc) + assert len(retry_exc.exceptions) == expected_num + for sub_exc in retry_exc.exceptions: + assert sub_exc.message == "mock deadline" + assert read_rows.call_count == expected_num + # check timeouts + for _, call_kwargs in read_rows.call_args_list[:-1]: + assert call_kwargs["timeout"] == per_request_t + # last timeout should be adjusted to account for the time spent + assert ( + abs( + read_rows.call_args_list[-1][1]["timeout"] + - expected_last_timeout ) + < 0.05 + ) @pytest.mark.asyncio async def test_read_rows_idle_timeout(self): @@ -1154,23 +1160,20 @@ async def test_read_rows_idle_timeout(self): ) @pytest.mark.asyncio async def test_read_rows_retryable_error(self, exc_type): - async with self._make_client() as client: - table = client.get_table("instance", "table") + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] + ) query = ReadRowsQuery() expected_error = exc_type("mock error") - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except core_exceptions.DeadlineExceeded as e: - retry_exc = e.__cause__ - root_cause = retry_exc.exceptions[0] - assert type(root_cause) == exc_type - assert root_cause == expected_error + try: + await table.read_rows(query, operation_timeout=0.1) + except core_exceptions.DeadlineExceeded as e: + retry_exc = e.__cause__ + root_cause = retry_exc.exceptions[0] + assert type(root_cause) == exc_type + assert root_cause == expected_error @pytest.mark.parametrize( "exc_type", @@ -1188,20 +1191,17 @@ async def test_read_rows_retryable_error(self, exc_type): ) @pytest.mark.asyncio async def test_read_rows_non_retryable_error(self, exc_type): - async with self._make_client() as client: - table = client.get_table("instance", "table") + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + [expected_error] + ) query = ReadRowsQuery() expected_error = exc_type("mock error") - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [expected_error] - ) - try: - await table.read_rows(query, operation_timeout=0.1) - except exc_type as e: - assert e == expected_error + try: + await table.read_rows(query, operation_timeout=0.1) + except exc_type as e: + assert e == expected_error @pytest.mark.asyncio async def test_read_rows_revise_request(self): @@ -1216,32 +1216,29 @@ async def test_read_rows_revise_request(self): ) as revise_rowset: with mock.patch.object(_ReadRowsOperation, "aclose"): revise_rowset.return_value = "modified" - async with self._make_client() as client: - table = client.get_table("instance", "table") + async with self._make_table() as table: + read_rows = table.client._gapic_client.read_rows + read_rows.side_effect = ( + lambda *args, **kwargs: self._make_gapic_stream(chunks) + ) row_keys = [b"test_1", b"test_2", b"test_3"] query = ReadRowsQuery(row_keys=row_keys) chunks = [ self._make_chunk(row_key=b"test_1"), core_exceptions.Aborted("mock retryable error"), ] - with mock.patch.object( - table.client._gapic_client, "read_rows" - ) as read_rows: - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream(chunks) + try: + await table.read_rows(query) + except InvalidChunk: + revise_rowset.assert_called() + revise_call_kwargs = revise_rowset.call_args_list[0].kwargs + assert ( + revise_call_kwargs["row_set"] + == query._to_dict()["rows"] ) - try: - await table.read_rows(query) - except InvalidChunk: - revise_rowset.assert_called() - revise_call_kwargs = revise_rowset.call_args_list[0].kwargs - assert ( - revise_call_kwargs["row_set"] - == query._to_dict()["rows"] - ) - assert revise_call_kwargs["last_seen_row_key"] == b"test_1" - read_rows_request = read_rows.call_args_list[1].args[0] - assert read_rows_request["rows"] == "modified" + assert revise_call_kwargs["last_seen_row_key"] == b"test_1" + read_rows_request = read_rows.call_args_list[1].args[0] + assert read_rows_request["rows"] == "modified" @pytest.mark.asyncio async def test_read_rows_default_timeouts(self): @@ -1254,20 +1251,14 @@ async def test_read_rows_default_timeouts(self): per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") - async with self._make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=operation_timeout, - default_per_request_timeout=per_request_timeout, - ) as table: - try: - await table.read_rows(ReadRowsQuery()) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_request_timeout"] == per_request_timeout + async with self._make_table(default_operation_timeout=operation_timeout,default_per_request_timeout=per_request_timeout) as table: + try: + await table.read_rows(ReadRowsQuery()) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_request_timeout"] == per_request_timeout @pytest.mark.asyncio async def test_read_rows_default_timeout_override(self): @@ -1280,24 +1271,18 @@ async def test_read_rows_default_timeout_override(self): per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") - async with self._make_client() as client: - async with client.get_table( - "instance", - "table", - default_operation_timeout=99, - default_per_request_timeout=97, - ) as table: - try: - await table.read_rows( - ReadRowsQuery(), - operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, - ) - except RuntimeError: - pass - kwargs = mock_op.call_args_list[0].kwargs - assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_request_timeout"] == per_request_timeout + async with self._make_table(default_operation_timeout=99, default_per_request_timeout=97) as table: + try: + await table.read_rows( + ReadRowsQuery(), + operation_timeout=operation_timeout, + per_request_timeout=per_request_timeout, + ) + except RuntimeError: + pass + kwargs = mock_op.call_args_list[0].kwargs + assert kwargs["operation_timeout"] == operation_timeout + assert kwargs["per_request_timeout"] == per_request_timeout @pytest.mark.asyncio async def test_read_row(self): @@ -1456,24 +1441,22 @@ async def test_row_exists_w_invalid_input(self, input_row): async def test_read_rows_metadata(self, include_app_profile): """request should attach metadata headers""" profile = "profile" if include_app_profile else None - async with self._make_client() as client: - async with client.get_table("i", "t", app_profile_id=profile) as table: - with mock.patch.object( - client._gapic_client, "read_rows", AsyncMock() - ) as read_rows: - await table.read_rows(ReadRowsQuery()) - kwargs = read_rows.call_args_list[0].kwargs - metadata = kwargs["metadata"] - goog_metadata = None - for key, value in metadata: - if key == "x-goog-request-params": - goog_metadata = value - assert goog_metadata is not None, "x-goog-request-params not found" - assert "table_name=" + table.table_name in goog_metadata - if include_app_profile: - assert "app_profile_id=profile" in goog_metadata - else: - assert "app_profile_id=" not in goog_metadata + async with self._make_table(app_profile_id=profile) as table: + read_rows = table.client._gapic_client.read_rows + read_rows.return_value = self._make_gapic_stream([]) + await table.read_rows(ReadRowsQuery()) + kwargs = read_rows.call_args_list[0].kwargs + metadata = kwargs["metadata"] + goog_metadata = None + for key, value in metadata: + if key == "x-goog-request-params": + goog_metadata = value + assert goog_metadata is not None, "x-goog-request-params not found" + assert "table_name=" + table.table_name in goog_metadata + if include_app_profile: + assert "app_profile_id=profile" in goog_metadata + else: + assert "app_profile_id=" not in goog_metadata class TestMutateRow: From 0b99b89c8caf7907f3b440070b9c283e95888251 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 10:08:46 -0700 Subject: [PATCH 161/213] added ping and warm to system test --- tests/system/test_system.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 692911b10..b8abebcd6 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -224,6 +224,21 @@ async def test_ping_and_warm_gapic(client, table): await client._gapic_client.ping_and_warm(request) +@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) +@pytest.mark.asyncio +async def test_ping_and_warm(client, table): + """ + Test ping and warm from handwritten client + """ + try: + channel = client.transport._grpc_channel.pool[0] + except Exception: + # for sync client + channel = client.transport._grpc_channel + results = await client._ping_and_warm_instances(channel) + assert len(results) == 1 + assert results[0] is None + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio async def test_mutation_set_cell(table, temp_rows): From 6a35be46ae5357487804bbb86217d4223111d773 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 10:11:50 -0700 Subject: [PATCH 162/213] fixed broken ping and warm --- google/cloud/bigtable/client.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index f75613098..6cbd3fa16 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -38,6 +38,7 @@ from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( PooledBigtableGrpcAsyncIOTransport, ) +from google.cloud.bigtable_v2.types.bigtable import PingAndWarmRequest from google.cloud.client import ClientWithProject from google.api_core.exceptions import GoogleAPICallError from google.api_core import retry_async as retries @@ -190,10 +191,13 @@ async def _ping_and_warm_instances( - sequence of results or exceptions from the ping requests """ ping_rpc = channel.unary_unary( - "/google.bigtable.v2.Bigtable/PingAndWarmChannel" + "/google.bigtable.v2.Bigtable/PingAndWarm", + request_serializer=PingAndWarmRequest.serialize, ) tasks = [ping_rpc({"name": n}) for n in self._active_instances] - return await asyncio.gather(*tasks, return_exceptions=True) + result_list = await asyncio.gather(*tasks, return_exceptions=True) + # return None in place of empty successful responses + return [r or None for r in result_list] async def _manage_channel( self, From aa3590193cf1ceba3575b820ad4a610327efa48d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 10:30:31 -0700 Subject: [PATCH 163/213] added table and app profile to warming params --- google/cloud/bigtable/client.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 6cbd3fa16..06af5f31e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -32,6 +32,8 @@ import sys import random +from collections import namedtuple + from google.cloud.bigtable_v2.services.bigtable.client import BigtableClientMeta from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable_v2.services.bigtable.async_client import DEFAULT_CLIENT_INFO @@ -66,6 +68,11 @@ from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable import RowKeySamples +# used to register instance data with the client for channel warming +_WarmedInstanceKey = namedtuple( + "_WarmedInstanceKey", ["instance_name", "table_name", "app_profile_id"] +) + class BigtableDataClient(ClientWithProject): def __init__( @@ -131,10 +138,10 @@ def __init__( PooledBigtableGrpcAsyncIOTransport, self._gapic_client.transport ) # keep track of active instances to for warmup on channel refresh - self._active_instances: Set[str] = set() + self._active_instances: Set[_WarmedInstanceKey] = set() # keep track of table objects associated with each instance # only remove instance from _active_instances when all associated tables remove it - self._instance_owners: dict[str, Set[int]] = {} + self._instance_owners: dict[_WarmedInstanceKey, Set[int]] = {} # attempt to start background tasks self._channel_init_time = time.time() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] @@ -194,7 +201,15 @@ async def _ping_and_warm_instances( "/google.bigtable.v2.Bigtable/PingAndWarm", request_serializer=PingAndWarmRequest.serialize, ) - tasks = [ping_rpc({"name": n}) for n in self._active_instances] + tasks = [] + for (instance_name, table_name, app_profile_id) in self._active_instances: + tasks.append( + ping_rpc( + request={"name": instance_name, "app_profile_id": app_profile_id}, + metadata=_make_metadata(table_name, app_profile_id), + wait_for_ready=True, + ) + ) result_list = await asyncio.gather(*tasks, return_exceptions=True) # return None in place of empty successful responses return [r or None for r in result_list] @@ -263,9 +278,12 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: owners call _remove_instance_registration """ instance_name = self._gapic_client.instance_path(self.project, instance_id) - self._instance_owners.setdefault(instance_name, set()).add(id(owner)) + instance_key = _WarmedInstanceKey( + instance_name, owner.table_name, owner.app_profile_id + ) + self._instance_owners.setdefault(instance_key, set()).add(id(owner)) if instance_name not in self._active_instances: - self._active_instances.add(instance_name) + self._active_instances.add(instance_key) if self._channel_refresh_tasks: # refresh tasks already running # call ping and warm on all existing channels From 4e1ec6f895736eadcf6bdff0ab10d88d65eede5d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 11:35:30 -0700 Subject: [PATCH 164/213] fixed _remove_instance_registration --- google/cloud/bigtable/client.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index 06af5f31e..b9d632799 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -311,11 +311,14 @@ async def _remove_instance_registration( - True if instance was removed """ instance_name = self._gapic_client.instance_path(self.project, instance_id) - owner_list = self._instance_owners.get(instance_name, set()) + instance_key = _WarmedInstanceKey( + instance_name, owner.table_name, owner.app_profile_id + ) + owner_list = self._instance_owners.get(instance_key, set()) try: owner_list.remove(id(owner)) if len(owner_list) == 0: - self._active_instances.remove(instance_name) + self._active_instances.remove(instance_key) return True except KeyError: return False From 8f4b34f5ec9cb35fe16b978efc72a90ae5d570d7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 11:35:46 -0700 Subject: [PATCH 165/213] improved tests --- tests/system/test_system.py | 1 + tests/unit/test_client.py | 215 +++++++++++++++++++++--------------- 2 files changed, 130 insertions(+), 86 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index b8abebcd6..6b9d69242 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -239,6 +239,7 @@ async def test_ping_and_warm(client, table): assert len(results) == 1 assert results[0] is None + @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio async def test_mutation_set_cell(table, temp_rows): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e1663b016..8f54cc4d0 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -290,28 +290,36 @@ async def test__ping_and_warm_instances(self): channel = mock.Mock() # test with no instances client_mock._active_instances = [] - result = await self._get_target_class()._ping_and_warm_instances(client_mock, channel) + result = await self._get_target_class()._ping_and_warm_instances( + client_mock, channel + ) assert len(result) == 0 gather.assert_called_once() gather.assert_awaited_once() assert not gather.call_args.args assert gather.call_args.kwargs == {"return_exceptions": True} # test with instances - client_mock._active_instances = [ - "instance-1", - "instance-2", - "instance-3", - "instance-4", - ] + client_mock._active_instances = [(mock.Mock(), mock.Mock(), mock.Mock())] * 4 gather.reset_mock() - result = await self._get_target_class()._ping_and_warm_instances(client_mock, channel) + channel.reset_mock() + result = await self._get_target_class()._ping_and_warm_instances( + client_mock, channel + ) assert len(result) == 4 gather.assert_called_once() gather.assert_awaited_once() assert len(gather.call_args.args) == 4 - assert gather.call_args.kwargs == {"return_exceptions": True} - for idx, call in enumerate(gather.call_args.args): - assert call == channel.unary_unary()() + # check grpc call arguments + grpc_call_args = channel.unary_unary().call_args_list + for idx, (_, kwargs) in enumerate(grpc_call_args): + expected_instance, expected_table, expected_app_profile = client_mock._active_instances[idx] + request = kwargs["request"] + assert request["name"] == expected_instance + assert request["app_profile_id"] == expected_app_profile + metadata = kwargs["metadata"] + assert len(metadata) == 1 + assert metadata[0][0] == 'x-goog-request-params' + assert metadata[0][1] == f'table_name={expected_table},app_profile_id={expected_app_profile}' @pytest.mark.asyncio @pytest.mark.parametrize( @@ -508,58 +516,84 @@ async def test__manage_channel_refresh(self, num_cycles): await client.close() @pytest.mark.asyncio - @pytest.mark.filterwarnings("ignore::RuntimeWarning") async def test__register_instance(self): - # create the client without calling start_background_channel_refresh - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = self._make_one(project="project-id") - assert not client._channel_refresh_tasks - # first call should start background refresh - assert client._active_instances == set() - await client._register_instance("instance-1", mock.Mock()) - assert len(client._active_instances) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-1"} - assert client._channel_refresh_tasks - # next call should not - with mock.patch.object( - type(self._make_one()), "start_background_channel_refresh" - ) as refresh_mock: - await client._register_instance("instance-2", mock.Mock()) - assert len(client._active_instances) == 2 - assert client._active_instances == { - "projects/project-id/instances/instance-1", - "projects/project-id/instances/instance-2", - } - refresh_mock.assert_not_called() - - @pytest.mark.asyncio - @pytest.mark.filterwarnings("ignore::RuntimeWarning") - async def test__register_instance_ping_and_warm(self): - # should ping and warm each new instance - pool_size = 7 - with mock.patch.object(asyncio, "get_running_loop") as get_event_loop: - get_event_loop.side_effect = RuntimeError("no event loop") - client = self._make_one(project="project-id", pool_size=pool_size) + """ + test instance registration + """ + # set up mock client + client_mock = mock.Mock() + client_mock._gapic_client.instance_path.side_effect = lambda a, b: f"prefix/{b}" + active_instances = set() + instance_owners = {} + client_mock._active_instances = active_instances + client_mock._instance_owners = instance_owners + client_mock._channel_refresh_tasks = [] + client_mock.start_background_channel_refresh.side_effect = lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + mock_channels = [mock.Mock() for i in range(5)] + client_mock.transport.channels = mock_channels + client_mock._ping_and_warm_instances = AsyncMock() + table_mock = mock.Mock() + await self._get_target_class()._register_instance(client_mock, "instance-1", table_mock) # first call should start background refresh - assert not client._channel_refresh_tasks - await client._register_instance("instance-1", mock.Mock()) - client = self._make_one(project="project-id", pool_size=pool_size) - assert len(client._channel_refresh_tasks) == pool_size - assert not client._active_instances - # next calls should trigger ping and warm - with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_mock: - # new instance should trigger ping and warm - await client._register_instance("instance-2", mock.Mock()) - assert ping_mock.call_count == pool_size - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - # duplcate instances should not trigger ping and warm - await client._register_instance("instance-3", mock.Mock()) - assert ping_mock.call_count == pool_size * 2 - await client.close() + assert client_mock.start_background_channel_refresh.call_count == 1 + # ensure active_instances and instance_owners were updated properly + expected_key = ("prefix/instance-1", table_mock.table_name, table_mock.app_profile_id) + assert len(active_instances) == 1 + assert expected_key == tuple(list(active_instances)[0]) + assert len(instance_owners) == 1 + assert expected_key == tuple(list(instance_owners)[0]) + # should be a new task set + assert client_mock._channel_refresh_tasks + # # next call should not call start_background_channel_refresh again + table_mock2 = mock.Mock() + await self._get_target_class()._register_instance(client_mock, "instance-2", table_mock2) + assert client_mock.start_background_channel_refresh.call_count == 1 + # but it should call ping and warm with new instance key + assert client_mock._ping_and_warm_instances.call_count == len(mock_channels) + for channel in mock_channels: + assert channel in [call[0][0] for call in client_mock._ping_and_warm_instances.call_args_list] + # check for updated lists + assert len(active_instances) == 2 + assert len(instance_owners) == 2 + expected_key2 = ("prefix/instance-2", table_mock2.table_name, table_mock2.app_profile_id) + assert any([expected_key2 == tuple(list(active_instances)[i]) for i in range(len(active_instances))]) + assert any([expected_key2 == tuple(list(instance_owners)[i]) for i in range(len(instance_owners))]) + + @pytest.mark.asyncio + @pytest.mark.parametrize("insert_instances,expected_active,expected_owner_keys", [ + ([('i','t',None)], [('i','t',None)], [('i','t',None)]), + ([('i','t','p')], [('i','t','p')], [('i','t','p')]), + ([('1','t','p'), ('1','t','p')], [('1','t','p')], [('1','t','p')]), + ([('1','t','p'), ('2','t','p')], [('1','t','p'), ('2','t','p')], [('1','t','p'), ('2','t','p')]), + ]) + async def test__register_instance_state(self, insert_instances, expected_active, expected_owner_keys): + """ + test that active_instances and instance_owners are updated as expected + """ + # set up mock client + client_mock = mock.Mock() + client_mock._gapic_client.instance_path.side_effect = lambda a, b: b + active_instances = set() + instance_owners = {} + client_mock._active_instances = active_instances + client_mock._instance_owners = instance_owners + client_mock._channel_refresh_tasks = [] + client_mock.start_background_channel_refresh.side_effect = lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + mock_channels = [mock.Mock() for i in range(5)] + client_mock.transport.channels = mock_channels + client_mock._ping_and_warm_instances = AsyncMock() + table_mock = mock.Mock() + # register instances + for instance, table, profile in insert_instances: + table_mock.table_name = table + table_mock.app_profile_id = profile + await self._get_target_class()._register_instance(client_mock, instance, table_mock) + assert len(active_instances) == len(expected_active) + assert len(instance_owners) == len(expected_owner_keys) + for expected in expected_active: + assert any([expected == tuple(list(active_instances)[i]) for i in range(len(active_instances))]) + for expected in expected_owner_keys: + assert any([expected == tuple(list(instance_owners)[i]) for i in range(len(instance_owners))]) @pytest.mark.asyncio async def test__remove_instance_registration(self): @@ -572,20 +606,22 @@ async def test__remove_instance_registration(self): instance_1_path = client._gapic_client.instance_path( client.project, "instance-1" ) + instance_1_key = (instance_1_path, table.table_name, table.app_profile_id) instance_2_path = client._gapic_client.instance_path( client.project, "instance-2" ) - assert len(client._instance_owners[instance_1_path]) == 1 - assert list(client._instance_owners[instance_1_path])[0] == id(table) - assert len(client._instance_owners[instance_2_path]) == 1 - assert list(client._instance_owners[instance_2_path])[0] == id(table) + instance_2_key = (instance_2_path, table.table_name, table.app_profile_id) + assert len(client._instance_owners[instance_1_key]) == 1 + assert list(client._instance_owners[instance_1_key])[0] == id(table) + assert len(client._instance_owners[instance_2_key]) == 1 + assert list(client._instance_owners[instance_2_key])[0] == id(table) success = await client._remove_instance_registration("instance-1", table) assert success assert len(client._active_instances) == 1 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 1 - assert client._active_instances == {"projects/project-id/instances/instance-2"} - success = await client._remove_instance_registration("nonexistant", table) + assert len(client._instance_owners[instance_1_key]) == 0 + assert len(client._instance_owners[instance_2_key]) == 1 + assert client._active_instances == {instance_2_key} + success = await client._remove_instance_registration("fake-key", table) assert not success assert len(client._active_instances) == 1 await client.close() @@ -874,11 +910,20 @@ def _make_client(self, *args, **kwargs): def _make_table(self, *args, **kwargs): from google.cloud.bigtable.client import Table + client_mock = mock.Mock() - client_mock._register_instance.side_effect = lambda *args, **kwargs: asyncio.sleep(0) - client_mock._remove_instance_registration.side_effect = lambda *args, **kwargs: asyncio.sleep(0) - kwargs["instance_id"] = kwargs.get("instance_id", args[0] if args else "instance") - kwargs["table_id"] = kwargs.get("table_id", args[1] if len(args) > 1 else "table") + client_mock._register_instance.side_effect = ( + lambda *args, **kwargs: asyncio.sleep(0) + ) + client_mock._remove_instance_registration.side_effect = ( + lambda *args, **kwargs: asyncio.sleep(0) + ) + kwargs["instance_id"] = kwargs.get( + "instance_id", args[0] if args else "instance" + ) + kwargs["table_id"] = kwargs.get( + "table_id", args[1] if len(args) > 1 else "table" + ) client_mock._gapic_client.table_path.return_value = kwargs["table_id"] client_mock._gapic_client.instance_path.return_value = kwargs["instance_id"] return Table(client_mock, *args, **kwargs) @@ -986,9 +1031,7 @@ async def test_read_rows_query_matches_request(self, include_app_profile): app_profile_id = "app_profile_id" if include_app_profile else None async with self._make_table(app_profile_id=app_profile_id) as table: read_rows = table.client._gapic_client.read_rows - read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( - [] - ) + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream([]) row_keys = [b"test_1", "test_2"] row_ranges = RowRange("start", "end") filter_ = {"test": "filter"} @@ -1066,10 +1109,8 @@ async def test_read_rows_per_request_timeout( with mock.patch("random.uniform", side_effect=lambda a, b: 0): async with self._make_table() as table: read_rows = table.client._gapic_client.read_rows - read_rows.side_effect = ( - lambda *args, **kwargs: self._make_gapic_stream( - chunks, sleep_time=per_request_t - ) + read_rows.side_effect = lambda *args, **kwargs: self._make_gapic_stream( + chunks, sleep_time=per_request_t ) query = ReadRowsQuery() chunks = [core_exceptions.DeadlineExceeded("mock deadline")] @@ -1232,10 +1273,7 @@ async def test_read_rows_revise_request(self): except InvalidChunk: revise_rowset.assert_called() revise_call_kwargs = revise_rowset.call_args_list[0].kwargs - assert ( - revise_call_kwargs["row_set"] - == query._to_dict()["rows"] - ) + assert revise_call_kwargs["row_set"] == query._to_dict()["rows"] assert revise_call_kwargs["last_seen_row_key"] == b"test_1" read_rows_request = read_rows.call_args_list[1].args[0] assert read_rows_request["rows"] == "modified" @@ -1251,7 +1289,10 @@ async def test_read_rows_default_timeouts(self): per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") - async with self._make_table(default_operation_timeout=operation_timeout,default_per_request_timeout=per_request_timeout) as table: + async with self._make_table( + default_operation_timeout=operation_timeout, + default_per_request_timeout=per_request_timeout, + ) as table: try: await table.read_rows(ReadRowsQuery()) except RuntimeError: @@ -1271,7 +1312,9 @@ async def test_read_rows_default_timeout_override(self): per_request_timeout = 4 with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") - async with self._make_table(default_operation_timeout=99, default_per_request_timeout=97) as table: + async with self._make_table( + default_operation_timeout=99, default_per_request_timeout=97 + ) as table: try: await table.read_rows( ReadRowsQuery(), From 9c8df9fe6cafaedfdb9537ff9cf996135a6dcd55 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 12:52:17 -0700 Subject: [PATCH 166/213] allow warming for single instance --- google/cloud/bigtable/client.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index b9d632799..e9f90098e 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -143,7 +143,7 @@ def __init__( # only remove instance from _active_instances when all associated tables remove it self._instance_owners: dict[_WarmedInstanceKey, Set[int]] = {} # attempt to start background tasks - self._channel_init_time = time.time() + self._channel_init_time = time.monotonic() self._channel_refresh_tasks: list[asyncio.Task[None]] = [] try: self.start_background_channel_refresh() @@ -185,7 +185,7 @@ async def close(self, timeout: float = 2.0): self._channel_refresh_tasks = [] async def _ping_and_warm_instances( - self, channel: grpc.aio.Channel + self, channel: grpc.aio.Channel, instance_key: _WarmedInstanceKey | None = None ) -> list[GoogleAPICallError | None]: """ Prepares the backend for requests on a channel @@ -193,23 +193,27 @@ async def _ping_and_warm_instances( Pings each Bigtable instance registered in `_active_instances` on the client Args: - channel: grpc channel to ping + - channel: grpc channel to warm + - instance_key: if provided, only warm the instance associated with the key Returns: - sequence of results or exceptions from the ping requests """ + instance_list = ( + [instance_key] if instance_key is not None else self._active_instances + ) ping_rpc = channel.unary_unary( "/google.bigtable.v2.Bigtable/PingAndWarm", request_serializer=PingAndWarmRequest.serialize, ) - tasks = [] - for (instance_name, table_name, app_profile_id) in self._active_instances: - tasks.append( - ping_rpc( - request={"name": instance_name, "app_profile_id": app_profile_id}, - metadata=_make_metadata(table_name, app_profile_id), - wait_for_ready=True, - ) - ) + # prepare list of coroutines to run + tasks = [ + ping_rpc( + request={"name": instance_name, "app_profile_id": app_profile_id}, + metadata=_make_metadata(table_name, app_profile_id), + wait_for_ready=True, + ) for (instance_name, table_name, app_profile_id) in instance_list + ] + # execute coroutines in parallel result_list = await asyncio.gather(*tasks, return_exceptions=True) # return None in place of empty successful responses return [r or None for r in result_list] @@ -243,7 +247,7 @@ async def _manage_channel( first_refresh = self._channel_init_time + random.uniform( refresh_interval_min, refresh_interval_max ) - next_sleep = max(first_refresh - time.time(), 0) + next_sleep = max(first_refresh - time.monotonic(), 0) if next_sleep > 0: # warm the current channel immediately channel = self.transport.channels[channel_idx] @@ -288,7 +292,7 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: # refresh tasks already running # call ping and warm on all existing channels for channel in self.transport.channels: - await self._ping_and_warm_instances(channel) + await self._ping_and_warm_instances(channel, instance_key) else: # refresh tasks aren't active. start them as background tasks self.start_background_channel_refresh() From 584f94df9dafb3f243896a0b900fc35b77fd5a3b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 12:56:34 -0700 Subject: [PATCH 167/213] updated tests --- google/cloud/bigtable/client.py | 3 +- tests/unit/test_client.py | 319 +++++++++++++++++++++++--------- 2 files changed, 234 insertions(+), 88 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index e9f90098e..326932f79 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -211,7 +211,8 @@ async def _ping_and_warm_instances( request={"name": instance_name, "app_profile_id": app_profile_id}, metadata=_make_metadata(table_name, app_profile_id), wait_for_ready=True, - ) for (instance_name, table_name, app_profile_id) in instance_list + ) + for (instance_name, table_name, app_profile_id) in instance_list ] # execute coroutines in parallel result_list = await asyncio.gather(*tasks, return_exceptions=True) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8f54cc4d0..9997205d7 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -299,7 +299,9 @@ async def test__ping_and_warm_instances(self): assert not gather.call_args.args assert gather.call_args.kwargs == {"return_exceptions": True} # test with instances - client_mock._active_instances = [(mock.Mock(), mock.Mock(), mock.Mock())] * 4 + client_mock._active_instances = [ + (mock.Mock(), mock.Mock(), mock.Mock()) + ] * 4 gather.reset_mock() channel.reset_mock() result = await self._get_target_class()._ping_and_warm_instances( @@ -312,14 +314,54 @@ async def test__ping_and_warm_instances(self): # check grpc call arguments grpc_call_args = channel.unary_unary().call_args_list for idx, (_, kwargs) in enumerate(grpc_call_args): - expected_instance, expected_table, expected_app_profile = client_mock._active_instances[idx] + ( + expected_instance, + expected_table, + expected_app_profile, + ) = client_mock._active_instances[idx] request = kwargs["request"] assert request["name"] == expected_instance assert request["app_profile_id"] == expected_app_profile metadata = kwargs["metadata"] assert len(metadata) == 1 - assert metadata[0][0] == 'x-goog-request-params' - assert metadata[0][1] == f'table_name={expected_table},app_profile_id={expected_app_profile}' + assert metadata[0][0] == "x-goog-request-params" + assert ( + metadata[0][1] + == f"table_name={expected_table},app_profile_id={expected_app_profile}" + ) + + @pytest.mark.asyncio + async def test_ping_and_warm_single_instance(self): + """ + should be able to call ping and warm with single instance + """ + client_mock = mock.Mock() + with mock.patch.object(asyncio, "gather", AsyncMock()) as gather: + # simulate gather by returning the same number of items as passed in + gather.side_effect = lambda *args, **kwargs: [None for _ in args] + channel = mock.Mock() + # test with large set of instances + client_mock._active_instances = [mock.Mock()] * 100 + test_key = ("test-instance", "test-table", "test-app-profile") + result = await self._get_target_class()._ping_and_warm_instances( + client_mock, channel, test_key + ) + # should only have been called with test instance + assert len(result) == 1 + # check grpc call arguments + grpc_call_args = channel.unary_unary().call_args_list + assert len(grpc_call_args) == 1 + kwargs = grpc_call_args[0][1] + request = kwargs["request"] + assert request["name"] == "test-instance" + assert request["app_profile_id"] == "test-app-profile" + metadata = kwargs["metadata"] + assert len(metadata) == 1 + assert metadata[0][0] == "x-goog-request-params" + assert ( + metadata[0][1] + == "table_name=test-table,app_profile_id=test-app-profile" + ) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -339,7 +381,7 @@ async def test__manage_channel_first_sleep( # first sleep time should be `refresh_interval` seconds after client init import time - with mock.patch.object(time, "time") as time: + with mock.patch.object(time, "monotonic") as time: time.return_value = 0 with mock.patch.object(asyncio, "sleep") as sleep: sleep.side_effect = asyncio.CancelledError @@ -358,46 +400,47 @@ async def test__manage_channel_first_sleep( @pytest.mark.asyncio async def test__manage_channel_ping_and_warm(self): - from google.cloud.bigtable_v2.services.bigtable.transports.pooled_grpc_asyncio import ( - PooledBigtableGrpcAsyncIOTransport, - ) + """ + _manage channel should call ping and warm internally + """ + import time + client_mock = mock.Mock() + client_mock._channel_init_time = time.monotonic() + channel_list = [mock.Mock(), mock.Mock()] + client_mock.transport.channels = channel_list + new_channel = mock.Mock() + client_mock.transport.grpc_channel._create_channel.return_value = new_channel # should ping an warm all new channels, and old channels if sleeping - client = self._make_one(project="project-id") - new_channel = grpc.aio.insecure_channel("localhost:8080") with mock.patch.object(asyncio, "sleep"): - create_channel = mock.Mock() - create_channel.return_value = new_channel - client.transport.grpc_channel._create_channel = create_channel - with mock.patch.object( - PooledBigtableGrpcAsyncIOTransport, "replace_channel" - ) as replace_channel: - replace_channel.side_effect = asyncio.CancelledError - # should ping and warm old channel then new if sleep > 0 - with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - channel_idx = 2 - old_channel = client.transport._grpc_channel._pool[channel_idx] - await client._manage_channel(channel_idx, 10) - except asyncio.CancelledError: - pass - assert ping_and_warm.call_count == 2 - assert old_channel != new_channel - called_with = [call[0][0] for call in ping_and_warm.call_args_list] - assert old_channel in called_with - assert new_channel in called_with - # should ping and warm instantly new channel only if not sleeping - with mock.patch.object( - type(self._make_one()), "_ping_and_warm_instances" - ) as ping_and_warm: - try: - await client._manage_channel(0, 0, 0) - except asyncio.CancelledError: - pass - ping_and_warm.assert_called_once_with(new_channel) - await client.close() + # stop process after replace_channel is called + client_mock.transport.replace_channel.side_effect = asyncio.CancelledError + ping_and_warm = client_mock._ping_and_warm_instances = AsyncMock() + # should ping and warm old channel then new if sleep > 0 + try: + channel_idx = 1 + await self._get_target_class()._manage_channel( + client_mock, channel_idx, 10 + ) + except asyncio.CancelledError: + pass + # should have called at loop start, and after replacement + assert ping_and_warm.call_count == 2 + # should have replaced channel once + assert client_mock.transport.replace_channel.call_count == 1 + # make sure new and old channels were warmed + old_channel = channel_list[channel_idx] + assert old_channel != new_channel + called_with = [call[0][0] for call in ping_and_warm.call_args_list] + assert old_channel in called_with + assert new_channel in called_with + # should ping and warm instantly new channel only if not sleeping + ping_and_warm.reset_mock() + try: + await self._get_target_class()._manage_channel(client_mock, 0, 0, 0) + except asyncio.CancelledError: + pass + ping_and_warm.assert_called_once_with(new_channel) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -528,16 +571,24 @@ async def test__register_instance(self): client_mock._active_instances = active_instances client_mock._instance_owners = instance_owners client_mock._channel_refresh_tasks = [] - client_mock.start_background_channel_refresh.side_effect = lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + client_mock.start_background_channel_refresh.side_effect = ( + lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + ) mock_channels = [mock.Mock() for i in range(5)] client_mock.transport.channels = mock_channels client_mock._ping_and_warm_instances = AsyncMock() table_mock = mock.Mock() - await self._get_target_class()._register_instance(client_mock, "instance-1", table_mock) + await self._get_target_class()._register_instance( + client_mock, "instance-1", table_mock + ) # first call should start background refresh assert client_mock.start_background_channel_refresh.call_count == 1 # ensure active_instances and instance_owners were updated properly - expected_key = ("prefix/instance-1", table_mock.table_name, table_mock.app_profile_id) + expected_key = ( + "prefix/instance-1", + table_mock.table_name, + table_mock.app_profile_id, + ) assert len(active_instances) == 1 assert expected_key == tuple(list(active_instances)[0]) assert len(instance_owners) == 1 @@ -546,27 +597,55 @@ async def test__register_instance(self): assert client_mock._channel_refresh_tasks # # next call should not call start_background_channel_refresh again table_mock2 = mock.Mock() - await self._get_target_class()._register_instance(client_mock, "instance-2", table_mock2) + await self._get_target_class()._register_instance( + client_mock, "instance-2", table_mock2 + ) assert client_mock.start_background_channel_refresh.call_count == 1 # but it should call ping and warm with new instance key assert client_mock._ping_and_warm_instances.call_count == len(mock_channels) for channel in mock_channels: - assert channel in [call[0][0] for call in client_mock._ping_and_warm_instances.call_args_list] + assert channel in [ + call[0][0] + for call in client_mock._ping_and_warm_instances.call_args_list + ] # check for updated lists assert len(active_instances) == 2 assert len(instance_owners) == 2 - expected_key2 = ("prefix/instance-2", table_mock2.table_name, table_mock2.app_profile_id) - assert any([expected_key2 == tuple(list(active_instances)[i]) for i in range(len(active_instances))]) - assert any([expected_key2 == tuple(list(instance_owners)[i]) for i in range(len(instance_owners))]) - - @pytest.mark.asyncio - @pytest.mark.parametrize("insert_instances,expected_active,expected_owner_keys", [ - ([('i','t',None)], [('i','t',None)], [('i','t',None)]), - ([('i','t','p')], [('i','t','p')], [('i','t','p')]), - ([('1','t','p'), ('1','t','p')], [('1','t','p')], [('1','t','p')]), - ([('1','t','p'), ('2','t','p')], [('1','t','p'), ('2','t','p')], [('1','t','p'), ('2','t','p')]), - ]) - async def test__register_instance_state(self, insert_instances, expected_active, expected_owner_keys): + expected_key2 = ( + "prefix/instance-2", + table_mock2.table_name, + table_mock2.app_profile_id, + ) + assert any( + [ + expected_key2 == tuple(list(active_instances)[i]) + for i in range(len(active_instances)) + ] + ) + assert any( + [ + expected_key2 == tuple(list(instance_owners)[i]) + for i in range(len(instance_owners)) + ] + ) + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "insert_instances,expected_active,expected_owner_keys", + [ + ([("i", "t", None)], [("i", "t", None)], [("i", "t", None)]), + ([("i", "t", "p")], [("i", "t", "p")], [("i", "t", "p")]), + ([("1", "t", "p"), ("1", "t", "p")], [("1", "t", "p")], [("1", "t", "p")]), + ( + [("1", "t", "p"), ("2", "t", "p")], + [("1", "t", "p"), ("2", "t", "p")], + [("1", "t", "p"), ("2", "t", "p")], + ), + ], + ) + async def test__register_instance_state( + self, insert_instances, expected_active, expected_owner_keys + ): """ test that active_instances and instance_owners are updated as expected """ @@ -578,7 +657,9 @@ async def test__register_instance_state(self, insert_instances, expected_active, client_mock._active_instances = active_instances client_mock._instance_owners = instance_owners client_mock._channel_refresh_tasks = [] - client_mock.start_background_channel_refresh.side_effect = lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + client_mock.start_background_channel_refresh.side_effect = ( + lambda: client_mock._channel_refresh_tasks.append(mock.Mock) + ) mock_channels = [mock.Mock() for i in range(5)] client_mock.transport.channels = mock_channels client_mock._ping_and_warm_instances = AsyncMock() @@ -587,13 +668,25 @@ async def test__register_instance_state(self, insert_instances, expected_active, for instance, table, profile in insert_instances: table_mock.table_name = table table_mock.app_profile_id = profile - await self._get_target_class()._register_instance(client_mock, instance, table_mock) + await self._get_target_class()._register_instance( + client_mock, instance, table_mock + ) assert len(active_instances) == len(expected_active) assert len(instance_owners) == len(expected_owner_keys) for expected in expected_active: - assert any([expected == tuple(list(active_instances)[i]) for i in range(len(active_instances))]) + assert any( + [ + expected == tuple(list(active_instances)[i]) + for i in range(len(active_instances)) + ] + ) for expected in expected_owner_keys: - assert any([expected == tuple(list(instance_owners)[i]) for i in range(len(instance_owners))]) + assert any( + [ + expected == tuple(list(instance_owners)[i]) + for i in range(len(instance_owners)) + ] + ) @pytest.mark.asyncio async def test__remove_instance_registration(self): @@ -628,58 +721,96 @@ async def test__remove_instance_registration(self): @pytest.mark.asyncio async def test__multiple_table_registration(self): + """ + registering with multiple tables with the same key should + add multiple owners to instance_owners, but only keep one copy + of shared key in active_instances + """ + from google.cloud.bigtable.client import _WarmedInstanceKey + async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: instance_1_path = client._gapic_client.instance_path( client.project, "instance_1" ) - assert len(client._instance_owners[instance_1_path]) == 1 + instance_1_key = _WarmedInstanceKey( + instance_1_path, table_1.table_name, table_1.app_profile_id + ) + assert len(client._instance_owners[instance_1_key]) == 1 assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - async with client.get_table("instance_1", "table_2") as table_2: - assert len(client._instance_owners[instance_1_path]) == 2 + assert id(table_1) in client._instance_owners[instance_1_key] + # duplicate table should register in instance_owners under same key + async with client.get_table("instance_1", "table_1") as table_2: + assert len(client._instance_owners[instance_1_key]) == 2 assert len(client._active_instances) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_1_path] - # table_2 should be unregistered, but instance should still be active + assert id(table_1) in client._instance_owners[instance_1_key] + assert id(table_2) in client._instance_owners[instance_1_key] + # unique table should register in instance_owners and active_instances + async with client.get_table("instance_1", "table_3") as table_3: + instance_3_path = client._gapic_client.instance_path( + client.project, "instance_1" + ) + instance_3_key = _WarmedInstanceKey( + instance_3_path, table_3.table_name, table_3.app_profile_id + ) + assert len(client._instance_owners[instance_1_key]) == 2 + assert len(client._instance_owners[instance_3_key]) == 1 + assert len(client._active_instances) == 2 + assert id(table_1) in client._instance_owners[instance_1_key] + assert id(table_2) in client._instance_owners[instance_1_key] + assert id(table_3) in client._instance_owners[instance_3_key] + # sub-tables should be unregistered, but instance should still be active assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances - assert id(table_2) not in client._instance_owners[instance_1_path] + assert instance_1_key in client._active_instances + assert id(table_2) not in client._instance_owners[instance_1_key] # both tables are gone. instance should be unregistered assert len(client._active_instances) == 0 - assert instance_1_path not in client._active_instances - assert len(client._instance_owners[instance_1_path]) == 0 + assert instance_1_key not in client._active_instances + assert len(client._instance_owners[instance_1_key]) == 0 @pytest.mark.asyncio async def test__multiple_instance_registration(self): + """ + registering with multiple instance keys should update the key + in instance_owners and active_instances + """ + from google.cloud.bigtable.client import _WarmedInstanceKey + async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: async with client.get_table("instance_2", "table_2") as table_2: instance_1_path = client._gapic_client.instance_path( client.project, "instance_1" ) + instance_1_key = _WarmedInstanceKey( + instance_1_path, table_1.table_name, table_1.app_profile_id + ) instance_2_path = client._gapic_client.instance_path( client.project, "instance_2" ) - assert len(client._instance_owners[instance_1_path]) == 1 - assert len(client._instance_owners[instance_2_path]) == 1 + instance_2_key = _WarmedInstanceKey( + instance_2_path, table_2.table_name, table_2.app_profile_id + ) + assert len(client._instance_owners[instance_1_key]) == 1 + assert len(client._instance_owners[instance_2_key]) == 1 assert len(client._active_instances) == 2 - assert id(table_1) in client._instance_owners[instance_1_path] - assert id(table_2) in client._instance_owners[instance_2_path] + assert id(table_1) in client._instance_owners[instance_1_key] + assert id(table_2) in client._instance_owners[instance_2_key] # instance2 should be unregistered, but instance1 should still be active assert len(client._active_instances) == 1 - assert instance_1_path in client._active_instances - assert len(client._instance_owners[instance_2_path]) == 0 - assert len(client._instance_owners[instance_1_path]) == 1 - assert id(table_1) in client._instance_owners[instance_1_path] + assert instance_1_key in client._active_instances + assert len(client._instance_owners[instance_2_key]) == 0 + assert len(client._instance_owners[instance_1_key]) == 1 + assert id(table_1) in client._instance_owners[instance_1_key] # both tables are gone. instances should both be unregistered assert len(client._active_instances) == 0 - assert len(client._instance_owners[instance_1_path]) == 0 - assert len(client._instance_owners[instance_2_path]) == 0 + assert len(client._instance_owners[instance_1_key]) == 0 + assert len(client._instance_owners[instance_2_key]) == 0 @pytest.mark.asyncio async def test_get_table(self): from google.cloud.bigtable.client import Table + from google.cloud.bigtable.client import _WarmedInstanceKey client = self._make_one(project="project-id") assert not client._active_instances @@ -705,12 +836,17 @@ async def test_get_table(self): ) assert table.app_profile_id == expected_app_profile_id assert table.client is client - assert table.instance_name in client._active_instances + instance_key = _WarmedInstanceKey( + table.instance_name, table.table_name, table.app_profile_id + ) + assert instance_key in client._active_instances + assert client._instance_owners[instance_key] == {id(table)} await client.close() @pytest.mark.asyncio async def test_get_table_context_manager(self): from google.cloud.bigtable.client import Table + from google.cloud.bigtable.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" @@ -738,7 +874,11 @@ async def test_get_table_context_manager(self): ) assert table.app_profile_id == expected_app_profile_id assert table.client is client - assert table.instance_name in client._active_instances + instance_key = _WarmedInstanceKey( + table.instance_name, table.table_name, table.app_profile_id + ) + assert instance_key in client._active_instances + assert client._instance_owners[instance_key] == {id(table)} assert close_mock.call_count == 1 @pytest.mark.asyncio @@ -829,6 +969,7 @@ class TestTable: async def test_table_ctor(self): from google.cloud.bigtable.client import BigtableDataClient from google.cloud.bigtable.client import Table + from google.cloud.bigtable.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" @@ -851,7 +992,11 @@ async def test_table_ctor(self): assert table.instance_id == expected_instance_id assert table.app_profile_id == expected_app_profile_id assert table.client is client - assert table.instance_name in client._active_instances + instance_key = _WarmedInstanceKey( + table.instance_name, table.table_name, table.app_profile_id + ) + assert instance_key in client._active_instances + assert client._instance_owners[instance_key] == {id(table)} assert table.default_operation_timeout == expected_operation_timeout assert table.default_per_request_timeout == expected_per_request_timeout # ensure task reaches completion From 3c605ba6d98b01ca2020036693822246ef28efea Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 13:00:37 -0700 Subject: [PATCH 168/213] fixed metadata format --- google/cloud/bigtable/_helpers.py | 2 +- tests/unit/test__helpers.py | 2 +- tests/unit/test_client.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/_helpers.py index dec4c2014..722fac9f4 100644 --- a/google/cloud/bigtable/_helpers.py +++ b/google/cloud/bigtable/_helpers.py @@ -35,7 +35,7 @@ def _make_metadata( params.append(f"table_name={table_name}") if app_profile_id is not None: params.append(f"app_profile_id={app_profile_id}") - params_str = ",".join(params) + params_str = "&".join(params) return [("x-goog-request-params", params_str)] diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2765afe24..9aa1a7bb4 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -23,7 +23,7 @@ class TestMakeMetadata: @pytest.mark.parametrize( "table,profile,expected", [ - ("table", "profile", "table_name=table,app_profile_id=profile"), + ("table", "profile", "table_name=table&app_profile_id=profile"), ("table", None, "table_name=table"), ], ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 9997205d7..75adffc72 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -327,7 +327,7 @@ async def test__ping_and_warm_instances(self): assert metadata[0][0] == "x-goog-request-params" assert ( metadata[0][1] - == f"table_name={expected_table},app_profile_id={expected_app_profile}" + == f"table_name={expected_table}&app_profile_id={expected_app_profile}" ) @pytest.mark.asyncio @@ -360,7 +360,7 @@ async def test_ping_and_warm_single_instance(self): assert metadata[0][0] == "x-goog-request-params" assert ( metadata[0][1] - == "table_name=test-table,app_profile_id=test-app-profile" + == "table_name=test-table&app_profile_id=test-app-profile" ) @pytest.mark.asyncio From 12099f5120ccf774fc394e3a87e6b6b30132015c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Thu, 22 Jun 2023 17:45:01 -0700 Subject: [PATCH 169/213] remove index from exceptions --- google/cloud/bigtable/mutations_batcher.py | 2 ++ tests/system/test_system.py | 5 ++++- tests/unit/test_mutations_batcher.py | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index e820cdb67..d52bcba8d 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -488,5 +488,7 @@ async def _wait_for_batch_results( found_errors.append(result) elif result: # completed requests will return a list of FailedMutationEntryError + for e in result: + e.index = None found_errors.extend(result) return found_errors diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 5294d4ac5..97b5bd0de 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -26,7 +26,10 @@ @pytest.fixture(scope="session") def event_loop(): - return asyncio.get_event_loop() + loop = asyncio.get_event_loop() + yield loop + loop.stop() + loop.close() @pytest.fixture(scope="session") diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 00d55d7db..a1c9fa947 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -978,6 +978,8 @@ async def gen(x): assert ( instance.exceptions[i] == expected_errors[i - num_starting] ) + # errors should have index stripped + assert instance.exceptions[i].index == i instance.exceptions = [] async def _mock_gapic_return(self, num=5): From 6833707ab2c358794760b0f5526b5b1ae39bb2aa Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 08:42:59 -0700 Subject: [PATCH 170/213] added test for memory leaks --- tests/unit/test_mutations_batcher.py | 75 ++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index a1c9fa947..6a972edf9 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -1216,3 +1216,78 @@ async def test_timeout_args_passed(self, mutate_rows): kwargs = mutate_rows.call_args[1] assert kwargs["operation_timeout"] == expected_operation_timeout assert kwargs["per_request_timeout"] == expected_per_request_timeout + + @pytest.mark.asyncio + async def test_batcher_task_memory_release(self): + """ + the batcher keeps a reference to the previous task, which may contain + a reference to another previous task. If we're not careful, this + could result in a memory leak. + + Test to ensure that old tasks are released + """ + import weakref + x + # use locks to control when tasks complete + task_locks = [asyncio.Lock() for _ in range(4)] + lock_idx = 0 + for lock in task_locks: + await lock.acquire() + async with self._make_one() as instance: + with mock.patch.object( + instance, "_execute_mutate_rows", AsyncMock() + ) as op_mock: + # mock network calls + async def mock_call(*args, **kwargs): + nonlocal lock_idx + lock_idx += 1 + await task_locks[lock_idx-1].acquire() + return [] + op_mock.side_effect = mock_call + # create a starting task + instance._staged_entries = [_make_mutation()] + try: + await instance.flush(timeout=0.01) + except asyncio.TimeoutError: + pass + # capture as weak reference + first_task_ref = weakref.ref(instance._prev_flush) + assert first_task_ref() is not None + assert first_task_ref().done() is False + assert len(first_task_ref().get_stack()) == 1 + # add more flushes to chain + middle_task = None + # add more flushes to chain + for i in range(3): + instance._staged_entries = [_make_mutation()] + try: + await instance.flush(timeout=0.01) + except asyncio.TimeoutError: + pass + if i == 1: + # save a reference to a task in the middle of the chain + middle_task = instance._prev_flush + assert instance._prev_flush != middle_task + # first_task should still be active + assert first_task_ref() is not None + assert first_task_ref().done() is False + # let it complete + task_locks[0].release() + await asyncio.sleep(0.01) + # should be complete, but still referenced in second task, which is still active + assert first_task_ref() is not None + assert first_task_ref().done() is True + # task's internal stack should be cleared + assert len(first_task_ref().get_stack()) == 0 + # finish locks up to middle task + task_locks[1].release() + task_locks[2].release() + await asyncio.sleep(0.01) + # first task should no longer be referenced + assert first_task_ref() is None + # but there should still be an active task + assert instance._prev_flush is not None + assert instance._prev_flush.done() is False + # clear locks + for lock in task_locks: + lock.release() From 4c402994bdfb5f13fe9b1698ed92ff5327bebf36 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 08:53:45 -0700 Subject: [PATCH 171/213] clear task after completion --- google/cloud/bigtable/mutations_batcher.py | 35 ++++++++++++++-------- tests/unit/test_mutations_batcher.py | 8 +++-- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index d52bcba8d..871d4b900 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -235,9 +235,7 @@ def __init__( ) self.exceptions: list[Exception] = [] self._flush_timer = self._start_flush_timer(flush_interval) - # create empty previous flush to avoid None checks - self._prev_flush: asyncio.Future[None] = asyncio.Future() - self._prev_flush.set_result(None) + self._prev_flush: asyncio.Future[None] | None = None # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 # clean up on program exit @@ -317,17 +315,23 @@ async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = - asyncio.TimeoutError if timeout is reached before flush task completes. """ # add recent staged entries to flush task, and wait for flush to complete - flush_job: Awaitable[None] = self._schedule_flush() - if timeout is not None: - # wait `timeout seconds for flush to complete - # if timeout is exceeded, flush task will still be running in the background - flush_job = asyncio.wait_for(asyncio.shield(flush_job), timeout=timeout) - await flush_job + flush_job: Awaitable[None] | None = self._schedule_flush() + if flush_job is not None: + if timeout is not None: + # wait `timeout seconds for flush to complete + # if timeout is exceeded, flush task will still be running in the background + flush_job = asyncio.wait_for(asyncio.shield(flush_job), timeout=timeout) + await flush_job # raise any unreported exceptions from this or previous flushes if raise_exceptions: self._raise_exceptions() - def _schedule_flush(self) -> asyncio.Future[None]: + def _on_flush_complete(self, flush_task: asyncio.Future[None]): + """if there are no more running flush tasks, clear _prev_flush""" + if self._prev_flush == flush_task: + self._prev_flush = None + + def _schedule_flush(self) -> asyncio.Future[None] | None: """Update the flush task to include the latest staged entries""" if self._staged_entries: entries, self._staged_entries = self._staged_entries, [] @@ -335,22 +339,26 @@ def _schedule_flush(self) -> asyncio.Future[None]: self._prev_flush = self._create_bg_task( self._flush_internal, entries, self._prev_flush ) + self._prev_flush.add_done_callback(self._on_flush_complete) return self._prev_flush async def _flush_internal( - self, new_entries: list[RowMutationEntry], prev_flush: asyncio.Future[None] + self, + new_entries: list[RowMutationEntry], + prev_flush: asyncio.Future[None] | None, ): """ Flushes a set of mutations to the server, and updates internal state Args: + - new_entries: list of RowMutationEntry objects to flush - prev_flush: the previous flush task, which will be awaited before a new flush is initiated """ # flush new entries in_process_requests: list[ asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None] - ] = [prev_flush] + ] = ([prev_flush] if prev_flush else []) async for batch in self._flow_control.add_to_flow(new_entries): batch_task = self._create_bg_task(self._execute_mutate_rows, batch) in_process_requests.append(batch_task) @@ -428,7 +436,8 @@ async def close(self): self.closed = True self._flush_timer.cancel() self._schedule_flush() - await self._prev_flush + if self._prev_flush: + await self._prev_flush try: await self._flush_timer except asyncio.CancelledError: diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 6a972edf9..ea264c075 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -979,7 +979,8 @@ async def gen(x): instance.exceptions[i] == expected_errors[i - num_starting] ) # errors should have index stripped - assert instance.exceptions[i].index == i + assert instance.exceptions[i].index is None + # clear out exceptions instance.exceptions = [] async def _mock_gapic_return(self, num=5): @@ -1227,7 +1228,7 @@ async def test_batcher_task_memory_release(self): Test to ensure that old tasks are released """ import weakref - x + # use locks to control when tasks complete task_locks = [asyncio.Lock() for _ in range(4)] lock_idx = 0 @@ -1241,8 +1242,9 @@ async def test_batcher_task_memory_release(self): async def mock_call(*args, **kwargs): nonlocal lock_idx lock_idx += 1 - await task_locks[lock_idx-1].acquire() + await task_locks[lock_idx - 1].acquire() return [] + op_mock.side_effect = mock_call # create a starting task instance._staged_entries = [_make_mutation()] From 2931aa89cb6c78ce024a23c7844c3965697d9830 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 10:56:23 -0700 Subject: [PATCH 172/213] added exception list truncation --- google/cloud/bigtable/exceptions.py | 86 ++++++++++++++++++++-- google/cloud/bigtable/mutations_batcher.py | 60 +++++++++++++-- 2 files changed, 133 insertions(+), 13 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index d314d2157..befe3dd25 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -89,17 +89,87 @@ class MutationsExceptionGroup(BigtableExceptionGroup): """ @staticmethod - def _format_message(excs: list[Exception], total_entries: int): - entry_str = "entry" if total_entries == 1 else "entries" - plural_str = "" if len(excs) == 1 else "s" - return f"{len(excs)} sub-exception{plural_str} (from {total_entries} {entry_str} attempted)" + def _format_message( + excs: list[Exception], total_entries: int, exc_count: int | None = None + ) -> str: + """ + Format a message for the exception group + + Args: + - excs: the exceptions in the group + - total_entries: the total number of entries attempted, successful or not + - exc_count: the number of exceptions associated with the request + if None, this will be len(excs) + """ + exc_count = exc_count if exc_count is not None else len(excs) + entry_str = "entry" if exc_count == 1 else "entries" + return f"{exc_count} failed {entry_str} from {total_entries} attempted." - def __init__(self, excs: list[Exception], total_entries: int): - super().__init__(self._format_message(excs, total_entries), excs) + def __init__( + self, excs: list[Exception], total_entries: int, message: str | None = None + ): + """ + Args: + - excs: the exceptions in the group + - total_entries: the total number of entries attempted, successful or not + - message: the message for the exception group. If None, a default message + will be generated + """ + message = ( + message + if message is not None + else self._format_message(excs, total_entries) + ) + super().__init__(message, excs) self.total_entries_attempted = total_entries - def __new__(cls, excs: list[Exception], total_entries: int): - return super().__new__(cls, cls._format_message(excs, total_entries), excs) + def __new__( + cls, excs: list[Exception], total_entries: int, message: str | None = None + ): + """ + Args: + - excs: the exceptions in the group + - total_entries: the total number of entries attempted, successful or not + - message: the message for the exception group. If None, a default message + """ + message = ( + message if message is not None else cls._format_message(excs, total_entries) + ) + instance = super().__new__(cls, message, excs) + instance.total_entries_attempted = total_entries + return instance + + @classmethod + def from_truncated_mutations( + cls, + first_list: list[Exception], + last_list: list[Exception], + total_excs: int, + entry_count: int, + ) -> MutationsExceptionGroup: + """ + Create a MutationsExceptionGroup from two lists of exceptions, representing + a larger set that has been truncated. The MutationsExceptionGroup will + contain the union of the two lists as sub-exceptions, and the error message + describe the number of exceptions that were truncated. + + Args: + - first_list: the set of oldest exceptions to add to the ExceptionGroup + - last_list: the set of newest exceptions to add to the ExceptionGroup + - total_excs: the total number of exceptions associated with the request + Should be len(first_list) + len(last_list) + number of dropped exceptions + in the middle + - entry_count: the total number of entries attempted, successful or not + """ + first_count, last_count = len(first_list), len(last_list) + if last_count == 0 and first_count == total_excs: + # not truncated; use default constructor + return cls(first_list, entry_count) + excs = first_list + last_list + truncation_count = total_excs - (first_count + last_count) + base_message = cls._format_message(excs, entry_count, len(last_list)) + message = f"{base_message} (First {first_count} and last {last_count} attached as sub-exceptions; {truncation_count} exceptions truncated)" + return cls(excs, entry_count, message) class FailedMutationEntryError(Exception): diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 871d4b900..bed53c937 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -233,11 +233,15 @@ def __init__( if flush_limit_mutation_count is not None else float("inf") ) - self.exceptions: list[Exception] = [] self._flush_timer = self._start_flush_timer(flush_interval) self._prev_flush: asyncio.Future[None] | None = None # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 + self._exceptions_since_last_raise: int = 0 + # keep track of the first and last _exception_list_limit exceptions + self._exception_list_limit: int = 10 + self._oldest_exceptions: list[Exception] = [] + self._newest_exceptions: list[Exception] = [] # clean up on program exit atexit.register(self._on_exit) @@ -368,7 +372,7 @@ async def _flush_internal( await asyncio.sleep(0) # collect exception data for next raise, after previous flush tasks have completed self._entries_processed_since_last_raise += len(new_entries) - self.exceptions.extend(found_exceptions) + self._add_exceptions(found_exceptions) async def _execute_mutate_rows( self, batch: list[RowMutationEntry] @@ -406,6 +410,41 @@ async def _execute_mutate_rows( await self._flow_control.remove_from_flow(batch) return [] + def _add_exceptions(self, new_exceptions: list[Exception]): + """ + Add new list of exxceptions to internal store. To avoid unbounded memory, + the batcher will store the first and last _exception_list_limit exceptions, + and discard any in between. + """ + # add indices to exceptions to track the failure ordering + for idx, exc in enumerate( + new_exceptions, start=self._entries_processed_since_last_raise + ): + if isinstance(exc, FailedMutationEntryError): + exc.index = idx + self._exceptions_since_last_raise += len(new_exceptions) + if new_exceptions and len(self._oldest_exceptions) < self._exception_list_limit: + # populate oldest_exceptions with found_exceptions + addition_count = self._exception_list_limit - len(self._oldest_exceptions) + self._oldest_exceptions.extend(new_exceptions[:addition_count]) + new_exceptions = new_exceptions[addition_count:] + if new_exceptions: + # populate newest_exceptions with remaining found_exceptions + keep_count = self._exception_list_limit - len(new_exceptions) + self._newest_exceptions = ( + new_exceptions[-self._exception_list_limit :] + + self._newest_exceptions[:keep_count] + ) + + @property + def exceptions(self) -> list[Exception]: + """ + Access the list of exceptions waiting to be flushed. If more than + 20 exceptions have been encountered since the last flush, only the + first and last 10 exceptions will be stored. + """ + return self._oldest_exceptions + self._newest_exceptions + def _raise_exceptions(self): """ Raise any unreported exceptions from background flush operations @@ -414,12 +453,22 @@ def _raise_exceptions(self): - MutationsExceptionGroup with all unreported exceptions """ if self.exceptions: - exc_list, self.exceptions = self.exceptions, [] - raise_count, self._entries_processed_since_last_raise = ( + oldest, self._oldest_exceptions = self._oldest_exceptions, [] + newest, self._newest_exceptions = self._newest_exceptions, [] + entry_count, self._entries_processed_since_last_raise = ( self._entries_processed_since_last_raise, 0, ) - raise MutationsExceptionGroup(exc_list, raise_count) + exc_count, self._exceptions_since_last_raise = ( + self._exceptions_since_last_raise, + 0, + ) + raise MutationsExceptionGroup.from_truncated_lists( + first_list=oldest, + last_list=newest, + total_excs=exc_count, + entry_count=entry_count, + ) async def __aenter__(self): """For context manager API""" @@ -498,6 +547,7 @@ async def _wait_for_batch_results( elif result: # completed requests will return a list of FailedMutationEntryError for e in result: + # strip index information e.index = None found_errors.extend(result) return found_errors From 1dc760e6eadbfb68c15670ef451efc1de92499bb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 11:20:56 -0700 Subject: [PATCH 173/213] removed index information --- google/cloud/bigtable/mutations_batcher.py | 8 +------- tests/unit/test_mutations_batcher.py | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index bed53c937..e04f61015 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -412,16 +412,10 @@ async def _execute_mutate_rows( def _add_exceptions(self, new_exceptions: list[Exception]): """ - Add new list of exxceptions to internal store. To avoid unbounded memory, + Add new list of exceptions to internal store. To avoid unbounded memory, the batcher will store the first and last _exception_list_limit exceptions, and discard any in between. """ - # add indices to exceptions to track the failure ordering - for idx, exc in enumerate( - new_exceptions, start=self._entries_processed_since_last_raise - ): - if isinstance(exc, FailedMutationEntryError): - exc.index = idx self._exceptions_since_last_raise += len(new_exceptions) if new_exceptions and len(self._oldest_exceptions) < self._exception_list_limit: # populate oldest_exceptions with found_exceptions diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index ea264c075..527da7223 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -1228,12 +1228,11 @@ async def test_batcher_task_memory_release(self): Test to ensure that old tasks are released """ import weakref - # use locks to control when tasks complete task_locks = [asyncio.Lock() for _ in range(4)] lock_idx = 0 for lock in task_locks: - await lock.acquire() + await asyncio.wait_for(lock.acquire(), timeout=2) async with self._make_one() as instance: with mock.patch.object( instance, "_execute_mutate_rows", AsyncMock() From badbb837c1b0723442c72a274b990f1f79f994b0 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 12:07:12 -0700 Subject: [PATCH 174/213] fixed existing tests --- google/cloud/bigtable/exceptions.py | 2 +- google/cloud/bigtable/mutations_batcher.py | 16 ++++++++-------- tests/unit/test_exceptions.py | 6 +++--- tests/unit/test_mutations_batcher.py | 11 ++++++----- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index befe3dd25..18e95da8c 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -140,7 +140,7 @@ def __new__( return instance @classmethod - def from_truncated_mutations( + def from_truncated_lists( cls, first_list: list[Exception], last_list: list[Exception], diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index e04f61015..ae926a58f 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -410,23 +410,23 @@ async def _execute_mutate_rows( await self._flow_control.remove_from_flow(batch) return [] - def _add_exceptions(self, new_exceptions: list[Exception]): + def _add_exceptions(self, excs: list[Exception]): """ Add new list of exceptions to internal store. To avoid unbounded memory, the batcher will store the first and last _exception_list_limit exceptions, and discard any in between. """ - self._exceptions_since_last_raise += len(new_exceptions) - if new_exceptions and len(self._oldest_exceptions) < self._exception_list_limit: + self._exceptions_since_last_raise += len(excs) + if excs and len(self._oldest_exceptions) < self._exception_list_limit: # populate oldest_exceptions with found_exceptions addition_count = self._exception_list_limit - len(self._oldest_exceptions) - self._oldest_exceptions.extend(new_exceptions[:addition_count]) - new_exceptions = new_exceptions[addition_count:] - if new_exceptions: + self._oldest_exceptions.extend(excs[:addition_count]) + excs = excs[addition_count:] + if excs: # populate newest_exceptions with remaining found_exceptions - keep_count = self._exception_list_limit - len(new_exceptions) + keep_count = self._exception_list_limit - len(excs) self._newest_exceptions = ( - new_exceptions[-self._exception_list_limit :] + excs[-self._exception_list_limit :] + self._newest_exceptions[:keep_count] ) diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 7cbf2d9b1..366c16125 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -130,12 +130,12 @@ def _make_one(self, excs=None, num_entries=3): @pytest.mark.parametrize( "exception_list,total_entries,expected_message", [ - ([Exception()], 1, "1 sub-exception (from 1 entry attempted)"), - ([Exception()], 2, "1 sub-exception (from 2 entries attempted)"), + ([Exception()], 1, "1 failed entry (from 1 attempted)"), + ([Exception()], 2, "1 failed entry (from 2 attempted)"), ( [Exception(), RuntimeError()], 2, - "2 sub-exceptions (from 2 entries attempted)", + "2 failed entries (from 2 attempted)", ), ], ) diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 527da7223..4f05863be 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -938,6 +938,7 @@ async def gen(x): (1, 0, 1), (1, 1, 2), (10, 2, 12), + (10, 20, 20), # should cap at 20 ], ) @pytest.mark.asyncio @@ -954,7 +955,7 @@ async def test__flush_internal_with_errors( exceptions.FailedMutationEntryError(mock.Mock(), mock.Mock(), ValueError()) ] * num_new_errors async with self._make_one() as instance: - instance.exceptions = [mock.Mock()] * num_starting + instance._oldest_exceptions = [mock.Mock()] * num_starting with mock.patch.object(instance, "_execute_mutate_rows") as execute_mock: execute_mock.return_value = expected_errors with mock.patch.object( @@ -981,7 +982,7 @@ async def gen(x): # errors should have index stripped assert instance.exceptions[i].index is None # clear out exceptions - instance.exceptions = [] + instance._oldest_exceptions, instance._newest_exceptions = [], [] async def _mock_gapic_return(self, num=5): from google.cloud.bigtable_v2.types import MutateRowsResponse @@ -1098,7 +1099,7 @@ async def test__raise_exceptions(self): expected_total = 1201 expected_exceptions = [RuntimeError("mock")] * 3 async with self._make_one() as instance: - instance.exceptions = expected_exceptions + instance._oldest_exceptions = expected_exceptions instance._entries_processed_since_last_raise = expected_total try: instance._raise_exceptions() @@ -1133,7 +1134,7 @@ async def test_close(self): await instance.close() assert instance.closed is True assert instance._flush_timer.done() is True - assert instance._prev_flush.done() is True + assert instance._prev_flush is None assert flush_mock.call_count == 1 assert raise_mock.call_count == 1 @@ -1145,7 +1146,7 @@ async def test_close_w_exceptions(self): expected_total = 10 expected_exceptions = [RuntimeError("mock")] async with self._make_one() as instance: - instance.exceptions = expected_exceptions + instance._oldest_exceptions = expected_exceptions instance._entries_processed_since_last_raise = expected_total try: await instance.close() From 6da9a64b2f95bacade5ee9483c2025a16fc3e7dd Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 14:18:20 -0700 Subject: [PATCH 175/213] removed manual flushing --- google/cloud/bigtable/client.py | 6 +- google/cloud/bigtable/mutations_batcher.py | 83 +---- tests/unit/test_mutations_batcher.py | 399 +++++---------------- 3 files changed, 103 insertions(+), 385 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index dd869d076..d0c04d7ff 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -57,7 +57,7 @@ from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline from google.cloud.bigtable.mutations_batcher import MutationsBatcher -from google.cloud.bigtable.mutations_batcher import MB_SIZE +from google.cloud.bigtable.mutations_batcher import _MB_SIZE from google.cloud.bigtable._helpers import _attempt_timeout_generator from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule @@ -726,9 +726,9 @@ def mutations_batcher( *, flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, - flush_limit_bytes: int = 20 * MB_SIZE, + flush_limit_bytes: int = 20 * _MB_SIZE, flow_control_max_mutation_count: int = 100_000, - flow_control_max_bytes: int = 100 * MB_SIZE, + flow_control_max_bytes: int = 100 * _MB_SIZE, batch_operation_timeout: float | None = None, batch_per_request_timeout: float | None = None, ) -> MutationsBatcher: diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index ae926a58f..921eeddba 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -17,7 +17,7 @@ import asyncio import atexit import warnings -from typing import Any, Awaitable, TYPE_CHECKING +from typing import Any, TYPE_CHECKING from google.cloud.bigtable.mutations import RowMutationEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup @@ -31,7 +31,7 @@ from google.cloud.bigtable.client import Table # pragma: no cover # used to make more readable default values -MB_SIZE = 1024 * 1024 +_MB_SIZE = 1024 * 1024 class _FlowControl: @@ -183,9 +183,9 @@ def __init__( *, flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, - flush_limit_bytes: int = 20 * MB_SIZE, + flush_limit_bytes: int = 20 * _MB_SIZE, flow_control_max_mutation_count: int = 100_000, - flow_control_max_bytes: int = 100 * MB_SIZE, + flow_control_max_bytes: int = 100 * _MB_SIZE, batch_operation_timeout: float | None = None, batch_per_request_timeout: float | None = None, ): @@ -234,7 +234,7 @@ def __init__( else float("inf") ) self._flush_timer = self._start_flush_timer(flush_interval) - self._prev_flush: asyncio.Future[None] | None = None + self._flush_jobs: set[asyncio.Future[None]] = set() # MutationExceptionGroup reports number of successful entries along with failures self._entries_processed_since_last_raise: int = 0 self._exceptions_since_last_raise: int = 0 @@ -303,80 +303,37 @@ async def append(self, mutation_entry: RowMutationEntry): # yield to the event loop to allow flush to run await asyncio.sleep(0) - async def flush(self, *, raise_exceptions: bool = True, timeout: float | None = 60): - """ - Flush all staged entries - - Args: - - raise_exceptions: if True, will raise any unreported exceptions from this or previous flushes. - If False, exceptions will be stored in self.exceptions and raised on a future flush - or when the batcher is closed. - - timeout: maximum time to wait for flush to complete, in seconds. - If exceeded, flush will continue in the background and exceptions - will be surfaced on the next flush - Raises: - - MutationsExceptionGroup if raise_exceptions is True and any mutations fail - - asyncio.TimeoutError if timeout is reached before flush task completes. - """ - # add recent staged entries to flush task, and wait for flush to complete - flush_job: Awaitable[None] | None = self._schedule_flush() - if flush_job is not None: - if timeout is not None: - # wait `timeout seconds for flush to complete - # if timeout is exceeded, flush task will still be running in the background - flush_job = asyncio.wait_for(asyncio.shield(flush_job), timeout=timeout) - await flush_job - # raise any unreported exceptions from this or previous flushes - if raise_exceptions: - self._raise_exceptions() - - def _on_flush_complete(self, flush_task: asyncio.Future[None]): - """if there are no more running flush tasks, clear _prev_flush""" - if self._prev_flush == flush_task: - self._prev_flush = None - def _schedule_flush(self) -> asyncio.Future[None] | None: """Update the flush task to include the latest staged entries""" if self._staged_entries: entries, self._staged_entries = self._staged_entries, [] self._staged_count, self._staged_bytes = 0, 0 - self._prev_flush = self._create_bg_task( - self._flush_internal, entries, self._prev_flush - ) - self._prev_flush.add_done_callback(self._on_flush_complete) - return self._prev_flush + new_task = self._create_bg_task(self._flush_internal, entries) + new_task.add_done_callback(self._flush_jobs.remove) + self._flush_jobs.add(new_task) + return new_task + return None - async def _flush_internal( - self, - new_entries: list[RowMutationEntry], - prev_flush: asyncio.Future[None] | None, - ): + async def _flush_internal(self,new_entries: list[RowMutationEntry]): """ Flushes a set of mutations to the server, and updates internal state Args: - new_entries: list of RowMutationEntry objects to flush - - prev_flush: the previous flush task, which will be awaited before - a new flush is initiated """ # flush new entries - in_process_requests: list[ - asyncio.Future[list[FailedMutationEntryError]] | asyncio.Future[None] - ] = ([prev_flush] if prev_flush else []) + in_process_requests: list[asyncio.Future[list[FailedMutationEntryError]]] = [] async for batch in self._flow_control.add_to_flow(new_entries): batch_task = self._create_bg_task(self._execute_mutate_rows, batch) in_process_requests.append(batch_task) # wait for all inflight requests to complete found_exceptions = await self._wait_for_batch_results(*in_process_requests) # allow previous flush tasks to finalize before adding new exceptions to list - await asyncio.sleep(0) # collect exception data for next raise, after previous flush tasks have completed self._entries_processed_since_last_raise += len(new_entries) self._add_exceptions(found_exceptions) - async def _execute_mutate_rows( - self, batch: list[RowMutationEntry] - ) -> list[FailedMutationEntryError]: + async def _execute_mutate_rows(self, batch: list[RowMutationEntry]) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch @@ -430,15 +387,6 @@ def _add_exceptions(self, excs: list[Exception]): + self._newest_exceptions[:keep_count] ) - @property - def exceptions(self) -> list[Exception]: - """ - Access the list of exceptions waiting to be flushed. If more than - 20 exceptions have been encountered since the last flush, only the - first and last 10 exceptions will be stored. - """ - return self._oldest_exceptions + self._newest_exceptions - def _raise_exceptions(self): """ Raise any unreported exceptions from background flush operations @@ -446,7 +394,7 @@ def _raise_exceptions(self): Raises: - MutationsExceptionGroup with all unreported exceptions """ - if self.exceptions: + if self._oldest_exceptions or self._newest_exceptions: oldest, self._oldest_exceptions = self._oldest_exceptions, [] newest, self._newest_exceptions = self._newest_exceptions, [] entry_count, self._entries_processed_since_last_raise = ( @@ -479,8 +427,7 @@ async def close(self): self.closed = True self._flush_timer.cancel() self._schedule_flush() - if self._prev_flush: - await self._prev_flush + await asyncio.gather(*self._flush_jobs, return_exceptions=True) try: await self._flush_timer except asyncio.CancelledError: diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 4f05863be..c35319dac 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -276,15 +276,18 @@ async def test_add_to_flow_oversize(self): class TestMutationsBatcher: - def _make_one(self, table=None, **kwargs): + + def _get_target_class(self): from google.cloud.bigtable.mutations_batcher import MutationsBatcher + return MutationsBatcher + def _make_one(self, table=None, **kwargs): if table is None: table = mock.Mock() table.default_operation_timeout = 10 table.default_per_request_timeout = 10 - return MutationsBatcher(table, **kwargs) + return self._get_target_class()(table, **kwargs) @mock.patch( "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" @@ -298,8 +301,12 @@ async def test_ctor_defaults(self, flush_timer_mock): async with self._make_one(table) as instance: assert instance._table == table assert instance.closed is False + assert instance._flush_jobs == set() assert instance._staged_entries == [] - assert instance.exceptions == [] + assert instance._oldest_exceptions == [] + assert instance._newest_exceptions == [] + assert instance._exception_list_limit == 10 + assert instance._exceptions_since_last_raise == 0 assert instance._flow_control._max_mutation_count == 100000 assert instance._flow_control._max_mutation_bytes == 104857600 assert instance._flow_control._in_flight_mutation_count == 0 @@ -339,8 +346,12 @@ async def test_ctor_explicit(self, flush_timer_mock): ) as instance: assert instance._table == table assert instance.closed is False + assert instance._flush_jobs == set() assert instance._staged_entries == [] - assert instance.exceptions == [] + assert instance._oldest_exceptions == [] + assert instance._newest_exceptions == [] + assert instance._exception_list_limit == 10 + assert instance._exceptions_since_last_raise == 0 assert ( instance._flow_control._max_mutation_count == flow_control_max_mutation_count @@ -378,7 +389,10 @@ async def test_ctor_no_flush_limits(self, flush_timer_mock): assert instance._table == table assert instance.closed is False assert instance._staged_entries == [] - assert instance.exceptions == [] + assert instance._oldest_exceptions == [] + assert instance._newest_exceptions == [] + assert instance._exception_list_limit == 10 + assert instance._exceptions_since_last_raise == 0 assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 @@ -579,7 +593,7 @@ async def mock_call(*args, **kwargs): for _ in range(num_entries): await instance.append(_make_mutation(size=1)) # let any flush jobs finish - await instance._prev_flush + await asyncio.gather(*instance._flush_jobs) # should have only flushed once, with large mutation and first mutation in loop assert op_mock.call_count == 1 sent_batch = op_mock.call_args[0][0] @@ -647,74 +661,6 @@ async def test_append_multiple_sequentially(self): assert len(instance._staged_entries) == 3 instance._staged_entries = [] - @pytest.mark.parametrize("raise_exceptions", [True, False]) - @pytest.mark.asyncio - async def test_flush_no_timeout(self, raise_exceptions): - """flush should internally call _schedule_flush""" - mock_obj = AsyncMock() - async with self._make_one() as instance: - with mock.patch.object(instance, "_schedule_flush") as flush_mock: - with mock.patch.object(instance, "_raise_exceptions") as raise_mock: - flush_mock.return_value = mock_obj.__call__() - await instance.flush( - raise_exceptions=raise_exceptions, timeout=None - ) - assert flush_mock.call_count == 1 - assert mock_obj.await_count == 1 - assert raise_mock.call_count == int(raise_exceptions) - - @pytest.mark.asyncio - async def test_flush_w_timeout(self): - """ - flush should raise TimeoutError if incomplete by timeline, but flush - task should continue internally - """ - async with self._make_one() as instance: - # create mock internal flush job - instance._prev_flush = asyncio.create_task(asyncio.sleep(0.5)) - with pytest.raises(asyncio.TimeoutError): - await instance.flush(timeout=0.01) - # ensure that underlying flush task is still running - assert not instance._prev_flush.done() - # ensure flush task can complete without error - await instance._prev_flush - assert instance._prev_flush.done() - assert instance._prev_flush.exception() is None - - @pytest.mark.asyncio - async def test_flush_concurrent_requests(self): - """ - requests should happen in parallel if multiple flushes overlap - """ - import time - - num_flushes = 10 - fake_mutations = [_make_mutation() for _ in range(num_flushes)] - async with self._make_one() as instance: - with mock.patch.object( - instance, "_execute_mutate_rows", AsyncMock() - ) as op_mock: - # mock network calls - async def mock_call(*args, **kwargs): - await asyncio.sleep(0.1) - return [] - - op_mock.side_effect = mock_call - start_time = time.monotonic() - # create a few concurrent flushes - for i in range(num_flushes): - instance._staged_entries = [fake_mutations[i]] - try: - await instance.flush(timeout=0.01) - except asyncio.TimeoutError: - pass - # allow flushes to complete - await instance.flush() - duration = time.monotonic() - start_time - # if flushes were sequential, total duration would be 1s - assert duration < 0.25 - assert op_mock.call_count == num_flushes - @pytest.mark.asyncio async def test_flush_flow_control_concurrent_requests(self): """ @@ -737,10 +683,8 @@ async def mock_call(*args, **kwargs): start_time = time.monotonic() # flush one large batch, that will be broken up into smaller batches instance._staged_entries = fake_mutations - try: - await instance.flush(timeout=0.01) - except asyncio.TimeoutError: - pass + instance._schedule_flush() + await asyncio.sleep(0.01) # make room for new mutations for i in range(num_calls): await instance._flow_control.remove_from_flow( @@ -748,142 +692,27 @@ async def mock_call(*args, **kwargs): ) await asyncio.sleep(0.01) # allow flushes to complete - await instance.flush() + await asyncio.gather(*instance._flush_jobs) duration = time.monotonic() - start_time + assert instance._oldest_exceptions == [] + assert instance._newest_exceptions == [] # if flushes were sequential, total duration would be 1s - assert instance.exceptions == [] assert duration < 0.25 assert op_mock.call_count == num_calls - @pytest.mark.asyncio - async def test_overlapping_flush_requests(self): - """ - Should allow multiple flushes to be scheduled concurrently, with - each flush raising the errors related to the mutations at flush time - """ - from google.cloud.bigtable.exceptions import ( - MutationsExceptionGroup, - FailedMutationEntryError, - ) - from google.cloud.bigtable.mutations_batcher import MutationsBatcher - - exception1 = RuntimeError("test error1") - exception2 = ValueError("test error2") - wrapped_exception_list = [ - FailedMutationEntryError(2, mock.Mock(), exc) - for exc in [exception1, exception2] - ] - # excpetion1 is flushed first, but finishes second - sleep_times = [0.1, 0.05] - with mock.patch.object( - MutationsBatcher, "_execute_mutate_rows", AsyncMock() - ) as op_mock: - async with self._make_one() as instance: - # mock network calls - async def mock_call(*args, **kwargs): - time, exception = sleep_times.pop(0), wrapped_exception_list.pop(0) - await asyncio.sleep(time) - return [exception] - - op_mock.side_effect = mock_call - # create a few concurrent flushes - instance._staged_entries = [_make_mutation()] - flush_task1 = asyncio.create_task(instance.flush()) - # let flush task initialize - await asyncio.sleep(0) - instance._staged_entries = [_make_mutation()] - flush_task2 = asyncio.create_task(instance.flush()) - # raise errors - with pytest.raises(MutationsExceptionGroup) as exc2: - await flush_task2 - assert len(exc2.value.exceptions) == 1 - assert exc2.value.total_entries_attempted == 1 - assert exc2.value.exceptions[0].__cause__ == exception2 - - # flushes should be finalized in order. flush_task1 should already be done - assert flush_task1.done() - with pytest.raises(MutationsExceptionGroup) as exc: - await flush_task1 - assert len(exc.value.exceptions) == 1 - assert exc2.value.total_entries_attempted == 1 - assert exc.value.exceptions[0].__cause__ == exception1 - # should have had two separate flush calls - assert op_mock.call_count == 2 - - @pytest.mark.asyncio - async def test_overlapping_flush_requests_background(self): - """ - Test scheduling multiple background flushes without yielding the event loop in between. - - Should result in first flush receiving both entries, and the second flush being an empty - request. - Entries added after a context switch should not be flushed until the next flush call. - """ - from google.cloud.bigtable.exceptions import ( - MutationsExceptionGroup, - FailedMutationEntryError, - ) - from google.cloud.bigtable.mutations_batcher import MutationsBatcher - - test_error = RuntimeError("test error") - with mock.patch.object( - MutationsBatcher, "_execute_mutate_rows", AsyncMock() - ) as op_mock: - # mock network calls - async def mock_call(*args, **kwargs): - return [FailedMutationEntryError(2, mock.Mock(), test_error)] - - async with self._make_one() as instance: - mutations = [_make_mutation() for _ in range(4)] - op_mock.side_effect = mock_call - # create a few concurrent flushes - await instance.append(mutations[0]) - flush_task1 = asyncio.create_task(instance.flush()) - await instance.append(mutations[1]) - flush_task2 = asyncio.create_task(instance.flush()) - await instance.append(mutations[2]) - # should have mutations staged and ready - assert len(instance._staged_entries) == 3 - - # second task should be empty - await flush_task2 - # mutations should have been flushed - assert len(instance._staged_entries) == 0 - # mutations added after a context switch should not be in flush batch - await asyncio.sleep(0) - await instance.append(mutations[3]) - - # flushes should be finalized in order. flush_task1 should already be done - assert flush_task1.done() - # first task should have sent all mutations and raise exception - with pytest.raises(MutationsExceptionGroup) as exc: - await flush_task1 - assert exc.value.total_entries_attempted == 3 - assert len(exc.value.exceptions) == 1 - assert exc.value.exceptions[0].__cause__ == test_error - # should have just one flush call - assert op_mock.call_count == 1 - assert op_mock.call_args[0][0] == mutations[:3] - # final mutation should still be staged for next flush - assert instance._staged_entries == [mutations[3]] - instance._staged_entries = [] - @pytest.mark.asyncio async def test_schedule_flush_no_mutations(self): - """schedule flush should return prev_flush if no new mutations""" + """schedule flush should return None if no staged mutations""" async with self._make_one() as instance: - orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: for i in range(3): - instance._schedule_flush() + assert instance._schedule_flush() is None assert flush_mock.call_count == 0 - assert instance._prev_flush == orig_flush @pytest.mark.asyncio async def test_schedule_flush_with_mutations(self): - """if new mutations exist, should update prev_flush to a new flush task""" + """if new mutations exist, should add a new flush task to _flush_jobs""" async with self._make_one() as instance: - orig_flush = instance._prev_flush with mock.patch.object(instance, "_flush_internal") as flush_mock: for i in range(1, 4): mutation = mock.Mock() @@ -896,8 +725,6 @@ async def test_schedule_flush_with_mutations(self): assert instance._staged_count == 0 assert instance._staged_bytes == 0 assert flush_mock.call_count == i - assert instance._prev_flush != orig_flush - orig_flush = instance._prev_flush @pytest.mark.asyncio async def test__flush_internal(self): @@ -919,15 +746,12 @@ async def gen(x): yield x flow_mock.side_effect = lambda x: gen(x) - prev_flush_mock = AsyncMock() - prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - await instance._flush_internal(mutations, prev_flush) - assert prev_flush_mock.await_count == 1 + await instance._flush_internal(mutations) assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 assert flow_mock.call_count == 1 - assert instance.exceptions == [] + instance._oldest_exceptions, instance._newest_exceptions = [], [] @pytest.mark.parametrize( "num_starting,num_new_errors,expected_total_errors", @@ -946,7 +770,7 @@ async def test__flush_internal_with_errors( self, num_starting, num_new_errors, expected_total_errors ): """ - errors returned from _execute_mutate_rows should be added to self.exceptions + errors returned from _execute_mutate_rows should be added to internal exceptions """ from google.cloud.bigtable import exceptions @@ -966,21 +790,19 @@ async def gen(x): yield x flow_mock.side_effect = lambda x: gen(x) - prev_flush_mock = AsyncMock() - prev_flush = prev_flush_mock.__call__() mutations = [_make_mutation(count=1, size=1)] * num_entries - await instance._flush_internal(mutations, prev_flush) - assert prev_flush_mock.await_count == 1 + await instance._flush_internal(mutations) assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 assert flow_mock.call_count == 1 - assert len(instance.exceptions) == expected_total_errors + found_exceptions = ( + instance._oldest_exceptions + instance._newest_exceptions + ) + assert len(found_exceptions) == expected_total_errors for i in range(num_starting, expected_total_errors): - assert ( - instance.exceptions[i] == expected_errors[i - num_starting] - ) + assert found_exceptions[i] == expected_errors[i - num_starting] # errors should have index stripped - assert instance.exceptions[i].index is None + assert found_exceptions[i].index is None # clear out exceptions instance._oldest_exceptions, instance._newest_exceptions = [], [] @@ -997,29 +819,6 @@ async def gen(num): return gen(num) - @pytest.mark.asyncio - async def test_manual_flush_end_to_end(self): - """Test full flush process with minimal mocking""" - num_nutations = 10 - mutations = [_make_mutation(count=2, size=2)] * num_nutations - - async with self._make_one( - flow_control_max_mutation_count=3, flow_control_max_bytes=3 - ) as instance: - instance._table.default_operation_timeout = 10 - instance._table.default_per_request_timeout = 9 - with mock.patch.object( - instance._table.client._gapic_client, "mutate_rows" - ) as gapic_mock: - gapic_mock.side_effect = ( - lambda *args, **kwargs: self._mock_gapic_return(num_nutations) - ) - for m in mutations: - await instance.append(m) - assert instance._entries_processed_since_last_raise == 0 - await instance.flush() - assert instance._entries_processed_since_last_raise == num_nutations - @pytest.mark.asyncio async def test_timer_flush_end_to_end(self): """Flush should automatically trigger after flush_interval""" @@ -1107,7 +906,7 @@ async def test__raise_exceptions(self): assert list(exc.exceptions) == expected_exceptions assert str(expected_total) in str(exc) assert instance._entries_processed_since_last_raise == 0 - assert instance.exceptions == [] + instance._oldest_exceptions, instance._newest_exceptions = ([], []) # try calling again instance._raise_exceptions() @@ -1134,7 +933,7 @@ async def test_close(self): await instance.close() assert instance.closed is True assert instance._flush_timer.done() is True - assert instance._prev_flush is None + assert instance._flush_jobs == set() assert flush_mock.call_count == 1 assert raise_mock.call_count == 1 @@ -1154,7 +953,8 @@ async def test_close_w_exceptions(self): assert list(exc.exceptions) == expected_exceptions assert str(expected_total) in str(exc) assert instance._entries_processed_since_last_raise == 0 - assert instance.exceptions == [] + # clear out exceptions + instance._oldest_exceptions, instance._newest_exceptions = ([], []) @pytest.mark.asyncio async def test__on_exit(self, recwarn): @@ -1219,77 +1019,48 @@ async def test_timeout_args_passed(self, mutate_rows): assert kwargs["operation_timeout"] == expected_operation_timeout assert kwargs["per_request_timeout"] == expected_per_request_timeout - @pytest.mark.asyncio - async def test_batcher_task_memory_release(self): + @pytest.mark.parametrize("limit,in_e,start_e,end_e", [ + (10, 0, (10, 0), (10, 0)), + (1, 10, (0, 0), (1, 1)), + (10, 1, (0, 0), (1, 0)), + (10, 10, (0, 0), (10, 0)), + (10, 11, (0, 0), (10, 1)), + (3, 20, (0, 0), (3, 3)), + (10, 20, (0, 0), (10, 10)), + (10, 21, (0, 0), (10, 10)), + (2, 1, (2, 0), (2, 1)), + (2, 1, (1, 0), (2, 0)), + (2, 2, (1, 0), (2, 1)), + (3, 1, (3, 1), (3, 2)), + (3, 3, (3, 1), (3, 3)), + ]) + def test__add_exceptions(self, limit, in_e, start_e, end_e): """ - the batcher keeps a reference to the previous task, which may contain - a reference to another previous task. If we're not careful, this - could result in a memory leak. - - Test to ensure that old tasks are released + Test that the _add_exceptions function properly updates the + _oldest_exceptions and _newest_exceptions lists + Args: + - limit: the _exception_list_limit representing the max size of either list + - in_e: size of list of exceptions to send to _add_exceptions + - start_e: a tuple of ints representing the initial sizes of _oldest_exceptions and _newest_exceptions + - end_e: a tuple of ints representing the expected sizes of _oldest_exceptions and _newest_exceptions """ - import weakref - # use locks to control when tasks complete - task_locks = [asyncio.Lock() for _ in range(4)] - lock_idx = 0 - for lock in task_locks: - await asyncio.wait_for(lock.acquire(), timeout=2) - async with self._make_one() as instance: - with mock.patch.object( - instance, "_execute_mutate_rows", AsyncMock() - ) as op_mock: - # mock network calls - async def mock_call(*args, **kwargs): - nonlocal lock_idx - lock_idx += 1 - await task_locks[lock_idx - 1].acquire() - return [] - - op_mock.side_effect = mock_call - # create a starting task - instance._staged_entries = [_make_mutation()] - try: - await instance.flush(timeout=0.01) - except asyncio.TimeoutError: - pass - # capture as weak reference - first_task_ref = weakref.ref(instance._prev_flush) - assert first_task_ref() is not None - assert first_task_ref().done() is False - assert len(first_task_ref().get_stack()) == 1 - # add more flushes to chain - middle_task = None - # add more flushes to chain - for i in range(3): - instance._staged_entries = [_make_mutation()] - try: - await instance.flush(timeout=0.01) - except asyncio.TimeoutError: - pass - if i == 1: - # save a reference to a task in the middle of the chain - middle_task = instance._prev_flush - assert instance._prev_flush != middle_task - # first_task should still be active - assert first_task_ref() is not None - assert first_task_ref().done() is False - # let it complete - task_locks[0].release() - await asyncio.sleep(0.01) - # should be complete, but still referenced in second task, which is still active - assert first_task_ref() is not None - assert first_task_ref().done() is True - # task's internal stack should be cleared - assert len(first_task_ref().get_stack()) == 0 - # finish locks up to middle task - task_locks[1].release() - task_locks[2].release() - await asyncio.sleep(0.01) - # first task should no longer be referenced - assert first_task_ref() is None - # but there should still be an active task - assert instance._prev_flush is not None - assert instance._prev_flush.done() is False - # clear locks - for lock in task_locks: - lock.release() + input_list = [RuntimeError(f"mock {i}") for i in range(in_e)] + mock_batcher = mock.Mock() + mock_batcher._oldest_exceptions = [RuntimeError(f"starting mock {i}") for i in range(start_e[0])] + mock_batcher._newest_exceptions = [RuntimeError(f"starting mock {i}") for i in range(start_e[1])] + mock_batcher._exception_list_limit = limit + mock_batcher._exceptions_since_last_raise = 0 + self._get_target_class()._add_exceptions(mock_batcher, input_list) + assert len(mock_batcher._oldest_exceptions) == end_e[0] + assert len(mock_batcher._newest_exceptions) == end_e[1] + assert mock_batcher._exceptions_since_last_raise == in_e + # make sure that the right items ended up in the right spots + # should fill the oldest slots first + oldest_list_diff = end_e[0] - start_e[0] + # new items should bump off starting items + newest_list_diff = min(max(in_e - oldest_list_diff, 0), limit) + for i in range(oldest_list_diff): + assert mock_batcher._oldest_exceptions[i + start_e[0]] == input_list[i] + # then, the newest slots should be filled with the last items of the input list + for i in range(newest_list_diff): + assert mock_batcher._newest_exceptions[i] == input_list[-(newest_list_diff - i)] From 0ecbbcdcff0651cc797b3e8931837bec09d3156b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 15:04:06 -0700 Subject: [PATCH 176/213] added tests --- google/cloud/bigtable/client.py | 2 +- google/cloud/bigtable/exceptions.py | 13 ++-- google/cloud/bigtable/mutations_batcher.py | 7 +- tests/system/test_system.py | 42 ++++-------- tests/unit/test_exceptions.py | 78 +++++++++++++++++++++- tests/unit/test_mutations_batcher.py | 64 ++++++++++++------ 6 files changed, 145 insertions(+), 61 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index d0c04d7ff..6470ab459 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -744,7 +744,7 @@ def mutations_batcher( - flush_limit_mutation_count: Flush immediately after flush_limit_mutation_count mutations are added across all entries. If None, this limit is ignored. - flush_limit_bytes: Flush immediately after flush_limit_bytes bytes are added. - - flow_control_max_mitation_count: Maximum number of inflight mutations. + - flow_control_max_mutation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. - batch_operation_timeout: timeout for each mutate_rows operation, in seconds. If None, table default_operation_timeout will be used diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/exceptions.py index 629d7eb15..fc4e368b9 100644 --- a/google/cloud/bigtable/exceptions.py +++ b/google/cloud/bigtable/exceptions.py @@ -164,13 +164,16 @@ def from_truncated_lists( - entry_count: the total number of entries attempted, successful or not """ first_count, last_count = len(first_list), len(last_list) - if last_count == 0 and first_count == total_excs: - # not truncated; use default constructor - return cls(first_list, entry_count) + if first_count + last_count >= total_excs: + # no exceptions were dropped + return cls(first_list + last_list, entry_count) excs = first_list + last_list truncation_count = total_excs - (first_count + last_count) - base_message = cls._format_message(excs, entry_count, len(last_list)) - message = f"{base_message} (First {first_count} and last {last_count} attached as sub-exceptions; {truncation_count} exceptions truncated)" + base_message = cls._format_message(excs, entry_count, total_excs) + first_message = f"first {first_count}" if first_count else "" + last_message = f"last {last_count}" if last_count else "" + conjunction = " and " if first_message and last_message else "" + message = f"{base_message} ({first_message}{conjunction}{last_message} attached as sub-exceptions; {truncation_count} truncated)" return cls(excs, entry_count, message) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 921eeddba..a6b844f7d 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -166,7 +166,6 @@ class MutationsBatcher: to use as few network requests as required Flushes: - - manually - every flush_interval seconds - after queue reaches flush_count in quantity - after queue reaches flush_size_bytes in storage size @@ -314,7 +313,7 @@ def _schedule_flush(self) -> asyncio.Future[None] | None: return new_task return None - async def _flush_internal(self,new_entries: list[RowMutationEntry]): + async def _flush_internal(self, new_entries: list[RowMutationEntry]): """ Flushes a set of mutations to the server, and updates internal state @@ -333,7 +332,9 @@ async def _flush_internal(self,new_entries: list[RowMutationEntry]): self._entries_processed_since_last_raise += len(new_entries) self._add_exceptions(found_exceptions) - async def _execute_mutate_rows(self, batch: list[RowMutationEntry]) -> list[FailedMutationEntryError]: + async def _execute_mutate_rows( + self, batch: list[RowMutationEntry] + ) -> list[FailedMutationEntryError]: """ Helper to execute mutation operation on a batch diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 776a8bb9f..b086024ed 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -342,28 +342,6 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): assert len(batcher._staged_entries) == 0 -@retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) -@pytest.mark.asyncio -async def test_mutations_batcher_manual_flush(client, table, temp_rows): - """ - batcher should flush when manually requested - """ - from google.cloud.bigtable.mutations import RowMutationEntry - - new_value = uuid.uuid4().hex.encode() - row_key, mutation = await _create_row_and_mutation( - table, temp_rows, new_value=new_value - ) - bulk_mutation = RowMutationEntry(row_key, [mutation]) - async with table.mutations_batcher() as batcher: - await batcher.append(bulk_mutation) - assert len(batcher._staged_entries) == 1 - await batcher.flush() - assert len(batcher._staged_entries) == 0 - # ensure cell is updated - assert (await _retrieve_cell_value(table, row_key)) == new_value - - @retry.Retry(predicate=retry.if_exception_type(ClientError), initial=1, maximum=5) @pytest.mark.asyncio async def test_mutations_batcher_timer_flush(client, table, temp_rows): @@ -408,13 +386,15 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): async with table.mutations_batcher(flush_limit_mutation_count=2) as batcher: await batcher.append(bulk_mutation) + assert len(batcher._flush_jobs) == 0 # should be noop; flush not scheduled - await batcher._prev_flush assert len(batcher._staged_entries) == 1 await batcher.append(bulk_mutation2) # task should now be scheduled - await batcher._prev_flush + assert len(batcher._flush_jobs) == 1 + await asyncio.gather(*batcher._flush_jobs) assert len(batcher._staged_entries) == 0 + assert len(batcher._flush_jobs) == 0 # ensure cells were updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert (await _retrieve_cell_value(table, row_key2)) == new_value2 @@ -442,13 +422,14 @@ async def test_mutations_batcher_bytes_flush(client, table, temp_rows): async with table.mutations_batcher(flush_limit_bytes=flush_limit) as batcher: await batcher.append(bulk_mutation) - # should be noop; flush not scheduled - await batcher._prev_flush + assert len(batcher._flush_jobs) == 0 assert len(batcher._staged_entries) == 1 await batcher.append(bulk_mutation2) # task should now be scheduled - await batcher._prev_flush + assert len(batcher._flush_jobs) == 1 assert len(batcher._staged_entries) == 0 + # let flush complete + await asyncio.gather(*batcher._flush_jobs) # ensure cells were updated assert (await _retrieve_cell_value(table, row_key)) == new_value assert (await _retrieve_cell_value(table, row_key2)) == new_value2 @@ -480,11 +461,12 @@ async def test_mutations_batcher_no_flush(client, table, temp_rows): await batcher.append(bulk_mutation) assert len(batcher._staged_entries) == 1 await batcher.append(bulk_mutation2) - # should be noop; flush not scheduled - await batcher._prev_flush + # flush not scheduled + assert len(batcher._flush_jobs) == 0 await asyncio.sleep(0.01) assert len(batcher._staged_entries) == 2 - # ensure cells were updated + assert len(batcher._flush_jobs) == 0 + # ensure cells were not updated assert (await _retrieve_cell_value(table, row_key)) == start_value assert (await _retrieve_cell_value(table, row_key2)) == start_value diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index 316b090e1..ef186a47c 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -136,12 +136,12 @@ def _make_one(self, excs=None, num_entries=3): @pytest.mark.parametrize( "exception_list,total_entries,expected_message", [ - ([Exception()], 1, "1 failed entry (from 1 attempted)"), - ([Exception()], 2, "1 failed entry (from 2 attempted)"), + ([Exception()], 1, "1 failed entry from 1 attempted."), + ([Exception()], 2, "1 failed entry from 2 attempted."), ( [Exception(), RuntimeError()], 2, - "2 failed entries (from 2 attempted)", + "2 failed entries from 2 attempted.", ), ], ) @@ -154,6 +154,77 @@ def test_raise(self, exception_list, total_entries, expected_message): assert str(e.value) == expected_message assert list(e.value.exceptions) == exception_list + def test_raise_custom_message(self): + """ + should be able to set a custom error message + """ + custom_message = "custom message" + exception_list = [Exception()] + with pytest.raises(self._get_class()) as e: + raise self._get_class()(exception_list, 5, message=custom_message) + assert str(e.value) == custom_message + assert list(e.value.exceptions) == exception_list + + @pytest.mark.parametrize( + "first_list_len,second_list_len,total_excs,entry_count,expected_message", + [ + (3, 0, 3, 4, "3 failed entries from 4 attempted."), + (1, 0, 1, 2, "1 failed entry from 2 attempted."), + (0, 1, 1, 2, "1 failed entry from 2 attempted."), + (2, 2, 4, 4, "4 failed entries from 4 attempted."), + ( + 1, + 1, + 3, + 2, + "3 failed entries from 2 attempted. (first 1 and last 1 attached as sub-exceptions; 1 truncated)", + ), + ( + 1, + 2, + 100, + 2, + "100 failed entries from 2 attempted. (first 1 and last 2 attached as sub-exceptions; 97 truncated)", + ), + ( + 2, + 1, + 4, + 9, + "4 failed entries from 9 attempted. (first 2 and last 1 attached as sub-exceptions; 1 truncated)", + ), + ( + 3, + 0, + 10, + 10, + "10 failed entries from 10 attempted. (first 3 attached as sub-exceptions; 7 truncated)", + ), + ( + 0, + 3, + 10, + 10, + "10 failed entries from 10 attempted. (last 3 attached as sub-exceptions; 7 truncated)", + ), + ], + ) + def test_from_truncated_lists( + self, first_list_len, second_list_len, total_excs, entry_count, expected_message + ): + """ + Should be able to make MutationsExceptionGroup using a pair of + lists representing a larger truncated list of exceptions + """ + first_list = [Exception()] * first_list_len + second_list = [Exception()] * second_list_len + with pytest.raises(self._get_class()) as e: + raise self._get_class().from_truncated_lists( + first_list, second_list, total_excs, entry_count + ) + assert str(e.value) == expected_message + assert list(e.value.exceptions) == first_list + second_list + class TestRetryExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): @@ -300,6 +371,7 @@ def test_no_index(self): assert isinstance(e.value, Exception) assert test_entry.is_idempotent.call_count == 1 + class TestFailedQueryShardError: def _get_class(self): from google.cloud.bigtable.exceptions import FailedQueryShardError diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index c35319dac..23f86d7fa 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -276,9 +276,9 @@ async def test_add_to_flow_oversize(self): class TestMutationsBatcher: - def _get_target_class(self): from google.cloud.bigtable.mutations_batcher import MutationsBatcher + return MutationsBatcher def _make_one(self, table=None, **kwargs): @@ -753,6 +753,22 @@ async def gen(x): assert flow_mock.call_count == 1 instance._oldest_exceptions, instance._newest_exceptions = [], [] + @pytest.mark.asyncio + async def test_flush_clears_job_list(self): + """ + a job should be added to _flush_jobs when _schedule_flush is called, + and removed when it completes + """ + async with self._make_one() as instance: + with mock.patch.object(instance, "_flush_internal", AsyncMock()): + mutations = [_make_mutation(count=1, size=1)] + instance._staged_entries = mutations + assert instance._flush_jobs == set() + new_job = instance._schedule_flush() + assert instance._flush_jobs == {new_job} + await new_job + assert instance._flush_jobs == set() + @pytest.mark.parametrize( "num_starting,num_new_errors,expected_total_errors", [ @@ -1019,21 +1035,24 @@ async def test_timeout_args_passed(self, mutate_rows): assert kwargs["operation_timeout"] == expected_operation_timeout assert kwargs["per_request_timeout"] == expected_per_request_timeout - @pytest.mark.parametrize("limit,in_e,start_e,end_e", [ - (10, 0, (10, 0), (10, 0)), - (1, 10, (0, 0), (1, 1)), - (10, 1, (0, 0), (1, 0)), - (10, 10, (0, 0), (10, 0)), - (10, 11, (0, 0), (10, 1)), - (3, 20, (0, 0), (3, 3)), - (10, 20, (0, 0), (10, 10)), - (10, 21, (0, 0), (10, 10)), - (2, 1, (2, 0), (2, 1)), - (2, 1, (1, 0), (2, 0)), - (2, 2, (1, 0), (2, 1)), - (3, 1, (3, 1), (3, 2)), - (3, 3, (3, 1), (3, 3)), - ]) + @pytest.mark.parametrize( + "limit,in_e,start_e,end_e", + [ + (10, 0, (10, 0), (10, 0)), + (1, 10, (0, 0), (1, 1)), + (10, 1, (0, 0), (1, 0)), + (10, 10, (0, 0), (10, 0)), + (10, 11, (0, 0), (10, 1)), + (3, 20, (0, 0), (3, 3)), + (10, 20, (0, 0), (10, 10)), + (10, 21, (0, 0), (10, 10)), + (2, 1, (2, 0), (2, 1)), + (2, 1, (1, 0), (2, 0)), + (2, 2, (1, 0), (2, 1)), + (3, 1, (3, 1), (3, 2)), + (3, 3, (3, 1), (3, 3)), + ], + ) def test__add_exceptions(self, limit, in_e, start_e, end_e): """ Test that the _add_exceptions function properly updates the @@ -1046,8 +1065,12 @@ def test__add_exceptions(self, limit, in_e, start_e, end_e): """ input_list = [RuntimeError(f"mock {i}") for i in range(in_e)] mock_batcher = mock.Mock() - mock_batcher._oldest_exceptions = [RuntimeError(f"starting mock {i}") for i in range(start_e[0])] - mock_batcher._newest_exceptions = [RuntimeError(f"starting mock {i}") for i in range(start_e[1])] + mock_batcher._oldest_exceptions = [ + RuntimeError(f"starting mock {i}") for i in range(start_e[0]) + ] + mock_batcher._newest_exceptions = [ + RuntimeError(f"starting mock {i}") for i in range(start_e[1]) + ] mock_batcher._exception_list_limit = limit mock_batcher._exceptions_since_last_raise = 0 self._get_target_class()._add_exceptions(mock_batcher, input_list) @@ -1063,4 +1086,7 @@ def test__add_exceptions(self, limit, in_e, start_e, end_e): assert mock_batcher._oldest_exceptions[i + start_e[0]] == input_list[i] # then, the newest slots should be filled with the last items of the input list for i in range(newest_list_diff): - assert mock_batcher._newest_exceptions[i] == input_list[-(newest_list_diff - i)] + assert ( + mock_batcher._newest_exceptions[i] + == input_list[-(newest_list_diff - i)] + ) From 61b367a4ed653b45b7c6566448ec037590902870 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 15:52:23 -0700 Subject: [PATCH 177/213] pr comment improvements --- google/cloud/bigtable/mutations_batcher.py | 23 +++++----- tests/unit/test_mutations_batcher.py | 51 ++++++++++++---------- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index a6b844f7d..6c90d1a90 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -14,10 +14,11 @@ # from __future__ import annotations +from typing import Any, TYPE_CHECKING import asyncio import atexit import warnings -from typing import Any, TYPE_CHECKING +from collections import deque from google.cloud.bigtable.mutations import RowMutationEntry from google.cloud.bigtable.exceptions import MutationsExceptionGroup @@ -240,7 +241,9 @@ def __init__( # keep track of the first and last _exception_list_limit exceptions self._exception_list_limit: int = 10 self._oldest_exceptions: list[Exception] = [] - self._newest_exceptions: list[Exception] = [] + self._newest_exceptions: deque[Exception] = deque( + maxlen=self._exception_list_limit + ) # clean up on program exit atexit.register(self._on_exit) @@ -382,11 +385,7 @@ def _add_exceptions(self, excs: list[Exception]): excs = excs[addition_count:] if excs: # populate newest_exceptions with remaining found_exceptions - keep_count = self._exception_list_limit - len(excs) - self._newest_exceptions = ( - excs[-self._exception_list_limit :] - + self._newest_exceptions[:keep_count] - ) + self._newest_exceptions.extend(excs[-self._exception_list_limit :]) def _raise_exceptions(self): """ @@ -397,7 +396,8 @@ def _raise_exceptions(self): """ if self._oldest_exceptions or self._newest_exceptions: oldest, self._oldest_exceptions = self._oldest_exceptions, [] - newest, self._newest_exceptions = self._newest_exceptions, [] + newest = list(self._newest_exceptions) + self._newest_exceptions.clear() entry_count, self._entries_processed_since_last_raise = ( self._entries_processed_since_last_raise, 0, @@ -428,14 +428,15 @@ async def close(self): self.closed = True self._flush_timer.cancel() self._schedule_flush() - await asyncio.gather(*self._flush_jobs, return_exceptions=True) + if self._flush_jobs: + await asyncio.gather(*self._flush_jobs, return_exceptions=True) try: await self._flush_timer except asyncio.CancelledError: pass + atexit.unregister(self._on_exit) # raise unreported exceptions self._raise_exceptions() - atexit.unregister(self._on_exit) def _on_exit(self): """ @@ -480,6 +481,8 @@ async def _wait_for_batch_results( If a task fails with a different exception, it will be included in the output list. Successful tasks will not be represented in the output list. """ + if not tasks: + return [] all_results = await asyncio.gather(*tasks, return_exceptions=True) found_errors = [] for result in all_results: diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/test_mutations_batcher.py index 23f86d7fa..a900468d5 100644 --- a/tests/unit/test_mutations_batcher.py +++ b/tests/unit/test_mutations_batcher.py @@ -302,9 +302,9 @@ async def test_ctor_defaults(self, flush_timer_mock): assert instance._table == table assert instance.closed is False assert instance._flush_jobs == set() - assert instance._staged_entries == [] - assert instance._oldest_exceptions == [] - assert instance._newest_exceptions == [] + assert len(instance._staged_entries) == 0 + assert len(instance._oldest_exceptions) == 0 + assert len(instance._newest_exceptions) == 0 assert instance._exception_list_limit == 10 assert instance._exceptions_since_last_raise == 0 assert instance._flow_control._max_mutation_count == 100000 @@ -347,9 +347,9 @@ async def test_ctor_explicit(self, flush_timer_mock): assert instance._table == table assert instance.closed is False assert instance._flush_jobs == set() - assert instance._staged_entries == [] - assert instance._oldest_exceptions == [] - assert instance._newest_exceptions == [] + assert len(instance._staged_entries) == 0 + assert len(instance._oldest_exceptions) == 0 + assert len(instance._newest_exceptions) == 0 assert instance._exception_list_limit == 10 assert instance._exceptions_since_last_raise == 0 assert ( @@ -389,8 +389,8 @@ async def test_ctor_no_flush_limits(self, flush_timer_mock): assert instance._table == table assert instance.closed is False assert instance._staged_entries == [] - assert instance._oldest_exceptions == [] - assert instance._newest_exceptions == [] + assert len(instance._oldest_exceptions) == 0 + assert len(instance._newest_exceptions) == 0 assert instance._exception_list_limit == 10 assert instance._exceptions_since_last_raise == 0 assert instance._flow_control._in_flight_mutation_count == 0 @@ -694,8 +694,8 @@ async def mock_call(*args, **kwargs): # allow flushes to complete await asyncio.gather(*instance._flush_jobs) duration = time.monotonic() - start_time - assert instance._oldest_exceptions == [] - assert instance._newest_exceptions == [] + assert len(instance._oldest_exceptions) == 0 + assert len(instance._newest_exceptions) == 0 # if flushes were sequential, total duration would be 1s assert duration < 0.25 assert op_mock.call_count == num_calls @@ -751,7 +751,8 @@ async def gen(x): assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 assert flow_mock.call_count == 1 - instance._oldest_exceptions, instance._newest_exceptions = [], [] + instance._oldest_exceptions.clear() + instance._newest_exceptions.clear() @pytest.mark.asyncio async def test_flush_clears_job_list(self): @@ -811,8 +812,8 @@ async def gen(x): assert instance._entries_processed_since_last_raise == num_entries assert execute_mock.call_count == 1 assert flow_mock.call_count == 1 - found_exceptions = ( - instance._oldest_exceptions + instance._newest_exceptions + found_exceptions = instance._oldest_exceptions + list( + instance._newest_exceptions ) assert len(found_exceptions) == expected_total_errors for i in range(num_starting, expected_total_errors): @@ -820,7 +821,8 @@ async def gen(x): # errors should have index stripped assert found_exceptions[i].index is None # clear out exceptions - instance._oldest_exceptions, instance._newest_exceptions = [], [] + instance._oldest_exceptions.clear() + instance._newest_exceptions.clear() async def _mock_gapic_return(self, num=5): from google.cloud.bigtable_v2.types import MutateRowsResponse @@ -1051,6 +1053,9 @@ async def test_timeout_args_passed(self, mutate_rows): (2, 2, (1, 0), (2, 1)), (3, 1, (3, 1), (3, 2)), (3, 3, (3, 1), (3, 3)), + (1000, 5, (999, 0), (1000, 4)), + (1000, 5, (0, 0), (5, 0)), + (1000, 5, (1000, 0), (1000, 5)), ], ) def test__add_exceptions(self, limit, in_e, start_e, end_e): @@ -1063,14 +1068,17 @@ def test__add_exceptions(self, limit, in_e, start_e, end_e): - start_e: a tuple of ints representing the initial sizes of _oldest_exceptions and _newest_exceptions - end_e: a tuple of ints representing the expected sizes of _oldest_exceptions and _newest_exceptions """ + from collections import deque + input_list = [RuntimeError(f"mock {i}") for i in range(in_e)] mock_batcher = mock.Mock() mock_batcher._oldest_exceptions = [ RuntimeError(f"starting mock {i}") for i in range(start_e[0]) ] - mock_batcher._newest_exceptions = [ - RuntimeError(f"starting mock {i}") for i in range(start_e[1]) - ] + mock_batcher._newest_exceptions = deque( + [RuntimeError(f"starting mock {i}") for i in range(start_e[1])], + maxlen=limit, + ) mock_batcher._exception_list_limit = limit mock_batcher._exceptions_since_last_raise = 0 self._get_target_class()._add_exceptions(mock_batcher, input_list) @@ -1080,13 +1088,10 @@ def test__add_exceptions(self, limit, in_e, start_e, end_e): # make sure that the right items ended up in the right spots # should fill the oldest slots first oldest_list_diff = end_e[0] - start_e[0] - # new items should bump off starting items + # new items should by added on top of the starting list newest_list_diff = min(max(in_e - oldest_list_diff, 0), limit) for i in range(oldest_list_diff): assert mock_batcher._oldest_exceptions[i + start_e[0]] == input_list[i] # then, the newest slots should be filled with the last items of the input list - for i in range(newest_list_diff): - assert ( - mock_batcher._newest_exceptions[i] - == input_list[-(newest_list_diff - i)] - ) + for i in range(1, newest_list_diff + 1): + assert mock_batcher._newest_exceptions[-i] == input_list[-i] From 0ce16c61ab4c38bc4305462ef6429f4b6b19ff82 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:04:23 -0700 Subject: [PATCH 178/213] fixed metadata --- google/cloud/bigtable/client.py | 7 ++++++- tests/unit/test_client.py | 7 +++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index a01bc0e61..3d33eebf9 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -217,7 +217,12 @@ async def _ping_and_warm_instances( tasks = [ ping_rpc( request={"name": instance_name, "app_profile_id": app_profile_id}, - metadata=_make_metadata(table_name, app_profile_id), + metadata=[ + ( + "x-goog-request-params", + f"name={instance_name}&app_profile_id={app_profile_id}", + ) + ], wait_for_ready=True, ) for (instance_name, table_name, app_profile_id) in instance_list diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 10711ce52..805a6340d 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -327,11 +327,11 @@ async def test__ping_and_warm_instances(self): assert metadata[0][0] == "x-goog-request-params" assert ( metadata[0][1] - == f"table_name={expected_table}&app_profile_id={expected_app_profile}" + == f"name={expected_instance}&app_profile_id={expected_app_profile}" ) @pytest.mark.asyncio - async def test_ping_and_warm_single_instance(self): + async def test__ping_and_warm_single_instance(self): """ should be able to call ping and warm with single instance """ @@ -359,8 +359,7 @@ async def test_ping_and_warm_single_instance(self): assert len(metadata) == 1 assert metadata[0][0] == "x-goog-request-params" assert ( - metadata[0][1] - == "table_name=test-table&app_profile_id=test-app-profile" + metadata[0][1] == "name=test-instance&app_profile_id=test-app-profile" ) @pytest.mark.asyncio From 9a407f0e889d98a4038de97054c4e208363ba276 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:11:57 -0700 Subject: [PATCH 179/213] moved v3 client into /data folder --- google/cloud/bigtable/{ => data}/__init__.py | 0 google/cloud/bigtable/{ => data}/_helpers.py | 0 google/cloud/bigtable/{ => data}/_mutate_rows.py | 0 google/cloud/bigtable/{ => data}/_read_rows.py | 0 google/cloud/bigtable/{ => data}/client.py | 0 google/cloud/bigtable/{ => data}/exceptions.py | 0 google/cloud/bigtable/{ => data}/gapic_version.py | 0 google/cloud/bigtable/{ => data}/iterators.py | 0 google/cloud/bigtable/{ => data}/mutations.py | 0 google/cloud/bigtable/{ => data}/mutations_batcher.py | 0 google/cloud/bigtable/{ => data}/read_modify_write_rules.py | 0 google/cloud/bigtable/{ => data}/read_rows_query.py | 0 google/cloud/bigtable/{ => data}/row.py | 0 google/cloud/bigtable/{ => data}/row_filters.py | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename google/cloud/bigtable/{ => data}/__init__.py (100%) rename google/cloud/bigtable/{ => data}/_helpers.py (100%) rename google/cloud/bigtable/{ => data}/_mutate_rows.py (100%) rename google/cloud/bigtable/{ => data}/_read_rows.py (100%) rename google/cloud/bigtable/{ => data}/client.py (100%) rename google/cloud/bigtable/{ => data}/exceptions.py (100%) rename google/cloud/bigtable/{ => data}/gapic_version.py (100%) rename google/cloud/bigtable/{ => data}/iterators.py (100%) rename google/cloud/bigtable/{ => data}/mutations.py (100%) rename google/cloud/bigtable/{ => data}/mutations_batcher.py (100%) rename google/cloud/bigtable/{ => data}/read_modify_write_rules.py (100%) rename google/cloud/bigtable/{ => data}/read_rows_query.py (100%) rename google/cloud/bigtable/{ => data}/row.py (100%) rename google/cloud/bigtable/{ => data}/row_filters.py (100%) diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/data/__init__.py similarity index 100% rename from google/cloud/bigtable/__init__.py rename to google/cloud/bigtable/data/__init__.py diff --git a/google/cloud/bigtable/_helpers.py b/google/cloud/bigtable/data/_helpers.py similarity index 100% rename from google/cloud/bigtable/_helpers.py rename to google/cloud/bigtable/data/_helpers.py diff --git a/google/cloud/bigtable/_mutate_rows.py b/google/cloud/bigtable/data/_mutate_rows.py similarity index 100% rename from google/cloud/bigtable/_mutate_rows.py rename to google/cloud/bigtable/data/_mutate_rows.py diff --git a/google/cloud/bigtable/_read_rows.py b/google/cloud/bigtable/data/_read_rows.py similarity index 100% rename from google/cloud/bigtable/_read_rows.py rename to google/cloud/bigtable/data/_read_rows.py diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/data/client.py similarity index 100% rename from google/cloud/bigtable/client.py rename to google/cloud/bigtable/data/client.py diff --git a/google/cloud/bigtable/exceptions.py b/google/cloud/bigtable/data/exceptions.py similarity index 100% rename from google/cloud/bigtable/exceptions.py rename to google/cloud/bigtable/data/exceptions.py diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/data/gapic_version.py similarity index 100% rename from google/cloud/bigtable/gapic_version.py rename to google/cloud/bigtable/data/gapic_version.py diff --git a/google/cloud/bigtable/iterators.py b/google/cloud/bigtable/data/iterators.py similarity index 100% rename from google/cloud/bigtable/iterators.py rename to google/cloud/bigtable/data/iterators.py diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/data/mutations.py similarity index 100% rename from google/cloud/bigtable/mutations.py rename to google/cloud/bigtable/data/mutations.py diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/data/mutations_batcher.py similarity index 100% rename from google/cloud/bigtable/mutations_batcher.py rename to google/cloud/bigtable/data/mutations_batcher.py diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/data/read_modify_write_rules.py similarity index 100% rename from google/cloud/bigtable/read_modify_write_rules.py rename to google/cloud/bigtable/data/read_modify_write_rules.py diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/data/read_rows_query.py similarity index 100% rename from google/cloud/bigtable/read_rows_query.py rename to google/cloud/bigtable/data/read_rows_query.py diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/data/row.py similarity index 100% rename from google/cloud/bigtable/row.py rename to google/cloud/bigtable/data/row.py diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/data/row_filters.py similarity index 100% rename from google/cloud/bigtable/row_filters.py rename to google/cloud/bigtable/data/row_filters.py From c1587e5ed8922b7141bbe8f4cff8137b6f18a044 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:14:14 -0700 Subject: [PATCH 180/213] moved legacy client back into root --- .../bigtable/{deprecated => }/__init__.py | 2 +- .../bigtable/{deprecated => }/app_profile.py | 8 +- .../cloud/bigtable/{deprecated => }/backup.py | 20 +- google/cloud/bigtable/batcher.py | 395 ++++++++++++++++++ .../cloud/bigtable/{deprecated => }/client.py | 40 +- .../bigtable/{deprecated => }/cluster.py | 22 +- .../{deprecated => }/column_family.py | 2 +- google/cloud/bigtable/deprecated/batcher.py | 146 ------- google/cloud/bigtable/deprecated/py.typed | 2 - .../{deprecated => }/encryption_info.py | 4 +- .../cloud/bigtable/{deprecated => }/enums.py | 0 .../cloud/bigtable/{deprecated => }/error.py | 0 google/cloud/bigtable/gapic_version.py | 16 + .../bigtable/{deprecated => }/instance.py | 68 +-- .../cloud/bigtable/{deprecated => }/policy.py | 0 google/cloud/bigtable/{deprecated => }/row.py | 18 +- .../bigtable/{deprecated => }/row_data.py | 10 +- .../bigtable/{deprecated => }/row_filters.py | 0 .../bigtable/{deprecated => }/row_merger.py | 2 +- .../bigtable/{deprecated => }/row_set.py | 0 .../cloud/bigtable/{deprecated => }/table.py | 68 +-- 21 files changed, 541 insertions(+), 282 deletions(-) rename google/cloud/bigtable/{deprecated => }/__init__.py (92%) rename google/cloud/bigtable/{deprecated => }/app_profile.py (97%) rename google/cloud/bigtable/{deprecated => }/backup.py (96%) create mode 100644 google/cloud/bigtable/batcher.py rename google/cloud/bigtable/{deprecated => }/client.py (92%) rename google/cloud/bigtable/{deprecated => }/cluster.py (95%) rename google/cloud/bigtable/{deprecated => }/column_family.py (99%) delete mode 100644 google/cloud/bigtable/deprecated/batcher.py delete mode 100644 google/cloud/bigtable/deprecated/py.typed rename google/cloud/bigtable/{deprecated => }/encryption_info.py (93%) rename google/cloud/bigtable/{deprecated => }/enums.py (100%) rename google/cloud/bigtable/{deprecated => }/error.py (100%) create mode 100644 google/cloud/bigtable/gapic_version.py rename google/cloud/bigtable/{deprecated => }/instance.py (91%) rename google/cloud/bigtable/{deprecated => }/policy.py (100%) rename google/cloud/bigtable/{deprecated => }/row.py (98%) rename google/cloud/bigtable/{deprecated => }/row_data.py (97%) rename google/cloud/bigtable/{deprecated => }/row_filters.py (100%) rename google/cloud/bigtable/{deprecated => }/row_merger.py (99%) rename google/cloud/bigtable/{deprecated => }/row_set.py (100%) rename google/cloud/bigtable/{deprecated => }/table.py (95%) diff --git a/google/cloud/bigtable/deprecated/__init__.py b/google/cloud/bigtable/__init__.py similarity index 92% rename from google/cloud/bigtable/deprecated/__init__.py rename to google/cloud/bigtable/__init__.py index a54fffdf1..7331ff241 100644 --- a/google/cloud/bigtable/deprecated/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -14,7 +14,7 @@ """Google Cloud Bigtable API package.""" -from google.cloud.bigtable.deprecated.client import Client +from google.cloud.bigtable.client import Client from google.cloud.bigtable import gapic_version as package_version diff --git a/google/cloud/bigtable/deprecated/app_profile.py b/google/cloud/bigtable/app_profile.py similarity index 97% rename from google/cloud/bigtable/deprecated/app_profile.py rename to google/cloud/bigtable/app_profile.py index a5c3df356..8cde66146 100644 --- a/google/cloud/bigtable/deprecated/app_profile.py +++ b/google/cloud/bigtable/app_profile.py @@ -17,7 +17,7 @@ import re -from google.cloud.bigtable.deprecated.enums import RoutingPolicyType +from google.cloud.bigtable.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.types import instance from google.protobuf import field_mask_pb2 from google.api_core.exceptions import NotFound @@ -47,8 +47,8 @@ class AppProfile(object): :param: routing_policy_type: (Optional) The type of the routing policy. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.ANY` - :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.SINGLE` + :data:`google.cloud.bigtable.enums.RoutingPolicyType.ANY` + :data:`google.cloud.bigtable.enums.RoutingPolicyType.SINGLE` :type: description: str :param: description: (Optional) Long form description of the use @@ -148,7 +148,7 @@ def from_pb(cls, app_profile_pb, instance): :type app_profile_pb: :class:`instance.app_profile_pb` :param app_profile_pb: An instance protobuf object. - :type instance: :class:`google.cloud.bigtable.deprecated.instance.Instance` + :type instance: :class:`google.cloud.bigtable.instance.Instance` :param instance: The instance that owns the cluster. :rtype: :class:`AppProfile` diff --git a/google/cloud/bigtable/deprecated/backup.py b/google/cloud/bigtable/backup.py similarity index 96% rename from google/cloud/bigtable/deprecated/backup.py rename to google/cloud/bigtable/backup.py index fc15318bc..6986d730a 100644 --- a/google/cloud/bigtable/deprecated/backup.py +++ b/google/cloud/bigtable/backup.py @@ -19,8 +19,8 @@ from google.cloud._helpers import _datetime_to_pb_timestamp # type: ignore from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient from google.cloud.bigtable_admin_v2.types import table -from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo -from google.cloud.bigtable.deprecated.policy import Policy +from google.cloud.bigtable.encryption_info import EncryptionInfo +from google.cloud.bigtable.policy import Policy from google.cloud.exceptions import NotFound # type: ignore from google.protobuf import field_mask_pb2 @@ -50,7 +50,7 @@ class Backup(object): :type backup_id: str :param backup_id: The ID of the backup. - :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.instance.Instance` :param instance: The Instance that owns this Backup. :type cluster_id: str @@ -188,7 +188,7 @@ def expire_time(self, new_expire_time): def encryption_info(self): """Encryption info for this Backup. - :rtype: :class:`google.cloud.bigtable.deprecated.encryption.EncryptionInfo` + :rtype: :class:`google.cloud.bigtable.encryption.EncryptionInfo` :returns: The encryption information for this backup. """ return self._encryption_info @@ -238,10 +238,10 @@ def from_pb(cls, backup_pb, instance): :type backup_pb: :class:`table.Backup` :param backup_pb: A Backup protobuf object. - :type instance: :class:`Instance ` + :type instance: :class:`Instance ` :param instance: The Instance that owns the Backup. - :rtype: :class:`~google.cloud.bigtable.deprecated.backup.Backup` + :rtype: :class:`~google.cloud.bigtable.backup.Backup` :returns: The backup parsed from the protobuf response. :raises: ValueError: If the backup name does not match the expected format or the parsed project ID does not match the @@ -440,7 +440,7 @@ def restore(self, table_id, instance_id=None): def get_iam_policy(self): """Gets the IAM access control policy for this backup. - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this backup. """ table_api = self._instance._client.table_admin_client @@ -452,13 +452,13 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.deprecated.policy.Policy` + class `google.cloud.bigtable.policy.Policy` - :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :type policy: :class:`google.cloud.bigtable.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this backup. - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this backup. """ table_api = self._instance._client.table_admin_client diff --git a/google/cloud/bigtable/batcher.py b/google/cloud/bigtable/batcher.py new file mode 100644 index 000000000..a6eb806e9 --- /dev/null +++ b/google/cloud/bigtable/batcher.py @@ -0,0 +1,395 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""User friendly container for Google Cloud Bigtable MutationBatcher.""" +import threading +import queue +import concurrent.futures +import atexit + + +from google.api_core.exceptions import from_grpc_status +from dataclasses import dataclass + + +FLUSH_COUNT = 100 # after this many elements, send out the batch + +MAX_MUTATION_SIZE = 20 * 1024 * 1024 # 20MB # after this many bytes, send out the batch + +MAX_OUTSTANDING_BYTES = 100 * 1024 * 1024 # 100MB # max inflight byte size. + +MAX_OUTSTANDING_ELEMENTS = 100000 # max inflight mutations. + + +class MutationsBatchError(Exception): + """Error in the batch request""" + + def __init__(self, message, exc): + self.exc = exc + self.message = message + super().__init__(self.message) + + +class _MutationsBatchQueue(object): + """Private Threadsafe Queue to hold rows for batching.""" + + def __init__(self, max_mutation_bytes=MAX_MUTATION_SIZE, flush_count=FLUSH_COUNT): + """Specify the queue constraints""" + self._queue = queue.Queue() + self.total_mutation_count = 0 + self.total_size = 0 + self.max_mutation_bytes = max_mutation_bytes + self.flush_count = flush_count + + def get(self): + """Retrieve an item from the queue. Recalculate queue size.""" + row = self._queue.get() + mutation_size = row.get_mutations_size() + self.total_mutation_count -= len(row._get_mutations()) + self.total_size -= mutation_size + return row + + def put(self, item): + """Insert an item to the queue. Recalculate queue size.""" + + mutation_count = len(item._get_mutations()) + + self._queue.put(item) + + self.total_size += item.get_mutations_size() + self.total_mutation_count += mutation_count + + def full(self): + """Check if the queue is full.""" + if ( + self.total_mutation_count >= self.flush_count + or self.total_size >= self.max_mutation_bytes + ): + return True + return False + + def empty(self): + return self._queue.empty() + + +@dataclass +class _BatchInfo: + """Keeping track of size of a batch""" + + mutations_count: int = 0 + rows_count: int = 0 + mutations_size: int = 0 + + +class _FlowControl(object): + def __init__( + self, + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ): + """Control the inflight requests. Keep track of the mutations, row bytes and row counts. + As requests to backend are being made, adjust the number of mutations being processed. + + If threshold is reached, block the flow. + Reopen the flow as requests are finished. + """ + self.max_mutations = max_mutations + self.max_mutation_bytes = max_mutation_bytes + self.inflight_mutations = 0 + self.inflight_size = 0 + self.event = threading.Event() + self.event.set() + + def is_blocked(self): + """Returns True if: + + - inflight mutations >= max_mutations, or + - inflight bytes size >= max_mutation_bytes, or + """ + + return ( + self.inflight_mutations >= self.max_mutations + or self.inflight_size >= self.max_mutation_bytes + ) + + def control_flow(self, batch_info): + """ + Calculate the resources used by this batch + """ + + self.inflight_mutations += batch_info.mutations_count + self.inflight_size += batch_info.mutations_size + self.set_flow_control_status() + + def wait(self): + """ + Wait until flow control pushback has been released. + It awakens as soon as `event` is set. + """ + self.event.wait() + + def set_flow_control_status(self): + """Check the inflight mutations and size. + + If values exceed the allowed threshold, block the event. + """ + if self.is_blocked(): + self.event.clear() # sleep + else: + self.event.set() # awaken the threads + + def release(self, batch_info): + """ + Release the resources. + Decrement the row size to allow enqueued mutations to be run. + """ + self.inflight_mutations -= batch_info.mutations_count + self.inflight_size -= batch_info.mutations_size + self.set_flow_control_status() + + +class MutationsBatcher(object): + """A MutationsBatcher is used in batch cases where the number of mutations + is large or unknown. It will store :class:`DirectRow` in memory until one of the + size limits is reached, or an explicit call to :func:`flush()` is performed. When + a flush event occurs, the :class:`DirectRow` in memory will be sent to Cloud + Bigtable. Batching mutations is more efficient than sending individual + request. + + This class is not suited for usage in systems where each mutation + must be guaranteed to be sent, since calling mutate may only result in an + in-memory change. In a case of a system crash, any :class:`DirectRow` remaining in + memory will not necessarily be sent to the service, even after the + completion of the :func:`mutate()` method. + + Note on thread safety: The same :class:`MutationBatcher` cannot be shared by multiple end-user threads. + + :type table: class + :param table: class:`~google.cloud.bigtable.table.Table`. + + :type flush_count: int + :param flush_count: (Optional) Max number of rows to flush. If it + reaches the max number of rows it calls finish_batch() to mutate the + current row batch. Default is FLUSH_COUNT (1000 rows). + + :type max_row_bytes: int + :param max_row_bytes: (Optional) Max number of row mutations size to + flush. If it reaches the max number of row mutations size it calls + finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES + (5 MB). + + :type flush_interval: float + :param flush_interval: (Optional) The interval (in seconds) between asynchronous flush. + Default is 1 second. + + :type batch_completed_callback: Callable[list:[`~google.rpc.status_pb2.Status`]] = None + :param batch_completed_callback: (Optional) A callable for handling responses + after the current batch is sent. The callable function expect a list of grpc + Status. + """ + + def __init__( + self, + table, + flush_count=FLUSH_COUNT, + max_row_bytes=MAX_MUTATION_SIZE, + flush_interval=1, + batch_completed_callback=None, + ): + self._rows = _MutationsBatchQueue( + max_mutation_bytes=max_row_bytes, flush_count=flush_count + ) + self.table = table + self._executor = concurrent.futures.ThreadPoolExecutor() + atexit.register(self.close) + self._timer = threading.Timer(flush_interval, self.flush) + self._timer.start() + self.flow_control = _FlowControl( + max_mutations=MAX_OUTSTANDING_ELEMENTS, + max_mutation_bytes=MAX_OUTSTANDING_BYTES, + ) + self.futures_mapping = {} + self.exceptions = queue.Queue() + self._user_batch_completed_callback = batch_completed_callback + + @property + def flush_count(self): + return self._rows.flush_count + + @property + def max_row_bytes(self): + return self._rows.max_mutation_bytes + + def __enter__(self): + """Starting the MutationsBatcher as a context manager""" + return self + + def mutate(self, row): + """Add a row to the batch. If the current batch meets one of the size + limits, the batch is sent asynchronously. + + For example: + + .. literalinclude:: snippets_table.py + :start-after: [START bigtable_api_batcher_mutate] + :end-before: [END bigtable_api_batcher_mutate] + :dedent: 4 + + :type row: class + :param row: :class:`~google.cloud.bigtable.row.DirectRow`. + + :raises: One of the following: + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried + """ + self._rows.put(row) + + if self._rows.full(): + self._flush_async() + + def mutate_rows(self, rows): + """Add multiple rows to the batch. If the current batch meets one of the size + limits, the batch is sent asynchronously. + + For example: + + .. literalinclude:: snippets_table.py + :start-after: [START bigtable_api_batcher_mutate_rows] + :end-before: [END bigtable_api_batcher_mutate_rows] + :dedent: 4 + + :type rows: list:[`~google.cloud.bigtable.row.DirectRow`] + :param rows: list:[`~google.cloud.bigtable.row.DirectRow`]. + + :raises: One of the following: + * :exc:`~.table._BigtableRetryableError` if any row returned a transient error. + * :exc:`RuntimeError` if the number of responses doesn't match the number of rows that were retried + """ + for row in rows: + self.mutate(row) + + def flush(self): + """Sends the current batch to Cloud Bigtable synchronously. + For example: + + .. literalinclude:: snippets_table.py + :start-after: [START bigtable_api_batcher_flush] + :end-before: [END bigtable_api_batcher_flush] + :dedent: 4 + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + rows_to_flush = [] + while not self._rows.empty(): + rows_to_flush.append(self._rows.get()) + response = self._flush_rows(rows_to_flush) + return response + + def _flush_async(self): + """Sends the current batch to Cloud Bigtable asynchronously. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + + rows_to_flush = [] + mutations_count = 0 + mutations_size = 0 + rows_count = 0 + batch_info = _BatchInfo() + + while not self._rows.empty(): + row = self._rows.get() + mutations_count += len(row._get_mutations()) + mutations_size += row.get_mutations_size() + rows_count += 1 + rows_to_flush.append(row) + batch_info.mutations_count = mutations_count + batch_info.rows_count = rows_count + batch_info.mutations_size = mutations_size + + if ( + rows_count >= self.flush_count + or mutations_size >= self.max_row_bytes + or mutations_count >= self.flow_control.max_mutations + or mutations_size >= self.flow_control.max_mutation_bytes + or self._rows.empty() # submit when it reached the end of the queue + ): + # wait for resources to become available, before submitting any new batch + self.flow_control.wait() + # once unblocked, submit a batch + # event flag will be set by control_flow to block subsequent thread, but not blocking this one + self.flow_control.control_flow(batch_info) + future = self._executor.submit(self._flush_rows, rows_to_flush) + self.futures_mapping[future] = batch_info + future.add_done_callback(self._batch_completed_callback) + + # reset and start a new batch + rows_to_flush = [] + mutations_size = 0 + rows_count = 0 + mutations_count = 0 + batch_info = _BatchInfo() + + def _batch_completed_callback(self, future): + """Callback for when the mutation has finished to clean up the current batch + and release items from the flow controller. + + Raise exceptions if there's any. + Release the resources locked by the flow control and allow enqueued tasks to be run. + """ + + processed_rows = self.futures_mapping[future] + self.flow_control.release(processed_rows) + del self.futures_mapping[future] + + def _flush_rows(self, rows_to_flush): + """Mutate the specified rows. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + responses = [] + if len(rows_to_flush) > 0: + response = self.table.mutate_rows(rows_to_flush) + + if self._user_batch_completed_callback: + self._user_batch_completed_callback(response) + + for result in response: + if result.code != 0: + exc = from_grpc_status(result.code, result.message) + self.exceptions.put(exc) + responses.append(result) + + return responses + + def __exit__(self, exc_type, exc_value, exc_traceback): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor.""" + self.close() + + def close(self): + """Clean up resources. Flush and shutdown the ThreadPoolExecutor. + Any errors will be raised. + + :raises: + * :exc:`.batcherMutationsBatchError` if there's any error in the mutations. + """ + self.flush() + self._executor.shutdown(wait=True) + atexit.unregister(self.close) + if self.exceptions.qsize() > 0: + exc = list(self.exceptions.queue) + raise MutationsBatchError("Errors in batch mutations.", exc=exc) diff --git a/google/cloud/bigtable/deprecated/client.py b/google/cloud/bigtable/client.py similarity index 92% rename from google/cloud/bigtable/deprecated/client.py rename to google/cloud/bigtable/client.py index c13e5f0da..c82a268c6 100644 --- a/google/cloud/bigtable/deprecated/client.py +++ b/google/cloud/bigtable/client.py @@ -18,14 +18,14 @@ In the hierarchy of API concepts -* a :class:`~google.cloud.bigtable.deprecated.client.Client` owns an - :class:`~google.cloud.bigtable.deprecated.instance.Instance` -* an :class:`~google.cloud.bigtable.deprecated.instance.Instance` owns a - :class:`~google.cloud.bigtable.deprecated.table.Table` -* a :class:`~google.cloud.bigtable.deprecated.table.Table` owns a +* a :class:`~google.cloud.bigtable.client.Client` owns an + :class:`~google.cloud.bigtable.instance.Instance` +* an :class:`~google.cloud.bigtable.instance.Instance` owns a + :class:`~google.cloud.bigtable.table.Table` +* a :class:`~google.cloud.bigtable.table.Table` owns a :class:`~.column_family.ColumnFamily` -* a :class:`~google.cloud.bigtable.deprecated.table.Table` owns a - :class:`~google.cloud.bigtable.deprecated.row.Row` (and all the cells in the row) +* a :class:`~google.cloud.bigtable.table.Table` owns a + :class:`~google.cloud.bigtable.row.Row` (and all the cells in the row) """ import os import warnings @@ -46,13 +46,13 @@ ) from google.cloud import bigtable -from google.cloud.bigtable.deprecated.instance import Instance -from google.cloud.bigtable.deprecated.cluster import Cluster +from google.cloud.bigtable.instance import Instance +from google.cloud.bigtable.cluster import Cluster from google.cloud.client import ClientWithProject # type: ignore from google.cloud.bigtable_admin_v2.types import instance -from google.cloud.bigtable.deprecated.cluster import _CLUSTER_NAME_RE +from google.cloud.bigtable.cluster import _CLUSTER_NAME_RE from google.cloud.environment_vars import BIGTABLE_EMULATOR # type: ignore @@ -91,9 +91,6 @@ def inner(self): class Client(ClientWithProject): """Client for interacting with Google Cloud Bigtable API. - DEPRECATED: This class is deprecated and may be removed in a future version - Please use `google.cloud.bigtable.BigtableDataClient` instead. - .. note:: Since the Cloud Bigtable API requires the gRPC transport, no @@ -162,11 +159,6 @@ def __init__( admin_client_options=None, channel=None, ): - warnings.warn( - "'Client' is deprecated. Please use 'google.cloud.bigtable.BigtableDataClient' instead.", - DeprecationWarning, - stacklevel=2, - ) if client_info is None: client_info = client_info_lib.ClientInfo( client_library_version=bigtable.__version__, @@ -438,10 +430,10 @@ def instance(self, instance_id, display_name=None, instance_type=None, labels=No :param instance_type: (Optional) The type of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.instance.InstanceType.PRODUCTION`. - :data:`google.cloud.bigtable.deprecated.instance.InstanceType.DEVELOPMENT`, + :data:`google.cloud.bigtable.instance.InstanceType.PRODUCTION`. + :data:`google.cloud.bigtable.instance.InstanceType.DEVELOPMENT`, Defaults to - :data:`google.cloud.bigtable.deprecated.instance.InstanceType.UNSPECIFIED`. + :data:`google.cloud.bigtable.instance.InstanceType.UNSPECIFIED`. :type labels: dict :param labels: (Optional) Labels are a flexible and lightweight @@ -454,7 +446,7 @@ def instance(self, instance_id, display_name=None, instance_type=None, labels=No be between 0 and 63 characters long. Keys and values must both be under 128 bytes. - :rtype: :class:`~google.cloud.bigtable.deprecated.instance.Instance` + :rtype: :class:`~google.cloud.bigtable.instance.Instance` :returns: an instance owned by this client. """ return Instance( @@ -478,7 +470,7 @@ def list_instances(self): :rtype: tuple :returns: (instances, failed_locations), where 'instances' is list of - :class:`google.cloud.bigtable.deprecated.instance.Instance`, and + :class:`google.cloud.bigtable.instance.Instance`, and 'failed_locations' is a list of locations which could not be resolved. """ @@ -501,7 +493,7 @@ def list_clusters(self): :rtype: tuple :returns: (clusters, failed_locations), where 'clusters' is list of - :class:`google.cloud.bigtable.deprecated.instance.Cluster`, and + :class:`google.cloud.bigtable.instance.Cluster`, and 'failed_locations' is a list of strings representing locations which could not be resolved. """ diff --git a/google/cloud/bigtable/deprecated/cluster.py b/google/cloud/bigtable/cluster.py similarity index 95% rename from google/cloud/bigtable/deprecated/cluster.py rename to google/cloud/bigtable/cluster.py index b60d3503c..11fb5492d 100644 --- a/google/cloud/bigtable/deprecated/cluster.py +++ b/google/cloud/bigtable/cluster.py @@ -42,7 +42,7 @@ class Cluster(object): :type cluster_id: str :param cluster_id: The ID of the cluster. - :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.instance.Instance` :param instance: The instance where the cluster resides. :type location_id: str @@ -62,10 +62,10 @@ class Cluster(object): :param default_storage_type: (Optional) The type of storage Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. :type kms_key_name: str :param kms_key_name: (Optional, Creation Only) The name of the KMS customer managed @@ -84,11 +84,11 @@ class Cluster(object): :param _state: (`OutputOnly`) The current state of the cluster. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.NOT_KNOWN`. - :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.READY`. - :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.CREATING`. - :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.RESIZING`. - :data:`google.cloud.bigtable.deprecated.enums.Cluster.State.DISABLED`. + :data:`google.cloud.bigtable.enums.Cluster.State.NOT_KNOWN`. + :data:`google.cloud.bigtable.enums.Cluster.State.READY`. + :data:`google.cloud.bigtable.enums.Cluster.State.CREATING`. + :data:`google.cloud.bigtable.enums.Cluster.State.RESIZING`. + :data:`google.cloud.bigtable.enums.Cluster.State.DISABLED`. :type min_serve_nodes: int :param min_serve_nodes: (Optional) The minimum number of nodes to be set in the cluster for autoscaling. @@ -150,7 +150,7 @@ def from_pb(cls, cluster_pb, instance): :type cluster_pb: :class:`instance.Cluster` :param cluster_pb: An instance protobuf object. - :type instance: :class:`google.cloud.bigtable.deprecated.instance.Instance` + :type instance: :class:`google.cloud.bigtable.instance.Instance` :param instance: The instance that owns the cluster. :rtype: :class:`Cluster` @@ -236,7 +236,7 @@ def name(self): @property def state(self): - """google.cloud.bigtable.deprecated.enums.Cluster.State: state of cluster. + """google.cloud.bigtable.enums.Cluster.State: state of cluster. For example: diff --git a/google/cloud/bigtable/deprecated/column_family.py b/google/cloud/bigtable/column_family.py similarity index 99% rename from google/cloud/bigtable/deprecated/column_family.py rename to google/cloud/bigtable/column_family.py index 3d4c1a642..80232958d 100644 --- a/google/cloud/bigtable/deprecated/column_family.py +++ b/google/cloud/bigtable/column_family.py @@ -195,7 +195,7 @@ class ColumnFamily(object): :param column_family_id: The ID of the column family. Must be of the form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the column family. :type gc_rule: :class:`GarbageCollectionRule` diff --git a/google/cloud/bigtable/deprecated/batcher.py b/google/cloud/bigtable/deprecated/batcher.py deleted file mode 100644 index 58cf6b6e3..000000000 --- a/google/cloud/bigtable/deprecated/batcher.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""User friendly container for Google Cloud Bigtable MutationBatcher.""" - - -FLUSH_COUNT = 1000 -MAX_MUTATIONS = 100000 -MAX_ROW_BYTES = 5242880 # 5MB - - -class MaxMutationsError(ValueError): - """The number of mutations for bulk request is too big.""" - - -class MutationsBatcher(object): - """A MutationsBatcher is used in batch cases where the number of mutations - is large or unknown. It will store DirectRows in memory until one of the - size limits is reached, or an explicit call to flush() is performed. When - a flush event occurs, the DirectRows in memory will be sent to Cloud - Bigtable. Batching mutations is more efficient than sending individual - request. - - This class is not suited for usage in systems where each mutation - must be guaranteed to be sent, since calling mutate may only result in an - in-memory change. In a case of a system crash, any DirectRows remaining in - memory will not necessarily be sent to the service, even after the - completion of the mutate() method. - - TODO: Performance would dramatically improve if this class had the - capability of asynchronous, parallel RPCs. - - :type table: class - :param table: class:`~google.cloud.bigtable.deprecated.table.Table`. - - :type flush_count: int - :param flush_count: (Optional) Max number of rows to flush. If it - reaches the max number of rows it calls finish_batch() to mutate the - current row batch. Default is FLUSH_COUNT (1000 rows). - - :type max_row_bytes: int - :param max_row_bytes: (Optional) Max number of row mutations size to - flush. If it reaches the max number of row mutations size it calls - finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES - (5 MB). - """ - - def __init__(self, table, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): - self.rows = [] - self.total_mutation_count = 0 - self.total_size = 0 - self.table = table - self.flush_count = flush_count - self.max_row_bytes = max_row_bytes - - def mutate(self, row): - """Add a row to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_batcher_mutate] - :end-before: [END bigtable_api_batcher_mutate] - :dedent: 4 - - :type row: class - :param row: class:`~google.cloud.bigtable.deprecated.row.DirectRow`. - - :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. - """ - mutation_count = len(row._get_mutations()) - if mutation_count > MAX_MUTATIONS: - raise MaxMutationsError( - "The row key {} exceeds the number of mutations {}.".format( - row.row_key, mutation_count - ) - ) - - if (self.total_mutation_count + mutation_count) >= MAX_MUTATIONS: - self.flush() - - self.rows.append(row) - self.total_mutation_count += mutation_count - self.total_size += row.get_mutations_size() - - if self.total_size >= self.max_row_bytes or len(self.rows) >= self.flush_count: - self.flush() - - def mutate_rows(self, rows): - """Add multiple rows to the batch. If the current batch meets one of the size - limits, the batch is sent synchronously. - - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_batcher_mutate_rows] - :end-before: [END bigtable_api_batcher_mutate_rows] - :dedent: 4 - - :type rows: list:[`~google.cloud.bigtable.deprecated.row.DirectRow`] - :param rows: list:[`~google.cloud.bigtable.deprecated.row.DirectRow`]. - - :raises: One of the following: - * :exc:`~.table._BigtableRetryableError` if any - row returned a transient error. - * :exc:`RuntimeError` if the number of responses doesn't - match the number of rows that were retried - * :exc:`.batcher.MaxMutationsError` if any row exceeds max - mutations count. - """ - for row in rows: - self.mutate(row) - - def flush(self): - """Sends the current. batch to Cloud Bigtable. - For example: - - .. literalinclude:: snippets.py - :start-after: [START bigtable_api_batcher_flush] - :end-before: [END bigtable_api_batcher_flush] - :dedent: 4 - - """ - if len(self.rows) != 0: - self.table.mutate_rows(self.rows) - self.total_mutation_count = 0 - self.total_size = 0 - self.rows = [] diff --git a/google/cloud/bigtable/deprecated/py.typed b/google/cloud/bigtable/deprecated/py.typed deleted file mode 100644 index 7bd4705d4..000000000 --- a/google/cloud/bigtable/deprecated/py.typed +++ /dev/null @@ -1,2 +0,0 @@ -# Marker file for PEP 561. -# The google-cloud-bigtable package uses inline types. diff --git a/google/cloud/bigtable/deprecated/encryption_info.py b/google/cloud/bigtable/encryption_info.py similarity index 93% rename from google/cloud/bigtable/deprecated/encryption_info.py rename to google/cloud/bigtable/encryption_info.py index daa0d9232..1757297bc 100644 --- a/google/cloud/bigtable/deprecated/encryption_info.py +++ b/google/cloud/bigtable/encryption_info.py @@ -14,7 +14,7 @@ """Class for encryption info for tables and backups.""" -from google.cloud.bigtable.deprecated.error import Status +from google.cloud.bigtable.error import Status class EncryptionInfo: @@ -27,7 +27,7 @@ class EncryptionInfo: :type encryption_type: int :param encryption_type: See :class:`enums.EncryptionInfo.EncryptionType` - :type encryption_status: google.cloud.bigtable.deprecated.encryption.Status + :type encryption_status: google.cloud.bigtable.encryption.Status :param encryption_status: The encryption status. :type kms_key_version: str diff --git a/google/cloud/bigtable/deprecated/enums.py b/google/cloud/bigtable/enums.py similarity index 100% rename from google/cloud/bigtable/deprecated/enums.py rename to google/cloud/bigtable/enums.py diff --git a/google/cloud/bigtable/deprecated/error.py b/google/cloud/bigtable/error.py similarity index 100% rename from google/cloud/bigtable/deprecated/error.py rename to google/cloud/bigtable/error.py diff --git a/google/cloud/bigtable/gapic_version.py b/google/cloud/bigtable/gapic_version.py new file mode 100644 index 000000000..0f1a446f3 --- /dev/null +++ b/google/cloud/bigtable/gapic_version.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__version__ = "2.19.0" # {x-release-please-version} diff --git a/google/cloud/bigtable/deprecated/instance.py b/google/cloud/bigtable/instance.py similarity index 91% rename from google/cloud/bigtable/deprecated/instance.py rename to google/cloud/bigtable/instance.py index 33475d261..6d092cefd 100644 --- a/google/cloud/bigtable/deprecated/instance.py +++ b/google/cloud/bigtable/instance.py @@ -16,9 +16,9 @@ import re -from google.cloud.bigtable.deprecated.app_profile import AppProfile -from google.cloud.bigtable.deprecated.cluster import Cluster -from google.cloud.bigtable.deprecated.table import Table +from google.cloud.bigtable.app_profile import AppProfile +from google.cloud.bigtable.cluster import Cluster +from google.cloud.bigtable.table import Table from google.protobuf import field_mask_pb2 @@ -28,7 +28,7 @@ from google.api_core.exceptions import NotFound -from google.cloud.bigtable.deprecated.policy import Policy +from google.cloud.bigtable.policy import Policy import warnings @@ -61,7 +61,7 @@ class Instance(object): :type instance_id: str :param instance_id: The ID of the instance. - :type client: :class:`Client ` + :type client: :class:`Client ` :param client: The client that owns the instance. Provides authorization and a project ID. @@ -75,10 +75,10 @@ class Instance(object): :param instance_type: (Optional) The type of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.PRODUCTION`. - :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.DEVELOPMENT`, + :data:`google.cloud.bigtable.enums.Instance.Type.PRODUCTION`. + :data:`google.cloud.bigtable.enums.Instance.Type.DEVELOPMENT`, Defaults to - :data:`google.cloud.bigtable.deprecated.enums.Instance.Type.UNSPECIFIED`. + :data:`google.cloud.bigtable.enums.Instance.Type.UNSPECIFIED`. :type labels: dict :param labels: (Optional) Labels are a flexible and lightweight @@ -95,9 +95,9 @@ class Instance(object): :param _state: (`OutputOnly`) The current state of the instance. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.Instance.State.STATE_NOT_KNOWN`. - :data:`google.cloud.bigtable.deprecated.enums.Instance.State.READY`. - :data:`google.cloud.bigtable.deprecated.enums.Instance.State.CREATING`. + :data:`google.cloud.bigtable.enums.Instance.State.STATE_NOT_KNOWN`. + :data:`google.cloud.bigtable.enums.Instance.State.READY`. + :data:`google.cloud.bigtable.enums.Instance.State.CREATING`. """ def __init__( @@ -141,7 +141,7 @@ def from_pb(cls, instance_pb, client): :type instance_pb: :class:`instance.Instance` :param instance_pb: An instance protobuf object. - :type client: :class:`Client ` + :type client: :class:`Client ` :param client: The client that owns the instance. :rtype: :class:`Instance` @@ -196,7 +196,7 @@ def name(self): @property def state(self): - """google.cloud.bigtable.deprecated.enums.Instance.State: state of Instance. + """google.cloud.bigtable.enums.Instance.State: state of Instance. For example: @@ -272,12 +272,12 @@ def create( persisting Bigtable data. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. - :type clusters: class:`~[~google.cloud.bigtable.deprecated.cluster.Cluster]` + :type clusters: class:`~[~google.cloud.bigtable.cluster.Cluster]` :param clusters: List of clusters to be created. :rtype: :class:`~google.api_core.operation.Operation` @@ -478,7 +478,7 @@ def get_iam_policy(self, requested_policy_version=None): than the one that was requested, based on the feature syntax in the policy fetched. - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this instance """ args = {"resource": self.name} @@ -497,7 +497,7 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.deprecated.policy.Policy` + class `google.cloud.bigtable.policy.Policy` For example: @@ -506,11 +506,11 @@ class `google.cloud.bigtable.deprecated.policy.Policy` :end-before: [END bigtable_api_set_iam_policy] :dedent: 4 - :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :type policy: :class:`google.cloud.bigtable.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this instance - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this instance. """ instance_admin_client = self._client.instance_admin_client @@ -586,12 +586,12 @@ def cluster( :param default_storage_type: (Optional) The type of storage Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.StorageType.SSD`. - :data:`google.cloud.bigtable.deprecated.enums.StorageType.HDD`, + :data:`google.cloud.bigtable.enums.StorageType.SSD`. + :data:`google.cloud.bigtable.enums.StorageType.HDD`, Defaults to - :data:`google.cloud.bigtable.deprecated.enums.StorageType.UNSPECIFIED`. + :data:`google.cloud.bigtable.enums.StorageType.UNSPECIFIED`. - :rtype: :class:`~google.cloud.bigtable.deprecated.instance.Cluster` + :rtype: :class:`~google.cloud.bigtable.instance.Cluster` :returns: a cluster owned by this instance. :type kms_key_name: str @@ -635,7 +635,7 @@ def list_clusters(self): :rtype: tuple :returns: (clusters, failed_locations), where 'clusters' is list of - :class:`google.cloud.bigtable.deprecated.instance.Cluster`, and + :class:`google.cloud.bigtable.instance.Cluster`, and 'failed_locations' is a list of locations which could not be resolved. """ @@ -664,7 +664,7 @@ def table(self, table_id, mutation_timeout=None, app_profile_id=None): :type app_profile_id: str :param app_profile_id: (Optional) The unique name of the AppProfile. - :rtype: :class:`Table ` + :rtype: :class:`Table ` :returns: The table owned by this instance. """ return Table( @@ -684,7 +684,7 @@ def list_tables(self): :end-before: [END bigtable_api_list_tables] :dedent: 4 - :rtype: list of :class:`Table ` + :rtype: list of :class:`Table ` :returns: The list of tables owned by the instance. :raises: :class:`ValueError ` if one of the returned tables has a name that is not of the expected format. @@ -731,8 +731,8 @@ def app_profile( :param: routing_policy_type: The type of the routing policy. Possible values are represented by the following constants: - :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.ANY` - :data:`google.cloud.bigtable.deprecated.enums.RoutingPolicyType.SINGLE` + :data:`google.cloud.bigtable.enums.RoutingPolicyType.ANY` + :data:`google.cloud.bigtable.enums.RoutingPolicyType.SINGLE` :type: description: str :param: description: (Optional) Long form description of the use @@ -753,7 +753,7 @@ def app_profile( transactional writes for ROUTING_POLICY_TYPE_SINGLE. - :rtype: :class:`~google.cloud.bigtable.deprecated.app_profile.AppProfile>` + :rtype: :class:`~google.cloud.bigtable.app_profile.AppProfile>` :returns: AppProfile for this instance. """ return AppProfile( @@ -776,10 +776,10 @@ def list_app_profiles(self): :end-before: [END bigtable_api_list_app_profiles] :dedent: 4 - :rtype: :list:[`~google.cloud.bigtable.deprecated.app_profile.AppProfile`] - :returns: A :list:[`~google.cloud.bigtable.deprecated.app_profile.AppProfile`]. + :rtype: :list:[`~google.cloud.bigtable.app_profile.AppProfile`] + :returns: A :list:[`~google.cloud.bigtable.app_profile.AppProfile`]. By default, this is a list of - :class:`~google.cloud.bigtable.deprecated.app_profile.AppProfile` + :class:`~google.cloud.bigtable.app_profile.AppProfile` instances. """ resp = self._client.instance_admin_client.list_app_profiles( diff --git a/google/cloud/bigtable/deprecated/policy.py b/google/cloud/bigtable/policy.py similarity index 100% rename from google/cloud/bigtable/deprecated/policy.py rename to google/cloud/bigtable/policy.py diff --git a/google/cloud/bigtable/deprecated/row.py b/google/cloud/bigtable/row.py similarity index 98% rename from google/cloud/bigtable/deprecated/row.py rename to google/cloud/bigtable/row.py index 3b114a74a..752458a08 100644 --- a/google/cloud/bigtable/deprecated/row.py +++ b/google/cloud/bigtable/row.py @@ -51,7 +51,7 @@ class Row(object): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: (Optional) The table that owns the row. """ @@ -86,7 +86,7 @@ def table(self): :end-before: [END bigtable_api_row_table] :dedent: 4 - :rtype: table: :class:`Table ` + :rtype: table: :class:`Table ` :returns: table: The table that owns the row. """ return self._table @@ -105,7 +105,7 @@ class _SetDeleteRow(Row): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. """ @@ -275,11 +275,11 @@ class DirectRow(_SetDeleteRow): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: (Optional) The table that owns the row. This is used for the :meth: `commit` only. Alternatively, DirectRows can be persisted via - :meth:`~google.cloud.bigtable.deprecated.table.Table.mutate_rows`. + :meth:`~google.cloud.bigtable.table.Table.mutate_rows`. """ def __init__(self, row_key, table=None): @@ -519,7 +519,7 @@ class ConditionalRow(_SetDeleteRow): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. :type filter_: :class:`.RowFilter` @@ -791,7 +791,7 @@ class AppendRow(Row): :type row_key: bytes :param row_key: The key for the current row. - :type table: :class:`Table ` + :type table: :class:`Table ` :param table: The table that owns the row. """ @@ -1107,7 +1107,7 @@ def find_cells(self, column_family_id, column): are located. Returns: - List[~google.cloud.bigtable.deprecated.row_data.Cell]: The cells stored in the + List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the specified column. Raises: @@ -1147,7 +1147,7 @@ def cell_value(self, column_family_id, column, index=0): not specified, will return the first cell. Returns: - ~google.cloud.bigtable.deprecated.row_data.Cell value: The cell value stored + ~google.cloud.bigtable.row_data.Cell value: The cell value stored in the specified column and specified index. Raises: diff --git a/google/cloud/bigtable/deprecated/row_data.py b/google/cloud/bigtable/row_data.py similarity index 97% rename from google/cloud/bigtable/deprecated/row_data.py rename to google/cloud/bigtable/row_data.py index 9daa1ed8f..e11379108 100644 --- a/google/cloud/bigtable/deprecated/row_data.py +++ b/google/cloud/bigtable/row_data.py @@ -23,10 +23,10 @@ from google.api_core import retry from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable.deprecated.row_merger import _RowMerger, _State +from google.cloud.bigtable.row_merger import _RowMerger, _State from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 from google.cloud.bigtable_v2.types import data as data_v2_pb2 -from google.cloud.bigtable.deprecated.row import Cell, InvalidChunk, PartialRowData +from google.cloud.bigtable.row import Cell, InvalidChunk, PartialRowData # Some classes need to be re-exported here to keep backwards @@ -98,7 +98,7 @@ def _retry_read_rows_exception(exc): """The default retry strategy to be used on retry-able errors. Used by -:meth:`~google.cloud.bigtable.deprecated.row_data.PartialRowsData._read_next_response`. +:meth:`~google.cloud.bigtable.row_data.PartialRowsData._read_next_response`. """ @@ -157,7 +157,9 @@ def __init__(self, read_method, request, retry=DEFAULT_RETRY_READ_ROWS): # Otherwise there is a risk of entering an infinite loop that resets # the timeout counter just before it being triggered. The increment # by 1 second here is customary but should not be much less than that. - self.response_iterator = read_method(request, timeout=self.retry._deadline + 1) + self.response_iterator = read_method( + request, timeout=self.retry._deadline + 1, retry=self.retry + ) self.rows = {} diff --git a/google/cloud/bigtable/deprecated/row_filters.py b/google/cloud/bigtable/row_filters.py similarity index 100% rename from google/cloud/bigtable/deprecated/row_filters.py rename to google/cloud/bigtable/row_filters.py diff --git a/google/cloud/bigtable/deprecated/row_merger.py b/google/cloud/bigtable/row_merger.py similarity index 99% rename from google/cloud/bigtable/deprecated/row_merger.py rename to google/cloud/bigtable/row_merger.py index d29d64eb2..515b91df7 100644 --- a/google/cloud/bigtable/deprecated/row_merger.py +++ b/google/cloud/bigtable/row_merger.py @@ -1,6 +1,6 @@ from enum import Enum from collections import OrderedDict -from google.cloud.bigtable.deprecated.row import Cell, PartialRowData, InvalidChunk +from google.cloud.bigtable.row import Cell, PartialRowData, InvalidChunk _MISSING_COLUMN_FAMILY = "Column family {} is not among the cells stored in this row." _MISSING_COLUMN = ( diff --git a/google/cloud/bigtable/deprecated/row_set.py b/google/cloud/bigtable/row_set.py similarity index 100% rename from google/cloud/bigtable/deprecated/row_set.py rename to google/cloud/bigtable/row_set.py diff --git a/google/cloud/bigtable/deprecated/table.py b/google/cloud/bigtable/table.py similarity index 95% rename from google/cloud/bigtable/deprecated/table.py rename to google/cloud/bigtable/table.py index cf60b066e..e3191a729 100644 --- a/google/cloud/bigtable/deprecated/table.py +++ b/google/cloud/bigtable/table.py @@ -28,24 +28,24 @@ from google.api_core.retry import if_exception_type from google.api_core.retry import Retry from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable.deprecated.backup import Backup -from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb -from google.cloud.bigtable.deprecated.column_family import ColumnFamily -from google.cloud.bigtable.deprecated.batcher import MutationsBatcher -from google.cloud.bigtable.deprecated.batcher import FLUSH_COUNT, MAX_ROW_BYTES -from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo -from google.cloud.bigtable.deprecated.policy import Policy -from google.cloud.bigtable.deprecated.row import AppendRow -from google.cloud.bigtable.deprecated.row import ConditionalRow -from google.cloud.bigtable.deprecated.row import DirectRow -from google.cloud.bigtable.deprecated.row_data import ( +from google.cloud.bigtable.backup import Backup +from google.cloud.bigtable.column_family import _gc_rule_from_pb +from google.cloud.bigtable.column_family import ColumnFamily +from google.cloud.bigtable.batcher import MutationsBatcher +from google.cloud.bigtable.batcher import FLUSH_COUNT, MAX_MUTATION_SIZE +from google.cloud.bigtable.encryption_info import EncryptionInfo +from google.cloud.bigtable.policy import Policy +from google.cloud.bigtable.row import AppendRow +from google.cloud.bigtable.row import ConditionalRow +from google.cloud.bigtable.row import DirectRow +from google.cloud.bigtable.row_data import ( PartialRowsData, _retriable_internal_server_error, ) -from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS -from google.cloud.bigtable.deprecated.row_set import RowSet -from google.cloud.bigtable.deprecated.row_set import RowRange -from google.cloud.bigtable.deprecated import enums +from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS +from google.cloud.bigtable.row_set import RowSet +from google.cloud.bigtable.row_set import RowRange +from google.cloud.bigtable import enums from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 from google.cloud.bigtable_admin_v2 import BigtableTableAdminClient from google.cloud.bigtable_admin_v2.types import table as admin_messages_v2_pb2 @@ -88,7 +88,7 @@ class _BigtableRetryableError(Exception): ) """The default retry strategy to be used on retry-able errors. -Used by :meth:`~google.cloud.bigtable.deprecated.table.Table.mutate_rows`. +Used by :meth:`~google.cloud.bigtable.table.Table.mutate_rows`. """ @@ -119,7 +119,7 @@ class Table(object): :type table_id: str :param table_id: The ID of the table. - :type instance: :class:`~google.cloud.bigtable.deprecated.instance.Instance` + :type instance: :class:`~google.cloud.bigtable.instance.Instance` :param instance: The instance that owns the table. :type app_profile_id: str @@ -172,7 +172,7 @@ def get_iam_policy(self): :end-before: [END bigtable_api_table_get_iam_policy] :dedent: 4 - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this table. """ table_client = self._instance._client.table_admin_client @@ -184,7 +184,7 @@ def set_iam_policy(self, policy): existing policy. For more information about policy, please see documentation of - class `google.cloud.bigtable.deprecated.policy.Policy` + class `google.cloud.bigtable.policy.Policy` For example: @@ -193,11 +193,11 @@ class `google.cloud.bigtable.deprecated.policy.Policy` :end-before: [END bigtable_api_table_set_iam_policy] :dedent: 4 - :type policy: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :type policy: :class:`google.cloud.bigtable.policy.Policy` :param policy: A new IAM policy to replace the current IAM policy of this table. - :rtype: :class:`google.cloud.bigtable.deprecated.policy.Policy` + :rtype: :class:`google.cloud.bigtable.policy.Policy` :returns: The current IAM policy of this table. """ table_client = self._instance._client.table_admin_client @@ -271,7 +271,7 @@ def row(self, row_key, filter_=None, append=False): .. warning:: At most one of ``filter_`` and ``append`` can be used in a - :class:`~google.cloud.bigtable.deprecated.row.Row`. + :class:`~google.cloud.bigtable.row.Row`. :type row_key: bytes :param row_key: The key for the row being created. @@ -284,7 +284,7 @@ def row(self, row_key, filter_=None, append=False): :param append: (Optional) Flag to determine if the row should be used for append mutations. - :rtype: :class:`~google.cloud.bigtable.deprecated.row.Row` + :rtype: :class:`~google.cloud.bigtable.row.Row` :returns: A row owned by this table. :raises: :class:`ValueError ` if both ``filter_`` and ``append`` are used. @@ -307,7 +307,7 @@ def row(self, row_key, filter_=None, append=False): return DirectRow(row_key, self) def append_row(self, row_key): - """Create a :class:`~google.cloud.bigtable.deprecated.row.AppendRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.row.AppendRow` associated with this table. For example: @@ -325,7 +325,7 @@ def append_row(self, row_key): return AppendRow(row_key, self) def direct_row(self, row_key): - """Create a :class:`~google.cloud.bigtable.deprecated.row.DirectRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.row.DirectRow` associated with this table. For example: @@ -343,7 +343,7 @@ def direct_row(self, row_key): return DirectRow(row_key, self) def conditional_row(self, row_key, filter_): - """Create a :class:`~google.cloud.bigtable.deprecated.row.ConditionalRow` associated with this table. + """Create a :class:`~google.cloud.bigtable.row.ConditionalRow` associated with this table. For example: @@ -515,7 +515,7 @@ def get_encryption_info(self): :rtype: dict :returns: Dictionary of encryption info for this table. Keys are cluster ids and - values are tuples of :class:`google.cloud.bigtable.deprecated.encryption.EncryptionInfo` instances. + values are tuples of :class:`google.cloud.bigtable.encryption.EncryptionInfo` instances. """ ENCRYPTION_VIEW = enums.Table.View.ENCRYPTION_VIEW table_client = self._instance._client.table_admin_client @@ -844,7 +844,9 @@ def drop_by_prefix(self, row_key_prefix, timeout=None): request={"name": self.name, "row_key_prefix": _to_bytes(row_key_prefix)} ) - def mutations_batcher(self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES): + def mutations_batcher( + self, flush_count=FLUSH_COUNT, max_row_bytes=MAX_MUTATION_SIZE + ): """Factory to create a mutation batcher associated with this instance. For example: @@ -967,7 +969,7 @@ def list_backups(self, cluster_id=None, filter_=None, order_by=None, page_size=0 number of resources in a page. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigtable.deprecated.backup.Backup` + :returns: Iterator of :class:`~google.cloud.bigtable.backup.Backup` resources within the current Instance. :raises: :class:`ValueError ` if one of the returned Backups' name is not of the expected format. @@ -1367,8 +1369,8 @@ def _check_row_table_name(table_name, row): :type table_name: str :param table_name: The name of the table. - :type row: :class:`~google.cloud.bigtable.deprecated.row.Row` - :param row: An instance of :class:`~google.cloud.bigtable.deprecated.row.Row` + :type row: :class:`~google.cloud.bigtable.row.Row` + :param row: An instance of :class:`~google.cloud.bigtable.row.Row` subclasses. :raises: :exc:`~.table.TableMismatchError` if the row does not belong to @@ -1384,8 +1386,8 @@ def _check_row_table_name(table_name, row): def _check_row_type(row): """Checks that a row is an instance of :class:`.DirectRow`. - :type row: :class:`~google.cloud.bigtable.deprecated.row.Row` - :param row: An instance of :class:`~google.cloud.bigtable.deprecated.row.Row` + :type row: :class:`~google.cloud.bigtable.row.Row` + :param row: An instance of :class:`~google.cloud.bigtable.row.Row` subclasses. :raises: :class:`TypeError ` if the row is not an From a6a6b7bc717c44d32dff3121476977a2c8a9e8a5 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:33:33 -0700 Subject: [PATCH 181/213] moved async classes into own sub-folder --- ...ad_rows.py => _read_rows_state_machine.py} | 276 +---------------- .../bigtable/data/{ => async}/_mutate_rows.py | 20 +- .../cloud/bigtable/data/async/_read_rows.py | 289 ++++++++++++++++++ .../cloud/bigtable/data/{ => async}/client.py | 26 +- .../bigtable/data/{ => async}/iterators.py | 2 +- .../data/{ => async}/mutations_batcher.py | 6 +- google/cloud/bigtable/data/exceptions.py | 8 + google/cloud/bigtable/data/mutations.py | 8 +- 8 files changed, 328 insertions(+), 307 deletions(-) rename google/cloud/bigtable/data/{_read_rows.py => _read_rows_state_machine.py} (54%) rename google/cloud/bigtable/data/{ => async}/_mutate_rows.py (96%) create mode 100644 google/cloud/bigtable/data/async/_read_rows.py rename google/cloud/bigtable/data/{ => async}/client.py (98%) rename google/cloud/bigtable/data/{ => async}/iterators.py (98%) rename google/cloud/bigtable/data/{ => async}/mutations_batcher.py (99%) diff --git a/google/cloud/bigtable/data/_read_rows.py b/google/cloud/bigtable/data/_read_rows_state_machine.py similarity index 54% rename from google/cloud/bigtable/data/_read_rows.py rename to google/cloud/bigtable/data/_read_rows_state_machine.py index ee094f1a7..bda0b1337 100644 --- a/google/cloud/bigtable/data/_read_rows.py +++ b/google/cloud/bigtable/data/_read_rows_state_machine.py @@ -14,35 +14,14 @@ # from __future__ import annotations -from typing import ( - List, - Any, - AsyncIterable, - AsyncIterator, - AsyncGenerator, - Iterator, - Callable, - Awaitable, - Type, -) - -import asyncio -from functools import partial -from grpc.aio import RpcContext +from typing import Type from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient from google.cloud.bigtable.row import Row, Cell, _LastScannedRow from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable.exceptions import _RowSetComplete -from google.api_core import retry_async as retries -from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable._helpers import _make_metadata -from google.cloud.bigtable._helpers import _attempt_timeout_generator """ -This module provides a set of classes for merging ReadRowsResponse chunks -into Row objects. +This module provides classes for the read_rows state machine: - ReadRowsOperation is the highest level class, providing an interface for asynchronous merging end-to-end @@ -56,253 +35,6 @@ """ -class _ReadRowsOperation(AsyncIterable[Row]): - """ - ReadRowsOperation handles the logic of merging chunks from a ReadRowsResponse stream - into a stream of Row objects. - - ReadRowsOperation.merge_row_response_stream takes in a stream of ReadRowsResponse - and turns them into a stream of Row objects using an internal - StateMachine. - - ReadRowsOperation(request, client) handles row merging logic end-to-end, including - performing retries on stream errors. - """ - - def __init__( - self, - request: dict[str, Any], - client: BigtableAsyncClient, - *, - operation_timeout: float = 600.0, - per_request_timeout: float | None = None, - ): - """ - Args: - - request: the request dict to send to the Bigtable API - - client: the Bigtable client to use to make the request - - operation_timeout: the timeout to use for the entire operation, in seconds - - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds - If not specified, defaults to operation_timeout - """ - self._last_emitted_row_key: bytes | None = None - self._emit_count = 0 - self._request = request - self.operation_timeout = operation_timeout - # use generator to lower per-attempt timeout as we approach operation_timeout deadline - attempt_timeout_gen = _attempt_timeout_generator( - per_request_timeout, operation_timeout - ) - row_limit = request.get("rows_limit", 0) - # lock in paramters for retryable wrapper - self._partial_retryable = partial( - self._read_rows_retryable_attempt, - client.read_rows, - attempt_timeout_gen, - row_limit, - ) - predicate = retries.if_exception_type( - core_exceptions.DeadlineExceeded, - core_exceptions.ServiceUnavailable, - core_exceptions.Aborted, - ) - - def on_error_fn(exc): - if predicate(exc): - self.transient_errors.append(exc) - - retry = retries.AsyncRetry( - predicate=predicate, - timeout=self.operation_timeout, - initial=0.01, - multiplier=2, - maximum=60, - on_error=on_error_fn, - is_stream=True, - ) - self._stream: AsyncGenerator[Row, None] | None = retry( - self._partial_retryable - )() - # contains the list of errors that were retried - self.transient_errors: List[Exception] = [] - - def __aiter__(self) -> AsyncIterator[Row]: - """Implements the AsyncIterable interface""" - return self - - async def __anext__(self) -> Row: - """Implements the AsyncIterator interface""" - if self._stream is not None: - return await self._stream.__anext__() - else: - raise asyncio.InvalidStateError("stream is closed") - - async def aclose(self): - """Close the stream and release resources""" - if self._stream is not None: - await self._stream.aclose() - self._stream = None - self._emitted_seen_row_key = None - - async def _read_rows_retryable_attempt( - self, - gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], - timeout_generator: Iterator[float], - total_row_limit: int, - ) -> AsyncGenerator[Row, None]: - """ - Retryable wrapper for merge_rows. This function is called each time - a retry is attempted. - - Some fresh state is created on each retry: - - grpc network stream - - state machine to hold merge chunks received from stream - Some state is shared between retries: - - _last_emitted_row_key is used to ensure that - duplicate rows are not emitted - - request is stored and (potentially) modified on each retry - """ - if self._last_emitted_row_key is not None: - # if this is a retry, try to trim down the request to avoid ones we've already processed - try: - self._request["rows"] = _ReadRowsOperation._revise_request_rowset( - row_set=self._request.get("rows", None), - last_seen_row_key=self._last_emitted_row_key, - ) - except _RowSetComplete: - # if there are no rows left to process, we're done - # This is not expected to happen often, but could occur if - # a retry is triggered quickly after the last row is emitted - return - # revise next request's row limit based on number emitted - if total_row_limit: - new_limit = total_row_limit - self._emit_count - if new_limit == 0: - # we have hit the row limit, so we're done - return - elif new_limit < 0: - raise RuntimeError("unexpected state: emit count exceeds row limit") - else: - self._request["rows_limit"] = new_limit - metadata = _make_metadata( - self._request.get("table_name", None), - self._request.get("app_profile_id", None), - ) - new_gapic_stream: RpcContext = await gapic_fn( - self._request, - timeout=next(timeout_generator), - metadata=metadata, - ) - try: - state_machine = _StateMachine() - stream = _ReadRowsOperation.merge_row_response_stream( - new_gapic_stream, state_machine - ) - # run until we get a timeout or the stream is exhausted - async for new_item in stream: - if ( - self._last_emitted_row_key is not None - and new_item.row_key <= self._last_emitted_row_key - ): - raise InvalidChunk("Last emitted row key out of order") - # don't yeild _LastScannedRow markers; they - # should only update last_seen_row_key - if not isinstance(new_item, _LastScannedRow): - yield new_item - self._emit_count += 1 - self._last_emitted_row_key = new_item.row_key - if total_row_limit and self._emit_count >= total_row_limit: - return - except (Exception, GeneratorExit) as exc: - # ensure grpc stream is closed - new_gapic_stream.cancel() - raise exc - - @staticmethod - def _revise_request_rowset( - row_set: dict[str, Any] | None, - last_seen_row_key: bytes, - ) -> dict[str, Any]: - """ - Revise the rows in the request to avoid ones we've already processed. - - Args: - - row_set: the row set from the request - - last_seen_row_key: the last row key encountered - Raises: - - _RowSetComplete: if there are no rows left to process after the revision - """ - # if user is doing a whole table scan, start a new one with the last seen key - if row_set is None or ( - len(row_set.get("row_ranges", [])) == 0 - and len(row_set.get("row_keys", [])) == 0 - ): - last_seen = last_seen_row_key - return { - "row_keys": [], - "row_ranges": [{"start_key_open": last_seen}], - } - # remove seen keys from user-specific key list - row_keys: list[bytes] = row_set.get("row_keys", []) - adjusted_keys = [k for k in row_keys if k > last_seen_row_key] - # adjust ranges to ignore keys before last seen - row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) - adjusted_ranges = [] - for row_range in row_ranges: - end_key = row_range.get("end_key_closed", None) or row_range.get( - "end_key_open", None - ) - if end_key is None or end_key > last_seen_row_key: - # end range is after last seen key - new_range = row_range.copy() - start_key = row_range.get("start_key_closed", None) or row_range.get( - "start_key_open", None - ) - if start_key is None or start_key <= last_seen_row_key: - # replace start key with last seen - new_range["start_key_open"] = last_seen_row_key - new_range.pop("start_key_closed", None) - adjusted_ranges.append(new_range) - if len(adjusted_keys) == 0 and len(adjusted_ranges) == 0: - # if the query is empty after revision, raise an exception - # this will avoid an unwanted full table scan - raise _RowSetComplete() - return {"row_keys": adjusted_keys, "row_ranges": adjusted_ranges} - - @staticmethod - async def merge_row_response_stream( - response_generator: AsyncIterable[ReadRowsResponse], - state_machine: _StateMachine, - ) -> AsyncGenerator[Row, None]: - """ - Consume chunks from a ReadRowsResponse stream into a set of Rows - - Args: - - response_generator: AsyncIterable of ReadRowsResponse objects. Typically - this is a stream of chunks from the Bigtable API - Returns: - - AsyncGenerator of Rows - Raises: - - InvalidChunk: if the chunk stream is invalid - """ - async for row_response in response_generator: - # unwrap protoplus object for increased performance - response_pb = row_response._pb - last_scanned = response_pb.last_scanned_row_key - # if the server sends a scan heartbeat, notify the state machine. - if last_scanned: - yield state_machine.handle_last_scanned_row(last_scanned) - # process new chunks through the state machine. - for chunk in response_pb.chunks: - complete_row = state_machine.handle_chunk(chunk) - if complete_row is not None: - yield complete_row - # TODO: handle request stats - if not state_machine.is_terminal_state(): - # read rows is complete, but there's still data in the merger - raise InvalidChunk("read_rows completed with partial state remaining") - - class _StateMachine: """ State Machine converts chunks into Rows @@ -579,7 +311,7 @@ def reset(self) -> None: self.current_key: bytes | None = None self.working_cell: Cell | None = None self.working_value: bytearray | None = None - self.completed_cells: List[Cell] = [] + self.completed_cells: list[Cell] = [] def start_row(self, key: bytes) -> None: """Called to start a new row. This will be called once per row""" @@ -590,7 +322,7 @@ def start_cell( family: str, qualifier: bytes, timestamp_micros: int, - labels: List[str], + labels: list[str], ) -> None: """called to start a new cell in a row.""" if self.current_key is None: diff --git a/google/cloud/bigtable/data/_mutate_rows.py b/google/cloud/bigtable/data/async/_mutate_rows.py similarity index 96% rename from google/cloud/bigtable/data/_mutate_rows.py rename to google/cloud/bigtable/data/async/_mutate_rows.py index e34ebaeb6..acabc863e 100644 --- a/google/cloud/bigtable/data/_mutate_rows.py +++ b/google/cloud/bigtable/data/async/_mutate_rows.py @@ -24,6 +24,9 @@ from google.cloud.bigtable._helpers import _convert_retry_deadline from google.cloud.bigtable._helpers import _attempt_timeout_generator +# mutate_rows requests are limited to this number of mutations +from google.cloud.bigtable.mutations import MUTATE_ROWS_REQUEST_MUTATION_LIMIT + if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, @@ -31,19 +34,8 @@ from google.cloud.bigtable.client import Table from google.cloud.bigtable.mutations import RowMutationEntry -# mutate_rows requests are limited to this value -MUTATE_ROWS_REQUEST_MUTATION_LIMIT = 100_000 - - -class _MutateRowsIncomplete(RuntimeError): - """ - Exception raised when a mutate_rows call has unfinished work. - """ - - pass - -class _MutateRowsOperation: +class _MutateRowsOperationAsync: """ MutateRowsOperation manages the logic of sending a set of row mutations, and retrying on failed entries. It manages this using the _run_attempt @@ -93,7 +85,7 @@ def __init__( core_exceptions.DeadlineExceeded, core_exceptions.ServiceUnavailable, # Entry level errors - _MutateRowsIncomplete, + bt_exceptions._MutateRowsIncomplete, ) # build retryable operation retry = retries.AsyncRetry( @@ -199,7 +191,7 @@ async def _run_attempt(self): # check if attempt succeeded, or needs to be retried if self.remaining_indices: # unfinished work; raise exception to trigger retry - raise _MutateRowsIncomplete + raise bt_exceptions._MutateRowsIncomplete def _handle_entry_error(self, idx: int, exc: Exception): """ diff --git a/google/cloud/bigtable/data/async/_read_rows.py b/google/cloud/bigtable/data/async/_read_rows.py new file mode 100644 index 000000000..50673cab0 --- /dev/null +++ b/google/cloud/bigtable/data/async/_read_rows.py @@ -0,0 +1,289 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import ( + List, + Any, + AsyncIterable, + AsyncIterator, + AsyncGenerator, + Iterator, + Callable, + Awaitable, + Type, +) + +import asyncio +from functools import partial +from grpc.aio import RpcContext + +from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse +from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient +from google.cloud.bigtable.row import Row, _LastScannedRow +from google.cloud.bigtable.exceptions import InvalidChunk +from google.cloud.bigtable.exceptions import _RowSetComplete +from google.cloud.bigtable._read_rows_state_machine import _StateMachine +from google.api_core import retry_async as retries +from google.api_core import exceptions as core_exceptions +from google.cloud.bigtable._helpers import _make_metadata +from google.cloud.bigtable._helpers import _attempt_timeout_generator + + +class _ReadRowsOperationAsync(AsyncIterable[Row]): + """ + ReadRowsOperation handles the logic of merging chunks from a ReadRowsResponse stream + into a stream of Row objects. + + ReadRowsOperation.merge_row_response_stream takes in a stream of ReadRowsResponse + and turns them into a stream of Row objects using an internal + StateMachine. + + ReadRowsOperation(request, client) handles row merging logic end-to-end, including + performing retries on stream errors. + """ + + def __init__( + self, + request: dict[str, Any], + client: BigtableAsyncClient, + *, + operation_timeout: float = 600.0, + per_request_timeout: float | None = None, + ): + """ + Args: + - request: the request dict to send to the Bigtable API + - client: the Bigtable client to use to make the request + - operation_timeout: the timeout to use for the entire operation, in seconds + - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds + If not specified, defaults to operation_timeout + """ + self._last_emitted_row_key: bytes | None = None + self._emit_count = 0 + self._request = request + self.operation_timeout = operation_timeout + # use generator to lower per-attempt timeout as we approach operation_timeout deadline + attempt_timeout_gen = _attempt_timeout_generator( + per_request_timeout, operation_timeout + ) + row_limit = request.get("rows_limit", 0) + # lock in paramters for retryable wrapper + self._partial_retryable = partial( + self._read_rows_retryable_attempt, + client.read_rows, + attempt_timeout_gen, + row_limit, + ) + predicate = retries.if_exception_type( + core_exceptions.DeadlineExceeded, + core_exceptions.ServiceUnavailable, + core_exceptions.Aborted, + ) + + def on_error_fn(exc): + if predicate(exc): + self.transient_errors.append(exc) + + retry = retries.AsyncRetry( + predicate=predicate, + timeout=self.operation_timeout, + initial=0.01, + multiplier=2, + maximum=60, + on_error=on_error_fn, + is_stream=True, + ) + self._stream: AsyncGenerator[Row, None] | None = retry( + self._partial_retryable + )() + # contains the list of errors that were retried + self.transient_errors: List[Exception] = [] + + def __aiter__(self) -> AsyncIterator[Row]: + """Implements the AsyncIterable interface""" + return self + + async def __anext__(self) -> Row: + """Implements the AsyncIterator interface""" + if self._stream is not None: + return await self._stream.__anext__() + else: + raise asyncio.InvalidStateError("stream is closed") + + async def aclose(self): + """Close the stream and release resources""" + if self._stream is not None: + await self._stream.aclose() + self._stream = None + self._emitted_seen_row_key = None + + async def _read_rows_retryable_attempt( + self, + gapic_fn: Callable[..., Awaitable[AsyncIterable[ReadRowsResponse]]], + timeout_generator: Iterator[float], + total_row_limit: int, + ) -> AsyncGenerator[Row, None]: + """ + Retryable wrapper for merge_rows. This function is called each time + a retry is attempted. + + Some fresh state is created on each retry: + - grpc network stream + - state machine to hold merge chunks received from stream + Some state is shared between retries: + - _last_emitted_row_key is used to ensure that + duplicate rows are not emitted + - request is stored and (potentially) modified on each retry + """ + if self._last_emitted_row_key is not None: + # if this is a retry, try to trim down the request to avoid ones we've already processed + try: + self._request["rows"] = _ReadRowsOperationAsync._revise_request_rowset( + row_set=self._request.get("rows", None), + last_seen_row_key=self._last_emitted_row_key, + ) + except _RowSetComplete: + # if there are no rows left to process, we're done + # This is not expected to happen often, but could occur if + # a retry is triggered quickly after the last row is emitted + return + # revise next request's row limit based on number emitted + if total_row_limit: + new_limit = total_row_limit - self._emit_count + if new_limit == 0: + # we have hit the row limit, so we're done + return + elif new_limit < 0: + raise RuntimeError("unexpected state: emit count exceeds row limit") + else: + self._request["rows_limit"] = new_limit + metadata = _make_metadata( + self._request.get("table_name", None), + self._request.get("app_profile_id", None), + ) + new_gapic_stream: RpcContext = await gapic_fn( + self._request, + timeout=next(timeout_generator), + metadata=metadata, + ) + try: + state_machine = _StateMachine() + stream = _ReadRowsOperationAsync.merge_row_response_stream( + new_gapic_stream, state_machine + ) + # run until we get a timeout or the stream is exhausted + async for new_item in stream: + if ( + self._last_emitted_row_key is not None + and new_item.row_key <= self._last_emitted_row_key + ): + raise InvalidChunk("Last emitted row key out of order") + # don't yeild _LastScannedRow markers; they + # should only update last_seen_row_key + if not isinstance(new_item, _LastScannedRow): + yield new_item + self._emit_count += 1 + self._last_emitted_row_key = new_item.row_key + if total_row_limit and self._emit_count >= total_row_limit: + return + except (Exception, GeneratorExit) as exc: + # ensure grpc stream is closed + new_gapic_stream.cancel() + raise exc + + @staticmethod + def _revise_request_rowset( + row_set: dict[str, Any] | None, + last_seen_row_key: bytes, + ) -> dict[str, Any]: + """ + Revise the rows in the request to avoid ones we've already processed. + + Args: + - row_set: the row set from the request + - last_seen_row_key: the last row key encountered + Raises: + - _RowSetComplete: if there are no rows left to process after the revision + """ + # if user is doing a whole table scan, start a new one with the last seen key + if row_set is None or ( + len(row_set.get("row_ranges", [])) == 0 + and len(row_set.get("row_keys", [])) == 0 + ): + last_seen = last_seen_row_key + return { + "row_keys": [], + "row_ranges": [{"start_key_open": last_seen}], + } + # remove seen keys from user-specific key list + row_keys: list[bytes] = row_set.get("row_keys", []) + adjusted_keys = [k for k in row_keys if k > last_seen_row_key] + # adjust ranges to ignore keys before last seen + row_ranges: list[dict[str, Any]] = row_set.get("row_ranges", []) + adjusted_ranges = [] + for row_range in row_ranges: + end_key = row_range.get("end_key_closed", None) or row_range.get( + "end_key_open", None + ) + if end_key is None or end_key > last_seen_row_key: + # end range is after last seen key + new_range = row_range.copy() + start_key = row_range.get("start_key_closed", None) or row_range.get( + "start_key_open", None + ) + if start_key is None or start_key <= last_seen_row_key: + # replace start key with last seen + new_range["start_key_open"] = last_seen_row_key + new_range.pop("start_key_closed", None) + adjusted_ranges.append(new_range) + if len(adjusted_keys) == 0 and len(adjusted_ranges) == 0: + # if the query is empty after revision, raise an exception + # this will avoid an unwanted full table scan + raise _RowSetComplete() + return {"row_keys": adjusted_keys, "row_ranges": adjusted_ranges} + + @staticmethod + async def merge_row_response_stream( + response_generator: AsyncIterable[ReadRowsResponse], + state_machine: _StateMachine, + ) -> AsyncGenerator[Row, None]: + """ + Consume chunks from a ReadRowsResponse stream into a set of Rows + + Args: + - response_generator: AsyncIterable of ReadRowsResponse objects. Typically + this is a stream of chunks from the Bigtable API + Returns: + - AsyncGenerator of Rows + Raises: + - InvalidChunk: if the chunk stream is invalid + """ + async for row_response in response_generator: + # unwrap protoplus object for increased performance + response_pb = row_response._pb + last_scanned = response_pb.last_scanned_row_key + # if the server sends a scan heartbeat, notify the state machine. + if last_scanned: + yield state_machine.handle_last_scanned_row(last_scanned) + # process new chunks through the state machine. + for chunk in response_pb.chunks: + complete_row = state_machine.handle_chunk(chunk) + if complete_row is not None: + yield complete_row + # TODO: handle request stats + if not state_machine.is_terminal_state(): + # read rows is complete, but there's still data in the merger + raise InvalidChunk("read_rows completed with partial state remaining") diff --git a/google/cloud/bigtable/data/client.py b/google/cloud/bigtable/data/async/client.py similarity index 98% rename from google/cloud/bigtable/data/client.py rename to google/cloud/bigtable/data/async/client.py index 4ec3cea27..198b63822 100644 --- a/google/cloud/bigtable/data/client.py +++ b/google/cloud/bigtable/data/async/client.py @@ -43,19 +43,19 @@ from google.api_core.exceptions import GoogleAPICallError from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable._read_rows import _ReadRowsOperation +from google.cloud.bigtable._read_rows import _ReadRowsOperationAsync import google.auth.credentials import google.auth._default from google.api_core import client_options as client_options_lib from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable.iterators import ReadRowsIterator +from google.cloud.bigtable.iterators import ReadRowsIteratorAsync from google.cloud.bigtable.exceptions import FailedQueryShardError from google.cloud.bigtable.exceptions import ShardedReadRowsExceptionGroup from google.cloud.bigtable.mutations import Mutation, RowMutationEntry -from google.cloud.bigtable._mutate_rows import _MutateRowsOperation +from google.cloud.bigtable._mutate_rows import _MutateRowsOperationAsync from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _convert_retry_deadline from google.cloud.bigtable.mutations_batcher import MutationsBatcher @@ -81,7 +81,7 @@ ) -class BigtableDataClient(ClientWithProject): +class BigtableDataClientAsync(ClientWithProject): def __init__( self, *, @@ -280,7 +280,7 @@ async def _manage_channel( next_refresh = random.uniform(refresh_interval_min, refresh_interval_max) next_sleep = next_refresh - (time.time() - start_timestamp) - async def _register_instance(self, instance_id: str, owner: Table) -> None: + async def _register_instance(self, instance_id: str, owner: TableAsync) -> None: """ Registers an instance with the client, and warms the channel pool for the instance @@ -311,7 +311,7 @@ async def _register_instance(self, instance_id: str, owner: Table) -> None: self.start_background_channel_refresh() async def _remove_instance_registration( - self, instance_id: str, owner: Table + self, instance_id: str, owner: TableAsync ) -> bool: """ Removes an instance from the client's registered instances, to prevent @@ -348,7 +348,7 @@ def get_table( app_profile_id: str | None = None, default_operation_timeout: float = 600, default_per_request_timeout: float | None = None, - ) -> Table: + ) -> TableAsync: """ Returns a table instance for making data API requests @@ -360,7 +360,7 @@ def get_table( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles """ - return Table( + return TableAsync( self, instance_id, table_id, @@ -378,7 +378,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): await self._gapic_client.__aexit__(exc_type, exc_val, exc_tb) -class Table: +class TableAsync: """ Main Data API surface @@ -388,7 +388,7 @@ class Table: def __init__( self, - client: BigtableDataClient, + client: BigtableDataClientAsync, instance_id: str, table_id: str, app_profile_id: str | None = None, @@ -457,7 +457,7 @@ async def read_rows_stream( *, operation_timeout: float | None = None, per_request_timeout: float | None = None, - ) -> ReadRowsIterator: + ) -> ReadRowsIteratorAsync: """ Returns an iterator to asynchronously stream back row data. @@ -507,13 +507,13 @@ async def read_rows_stream( # - ReadRowsOperation.merge_row_response_stream: parses chunks into rows # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, per_request_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute - row_merger = _ReadRowsOperation( + row_merger = _ReadRowsOperationAsync( request, self.client._gapic_client, operation_timeout=operation_timeout, per_request_timeout=per_request_timeout, ) - output_generator = ReadRowsIterator(row_merger) + output_generator = ReadRowsIteratorAsync(row_merger) # add idle timeout to clear resources if generator is abandoned idle_timeout_seconds = 300 await output_generator._start_idle_timer(idle_timeout_seconds) diff --git a/google/cloud/bigtable/data/iterators.py b/google/cloud/bigtable/data/async/iterators.py similarity index 98% rename from google/cloud/bigtable/data/iterators.py rename to google/cloud/bigtable/data/async/iterators.py index b20932fb2..6292b0289 100644 --- a/google/cloud/bigtable/data/iterators.py +++ b/google/cloud/bigtable/data/async/iterators.py @@ -26,7 +26,7 @@ from google.cloud.bigtable.row import Row -class ReadRowsIterator(AsyncIterable[Row]): +class ReadRowsIteratorAsync(AsyncIterable[Row]): """ Async iterator for ReadRows responses. """ diff --git a/google/cloud/bigtable/data/mutations_batcher.py b/google/cloud/bigtable/data/async/mutations_batcher.py similarity index 99% rename from google/cloud/bigtable/data/mutations_batcher.py rename to google/cloud/bigtable/data/async/mutations_batcher.py index 6c90d1a90..c6d617896 100644 --- a/google/cloud/bigtable/data/mutations_batcher.py +++ b/google/cloud/bigtable/data/async/mutations_batcher.py @@ -35,7 +35,7 @@ _MB_SIZE = 1024 * 1024 -class _FlowControl: +class _FlowControlAsync: """ Manages flow control for batched mutations. Mutations are registered against the FlowControl object before being sent, which will block if size or count @@ -159,7 +159,7 @@ async def add_to_flow(self, mutations: RowMutationEntry | list[RowMutationEntry] yield mutations[start_idx:end_idx] -class MutationsBatcher: +class MutationsBatcherAsync: """ Allows users to send batches using context manager API: @@ -224,7 +224,7 @@ def __init__( self._table = table self._staged_entries: list[RowMutationEntry] = [] self._staged_count, self._staged_bytes = 0, 0 - self._flow_control = _FlowControl( + self._flow_control = _FlowControlAsync( flow_control_max_mutation_count, flow_control_max_bytes ) self._flush_limit_bytes = flush_limit_bytes diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index fc4e368b9..15048a2e2 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -50,6 +50,14 @@ class _RowSetComplete(Exception): pass +class _MutateRowsIncomplete(RuntimeError): + """ + Exception raised when a mutate_rows call has unfinished work. + """ + + pass + + class BigtableExceptionGroup(ExceptionGroup if is_311_plus else Exception): # type: ignore # noqa: F821 """ Represents one or more exceptions that occur during a bulk Bigtable operation diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index a4c02cd74..c33bb61d7 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -19,16 +19,16 @@ from abc import ABC, abstractmethod from sys import getsizeof -# mutation entries above this should be rejected -from google.cloud.bigtable._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT - - from google.cloud.bigtable.read_modify_write_rules import MAX_INCREMENT_VALUE # special value for SetCell mutation timestamps. If set, server will assign a timestamp SERVER_SIDE_TIMESTAMP = -1 +# mutation entries above this should be rejected +MUTATE_ROWS_REQUEST_MUTATION_LIMIT = 100_000 + + class Mutation(ABC): """Model class for mutations""" From 7e7a6676e96c737257222f8e3ba46c1e41a6ab73 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:37:57 -0700 Subject: [PATCH 182/213] moved ReadRowsIterator into _read_rows --- .../cloud/bigtable/data/async/_read_rows.py | 108 ++++++++++++++- google/cloud/bigtable/data/async/iterators.py | 129 ------------------ 2 files changed, 107 insertions(+), 130 deletions(-) delete mode 100644 google/cloud/bigtable/data/async/iterators.py diff --git a/google/cloud/bigtable/data/async/_read_rows.py b/google/cloud/bigtable/data/async/_read_rows.py index 50673cab0..b3becbd89 100644 --- a/google/cloud/bigtable/data/async/_read_rows.py +++ b/google/cloud/bigtable/data/async/_read_rows.py @@ -25,7 +25,8 @@ Awaitable, Type, ) - +import sys +import time import asyncio from functools import partial from grpc.aio import RpcContext @@ -35,11 +36,13 @@ from google.cloud.bigtable.row import Row, _LastScannedRow from google.cloud.bigtable.exceptions import InvalidChunk from google.cloud.bigtable.exceptions import _RowSetComplete +from google.cloud.bigtable.exceptions import IdleTimeout from google.cloud.bigtable._read_rows_state_machine import _StateMachine from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions from google.cloud.bigtable._helpers import _make_metadata from google.cloud.bigtable._helpers import _attempt_timeout_generator +from google.cloud.bigtable._helpers import _convert_retry_deadline class _ReadRowsOperationAsync(AsyncIterable[Row]): @@ -287,3 +290,106 @@ async def merge_row_response_stream( if not state_machine.is_terminal_state(): # read rows is complete, but there's still data in the merger raise InvalidChunk("read_rows completed with partial state remaining") + + +class ReadRowsIteratorAsync(AsyncIterable[Row]): + """ + Async iterator for ReadRows responses. + """ + + def __init__(self, merger: _ReadRowsOperationAsync): + self._merger: _ReadRowsOperationAsync = merger + self._error: Exception | None = None + self.last_interaction_time = time.time() + self._idle_timeout_task: asyncio.Task[None] | None = None + # wrap merger with a wrapper that properly formats exceptions + self._next_fn = _convert_retry_deadline( + self._merger.__anext__, + self._merger.operation_timeout, + self._merger.transient_errors, + ) + + async def _start_idle_timer(self, idle_timeout: float): + """ + Start a coroutine that will cancel a stream if no interaction + with the iterator occurs for the specified number of seconds. + + Subsequent access to the iterator will raise an IdleTimeout exception. + + Args: + - idle_timeout: number of seconds of inactivity before cancelling the stream + """ + self.last_interaction_time = time.time() + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() + self._idle_timeout_task = asyncio.create_task( + self._idle_timeout_coroutine(idle_timeout) + ) + if sys.version_info >= (3, 8): + self._idle_timeout_task.name = f"{self.__class__.__name__}.idle_timeout" + + @property + def active(self): + """ + Returns True if the iterator is still active and has not been closed + """ + return self._error is None + + async def _idle_timeout_coroutine(self, idle_timeout: float): + """ + Coroutine that will cancel a stream if no interaction with the iterator + in the last `idle_timeout` seconds. + """ + while self.active: + next_timeout = self.last_interaction_time + idle_timeout + await asyncio.sleep(next_timeout - time.time()) + if self.last_interaction_time + idle_timeout < time.time() and self.active: + # idle timeout has expired + await self._finish_with_error( + IdleTimeout( + ( + "Timed out waiting for next Row to be consumed. " + f"(idle_timeout={idle_timeout:0.1f}s)" + ) + ) + ) + + def __aiter__(self): + """Implement the async iterator protocol.""" + return self + + async def __anext__(self) -> Row: + """ + Implement the async iterator potocol. + + Return the next item in the stream if active, or + raise an exception if the stream has been closed. + """ + if self._error is not None: + raise self._error + try: + self.last_interaction_time = time.time() + return await self._next_fn() + except Exception as e: + await self._finish_with_error(e) + raise e + + async def _finish_with_error(self, e: Exception): + """ + Helper function to close the stream and clean up resources + after an error has occurred. + """ + if self.active: + await self._merger.aclose() + self._error = e + if self._idle_timeout_task is not None: + self._idle_timeout_task.cancel() + self._idle_timeout_task = None + + async def aclose(self): + """ + Support closing the stream with an explicit call to aclose() + """ + await self._finish_with_error( + StopAsyncIteration(f"{self.__class__.__name__} closed") + ) diff --git a/google/cloud/bigtable/data/async/iterators.py b/google/cloud/bigtable/data/async/iterators.py deleted file mode 100644 index 6292b0289..000000000 --- a/google/cloud/bigtable/data/async/iterators.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import AsyncIterable - -import asyncio -import time -import sys - -from google.cloud.bigtable._read_rows import _ReadRowsOperation -from google.cloud.bigtable.exceptions import IdleTimeout -from google.cloud.bigtable._helpers import _convert_retry_deadline -from google.cloud.bigtable.row import Row - - -class ReadRowsIteratorAsync(AsyncIterable[Row]): - """ - Async iterator for ReadRows responses. - """ - - def __init__(self, merger: _ReadRowsOperation): - self._merger: _ReadRowsOperation = merger - self._error: Exception | None = None - self.last_interaction_time = time.time() - self._idle_timeout_task: asyncio.Task[None] | None = None - # wrap merger with a wrapper that properly formats exceptions - self._next_fn = _convert_retry_deadline( - self._merger.__anext__, - self._merger.operation_timeout, - self._merger.transient_errors, - ) - - async def _start_idle_timer(self, idle_timeout: float): - """ - Start a coroutine that will cancel a stream if no interaction - with the iterator occurs for the specified number of seconds. - - Subsequent access to the iterator will raise an IdleTimeout exception. - - Args: - - idle_timeout: number of seconds of inactivity before cancelling the stream - """ - self.last_interaction_time = time.time() - if self._idle_timeout_task is not None: - self._idle_timeout_task.cancel() - self._idle_timeout_task = asyncio.create_task( - self._idle_timeout_coroutine(idle_timeout) - ) - if sys.version_info >= (3, 8): - self._idle_timeout_task.name = "ReadRowsIterator._idle_timeout" - - @property - def active(self): - """ - Returns True if the iterator is still active and has not been closed - """ - return self._error is None - - async def _idle_timeout_coroutine(self, idle_timeout: float): - """ - Coroutine that will cancel a stream if no interaction with the iterator - in the last `idle_timeout` seconds. - """ - while self.active: - next_timeout = self.last_interaction_time + idle_timeout - await asyncio.sleep(next_timeout - time.time()) - if self.last_interaction_time + idle_timeout < time.time() and self.active: - # idle timeout has expired - await self._finish_with_error( - IdleTimeout( - ( - "Timed out waiting for next Row to be consumed. " - f"(idle_timeout={idle_timeout:0.1f}s)" - ) - ) - ) - - def __aiter__(self): - """Implement the async iterator protocol.""" - return self - - async def __anext__(self) -> Row: - """ - Implement the async iterator potocol. - - Return the next item in the stream if active, or - raise an exception if the stream has been closed. - """ - if self._error is not None: - raise self._error - try: - self.last_interaction_time = time.time() - return await self._next_fn() - except Exception as e: - await self._finish_with_error(e) - raise e - - async def _finish_with_error(self, e: Exception): - """ - Helper function to close the stream and clean up resources - after an error has occurred. - """ - if self.active: - await self._merger.aclose() - self._error = e - if self._idle_timeout_task is not None: - self._idle_timeout_task.cancel() - self._idle_timeout_task = None - - async def aclose(self): - """ - Support closing the stream with an explicit call to aclose() - """ - await self._finish_with_error( - StopAsyncIteration(f"{self.__class__.__name__} closed") - ) From 66afd15d3377d1d63b8285e2a9e42a308b4f3714 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:43:02 -0700 Subject: [PATCH 183/213] moved unit tests into data folder --- tests/unit/data/__init__.py | 15 ++ tests/unit/{ => data}/test__helpers.py | 0 tests/unit/{ => data}/test__mutate_rows.py | 0 tests/unit/{ => data}/test__read_rows.py | 0 tests/unit/{ => data}/test_client.py | 158 +++++++++--------- tests/unit/{ => data}/test_exceptions.py | 0 tests/unit/{ => data}/test_iterators.py | 0 tests/unit/{ => data}/test_mutations.py | 0 .../unit/{ => data}/test_mutations_batcher.py | 0 .../test_read_modify_write_rules.py | 0 .../{ => data}/test_read_rows_acceptance.py | 0 tests/unit/{ => data}/test_read_rows_query.py | 0 tests/unit/{ => data}/test_row.py | 0 tests/unit/{ => data}/test_row_filters.py | 0 14 files changed, 94 insertions(+), 79 deletions(-) create mode 100644 tests/unit/data/__init__.py rename tests/unit/{ => data}/test__helpers.py (100%) rename tests/unit/{ => data}/test__mutate_rows.py (100%) rename tests/unit/{ => data}/test__read_rows.py (100%) rename tests/unit/{ => data}/test_client.py (96%) rename tests/unit/{ => data}/test_exceptions.py (100%) rename tests/unit/{ => data}/test_iterators.py (100%) rename tests/unit/{ => data}/test_mutations.py (100%) rename tests/unit/{ => data}/test_mutations_batcher.py (100%) rename tests/unit/{ => data}/test_read_modify_write_rules.py (100%) rename tests/unit/{ => data}/test_read_rows_acceptance.py (100%) rename tests/unit/{ => data}/test_read_rows_query.py (100%) rename tests/unit/{ => data}/test_row.py (100%) rename tests/unit/{ => data}/test_row_filters.py (100%) diff --git a/tests/unit/data/__init__.py b/tests/unit/data/__init__.py new file mode 100644 index 000000000..89a37dc92 --- /dev/null +++ b/tests/unit/data/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/test__helpers.py b/tests/unit/data/test__helpers.py similarity index 100% rename from tests/unit/test__helpers.py rename to tests/unit/data/test__helpers.py diff --git a/tests/unit/test__mutate_rows.py b/tests/unit/data/test__mutate_rows.py similarity index 100% rename from tests/unit/test__mutate_rows.py rename to tests/unit/data/test__mutate_rows.py diff --git a/tests/unit/test__read_rows.py b/tests/unit/data/test__read_rows.py similarity index 100% rename from tests/unit/test__read_rows.py rename to tests/unit/data/test__read_rows.py diff --git a/tests/unit/test_client.py b/tests/unit/data/test_client.py similarity index 96% rename from tests/unit/test_client.py rename to tests/unit/data/test_client.py index 3557c1c16..0e39851e4 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/data/test_client.py @@ -20,15 +20,15 @@ import pytest -from google.cloud.bigtable import mutations +from google.cloud.bigtable.data import mutations from google.auth.credentials import AnonymousCredentials from google.cloud.bigtable_v2.types import ReadRowsResponse -from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.exceptions import InvalidChunk +from google.cloud.bigtable.data.exceptions import InvalidChunk -from google.cloud.bigtable.read_modify_write_rules import IncrementRule -from google.cloud.bigtable.read_modify_write_rules import AppendValueRule +from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule +from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule # try/except added for compatibility with python < 3.8 try: @@ -43,11 +43,11 @@ ) -class TestBigtableDataClient: +class TestBigtableDataClientAsync: def _get_target_class(self): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient + return BigtableDataClientAsync def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) @@ -118,7 +118,7 @@ async def test_ctor_dict_options(self): BigtableAsyncClient, ) from google.api_core.client_options import ClientOptions - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync client_options = {"api_endpoint": "foo.bar:1234"} with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: @@ -132,7 +132,7 @@ async def test_ctor_dict_options(self): assert called_options.api_endpoint == "foo.bar:1234" assert isinstance(called_options, ClientOptions) with mock.patch.object( - BigtableDataClient, "start_background_channel_refresh" + BigtableDataClientAsync, "start_background_channel_refresh" ) as start_background_refresh: client = self._make_one(client_options=client_options) start_background_refresh.assert_called_once() @@ -275,7 +275,7 @@ async def test_start_background_channel_refresh_tasks_names(self): for i in range(pool_size): name = client._channel_refresh_tasks[i].get_name() assert str(i) in name - assert "BigtableDataClient channel refresh " in name + assert "BigtableDataClientAsync channel refresh " in name await client.close() @pytest.mark.asyncio @@ -725,7 +725,7 @@ async def test__multiple_table_registration(self): add multiple owners to instance_owners, but only keep one copy of shared key in active_instances """ - from google.cloud.bigtable.client import _WarmedInstanceKey + from google.cloud.bigtable.data.client import _WarmedInstanceKey async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: @@ -773,7 +773,7 @@ async def test__multiple_instance_registration(self): registering with multiple instance keys should update the key in instance_owners and active_instances """ - from google.cloud.bigtable.client import _WarmedInstanceKey + from google.cloud.bigtable.data.client import _WarmedInstanceKey async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: @@ -808,8 +808,8 @@ async def test__multiple_instance_registration(self): @pytest.mark.asyncio async def test_get_table(self): - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.client import _WarmedInstanceKey + from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data.client import _WarmedInstanceKey client = self._make_one(project="project-id") assert not client._active_instances @@ -822,7 +822,7 @@ async def test_get_table(self): expected_app_profile_id, ) await asyncio.sleep(0) - assert isinstance(table, Table) + assert isinstance(table, TableAsync) assert table.table_id == expected_table_id assert ( table.table_name @@ -844,15 +844,15 @@ async def test_get_table(self): @pytest.mark.asyncio async def test_get_table_context_manager(self): - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.client import _WarmedInstanceKey + from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" expected_project_id = "project-id" - with mock.patch.object(Table, "close") as close_mock: + with mock.patch.object(TableAsync, "close") as close_mock: async with self._make_one(project=expected_project_id) as client: async with client.get_table( expected_instance_id, @@ -860,7 +860,7 @@ async def test_get_table_context_manager(self): expected_app_profile_id, ) as table: await asyncio.sleep(0) - assert isinstance(table, Table) + assert isinstance(table, TableAsync) assert table.table_id == expected_table_id assert ( table.table_name @@ -950,35 +950,35 @@ async def test_context_manager(self): def test_client_ctor_sync(self): # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync with pytest.warns(RuntimeWarning) as warnings: - client = BigtableDataClient(project="project-id") + client = BigtableDataClientAsync(project="project-id") expected_warning = [w for w in warnings if "client.py" in w.filename] assert len(expected_warning) == 1 - assert "BigtableDataClient should be started in an asyncio event loop." in str( + assert "BigtableDataClientAsync should be started in an asyncio event loop." in str( expected_warning[0].message ) assert client.project == "project-id" assert client._channel_refresh_tasks == [] -class TestTable: +class TestTableAsync: @pytest.mark.asyncio async def test_table_ctor(self): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.client import _WarmedInstanceKey + from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" expected_operation_timeout = 123 expected_per_request_timeout = 12 - client = BigtableDataClient() + client = BigtableDataClientAsync() assert not client._active_instances - table = Table( + table = TableAsync( client, expected_instance_id, expected_table_id, @@ -1007,19 +1007,19 @@ async def test_table_ctor(self): @pytest.mark.asyncio async def test_table_ctor_bad_timeout_values(self): - from google.cloud.bigtable.client import BigtableDataClient - from google.cloud.bigtable.client import Table + from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data.client import TableAsync - client = BigtableDataClient() + client = BigtableDataClientAsync() with pytest.raises(ValueError) as e: - Table(client, "", "", default_per_request_timeout=-1) + TableAsync(client, "", "", default_per_request_timeout=-1) assert "default_per_request_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - Table(client, "", "", default_operation_timeout=-1) + TableAsync(client, "", "", default_operation_timeout=-1) assert "default_operation_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - Table( + TableAsync( client, "", "", @@ -1034,12 +1034,12 @@ async def test_table_ctor_bad_timeout_values(self): def test_table_ctor_sync(self): # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.client import Table + from google.cloud.bigtable.data.client import TableAsync client = mock.Mock() with pytest.raises(RuntimeError) as e: - Table(client, "instance-id", "table-id") - assert e.match("Table must be created within an async event loop context.") + TableAsync(client, "instance-id", "table-id") + assert e.match("TableAsync must be created within an async event loop context.") class TestReadRows: @@ -1048,12 +1048,12 @@ class TestReadRows: """ def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) def _make_table(self, *args, **kwargs): - from google.cloud.bigtable.client import Table + from google.cloud.bigtable.data.client import TableAsync client_mock = mock.Mock() client_mock._register_instance.side_effect = ( @@ -1070,7 +1070,7 @@ def _make_table(self, *args, **kwargs): ) client_mock._gapic_client.table_path.return_value = kwargs["table_id"] client_mock._gapic_client.instance_path.return_value = kwargs["instance_id"] - return Table(client_mock, *args, **kwargs) + return TableAsync(client_mock, *args, **kwargs) def _make_stats(self): from google.cloud.bigtable_v2.types import RequestStats @@ -1174,7 +1174,7 @@ async def test_read_rows_stream(self): @pytest.mark.parametrize("include_app_profile", [True, False]) @pytest.mark.asyncio async def test_read_rows_query_matches_request(self, include_app_profile): - from google.cloud.bigtable import RowRange + from google.cloud.bigtable.data import RowRange app_profile_id = "app_profile_id" if include_app_profile else None async with self._make_table(app_profile_id=app_profile_id) as table: @@ -1250,7 +1250,7 @@ async def test_read_rows_per_request_timeout( operation_timeout does not cancel the request, so we expect the number of requests to be the ceiling of operation_timeout / per_request_timeout. """ - from google.cloud.bigtable.exceptions import RetryExceptionGroup + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup expected_last_timeout = operation_t - (expected_num - 1) * per_request_t @@ -1295,12 +1295,12 @@ async def test_read_rows_per_request_timeout( @pytest.mark.asyncio async def test_read_rows_idle_timeout(self): - from google.cloud.bigtable.client import ReadRowsIterator + from google.cloud.bigtable.data.client import ReadRowsIterator from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) - from google.cloud.bigtable.exceptions import IdleTimeout - from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data.exceptions import IdleTimeout + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation chunks = [ self._make_chunk(row_key=b"test_1"), @@ -1398,8 +1398,8 @@ async def test_read_rows_revise_request(self): """ Ensure that _revise_request is called between retries """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data.exceptions import InvalidChunk with mock.patch.object( _ReadRowsOperation, "_revise_request_rowset" @@ -1432,7 +1432,7 @@ async def test_read_rows_default_timeouts(self): """ Ensure that the default timeouts are set on the read rows operation when not overridden """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation operation_timeout = 8 per_request_timeout = 4 @@ -1455,7 +1455,7 @@ async def test_read_rows_default_timeout_override(self): """ When timeouts are passed, they overwrite default values """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation operation_timeout = 8 per_request_timeout = 4 @@ -1653,9 +1653,9 @@ async def test_read_rows_metadata(self, include_app_profile): class TestReadRowsSharded: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) @pytest.mark.asyncio async def test_read_rows_sharded_empty_query(self): @@ -1708,8 +1708,8 @@ async def test_read_rows_sharded_errors(self): """ Errors should be exposed as ShardedReadRowsExceptionGroups """ - from google.cloud.bigtable.exceptions import ShardedReadRowsExceptionGroup - from google.cloud.bigtable.exceptions import FailedQueryShardError + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.exceptions import FailedQueryShardError async with self._make_client() as client: async with client.get_table("instance", "table") as table: @@ -1785,8 +1785,8 @@ async def test_read_rows_sharded_batching(self): Large queries should be processed in batches to limit concurrency operation timeout should change between batches """ - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.client import CONCURRENCY_LIMIT + from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data.client import CONCURRENCY_LIMIT assert CONCURRENCY_LIMIT == 10 # change this test if this changes @@ -1802,7 +1802,7 @@ async def test_read_rows_sharded_batching(self): # clock ticks one second on each check with mock.patch("time.monotonic", side_effect=range(0, 100000)): with mock.patch("asyncio.gather", AsyncMock()) as gather_mock: - await Table.read_rows_sharded(table_mock, query_list) + await TableAsync.read_rows_sharded(table_mock, query_list) # should have individual calls for each query assert table_mock.read_rows.call_count == n_queries # should have single gather call for each batch @@ -1843,9 +1843,9 @@ async def test_read_rows_sharded_batching(self): class TestSampleRowKeys: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) async def _make_gapic_stream(self, sample_list: list[tuple[bytes, int]]): from google.cloud.bigtable_v2.types import SampleRowKeysResponse @@ -1980,7 +1980,7 @@ async def test_sample_row_keys_retryable_errors(self, retryable_exception): retryable errors should be retried until timeout """ from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.exceptions import RetryExceptionGroup + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup async with self._make_client() as client: async with client.get_table("instance", "table") as table: @@ -2023,9 +2023,9 @@ async def test_sample_row_keys_non_retryable_errors(self, non_retryable_exceptio class TestMutateRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -2085,7 +2085,7 @@ async def test_mutate_row(self, mutation_arg): @pytest.mark.asyncio async def test_mutate_row_retryable_errors(self, retryable_exception): from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.exceptions import RetryExceptionGroup + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: @@ -2190,9 +2190,9 @@ async def test_mutate_row_metadata(self, include_app_profile): class TestBulkMutateRows: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) async def _mock_response(self, response_list): from google.cloud.bigtable_v2.types import MutateRowsResponse @@ -2300,7 +2300,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_retryable( """ Individual idempotent mutations should be retried if they fail with a retryable error """ - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, @@ -2347,7 +2347,7 @@ async def test_bulk_mutate_rows_idempotent_mutation_error_non_retryable( """ Individual idempotent mutations should not be retried if they fail with a non-retryable error """ - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( FailedMutationEntryError, MutationsExceptionGroup, ) @@ -2386,7 +2386,7 @@ async def test_bulk_mutate_idempotent_retryable_request_errors( """ Individual idempotent mutations should be retried if the request fails with a retryable error """ - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, @@ -2425,7 +2425,7 @@ async def test_bulk_mutate_rows_non_idempotent_retryable_errors( self, retryable_exception ): """Non-Idempotent mutations should never be retried""" - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( FailedMutationEntryError, MutationsExceptionGroup, ) @@ -2467,7 +2467,7 @@ async def test_bulk_mutate_rows_non_retryable_errors(self, non_retryable_excepti """ If the request fails with a non-retryable error, mutations should not be retried """ - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( FailedMutationEntryError, MutationsExceptionGroup, ) @@ -2502,7 +2502,7 @@ async def test_bulk_mutate_error_index(self): ServiceUnavailable, FailedPrecondition, ) - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( RetryExceptionGroup, FailedMutationEntryError, MutationsExceptionGroup, @@ -2579,9 +2579,9 @@ async def test_bulk_mutate_row_metadata(self, include_app_profile): class TestCheckAndMutateRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) @pytest.mark.parametrize("gapic_result", [True, False]) @pytest.mark.asyncio @@ -2660,7 +2660,7 @@ async def test_check_and_mutate_no_mutations(self): @pytest.mark.asyncio async def test_check_and_mutate_single_mutations(self): """if single mutations are passed, they should be internally wrapped in a list""" - from google.cloud.bigtable.mutations import SetCell + from google.cloud.bigtable.data.mutations import SetCell from google.cloud.bigtable_v2.types import CheckAndMutateRowResponse async with self._make_client() as client: @@ -2713,7 +2713,7 @@ async def test_check_and_mutate_predicate_object(self): async def test_check_and_mutate_mutations_parsing(self): """mutations objects should be converted to dicts""" from google.cloud.bigtable_v2.types import CheckAndMutateRowResponse - from google.cloud.bigtable.mutations import DeleteAllFromRow + from google.cloud.bigtable.data.mutations import DeleteAllFromRow mutations = [mock.Mock() for _ in range(5)] for idx, mutation in enumerate(mutations): @@ -2772,9 +2772,9 @@ async def test_check_and_mutate_metadata(self, include_app_profile): class TestReadModifyWriteRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.client import BigtableDataClient + from google.cloud.bigtable.data.client import BigtableDataClientAsync - return BigtableDataClient(*args, **kwargs) + return BigtableDataClientAsync(*args, **kwargs) @pytest.mark.parametrize( "call_rules,expected_rules", @@ -2886,7 +2886,7 @@ async def test_read_modify_write_row_building(self): """ results from gapic call should be used to construct row """ - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data.row import Row from google.cloud.bigtable_v2.types import ReadModifyWriteRowResponse from google.cloud.bigtable_v2.types import Row as RowPB diff --git a/tests/unit/test_exceptions.py b/tests/unit/data/test_exceptions.py similarity index 100% rename from tests/unit/test_exceptions.py rename to tests/unit/data/test_exceptions.py diff --git a/tests/unit/test_iterators.py b/tests/unit/data/test_iterators.py similarity index 100% rename from tests/unit/test_iterators.py rename to tests/unit/data/test_iterators.py diff --git a/tests/unit/test_mutations.py b/tests/unit/data/test_mutations.py similarity index 100% rename from tests/unit/test_mutations.py rename to tests/unit/data/test_mutations.py diff --git a/tests/unit/test_mutations_batcher.py b/tests/unit/data/test_mutations_batcher.py similarity index 100% rename from tests/unit/test_mutations_batcher.py rename to tests/unit/data/test_mutations_batcher.py diff --git a/tests/unit/test_read_modify_write_rules.py b/tests/unit/data/test_read_modify_write_rules.py similarity index 100% rename from tests/unit/test_read_modify_write_rules.py rename to tests/unit/data/test_read_modify_write_rules.py diff --git a/tests/unit/test_read_rows_acceptance.py b/tests/unit/data/test_read_rows_acceptance.py similarity index 100% rename from tests/unit/test_read_rows_acceptance.py rename to tests/unit/data/test_read_rows_acceptance.py diff --git a/tests/unit/test_read_rows_query.py b/tests/unit/data/test_read_rows_query.py similarity index 100% rename from tests/unit/test_read_rows_query.py rename to tests/unit/data/test_read_rows_query.py diff --git a/tests/unit/test_row.py b/tests/unit/data/test_row.py similarity index 100% rename from tests/unit/test_row.py rename to tests/unit/data/test_row.py diff --git a/tests/unit/test_row_filters.py b/tests/unit/data/test_row_filters.py similarity index 100% rename from tests/unit/test_row_filters.py rename to tests/unit/data/test_row_filters.py From fe4e280b361506b51163438e8a68e5bb60a25db9 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 16:55:53 -0700 Subject: [PATCH 184/213] fixed v2 unit tests --- tests/unit/v2_client/test_app_profile.py | 44 ++-- tests/unit/v2_client/test_backup.py | 36 +-- tests/unit/v2_client/test_batcher.py | 237 ++++++++++++++----- tests/unit/v2_client/test_client.py | 59 ++--- tests/unit/v2_client/test_cluster.py | 68 +++--- tests/unit/v2_client/test_column_family.py | 62 ++--- tests/unit/v2_client/test_encryption_info.py | 8 +- tests/unit/v2_client/test_error.py | 2 +- tests/unit/v2_client/test_instance.py | 50 ++-- tests/unit/v2_client/test_policy.py | 28 +-- tests/unit/v2_client/test_row.py | 30 +-- tests/unit/v2_client/test_row_data.py | 62 ++--- tests/unit/v2_client/test_row_filters.py | 214 ++++++++--------- tests/unit/v2_client/test_row_merger.py | 8 +- tests/unit/v2_client/test_row_set.py | 60 ++--- tests/unit/v2_client/test_table.py | 188 +++++++-------- 16 files changed, 631 insertions(+), 525 deletions(-) diff --git a/tests/unit/v2_client/test_app_profile.py b/tests/unit/v2_client/test_app_profile.py index 575f25194..660ee7899 100644 --- a/tests/unit/v2_client/test_app_profile.py +++ b/tests/unit/v2_client/test_app_profile.py @@ -32,19 +32,19 @@ def _make_app_profile(*args, **kwargs): - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile return AppProfile(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) def test_app_profile_constructor_defaults(): - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -60,7 +60,7 @@ def test_app_profile_constructor_defaults(): def test_app_profile_constructor_explicit(): - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType ANY = RoutingPolicyType.ANY DESCRIPTION_1 = "routing policy any" @@ -99,7 +99,7 @@ def test_app_profile_constructor_explicit(): def test_app_profile_constructor_multi_cluster_ids(): - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType ANY = RoutingPolicyType.ANY DESCRIPTION_1 = "routing policy any" @@ -166,8 +166,8 @@ def test_app_profile___ne__(): def test_app_profile_from_pb_success_w_routing_any(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -195,8 +195,8 @@ def test_app_profile_from_pb_success_w_routing_any(): def test_app_profile_from_pb_success_w_routing_any_multi_cluster_ids(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -226,8 +226,8 @@ def test_app_profile_from_pb_success_w_routing_any_multi_cluster_ids(): def test_app_profile_from_pb_success_w_routing_single(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.enums import RoutingPolicyType client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -259,7 +259,7 @@ def test_app_profile_from_pb_success_w_routing_single(): def test_app_profile_from_pb_w_bad_app_profile_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile bad_app_profile_name = "BAD_NAME" @@ -271,7 +271,7 @@ def test_app_profile_from_pb_w_bad_app_profile_name(): def test_app_profile_from_pb_w_instance_id_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile ALT_INSTANCE_ID = "ALT_INSTANCE_ID" client = _Client(PROJECT) @@ -286,7 +286,7 @@ def test_app_profile_from_pb_w_instance_id_mistmatch(): def test_app_profile_from_pb_w_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile ALT_PROJECT = "ALT_PROJECT" client = _Client(project=ALT_PROJECT) @@ -304,7 +304,7 @@ def test_app_profile_reload_w_routing_any(): BigtableInstanceAdminClient, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType api = mock.create_autospec(BigtableInstanceAdminClient) credentials = _make_credentials() @@ -400,8 +400,8 @@ def test_app_profile_create_w_routing_any(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.deprecated.app_profile import AppProfile - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.enums import RoutingPolicyType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -461,8 +461,8 @@ def test_app_profile_create_w_routing_single(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.deprecated.app_profile import AppProfile - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.app_profile import AppProfile + from google.cloud.bigtable.enums import RoutingPolicyType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -533,7 +533,7 @@ def test_app_profile_update_w_routing_any(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -608,7 +608,7 @@ def test_app_profile_update_w_routing_any_multi_cluster_ids(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -684,7 +684,7 @@ def test_app_profile_update_w_routing_single(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) diff --git a/tests/unit/v2_client/test_backup.py b/tests/unit/v2_client/test_backup.py index 34cc8823a..9882ca339 100644 --- a/tests/unit/v2_client/test_backup.py +++ b/tests/unit/v2_client/test_backup.py @@ -48,7 +48,7 @@ def _make_table_admin_client(): def _make_backup(*args, **kwargs): - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup return Backup(*args, **kwargs) @@ -102,7 +102,7 @@ def test_backup_constructor_explicit(): def test_backup_from_pb_w_project_mismatch(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup alt_project_id = "alt-project-id" client = _Client(project=alt_project_id) @@ -115,7 +115,7 @@ def test_backup_from_pb_w_project_mismatch(): def test_backup_from_pb_w_instance_mismatch(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup alt_instance = "/projects/%s/instances/alt-instance" % PROJECT_ID client = _Client() @@ -128,7 +128,7 @@ def test_backup_from_pb_w_instance_mismatch(): def test_backup_from_pb_w_bad_name(): from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup client = _Client() instance = _Instance(INSTANCE_NAME, client) @@ -139,10 +139,10 @@ def test_backup_from_pb_w_bad_name(): def test_backup_from_pb_success(): - from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo - from google.cloud.bigtable.deprecated.error import Status + from google.cloud.bigtable.encryption_info import EncryptionInfo + from google.cloud.bigtable.error import Status from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup from google.cloud._helpers import _datetime_to_pb_timestamp from google.rpc.code_pb2 import Code @@ -190,7 +190,7 @@ def test_backup_from_pb_success(): def test_backup_name(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -225,7 +225,7 @@ def test_backup_parent_none(): def test_backup_parent_w_cluster(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -242,7 +242,7 @@ def test_backup_parent_w_cluster(): def test_backup_source_table_none(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -258,7 +258,7 @@ def test_backup_source_table_none(): def test_backup_source_table_valid(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) @@ -473,7 +473,7 @@ def test_backup_create_w_expire_time_not_set(): def test_backup_create_success(): from google.cloud._helpers import _datetime_to_pb_timestamp from google.cloud.bigtable_admin_v2.types import table - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client op_future = object() credentials = _make_credentials() @@ -806,12 +806,12 @@ def test_backup_restore_to_another_instance(): def test_backup_get_iam_policy(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = Client(project=PROJECT_ID, credentials=credentials, admin=True) @@ -842,13 +842,13 @@ def test_backup_get_iam_policy(): def test_backup_set_iam_policy(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = Client(project=PROJECT_ID, credentials=credentials, admin=True) @@ -887,7 +887,7 @@ def test_backup_set_iam_policy(): def test_backup_test_iam_permissions(): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client from google.cloud.bigtable_admin_v2.services.bigtable_table_admin import ( BigtableTableAdminClient, ) diff --git a/tests/unit/v2_client/test_batcher.py b/tests/unit/v2_client/test_batcher.py index 0793ed480..7284652f4 100644 --- a/tests/unit/v2_client/test_batcher.py +++ b/tests/unit/v2_client/test_batcher.py @@ -14,122 +14,139 @@ import mock +import time + import pytest -from google.cloud.bigtable.deprecated.row import DirectRow +from google.cloud.bigtable.row import DirectRow +from google.cloud.bigtable.batcher import ( + _FlowControl, + MutationsBatcher, + MutationsBatchError, +) TABLE_ID = "table-id" TABLE_NAME = "/tables/" + TABLE_ID -def _make_mutation_batcher(table, **kw): - from google.cloud.bigtable.deprecated.batcher import MutationsBatcher - - return MutationsBatcher(table, **kw) +def test_mutation_batcher_constructor(): + table = _Table(TABLE_NAME) + with MutationsBatcher(table) as mutation_batcher: + assert table is mutation_batcher.table -def test_mutation_batcher_constructor(): +def test_mutation_batcher_w_user_callback(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table) - assert table is mutation_batcher.table + def callback_fn(response): + callback_fn.count = len(response) + + with MutationsBatcher( + table, flush_count=1, batch_completed_callback=callback_fn + ) as mutation_batcher: + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] + + mutation_batcher.mutate_rows(rows) + + assert callback_fn.count == 4 def test_mutation_batcher_mutate_row(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - rows = [ - DirectRow(row_key=b"row_key"), - DirectRow(row_key=b"row_key_2"), - DirectRow(row_key=b"row_key_3"), - DirectRow(row_key=b"row_key_4"), - ] + rows = [ + DirectRow(row_key=b"row_key"), + DirectRow(row_key=b"row_key_2"), + DirectRow(row_key=b"row_key_3"), + DirectRow(row_key=b"row_key_4"), + ] - mutation_batcher.mutate_rows(rows) - mutation_batcher.flush() + mutation_batcher.mutate_rows(rows) assert table.mutation_calls == 1 def test_mutation_batcher_mutate(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) - - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) + with MutationsBatcher(table=table) as mutation_batcher: - mutation_batcher.mutate(row) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) + row.set_cell("cf1", b"c4", 4) - mutation_batcher.flush() + mutation_batcher.mutate(row) assert table.mutation_calls == 1 def test_mutation_batcher_flush_w_no_rows(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) - mutation_batcher.flush() + with MutationsBatcher(table=table) as mutation_batcher: + mutation_batcher.flush() assert table.mutation_calls == 0 def test_mutation_batcher_mutate_w_max_flush_count(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table, flush_count=3) + with MutationsBatcher(table=table, flush_count=3) as mutation_batcher: - row_1 = DirectRow(row_key=b"row_key_1") - row_2 = DirectRow(row_key=b"row_key_2") - row_3 = DirectRow(row_key=b"row_key_3") + row_1 = DirectRow(row_key=b"row_key_1") + row_2 = DirectRow(row_key=b"row_key_2") + row_3 = DirectRow(row_key=b"row_key_3") - mutation_batcher.mutate(row_1) - mutation_batcher.mutate(row_2) - mutation_batcher.mutate(row_3) + mutation_batcher.mutate(row_1) + mutation_batcher.mutate(row_2) + mutation_batcher.mutate(row_3) assert table.mutation_calls == 1 -@mock.patch("google.cloud.bigtable.deprecated.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_with_max_mutations_failure(): - from google.cloud.bigtable.deprecated.batcher import MaxMutationsError - +@mock.patch("google.cloud.bigtable.batcher.MAX_OUTSTANDING_ELEMENTS", new=3) +def test_mutation_batcher_mutate_w_max_mutations(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher(table=table) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) - row.set_cell("cf1", b"c4", 4) + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", 1) + row.set_cell("cf1", b"c2", 2) + row.set_cell("cf1", b"c3", 3) - with pytest.raises(MaxMutationsError): mutation_batcher.mutate(row) + assert table.mutation_calls == 1 + -@mock.patch("google.cloud.bigtable.deprecated.batcher.MAX_MUTATIONS", new=3) -def test_mutation_batcher_mutate_w_max_mutations(): +def test_mutation_batcher_mutate_w_max_row_bytes(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher(table=table) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: - row = DirectRow(row_key=b"row_key") - row.set_cell("cf1", b"c1", 1) - row.set_cell("cf1", b"c2", 2) - row.set_cell("cf1", b"c3", 3) + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes - mutation_batcher.mutate(row) - mutation_batcher.flush() + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + row.set_cell("cf1", b"c3", max_value) + + mutation_batcher.mutate(row) assert table.mutation_calls == 1 -def test_mutation_batcher_mutate_w_max_row_bytes(): +def test_mutations_batcher_flushed_when_closed(): table = _Table(TABLE_NAME) - mutation_batcher = _make_mutation_batcher( - table=table, max_row_bytes=3 * 1024 * 1024 - ) + mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) number_of_bytes = 1 * 1024 * 1024 max_value = b"1" * number_of_bytes @@ -137,13 +154,108 @@ def test_mutation_batcher_mutate_w_max_row_bytes(): row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", max_value) row.set_cell("cf1", b"c2", max_value) - row.set_cell("cf1", b"c3", max_value) mutation_batcher.mutate(row) + assert table.mutation_calls == 0 + + mutation_batcher.close() assert table.mutation_calls == 1 +def test_mutations_batcher_context_manager_flushed_when_closed(): + table = _Table(TABLE_NAME) + with MutationsBatcher( + table=table, max_row_bytes=3 * 1024 * 1024 + ) as mutation_batcher: + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + + row = DirectRow(row_key=b"row_key") + row.set_cell("cf1", b"c1", max_value) + row.set_cell("cf1", b"c2", max_value) + + mutation_batcher.mutate(row) + + assert table.mutation_calls == 1 + + +@mock.patch("google.cloud.bigtable.batcher.MutationsBatcher.flush") +def test_mutations_batcher_flush_interval(mocked_flush): + table = _Table(TABLE_NAME) + flush_interval = 0.5 + mutation_batcher = MutationsBatcher(table=table, flush_interval=flush_interval) + + assert mutation_batcher._timer.interval == flush_interval + mocked_flush.assert_not_called() + + time.sleep(0.4) + mocked_flush.assert_not_called() + + time.sleep(0.1) + mocked_flush.assert_called_once_with() + + mutation_batcher.close() + + +def test_mutations_batcher_response_with_error_codes(): + from google.rpc.status_pb2 import Status + + mocked_response = [Status(code=1), Status(code=5)] + + with mock.patch("test_batcher._Table") as mocked_table: + table = mocked_table.return_value + mutation_batcher = MutationsBatcher(table=table) + + row1 = DirectRow(row_key=b"row_key") + row2 = DirectRow(row_key=b"row_key") + table.mutate_rows.return_value = mocked_response + + mutation_batcher.mutate_rows([row1, row2]) + with pytest.raises(MutationsBatchError) as exc: + mutation_batcher.close() + assert exc.value.message == "Errors in batch mutations." + assert len(exc.value.exc) == 2 + + assert exc.value.exc[0].message == mocked_response[0].message + assert exc.value.exc[1].message == mocked_response[1].message + + +def test_flow_control_event_is_set_when_not_blocked(): + flow_control = _FlowControl() + + flow_control.set_flow_control_status() + assert flow_control.event.is_set() + + +def test_flow_control_event_is_not_set_when_blocked(): + flow_control = _FlowControl() + + flow_control.inflight_mutations = flow_control.max_mutations + flow_control.inflight_size = flow_control.max_mutation_bytes + + flow_control.set_flow_control_status() + assert not flow_control.event.is_set() + + +@mock.patch("concurrent.futures.ThreadPoolExecutor.submit") +def test_flush_async_batch_count(mocked_executor_submit): + table = _Table(TABLE_NAME) + mutation_batcher = MutationsBatcher(table=table, flush_count=2) + + number_of_bytes = 1 * 1024 * 1024 + max_value = b"1" * number_of_bytes + for index in range(5): + row = DirectRow(row_key=f"row_key_{index}") + row.set_cell("cf1", b"c1", max_value) + mutation_batcher.mutate(row) + mutation_batcher._flush_async() + + # 3 batches submitted. 2 batches of 2 items, and the last one a single item batch. + assert mocked_executor_submit.call_count == 3 + + class _Instance(object): def __init__(self, client=None): self._client = client @@ -156,5 +268,8 @@ def __init__(self, name, client=None): self.mutation_calls = 0 def mutate_rows(self, rows): + from google.rpc.status_pb2 import Status + self.mutation_calls += 1 - return rows + + return [Status(code=0) for _ in rows] diff --git a/tests/unit/v2_client/test_client.py b/tests/unit/v2_client/test_client.py index 9deac6a25..5944c58a3 100644 --- a/tests/unit/v2_client/test_client.py +++ b/tests/unit/v2_client/test_client.py @@ -25,7 +25,7 @@ def _invoke_client_factory(client_class, **kw): - from google.cloud.bigtable.deprecated.client import _create_gapic_client + from google.cloud.bigtable.client import _create_gapic_client return _create_gapic_client(client_class, **kw) @@ -101,27 +101,23 @@ def __init__(self, credentials, emulator_host=None, emulator_channel=None): def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) @mock.patch("os.environ", {}) def test_client_constructor_defaults(): - import warnings from google.api_core import client_info - from google.cloud.bigtable.deprecated import __version__ - from google.cloud.bigtable.deprecated.client import DATA_SCOPE + from google.cloud.bigtable import __version__ + from google.cloud.bigtable.client import DATA_SCOPE credentials = _make_credentials() - with warnings.catch_warnings(record=True) as warned: - with mock.patch("google.auth.default") as mocked: - mocked.return_value = credentials, PROJECT - client = _make_client() + with mock.patch("google.auth.default") as mocked: + mocked.return_value = credentials, PROJECT + client = _make_client() - # warn about client deprecation - assert len(warned) == 1 assert client.project == PROJECT assert client._credentials is credentials.with_scopes.return_value assert not client._read_only @@ -135,8 +131,8 @@ def test_client_constructor_defaults(): def test_client_constructor_explicit(): import warnings - from google.cloud.bigtable.deprecated.client import ADMIN_SCOPE - from google.cloud.bigtable.deprecated.client import DATA_SCOPE + from google.cloud.bigtable.client import ADMIN_SCOPE + from google.cloud.bigtable.client import DATA_SCOPE credentials = _make_credentials() client_info = mock.Mock() @@ -151,8 +147,7 @@ def test_client_constructor_explicit(): channel=mock.sentinel.channel, ) - # deprecationw arnning for channel and Client deprecation - assert len(warned) == 2 + assert len(warned) == 1 assert client.project == PROJECT assert client._credentials is credentials.with_scopes.return_value @@ -176,10 +171,8 @@ def test_client_constructor_w_both_admin_and_read_only(): def test_client_constructor_w_emulator_host(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.deprecated.client import ( - _DEFAULT_BIGTABLE_EMULATOR_CLIENT, - ) - from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" with mock.patch("os.environ", {BIGTABLE_EMULATOR: emulator_host}): @@ -202,7 +195,7 @@ def test_client_constructor_w_emulator_host(): def test_client_constructor_w_emulator_host_w_project(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" with mock.patch("os.environ", {BIGTABLE_EMULATOR: emulator_host}): @@ -223,10 +216,8 @@ def test_client_constructor_w_emulator_host_w_project(): def test_client_constructor_w_emulator_host_w_credentials(): from google.cloud.environment_vars import BIGTABLE_EMULATOR - from google.cloud.bigtable.deprecated.client import ( - _DEFAULT_BIGTABLE_EMULATOR_CLIENT, - ) - from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.client import _DEFAULT_BIGTABLE_EMULATOR_CLIENT + from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS emulator_host = "localhost:8081" credentials = _make_credentials() @@ -247,15 +238,15 @@ def test_client_constructor_w_emulator_host_w_credentials(): def test_client__get_scopes_default(): - from google.cloud.bigtable.deprecated.client import DATA_SCOPE + from google.cloud.bigtable.client import DATA_SCOPE client = _make_client(project=PROJECT, credentials=_make_credentials()) assert client._get_scopes() == (DATA_SCOPE,) def test_client__get_scopes_w_admin(): - from google.cloud.bigtable.deprecated.client import ADMIN_SCOPE - from google.cloud.bigtable.deprecated.client import DATA_SCOPE + from google.cloud.bigtable.client import ADMIN_SCOPE + from google.cloud.bigtable.client import DATA_SCOPE client = _make_client(project=PROJECT, credentials=_make_credentials(), admin=True) expected_scopes = (DATA_SCOPE, ADMIN_SCOPE) @@ -263,7 +254,7 @@ def test_client__get_scopes_w_admin(): def test_client__get_scopes_w_read_only(): - from google.cloud.bigtable.deprecated.client import READ_ONLY_SCOPE + from google.cloud.bigtable.client import READ_ONLY_SCOPE client = _make_client( project=PROJECT, credentials=_make_credentials(), read_only=True @@ -353,7 +344,7 @@ def test_client__local_composite_credentials(): def _create_gapic_client_channel_helper(endpoint=None, emulator_host=None): - from google.cloud.bigtable.deprecated.client import _GRPC_CHANNEL_OPTIONS + from google.cloud.bigtable.client import _GRPC_CHANNEL_OPTIONS client_class = mock.Mock(spec=["DEFAULT_ENDPOINT"]) credentials = _make_credentials() @@ -627,7 +618,7 @@ def test_client_instance_admin_client_initialized(): def test_client_instance_factory_defaults(): - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials) @@ -643,8 +634,8 @@ def test_client_instance_factory_defaults(): def test_client_instance_factory_non_defaults(): - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable import enums instance_type = enums.Instance.Type.DEVELOPMENT labels = {"foo": "bar"} @@ -674,7 +665,7 @@ def test_client_list_instances(): from google.cloud.bigtable_admin_v2.services.bigtable_instance_admin import ( BigtableInstanceAdminClient, ) - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance FAILED_LOCATION = "FAILED" INSTANCE_ID1 = "instance-id1" @@ -726,7 +717,7 @@ def test_client_list_clusters(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.instance import Cluster + from google.cloud.bigtable.instance import Cluster instance_api = mock.create_autospec(BigtableInstanceAdminClient) diff --git a/tests/unit/v2_client/test_cluster.py b/tests/unit/v2_client/test_cluster.py index e667c2af4..cb0312b0c 100644 --- a/tests/unit/v2_client/test_cluster.py +++ b/tests/unit/v2_client/test_cluster.py @@ -42,13 +42,13 @@ def _make_cluster(*args, **kwargs): - from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.cluster import Cluster return Cluster(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) @@ -72,8 +72,8 @@ def test_cluster_constructor_defaults(): def test_cluster_constructor_explicit(): - from google.cloud.bigtable.deprecated.enums import StorageType - from google.cloud.bigtable.deprecated.enums import Cluster + from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.enums import Cluster STATE = Cluster.State.READY STORAGE_TYPE_SSD = StorageType.SSD @@ -126,8 +126,8 @@ def test_cluster_kms_key_name_setter(): def test_cluster_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.cluster import Cluster - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable import enums client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -162,7 +162,7 @@ def test_cluster_from_pb_success(): def test_cluster_from_pb_w_bad_cluster_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.cluster import Cluster bad_cluster_name = "BAD_NAME" @@ -174,7 +174,7 @@ def test_cluster_from_pb_w_bad_cluster_name(): def test_cluster_from_pb_w_instance_id_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.cluster import Cluster ALT_INSTANCE_ID = "ALT_INSTANCE_ID" client = _Client(PROJECT) @@ -189,7 +189,7 @@ def test_cluster_from_pb_w_instance_id_mistmatch(): def test_cluster_from_pb_w_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable.cluster import Cluster ALT_PROJECT = "ALT_PROJECT" client = _Client(project=ALT_PROJECT) @@ -204,8 +204,8 @@ def test_cluster_from_pb_w_project_mistmatch(): def test_cluster_from_pb_w_autoscaling(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.cluster import Cluster - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable.cluster import Cluster + from google.cloud.bigtable import enums client = _Client(PROJECT) instance = _Instance(INSTANCE_ID, client) @@ -292,8 +292,8 @@ def _make_instance_admin_client(): def test_cluster_reload(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.enums import StorageType - from google.cloud.bigtable.deprecated.enums import Cluster + from google.cloud.bigtable.enums import StorageType + from google.cloud.bigtable.enums import Cluster credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -349,7 +349,7 @@ def test_cluster_reload(): def test_cluster_exists_hit(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -371,7 +371,7 @@ def test_cluster_exists_hit(): def test_cluster_exists_miss(): - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance from google.api_core import exceptions credentials = _make_credentials() @@ -390,7 +390,7 @@ def test_cluster_exists_miss(): def test_cluster_exists_w_error(): - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance from google.api_core import exceptions credentials = _make_credentials() @@ -416,9 +416,9 @@ def test_cluster_create(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -471,9 +471,9 @@ def test_cluster_create_w_cmek(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -531,9 +531,9 @@ def test_cluster_create_w_autoscaling(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance from google.cloud.bigtable_admin_v2.types import instance as instance_v2_pb2 - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -600,7 +600,7 @@ def test_cluster_update(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -667,7 +667,7 @@ def test_cluster_update_w_autoscaling(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -726,7 +726,7 @@ def test_cluster_update_w_partial_autoscaling_config(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -811,7 +811,7 @@ def test_cluster_update_w_both_manual_and_autoscaling(): from google.cloud.bigtable_admin_v2.types import ( bigtable_instance_admin as messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -871,8 +871,8 @@ def test_cluster_disable_autoscaling(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.enums import StorageType NOW = datetime.datetime.utcnow() NOW_PB = _datetime_to_pb_timestamp(NOW) @@ -928,8 +928,8 @@ def test_cluster_disable_autoscaling(): def test_create_cluster_with_both_manual_and_autoscaling(): - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -956,8 +956,8 @@ def test_create_cluster_with_both_manual_and_autoscaling(): def test_create_cluster_with_partial_autoscaling_config(): - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -997,8 +997,8 @@ def test_create_cluster_with_partial_autoscaling_config(): def test_create_cluster_with_no_scaling_config(): - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated.enums import StorageType + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.enums import StorageType credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) diff --git a/tests/unit/v2_client/test_column_family.py b/tests/unit/v2_client/test_column_family.py index d16d2b20c..b164b2fc1 100644 --- a/tests/unit/v2_client/test_column_family.py +++ b/tests/unit/v2_client/test_column_family.py @@ -19,7 +19,7 @@ def _make_max_versions_gc_rule(*args, **kwargs): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule return MaxVersionsGCRule(*args, **kwargs) @@ -51,7 +51,7 @@ def test_max_versions_gc_rule_to_pb(): def _make_max_age_gc_rule(*args, **kwargs): - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxAgeGCRule return MaxAgeGCRule(*args, **kwargs) @@ -89,7 +89,7 @@ def test_max_age_gc_rule_to_pb(): def _make_gc_rule_union(*args, **kwargs): - from google.cloud.bigtable.deprecated.column_family import GCRuleUnion + from google.cloud.bigtable.column_family import GCRuleUnion return GCRuleUnion(*args, **kwargs) @@ -124,8 +124,8 @@ def test_gc_rule_union___ne__same_value(): def test_gc_rule_union_to_pb(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule max_num_versions = 42 rule1 = MaxVersionsGCRule(max_num_versions) @@ -145,8 +145,8 @@ def test_gc_rule_union_to_pb(): def test_gc_rule_union_to_pb_nested(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule max_num_versions1 = 42 rule1 = MaxVersionsGCRule(max_num_versions1) @@ -171,7 +171,7 @@ def test_gc_rule_union_to_pb_nested(): def _make_gc_rule_intersection(*args, **kwargs): - from google.cloud.bigtable.deprecated.column_family import GCRuleIntersection + from google.cloud.bigtable.column_family import GCRuleIntersection return GCRuleIntersection(*args, **kwargs) @@ -206,8 +206,8 @@ def test_gc_rule_intersection___ne__same_value(): def test_gc_rule_intersection_to_pb(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule max_num_versions = 42 rule1 = MaxVersionsGCRule(max_num_versions) @@ -227,8 +227,8 @@ def test_gc_rule_intersection_to_pb(): def test_gc_rule_intersection_to_pb_nested(): import datetime from google.protobuf import duration_pb2 - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule max_num_versions1 = 42 rule1 = MaxVersionsGCRule(max_num_versions1) @@ -253,13 +253,13 @@ def test_gc_rule_intersection_to_pb_nested(): def _make_column_family(*args, **kwargs): - from google.cloud.bigtable.deprecated.column_family import ColumnFamily + from google.cloud.bigtable.column_family import ColumnFamily return ColumnFamily(*args, **kwargs) def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) @@ -323,7 +323,7 @@ def test_column_family_to_pb_no_rules(): def test_column_family_to_pb_with_rule(): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1) column_family = _make_column_family("column_family_id", None, gc_rule=gc_rule) @@ -397,7 +397,7 @@ def test_column_family_create(): def test_column_family_create_with_gc_rule(): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1337) _create_test_helper(gc_rule=gc_rule) @@ -467,7 +467,7 @@ def test_column_family_update(): def test_column_family_update_with_gc_rule(): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule gc_rule = MaxVersionsGCRule(1337) _update_test_helper(gc_rule=gc_rule) @@ -530,15 +530,15 @@ def test_column_family_delete(): def test__gc_rule_from_pb_empty(): - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import _gc_rule_from_pb gc_rule_pb = _GcRulePB() assert _gc_rule_from_pb(gc_rule_pb) is None def test__gc_rule_from_pb_max_num_versions(): - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import MaxVersionsGCRule orig_rule = MaxVersionsGCRule(1) gc_rule_pb = orig_rule.to_pb() @@ -549,8 +549,8 @@ def test__gc_rule_from_pb_max_num_versions(): def test__gc_rule_from_pb_max_age(): import datetime - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import MaxAgeGCRule orig_rule = MaxAgeGCRule(datetime.timedelta(seconds=1)) gc_rule_pb = orig_rule.to_pb() @@ -561,10 +561,10 @@ def test__gc_rule_from_pb_max_age(): def test__gc_rule_from_pb_union(): import datetime - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb - from google.cloud.bigtable.deprecated.column_family import GCRuleUnion - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import GCRuleUnion + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule rule1 = MaxVersionsGCRule(1) rule2 = MaxAgeGCRule(datetime.timedelta(seconds=1)) @@ -577,10 +577,10 @@ def test__gc_rule_from_pb_union(): def test__gc_rule_from_pb_intersection(): import datetime - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb - from google.cloud.bigtable.deprecated.column_family import GCRuleIntersection - from google.cloud.bigtable.deprecated.column_family import MaxAgeGCRule - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import GCRuleIntersection + from google.cloud.bigtable.column_family import MaxAgeGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule rule1 = MaxVersionsGCRule(1) rule2 = MaxAgeGCRule(datetime.timedelta(seconds=1)) @@ -592,7 +592,7 @@ def test__gc_rule_from_pb_intersection(): def test__gc_rule_from_pb_unknown_field_name(): - from google.cloud.bigtable.deprecated.column_family import _gc_rule_from_pb + from google.cloud.bigtable.column_family import _gc_rule_from_pb class MockProto(object): diff --git a/tests/unit/v2_client/test_encryption_info.py b/tests/unit/v2_client/test_encryption_info.py index 0b6a93e9e..8b92a83ed 100644 --- a/tests/unit/v2_client/test_encryption_info.py +++ b/tests/unit/v2_client/test_encryption_info.py @@ -14,7 +14,7 @@ import mock -from google.cloud.bigtable.deprecated import enums +from google.cloud.bigtable import enums EncryptionType = enums.EncryptionInfo.EncryptionType @@ -30,7 +30,7 @@ def _make_status_pb(code=_STATUS_CODE, message=_STATUS_MESSAGE): def _make_status(code=_STATUS_CODE, message=_STATUS_MESSAGE): - from google.cloud.bigtable.deprecated.error import Status + from google.cloud.bigtable.error import Status status_pb = _make_status_pb(code=code, message=message) return Status(status_pb) @@ -54,7 +54,7 @@ def _make_info_pb( def _make_encryption_info(*args, **kwargs): - from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo + from google.cloud.bigtable.encryption_info import EncryptionInfo return EncryptionInfo(*args, **kwargs) @@ -70,7 +70,7 @@ def _make_encryption_info_defaults( def test_encryption_info__from_pb(): - from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo + from google.cloud.bigtable.encryption_info import EncryptionInfo info_pb = _make_info_pb() diff --git a/tests/unit/v2_client/test_error.py b/tests/unit/v2_client/test_error.py index 072a3b3c3..8b148473c 100644 --- a/tests/unit/v2_client/test_error.py +++ b/tests/unit/v2_client/test_error.py @@ -20,7 +20,7 @@ def _make_status_pb(**kwargs): def _make_status(status_pb): - from google.cloud.bigtable.deprecated.error import Status + from google.cloud.bigtable.error import Status return Status(status_pb) diff --git a/tests/unit/v2_client/test_instance.py b/tests/unit/v2_client/test_instance.py index b43e8bb38..c577adca5 100644 --- a/tests/unit/v2_client/test_instance.py +++ b/tests/unit/v2_client/test_instance.py @@ -17,7 +17,7 @@ import pytest from ._testing import _make_credentials -from google.cloud.bigtable.deprecated.cluster import Cluster +from google.cloud.bigtable.cluster import Cluster PROJECT = "project" INSTANCE_ID = "instance-id" @@ -47,7 +47,7 @@ def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) @@ -61,7 +61,7 @@ def _make_instance_admin_api(): def _make_instance(*args, **kwargs): - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance return Instance(*args, **kwargs) @@ -79,7 +79,7 @@ def test_instance_constructor_defaults(): def test_instance_constructor_non_default(): - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums instance_type = enums.Instance.Type.DEVELOPMENT state = enums.Instance.State.READY @@ -104,7 +104,7 @@ def test_instance_constructor_non_default(): def test_instance__update_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums instance_type = data_v2_pb2.Instance.Type.PRODUCTION state = enums.Instance.State.READY @@ -129,7 +129,7 @@ def test_instance__update_from_pb_success(): def test_instance__update_from_pb_success_defaults(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums instance_pb = data_v2_pb2.Instance(display_name=DISPLAY_NAME) @@ -156,8 +156,8 @@ def test_instance__update_from_pb_wo_display_name(): def test_instance_from_pb_success(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated import enums - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable import enums + from google.cloud.bigtable.instance import Instance credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -184,7 +184,7 @@ def test_instance_from_pb_success(): def test_instance_from_pb_bad_instance_name(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance instance_name = "INCORRECT_FORMAT" instance_pb = data_v2_pb2.Instance(name=instance_name) @@ -195,7 +195,7 @@ def test_instance_from_pb_bad_instance_name(): def test_instance_from_pb_project_mistmatch(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance ALT_PROJECT = "ALT_PROJECT" credentials = _make_credentials() @@ -304,7 +304,7 @@ def _instance_api_response_for_create(): def test_instance_create(): - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums from google.cloud.bigtable_admin_v2.types import Instance from google.cloud.bigtable_admin_v2.types import Cluster import warnings @@ -353,8 +353,8 @@ def test_instance_create(): def test_instance_create_w_clusters(): - from google.cloud.bigtable.deprecated import enums - from google.cloud.bigtable.deprecated.cluster import Cluster + from google.cloud.bigtable import enums + from google.cloud.bigtable.cluster import Cluster from google.cloud.bigtable_admin_v2.types import Cluster as cluster_pb from google.cloud.bigtable_admin_v2.types import Instance as instance_pb @@ -473,7 +473,7 @@ def test_instance_exists_w_error(): def test_instance_reload(): from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums DISPLAY_NAME = "hey-hi-hello" credentials = _make_credentials() @@ -527,7 +527,7 @@ def _instance_api_response_for_update(): def test_instance_update(): - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums from google.protobuf import field_mask_pb2 from google.cloud.bigtable_admin_v2.types import Instance @@ -603,7 +603,7 @@ def test_instance_delete(): def test_instance_get_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -631,7 +631,7 @@ def test_instance_get_iam_policy(): def test_instance_get_iam_policy_w_requested_policy_version(): from google.iam.v1 import policy_pb2, options_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -665,8 +665,8 @@ def test_instance_get_iam_policy_w_requested_policy_version(): def test_instance_set_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -721,7 +721,7 @@ def test_instance_test_iam_permissions(): def test_instance_cluster_factory(): - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums CLUSTER_ID = "{}-cluster".format(INSTANCE_ID) LOCATION_ID = "us-central1-c" @@ -749,8 +749,8 @@ def test_instance_list_clusters(): bigtable_instance_admin as messages_v2_pb2, ) from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.instance import Instance - from google.cloud.bigtable.deprecated.instance import Cluster + from google.cloud.bigtable.instance import Instance + from google.cloud.bigtable.instance import Cluster credentials = _make_credentials() client = _make_client(project=PROJECT, credentials=credentials, admin=True) @@ -788,7 +788,7 @@ def test_instance_list_clusters(): def test_instance_table_factory(): - from google.cloud.bigtable.deprecated.table import Table + from google.cloud.bigtable.table import Table app_profile_id = "appProfileId1262094415" instance = _make_instance(INSTANCE_ID, None) @@ -857,7 +857,7 @@ def test_instance_list_tables_failure_name_bad_before(): def test_instance_app_profile_factory(): - from google.cloud.bigtable.deprecated.enums import RoutingPolicyType + from google.cloud.bigtable.enums import RoutingPolicyType instance = _make_instance(INSTANCE_ID, None) @@ -890,7 +890,7 @@ def test_instance_list_app_profiles(): from google.api_core.page_iterator import Iterator from google.api_core.page_iterator import Page from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 - from google.cloud.bigtable.deprecated.app_profile import AppProfile + from google.cloud.bigtable.app_profile import AppProfile class _Iterator(Iterator): def __init__(self, pages): diff --git a/tests/unit/v2_client/test_policy.py b/tests/unit/v2_client/test_policy.py index ef3df2d2b..77674517e 100644 --- a/tests/unit/v2_client/test_policy.py +++ b/tests/unit/v2_client/test_policy.py @@ -14,7 +14,7 @@ def _make_policy(*args, **kw): - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import Policy return Policy(*args, **kw) @@ -48,7 +48,7 @@ def test_policy_ctor_explicit(): def test_policy_bigtable_admins(): - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -58,7 +58,7 @@ def test_policy_bigtable_admins(): def test_policy_bigtable_readers(): - from google.cloud.bigtable.deprecated.policy import BIGTABLE_READER_ROLE + from google.cloud.bigtable.policy import BIGTABLE_READER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -68,7 +68,7 @@ def test_policy_bigtable_readers(): def test_policy_bigtable_users(): - from google.cloud.bigtable.deprecated.policy import BIGTABLE_USER_ROLE + from google.cloud.bigtable.policy import BIGTABLE_USER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -78,7 +78,7 @@ def test_policy_bigtable_users(): def test_policy_bigtable_viewers(): - from google.cloud.bigtable.deprecated.policy import BIGTABLE_VIEWER_ROLE + from google.cloud.bigtable.policy import BIGTABLE_VIEWER_ROLE MEMBER = "user:phred@example.com" expected = frozenset([MEMBER]) @@ -89,7 +89,7 @@ def test_policy_bigtable_viewers(): def test_policy_from_pb_w_empty(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import Policy empty = frozenset() message = policy_pb2.Policy() @@ -106,8 +106,8 @@ def test_policy_from_pb_w_empty(): def test_policy_from_pb_w_non_empty(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy ETAG = b"ETAG" VERSION = 1 @@ -133,8 +133,8 @@ def test_policy_from_pb_w_condition(): import pytest from google.iam.v1 import policy_pb2 from google.api_core.iam import InvalidOperationException, _DICT_ACCESS_MSG - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy ETAG = b"ETAG" VERSION = 3 @@ -184,7 +184,7 @@ def test_policy_to_pb_empty(): def test_policy_to_pb_explicit(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE VERSION = 1 ETAG = b"ETAG" @@ -204,7 +204,7 @@ def test_policy_to_pb_explicit(): def test_policy_to_pb_w_condition(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE VERSION = 3 ETAG = b"ETAG" @@ -234,7 +234,7 @@ def test_policy_to_pb_w_condition(): def test_policy_from_api_repr_wo_etag(): - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import Policy VERSION = 1 empty = frozenset() @@ -252,7 +252,7 @@ def test_policy_from_api_repr_wo_etag(): def test_policy_from_api_repr_w_etag(): import base64 - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import Policy ETAG = b"ETAG" empty = frozenset() diff --git a/tests/unit/v2_client/test_row.py b/tests/unit/v2_client/test_row.py index 4850b18c3..f04802f5c 100644 --- a/tests/unit/v2_client/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -20,13 +20,13 @@ def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) def _make_row(*args, **kwargs): - from google.cloud.bigtable.deprecated.row import Row + from google.cloud.bigtable.row import Row return Row(*args, **kwargs) @@ -42,7 +42,7 @@ def test_row_table_getter(): def _make__set_delete_row(*args, **kwargs): - from google.cloud.bigtable.deprecated.row import _SetDeleteRow + from google.cloud.bigtable.row import _SetDeleteRow return _SetDeleteRow(*args, **kwargs) @@ -54,7 +54,7 @@ def test__set_detlete_row__get_mutations_virtual(): def _make_direct_row(*args, **kwargs): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow return DirectRow(*args, **kwargs) @@ -193,7 +193,7 @@ def test_direct_row_delete(): def test_direct_row_delete_cell(): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow class MockRow(DirectRow): def __init__(self, *args, **kwargs): @@ -237,7 +237,7 @@ def test_direct_row_delete_cells_non_iterable(): def test_direct_row_delete_cells_all_columns(): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow row_key = b"row_key" column_family_id = "column_family_id" @@ -293,7 +293,7 @@ def test_direct_row_delete_cells_no_time_range(): def test_direct_row_delete_cells_with_time_range(): import datetime from google.cloud._helpers import _EPOCH - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange microseconds = 30871000 # Makes sure already milliseconds granularity start = _EPOCH + datetime.timedelta(microseconds=microseconds) @@ -386,7 +386,7 @@ def test_direct_row_commit_with_exception(): def _make_conditional_row(*args, **kwargs): - from google.cloud.bigtable.deprecated.row import ConditionalRow + from google.cloud.bigtable.row import ConditionalRow return ConditionalRow(*args, **kwargs) @@ -417,7 +417,7 @@ def test_conditional_row__get_mutations(): def test_conditional_row_commit(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter from google.cloud.bigtable_v2.services.bigtable import BigtableClient project_id = "project-id" @@ -466,7 +466,7 @@ def test_conditional_row_commit(): def test_conditional_row_commit_too_many_mutations(): from google.cloud._testing import _Monkey - from google.cloud.bigtable.deprecated import row as MUT + from google.cloud.bigtable import row as MUT row_key = b"row_key" table = object() @@ -504,7 +504,7 @@ def test_conditional_row_commit_no_mutations(): def _make_append_row(*args, **kwargs): - from google.cloud.bigtable.deprecated.row import AppendRow + from google.cloud.bigtable.row import AppendRow return AppendRow(*args, **kwargs) @@ -564,7 +564,7 @@ def test_append_row_increment_cell_value(): def test_append_row_commit(): from google.cloud._testing import _Monkey - from google.cloud.bigtable.deprecated import row as MUT + from google.cloud.bigtable import row as MUT from google.cloud.bigtable_v2.services.bigtable import BigtableClient project_id = "project-id" @@ -630,7 +630,7 @@ def test_append_row_commit_no_rules(): def test_append_row_commit_too_many_mutations(): from google.cloud._testing import _Monkey - from google.cloud.bigtable.deprecated import row as MUT + from google.cloud.bigtable import row as MUT row_key = b"row_key" table = object() @@ -644,7 +644,7 @@ def test_append_row_commit_too_many_mutations(): def test__parse_rmw_row_response(): from google.cloud._helpers import _datetime_from_microseconds - from google.cloud.bigtable.deprecated.row import _parse_rmw_row_response + from google.cloud.bigtable.row import _parse_rmw_row_response col_fam1 = "col-fam-id" col_fam2 = "col-fam-id2" @@ -700,7 +700,7 @@ def test__parse_rmw_row_response(): def test__parse_family_pb(): from google.cloud._helpers import _datetime_from_microseconds - from google.cloud.bigtable.deprecated.row import _parse_family_pb + from google.cloud.bigtable.row import _parse_family_pb col_fam1 = "col-fam-id" col_name1 = b"col-name1" diff --git a/tests/unit/v2_client/test_row_data.py b/tests/unit/v2_client/test_row_data.py index ee9b065c8..fba69ceba 100644 --- a/tests/unit/v2_client/test_row_data.py +++ b/tests/unit/v2_client/test_row_data.py @@ -27,7 +27,7 @@ def _make_cell(*args, **kwargs): - from google.cloud.bigtable.deprecated.row_data import Cell + from google.cloud.bigtable.row_data import Cell return Cell(*args, **kwargs) @@ -36,7 +36,7 @@ def _cell_from_pb_test_helper(labels=None): import datetime from google.cloud._helpers import _EPOCH from google.cloud.bigtable_v2.types import data as data_v2_pb2 - from google.cloud.bigtable.deprecated.row_data import Cell + from google.cloud.bigtable.row_data import Cell timestamp = _EPOCH + datetime.timedelta(microseconds=TIMESTAMP_MICROS) value = b"value-bytes" @@ -100,7 +100,7 @@ def test_cell___ne__(): def _make_partial_row_data(*args, **kwargs): - from google.cloud.bigtable.deprecated.row_data import PartialRowData + from google.cloud.bigtable.row_data import PartialRowData return PartialRowData(*args, **kwargs) @@ -288,7 +288,7 @@ def trailing_metadata(self): def test__retry_read_rows_exception_miss(): from google.api_core.exceptions import Conflict - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception exception = Conflict("testing") assert not _retry_read_rows_exception(exception) @@ -296,7 +296,7 @@ def test__retry_read_rows_exception_miss(): def test__retry_read_rows_exception_service_unavailable(): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception exception = ServiceUnavailable("testing") assert _retry_read_rows_exception(exception) @@ -304,7 +304,7 @@ def test__retry_read_rows_exception_service_unavailable(): def test__retry_read_rows_exception_deadline_exceeded(): from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception exception = DeadlineExceeded("testing") assert _retry_read_rows_exception(exception) @@ -312,7 +312,7 @@ def test__retry_read_rows_exception_deadline_exceeded(): def test__retry_read_rows_exception_internal_server_not_retriable(): from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.deprecated.row_data import ( + from google.cloud.bigtable.row_data import ( _retry_read_rows_exception, RETRYABLE_INTERNAL_ERROR_MESSAGES, ) @@ -325,7 +325,7 @@ def test__retry_read_rows_exception_internal_server_not_retriable(): def test__retry_read_rows_exception_internal_server_retriable(): from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.deprecated.row_data import ( + from google.cloud.bigtable.row_data import ( _retry_read_rows_exception, RETRYABLE_INTERNAL_ERROR_MESSAGES, ) @@ -337,7 +337,7 @@ def test__retry_read_rows_exception_internal_server_retriable(): def test__retry_read_rows_exception_miss_wrapped_in_grpc(): from google.api_core.exceptions import Conflict - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception wrapped = Conflict("testing") exception = _make_grpc_call_error(wrapped) @@ -346,7 +346,7 @@ def test__retry_read_rows_exception_miss_wrapped_in_grpc(): def test__retry_read_rows_exception_service_unavailable_wrapped_in_grpc(): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception wrapped = ServiceUnavailable("testing") exception = _make_grpc_call_error(wrapped) @@ -355,7 +355,7 @@ def test__retry_read_rows_exception_service_unavailable_wrapped_in_grpc(): def test__retry_read_rows_exception_deadline_exceeded_wrapped_in_grpc(): from google.api_core.exceptions import DeadlineExceeded - from google.cloud.bigtable.deprecated.row_data import _retry_read_rows_exception + from google.cloud.bigtable.row_data import _retry_read_rows_exception wrapped = DeadlineExceeded("testing") exception = _make_grpc_call_error(wrapped) @@ -363,7 +363,7 @@ def test__retry_read_rows_exception_deadline_exceeded_wrapped_in_grpc(): def _make_partial_rows_data(*args, **kwargs): - from google.cloud.bigtable.deprecated.row_data import PartialRowsData + from google.cloud.bigtable.row_data import PartialRowsData return PartialRowsData(*args, **kwargs) @@ -373,13 +373,13 @@ def _partial_rows_data_consume_all(yrd): def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) def test_partial_rows_data_constructor(): - from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS client = _Client() client._data_stub = mock.MagicMock() @@ -436,7 +436,7 @@ def fake_read(*args, **kwargs): def test_partial_rows_data_constructor_with_retry(): - from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS client = _Client() client._data_stub = mock.MagicMock() @@ -446,7 +446,9 @@ def test_partial_rows_data_constructor_with_retry(): client._data_stub.ReadRows, request, retry ) partial_rows_data.read_method.assert_called_once_with( - request, timeout=DEFAULT_RETRY_READ_ROWS.deadline + 1 + request, + timeout=DEFAULT_RETRY_READ_ROWS.deadline + 1, + retry=DEFAULT_RETRY_READ_ROWS, ) assert partial_rows_data.request is request assert partial_rows_data.rows == {} @@ -644,7 +646,7 @@ def test_partial_rows_data_valid_last_scanned_row_key_on_start(): def test_partial_rows_data_invalid_empty_chunk(): - from google.cloud.bigtable.deprecated.row_data import InvalidChunk + from google.cloud.bigtable.row_data import InvalidChunk from google.cloud.bigtable_v2.services.bigtable import BigtableClient client = _Client() @@ -755,14 +757,14 @@ def test_partial_rows_data_yield_retry_rows_data(): def _make_read_rows_request_manager(*args, **kwargs): - from google.cloud.bigtable.deprecated.row_data import _ReadRowsRequestManager + from google.cloud.bigtable.row_data import _ReadRowsRequestManager return _ReadRowsRequestManager(*args, **kwargs) @pytest.fixture(scope="session") def rrrm_data(): - from google.cloud.bigtable.deprecated import row_set + from google.cloud.bigtable import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") row_range2 = row_set.RowRange(b"row_key31", b"row_key39") @@ -851,7 +853,7 @@ def test_RRRM__filter_row_ranges_all_ranges_already_read(rrrm_data): def test_RRRM__filter_row_ranges_all_ranges_already_read_open_closed(): - from google.cloud.bigtable.deprecated import row_set + from google.cloud.bigtable import row_set last_scanned_key = b"row_key54" @@ -895,7 +897,7 @@ def test_RRRM__filter_row_ranges_some_ranges_already_read(rrrm_data): def test_RRRM_build_updated_request(rrrm_data): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_range1 = rrrm_data["row_range1"] @@ -944,7 +946,7 @@ def test_RRRM_build_updated_request_full_table(): def test_RRRM_build_updated_request_no_start_key(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_filter = RowSampleFilter(0.33) @@ -972,7 +974,7 @@ def test_RRRM_build_updated_request_no_start_key(): def test_RRRM_build_updated_request_no_end_key(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter from google.cloud.bigtable_v2 import types row_filter = RowSampleFilter(0.33) @@ -998,7 +1000,7 @@ def test_RRRM_build_updated_request_no_end_key(): def test_RRRM_build_updated_request_rows(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key4" @@ -1046,7 +1048,7 @@ def test_RRRM__key_already_read(): def test_RRRM__rows_limit_reached(): - from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest + from google.cloud.bigtable.row_data import InvalidRetryRequest last_scanned_key = b"row_key14" request = _ReadRowsRequestPB(table_name=TABLE_NAME) @@ -1059,7 +1061,7 @@ def test_RRRM__rows_limit_reached(): def test_RRRM_build_updated_request_last_row_read_raises_invalid_retry_request(): - from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest + from google.cloud.bigtable.row_data import InvalidRetryRequest last_scanned_key = b"row_key4" request = _ReadRowsRequestPB(table_name=TABLE_NAME) @@ -1073,8 +1075,8 @@ def test_RRRM_build_updated_request_last_row_read_raises_invalid_retry_request() def test_RRRM_build_updated_request_row_ranges_read_raises_invalid_retry_request(): - from google.cloud.bigtable.deprecated.row_data import InvalidRetryRequest - from google.cloud.bigtable.deprecated import row_set + from google.cloud.bigtable.row_data import InvalidRetryRequest + from google.cloud.bigtable import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") @@ -1095,7 +1097,7 @@ def test_RRRM_build_updated_request_row_ranges_read_raises_invalid_retry_request def test_RRRM_build_updated_request_row_ranges_valid(): - from google.cloud.bigtable.deprecated import row_set + from google.cloud.bigtable import row_set row_range1 = row_set.RowRange(b"row_key21", b"row_key29") @@ -1179,7 +1181,7 @@ def _ReadRowsResponseCellChunkPB(*args, **kw): def _make_cell_pb(value): - from google.cloud.bigtable.deprecated import row_data + from google.cloud.bigtable import row_data return row_data.Cell(value, TIMESTAMP_MICROS) diff --git a/tests/unit/v2_client/test_row_filters.py b/tests/unit/v2_client/test_row_filters.py index dfb16ba16..b312cb942 100644 --- a/tests/unit/v2_client/test_row_filters.py +++ b/tests/unit/v2_client/test_row_filters.py @@ -17,7 +17,7 @@ def test_bool_filter_constructor(): - from google.cloud.bigtable.deprecated.row_filters import _BoolFilter + from google.cloud.bigtable.row_filters import _BoolFilter flag = object() row_filter = _BoolFilter(flag) @@ -25,7 +25,7 @@ def test_bool_filter_constructor(): def test_bool_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import _BoolFilter + from google.cloud.bigtable.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -34,7 +34,7 @@ def test_bool_filter___eq__type_differ(): def test_bool_filter___eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _BoolFilter + from google.cloud.bigtable.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -43,7 +43,7 @@ def test_bool_filter___eq__same_value(): def test_bool_filter___ne__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _BoolFilter + from google.cloud.bigtable.row_filters import _BoolFilter flag = object() row_filter1 = _BoolFilter(flag) @@ -52,7 +52,7 @@ def test_bool_filter___ne__same_value(): def test_sink_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import SinkFilter + from google.cloud.bigtable.row_filters import SinkFilter flag = True row_filter = SinkFilter(flag) @@ -62,7 +62,7 @@ def test_sink_filter_to_pb(): def test_pass_all_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import PassAllFilter + from google.cloud.bigtable.row_filters import PassAllFilter flag = True row_filter = PassAllFilter(flag) @@ -72,7 +72,7 @@ def test_pass_all_filter_to_pb(): def test_block_all_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import BlockAllFilter + from google.cloud.bigtable.row_filters import BlockAllFilter flag = True row_filter = BlockAllFilter(flag) @@ -82,7 +82,7 @@ def test_block_all_filter_to_pb(): def test_regex_filterconstructor(): - from google.cloud.bigtable.deprecated.row_filters import _RegexFilter + from google.cloud.bigtable.row_filters import _RegexFilter regex = b"abc" row_filter = _RegexFilter(regex) @@ -90,7 +90,7 @@ def test_regex_filterconstructor(): def test_regex_filterconstructor_non_bytes(): - from google.cloud.bigtable.deprecated.row_filters import _RegexFilter + from google.cloud.bigtable.row_filters import _RegexFilter regex = "abc" row_filter = _RegexFilter(regex) @@ -98,7 +98,7 @@ def test_regex_filterconstructor_non_bytes(): def test_regex_filter__eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import _RegexFilter + from google.cloud.bigtable.row_filters import _RegexFilter regex = b"def-rgx" row_filter1 = _RegexFilter(regex) @@ -107,7 +107,7 @@ def test_regex_filter__eq__type_differ(): def test_regex_filter__eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _RegexFilter + from google.cloud.bigtable.row_filters import _RegexFilter regex = b"trex-regex" row_filter1 = _RegexFilter(regex) @@ -116,7 +116,7 @@ def test_regex_filter__eq__same_value(): def test_regex_filter__ne__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _RegexFilter + from google.cloud.bigtable.row_filters import _RegexFilter regex = b"abc" row_filter1 = _RegexFilter(regex) @@ -125,7 +125,7 @@ def test_regex_filter__ne__same_value(): def test_row_key_regex_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import RowKeyRegexFilter + from google.cloud.bigtable.row_filters import RowKeyRegexFilter regex = b"row-key-regex" row_filter = RowKeyRegexFilter(regex) @@ -135,7 +135,7 @@ def test_row_key_regex_filter_to_pb(): def test_row_sample_filter_constructor(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter sample = object() row_filter = RowSampleFilter(sample) @@ -143,7 +143,7 @@ def test_row_sample_filter_constructor(): def test_row_sample_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -152,7 +152,7 @@ def test_row_sample_filter___eq__type_differ(): def test_row_sample_filter___eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -161,7 +161,7 @@ def test_row_sample_filter___eq__same_value(): def test_row_sample_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter sample = object() other_sample = object() @@ -171,7 +171,7 @@ def test_row_sample_filter___ne__(): def test_row_sample_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import RowSampleFilter sample = 0.25 row_filter = RowSampleFilter(sample) @@ -181,7 +181,7 @@ def test_row_sample_filter_to_pb(): def test_family_name_regex_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import FamilyNameRegexFilter + from google.cloud.bigtable.row_filters import FamilyNameRegexFilter regex = "family-regex" row_filter = FamilyNameRegexFilter(regex) @@ -191,7 +191,7 @@ def test_family_name_regex_filter_to_pb(): def test_column_qualifier_regext_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter regex = b"column-regex" row_filter = ColumnQualifierRegexFilter(regex) @@ -201,7 +201,7 @@ def test_column_qualifier_regext_filter_to_pb(): def test_timestamp_range_constructor(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange start = object() end = object() @@ -211,7 +211,7 @@ def test_timestamp_range_constructor(): def test_timestamp_range___eq__(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange start = object() end = object() @@ -221,7 +221,7 @@ def test_timestamp_range___eq__(): def test_timestamp_range___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange start = object() end = object() @@ -231,7 +231,7 @@ def test_timestamp_range___eq__type_differ(): def test_timestamp_range___ne__same_value(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange start = object() end = object() @@ -243,7 +243,7 @@ def test_timestamp_range___ne__same_value(): def _timestamp_range_to_pb_helper(pb_kwargs, start=None, end=None): import datetime from google.cloud._helpers import _EPOCH - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRange if start is not None: start = _EPOCH + datetime.timedelta(microseconds=start) @@ -291,7 +291,7 @@ def test_timestamp_range_to_pb_end_only(): def test_timestamp_range_filter_constructor(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter + from google.cloud.bigtable.row_filters import TimestampRangeFilter range_ = object() row_filter = TimestampRangeFilter(range_) @@ -299,7 +299,7 @@ def test_timestamp_range_filter_constructor(): def test_timestamp_range_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter + from google.cloud.bigtable.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -308,7 +308,7 @@ def test_timestamp_range_filter___eq__type_differ(): def test_timestamp_range_filter___eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter + from google.cloud.bigtable.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -317,7 +317,7 @@ def test_timestamp_range_filter___eq__same_value(): def test_timestamp_range_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter + from google.cloud.bigtable.row_filters import TimestampRangeFilter range_ = object() other_range_ = object() @@ -327,8 +327,8 @@ def test_timestamp_range_filter___ne__(): def test_timestamp_range_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import TimestampRangeFilter - from google.cloud.bigtable.deprecated.row_filters import TimestampRange + from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.row_filters import TimestampRange range_ = TimestampRange() row_filter = TimestampRangeFilter(range_) @@ -338,7 +338,7 @@ def test_timestamp_range_filter_to_pb(): def test_column_range_filter_constructor_defaults(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() row_filter = ColumnRangeFilter(column_family_id) @@ -350,7 +350,7 @@ def test_column_range_filter_constructor_defaults(): def test_column_range_filter_constructor_explicit(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() start_column = object() @@ -372,7 +372,7 @@ def test_column_range_filter_constructor_explicit(): def test_column_range_filter_constructor_bad_start(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() with pytest.raises(ValueError): @@ -380,7 +380,7 @@ def test_column_range_filter_constructor_bad_start(): def test_column_range_filter_constructor_bad_end(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() with pytest.raises(ValueError): @@ -388,7 +388,7 @@ def test_column_range_filter_constructor_bad_end(): def test_column_range_filter___eq__(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() start_column = object() @@ -413,7 +413,7 @@ def test_column_range_filter___eq__(): def test_column_range_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() row_filter1 = ColumnRangeFilter(column_family_id) @@ -422,7 +422,7 @@ def test_column_range_filter___eq__type_differ(): def test_column_range_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = object() other_column_family_id = object() @@ -448,7 +448,7 @@ def test_column_range_filter___ne__(): def test_column_range_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = "column-family-id" row_filter = ColumnRangeFilter(column_family_id) @@ -458,7 +458,7 @@ def test_column_range_filter_to_pb(): def test_column_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -471,7 +471,7 @@ def test_column_range_filter_to_pb_inclusive_start(): def test_column_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -486,7 +486,7 @@ def test_column_range_filter_to_pb_exclusive_start(): def test_column_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -499,7 +499,7 @@ def test_column_range_filter_to_pb_inclusive_end(): def test_column_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.deprecated.row_filters import ColumnRangeFilter + from google.cloud.bigtable.row_filters import ColumnRangeFilter column_family_id = "column-family-id" column = b"column" @@ -514,7 +514,7 @@ def test_column_range_filter_to_pb_exclusive_end(): def test_value_regex_filter_to_pb_w_bytes(): - from google.cloud.bigtable.deprecated.row_filters import ValueRegexFilter + from google.cloud.bigtable.row_filters import ValueRegexFilter value = regex = b"value-regex" row_filter = ValueRegexFilter(value) @@ -524,7 +524,7 @@ def test_value_regex_filter_to_pb_w_bytes(): def test_value_regex_filter_to_pb_w_str(): - from google.cloud.bigtable.deprecated.row_filters import ValueRegexFilter + from google.cloud.bigtable.row_filters import ValueRegexFilter value = "value-regex" regex = value.encode("ascii") @@ -535,7 +535,7 @@ def test_value_regex_filter_to_pb_w_str(): def test_exact_value_filter_to_pb_w_bytes(): - from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter + from google.cloud.bigtable.row_filters import ExactValueFilter value = regex = b"value-regex" row_filter = ExactValueFilter(value) @@ -545,7 +545,7 @@ def test_exact_value_filter_to_pb_w_bytes(): def test_exact_value_filter_to_pb_w_str(): - from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter + from google.cloud.bigtable.row_filters import ExactValueFilter value = "value-regex" regex = value.encode("ascii") @@ -557,7 +557,7 @@ def test_exact_value_filter_to_pb_w_str(): def test_exact_value_filter_to_pb_w_int(): import struct - from google.cloud.bigtable.deprecated.row_filters import ExactValueFilter + from google.cloud.bigtable.row_filters import ExactValueFilter value = 1 regex = struct.Struct(">q").pack(value) @@ -568,7 +568,7 @@ def test_exact_value_filter_to_pb_w_int(): def test_value_range_filter_constructor_defaults(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() @@ -579,7 +579,7 @@ def test_value_range_filter_constructor_defaults(): def test_value_range_filter_constructor_explicit(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -600,7 +600,7 @@ def test_value_range_filter_constructor_explicit(): def test_value_range_filter_constructor_w_int_values(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter import struct start_value = 1 @@ -618,21 +618,21 @@ def test_value_range_filter_constructor_w_int_values(): def test_value_range_filter_constructor_bad_start(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_start=True) def test_value_range_filter_constructor_bad_end(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_end=True) def test_value_range_filter___eq__(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -654,7 +654,7 @@ def test_value_range_filter___eq__(): def test_value_range_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter row_filter1 = ValueRangeFilter() row_filter2 = object() @@ -662,7 +662,7 @@ def test_value_range_filter___eq__type_differ(): def test_value_range_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter start_value = object() other_start_value = object() @@ -685,7 +685,7 @@ def test_value_range_filter___ne__(): def test_value_range_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() expected_pb = _RowFilterPB(value_range_filter=_ValueRangePB()) @@ -693,7 +693,7 @@ def test_value_range_filter_to_pb(): def test_value_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value) @@ -703,7 +703,7 @@ def test_value_range_filter_to_pb_inclusive_start(): def test_value_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value, inclusive_start=False) @@ -713,7 +713,7 @@ def test_value_range_filter_to_pb_exclusive_start(): def test_value_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value) @@ -723,7 +723,7 @@ def test_value_range_filter_to_pb_inclusive_end(): def test_value_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.deprecated.row_filters import ValueRangeFilter + from google.cloud.bigtable.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value, inclusive_end=False) @@ -733,7 +733,7 @@ def test_value_range_filter_to_pb_exclusive_end(): def test_cell_count_constructor(): - from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter + from google.cloud.bigtable.row_filters import _CellCountFilter num_cells = object() row_filter = _CellCountFilter(num_cells) @@ -741,7 +741,7 @@ def test_cell_count_constructor(): def test_cell_count___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter + from google.cloud.bigtable.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -750,7 +750,7 @@ def test_cell_count___eq__type_differ(): def test_cell_count___eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter + from google.cloud.bigtable.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -759,7 +759,7 @@ def test_cell_count___eq__same_value(): def test_cell_count___ne__same_value(): - from google.cloud.bigtable.deprecated.row_filters import _CellCountFilter + from google.cloud.bigtable.row_filters import _CellCountFilter num_cells = object() row_filter1 = _CellCountFilter(num_cells) @@ -768,7 +768,7 @@ def test_cell_count___ne__same_value(): def test_cells_row_offset_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.row_filters import CellsRowOffsetFilter num_cells = 76 row_filter = CellsRowOffsetFilter(num_cells) @@ -778,7 +778,7 @@ def test_cells_row_offset_filter_to_pb(): def test_cells_row_limit_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.row_filters import CellsRowLimitFilter num_cells = 189 row_filter = CellsRowLimitFilter(num_cells) @@ -788,7 +788,7 @@ def test_cells_row_limit_filter_to_pb(): def test_cells_column_limit_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import CellsColumnLimitFilter + from google.cloud.bigtable.row_filters import CellsColumnLimitFilter num_cells = 10 row_filter = CellsColumnLimitFilter(num_cells) @@ -798,7 +798,7 @@ def test_cells_column_limit_filter_to_pb(): def test_strip_value_transformer_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter flag = True row_filter = StripValueTransformerFilter(flag) @@ -808,7 +808,7 @@ def test_strip_value_transformer_filter_to_pb(): def test_apply_label_filter_constructor(): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ApplyLabelFilter label = object() row_filter = ApplyLabelFilter(label) @@ -816,7 +816,7 @@ def test_apply_label_filter_constructor(): def test_apply_label_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -825,7 +825,7 @@ def test_apply_label_filter___eq__type_differ(): def test_apply_label_filter___eq__same_value(): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -834,7 +834,7 @@ def test_apply_label_filter___eq__same_value(): def test_apply_label_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ApplyLabelFilter label = object() other_label = object() @@ -844,7 +844,7 @@ def test_apply_label_filter___ne__(): def test_apply_label_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ApplyLabelFilter label = "label" row_filter = ApplyLabelFilter(label) @@ -854,14 +854,14 @@ def test_apply_label_filter_to_pb(): def test_filter_combination_constructor_defaults(): - from google.cloud.bigtable.deprecated.row_filters import _FilterCombination + from google.cloud.bigtable.row_filters import _FilterCombination row_filter = _FilterCombination() assert row_filter.filters == [] def test_filter_combination_constructor_explicit(): - from google.cloud.bigtable.deprecated.row_filters import _FilterCombination + from google.cloud.bigtable.row_filters import _FilterCombination filters = object() row_filter = _FilterCombination(filters=filters) @@ -869,7 +869,7 @@ def test_filter_combination_constructor_explicit(): def test_filter_combination___eq__(): - from google.cloud.bigtable.deprecated.row_filters import _FilterCombination + from google.cloud.bigtable.row_filters import _FilterCombination filters = object() row_filter1 = _FilterCombination(filters=filters) @@ -878,7 +878,7 @@ def test_filter_combination___eq__(): def test_filter_combination___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import _FilterCombination + from google.cloud.bigtable.row_filters import _FilterCombination filters = object() row_filter1 = _FilterCombination(filters=filters) @@ -887,7 +887,7 @@ def test_filter_combination___eq__type_differ(): def test_filter_combination___ne__(): - from google.cloud.bigtable.deprecated.row_filters import _FilterCombination + from google.cloud.bigtable.row_filters import _FilterCombination filters = object() other_filters = object() @@ -897,9 +897,9 @@ def test_filter_combination___ne__(): def test_row_filter_chain_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import RowFilterChain - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import RowFilterChain + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -917,10 +917,10 @@ def test_row_filter_chain_to_pb(): def test_row_filter_chain_to_pb_nested(): - from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.deprecated.row_filters import RowFilterChain - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.row_filters import RowFilterChain + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -941,9 +941,9 @@ def test_row_filter_chain_to_pb_nested(): def test_row_filter_union_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import RowFilterUnion + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -961,10 +961,10 @@ def test_row_filter_union_to_pb(): def test_row_filter_union_to_pb_nested(): - from google.cloud.bigtable.deprecated.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.row_filters import RowFilterUnion + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -985,7 +985,7 @@ def test_row_filter_union_to_pb_nested(): def test_conditional_row_filter_constructor(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -999,7 +999,7 @@ def test_conditional_row_filter_constructor(): def test_conditional_row_filter___eq__(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -1014,7 +1014,7 @@ def test_conditional_row_filter___eq__(): def test_conditional_row_filter___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter base_filter = object() true_filter = object() @@ -1027,7 +1027,7 @@ def test_conditional_row_filter___eq__type_differ(): def test_conditional_row_filter___ne__(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter base_filter = object() other_base_filter = object() @@ -1043,10 +1043,10 @@ def test_conditional_row_filter___ne__(): def test_conditional_row_filter_to_pb(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter - from google.cloud.bigtable.deprecated.row_filters import CellsRowOffsetFilter - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -1073,9 +1073,9 @@ def test_conditional_row_filter_to_pb(): def test_conditional_row_filter_to_pb_true_only(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() @@ -1095,9 +1095,9 @@ def test_conditional_row_filter_to_pb_true_only(): def test_conditional_row_filter_to_pb_false_only(): - from google.cloud.bigtable.deprecated.row_filters import ConditionalRowFilter - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter - from google.cloud.bigtable.deprecated.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1.to_pb() diff --git a/tests/unit/v2_client/test_row_merger.py b/tests/unit/v2_client/test_row_merger.py index 26cedb34d..483c04536 100644 --- a/tests/unit/v2_client/test_row_merger.py +++ b/tests/unit/v2_client/test_row_merger.py @@ -5,13 +5,9 @@ import proto import pytest -from google.cloud.bigtable.deprecated.row_data import ( - PartialRowsData, - PartialRowData, - InvalidChunk, -) +from google.cloud.bigtable.row_data import PartialRowsData, PartialRowData, InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.deprecated.row_merger import _RowMerger +from google.cloud.bigtable.row_merger import _RowMerger # TODO: autogenerate protos from diff --git a/tests/unit/v2_client/test_row_set.py b/tests/unit/v2_client/test_row_set.py index ce0e9bfea..1a33be720 100644 --- a/tests/unit/v2_client/test_row_set.py +++ b/tests/unit/v2_client/test_row_set.py @@ -14,7 +14,7 @@ def test_row_set_constructor(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() assert [] == row_set.row_keys @@ -22,8 +22,8 @@ def test_row_set_constructor(): def test_row_set__eq__(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -42,7 +42,7 @@ def test_row_set__eq__(): def test_row_set__eq__type_differ(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set1 = RowSet() row_set2 = object() @@ -50,7 +50,7 @@ def test_row_set__eq__type_differ(): def test_row_set__eq__len_row_keys_differ(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -66,8 +66,8 @@ def test_row_set__eq__len_row_keys_differ(): def test_row_set__eq__len_row_ranges_differ(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key4", b"row_key9") @@ -83,7 +83,7 @@ def test_row_set__eq__len_row_ranges_differ(): def test_row_set__eq__row_keys_differ(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set1 = RowSet() row_set2 = RowSet() @@ -99,8 +99,8 @@ def test_row_set__eq__row_keys_differ(): def test_row_set__eq__row_ranges_differ(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_range1 = RowRange(b"row_key4", b"row_key9") row_range2 = RowRange(b"row_key14", b"row_key19") @@ -119,8 +119,8 @@ def test_row_set__eq__row_ranges_differ(): def test_row_set__ne__(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -139,8 +139,8 @@ def test_row_set__ne__(): def test_row_set__ne__same_value(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_key1 = b"row_key1" row_key2 = b"row_key1" @@ -159,7 +159,7 @@ def test_row_set__ne__same_value(): def test_row_set_add_row_key(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_set.add_row_key("row_key1") @@ -168,8 +168,8 @@ def test_row_set_add_row_key(): def test_row_set_add_row_range(): - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_range1 = RowRange(b"row_key1", b"row_key9") @@ -181,7 +181,7 @@ def test_row_set_add_row_range(): def test_row_set_add_row_range_from_keys(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_set.add_row_range_from_keys( @@ -194,7 +194,7 @@ def test_row_set_add_row_range_from_keys(): def test_row_set_add_row_range_with_prefix(): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() row_set.add_row_range_with_prefix("row") @@ -203,8 +203,8 @@ def test_row_set_add_row_range_with_prefix(): def test_row_set__update_message_request(): from google.cloud._helpers import _to_bytes - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet row_set = RowSet() table_name = "table_name" @@ -224,7 +224,7 @@ def test_row_set__update_message_request(): def test_row_range_constructor(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = "row_key1" end_key = "row_key9" @@ -236,7 +236,7 @@ def test_row_range_constructor(): def test_row_range___hash__set_equality(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange row_range1 = RowRange("row_key1", "row_key9") row_range2 = RowRange("row_key1", "row_key9") @@ -246,7 +246,7 @@ def test_row_range___hash__set_equality(): def test_row_range___hash__not_equals(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange row_range1 = RowRange("row_key1", "row_key9") row_range2 = RowRange("row_key1", "row_key19") @@ -256,7 +256,7 @@ def test_row_range___hash__not_equals(): def test_row_range__eq__(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -266,7 +266,7 @@ def test_row_range__eq__(): def test_row_range___eq__type_differ(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -276,7 +276,7 @@ def test_row_range___eq__type_differ(): def test_row_range__ne__(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -286,7 +286,7 @@ def test_row_range__ne__(): def test_row_range__ne__same_value(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -296,7 +296,7 @@ def test_row_range__ne__same_value(): def test_row_range_get_range_kwargs_closed_open(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" @@ -307,7 +307,7 @@ def test_row_range_get_range_kwargs_closed_open(): def test_row_range_get_range_kwargs_open_closed(): - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowRange start_key = b"row_key1" end_key = b"row_key9" diff --git a/tests/unit/v2_client/test_table.py b/tests/unit/v2_client/test_table.py index ad31e8bc9..3d7d2e8ee 100644 --- a/tests/unit/v2_client/test_table.py +++ b/tests/unit/v2_client/test_table.py @@ -50,11 +50,11 @@ STATUS_INTERNAL = StatusCode.INTERNAL.value[0] -@mock.patch("google.cloud.bigtable.deprecated.table._MAX_BULK_MUTATIONS", new=3) +@mock.patch("google.cloud.bigtable.table._MAX_BULK_MUTATIONS", new=3) def test__compile_mutation_entries_w_too_many_mutations(): - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import TooManyMutationsError - from google.cloud.bigtable.deprecated.table import _compile_mutation_entries + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import TooManyMutationsError + from google.cloud.bigtable.table import _compile_mutation_entries table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -72,8 +72,8 @@ def test__compile_mutation_entries_w_too_many_mutations(): def test__compile_mutation_entries_normal(): - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import _compile_mutation_entries + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import _compile_mutation_entries from google.cloud.bigtable_v2.types import MutateRowsRequest from google.cloud.bigtable_v2.types import data @@ -109,9 +109,9 @@ def test__compile_mutation_entries_normal(): def test__check_row_table_name_w_wrong_table_name(): - from google.cloud.bigtable.deprecated.table import _check_row_table_name - from google.cloud.bigtable.deprecated.table import TableMismatchError - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.table import _check_row_table_name + from google.cloud.bigtable.table import TableMismatchError + from google.cloud.bigtable.row import DirectRow table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -122,8 +122,8 @@ def test__check_row_table_name_w_wrong_table_name(): def test__check_row_table_name_w_right_table_name(): - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import _check_row_table_name + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import _check_row_table_name table = mock.Mock(name="table", spec=["name"]) table.name = "table" @@ -133,8 +133,8 @@ def test__check_row_table_name_w_right_table_name(): def test__check_row_type_w_wrong_row_type(): - from google.cloud.bigtable.deprecated.row import ConditionalRow - from google.cloud.bigtable.deprecated.table import _check_row_type + from google.cloud.bigtable.row import ConditionalRow + from google.cloud.bigtable.table import _check_row_type row = ConditionalRow(row_key=b"row_key", table="table", filter_=None) with pytest.raises(TypeError): @@ -142,21 +142,21 @@ def test__check_row_type_w_wrong_row_type(): def test__check_row_type_w_right_row_type(): - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import _check_row_type + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import _check_row_type row = DirectRow(row_key=b"row_key", table="table") assert not _check_row_type(row) def _make_client(*args, **kwargs): - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client return Client(*args, **kwargs) def _make_table(*args, **kwargs): - from google.cloud.bigtable.deprecated.table import Table + from google.cloud.bigtable.table import Table return Table(*args, **kwargs) @@ -219,7 +219,7 @@ def _table_row_methods_helper(): def test_table_row_factory_direct(): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow table, row_key = _table_row_methods_helper() with warnings.catch_warnings(record=True) as warned: @@ -234,7 +234,7 @@ def test_table_row_factory_direct(): def test_table_row_factory_conditional(): - from google.cloud.bigtable.deprecated.row import ConditionalRow + from google.cloud.bigtable.row import ConditionalRow table, row_key = _table_row_methods_helper() filter_ = object() @@ -251,7 +251,7 @@ def test_table_row_factory_conditional(): def test_table_row_factory_append(): - from google.cloud.bigtable.deprecated.row import AppendRow + from google.cloud.bigtable.row import AppendRow table, row_key = _table_row_methods_helper() @@ -278,7 +278,7 @@ def test_table_row_factory_failure(): def test_table_direct_row(): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow table, row_key = _table_row_methods_helper() row = table.direct_row(row_key) @@ -289,7 +289,7 @@ def test_table_direct_row(): def test_table_conditional_row(): - from google.cloud.bigtable.deprecated.row import ConditionalRow + from google.cloud.bigtable.row import ConditionalRow table, row_key = _table_row_methods_helper() filter_ = object() @@ -301,7 +301,7 @@ def test_table_conditional_row(): def test_table_append_row(): - from google.cloud.bigtable.deprecated.row import AppendRow + from google.cloud.bigtable.row import AppendRow table, row_key = _table_row_methods_helper() row = table.append_row(row_key) @@ -357,7 +357,7 @@ def _create_table_helper(split_keys=[], column_families={}): from google.cloud.bigtable_admin_v2.types import ( bigtable_table_admin as table_admin_messages_v2_pb2, ) - from google.cloud.bigtable.deprecated.column_family import ColumnFamily + from google.cloud.bigtable.column_family import ColumnFamily credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -391,7 +391,7 @@ def test_table_create(): def test_table_create_with_families(): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule families = {"family": MaxVersionsGCRule(5)} _create_table_helper(column_families=families) @@ -404,7 +404,7 @@ def test_table_create_with_split_keys(): def test_table_exists_hit(): from google.cloud.bigtable_admin_v2.types import ListTablesResponse from google.cloud.bigtable_admin_v2.types import Table - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -426,7 +426,7 @@ def test_table_exists_hit(): def test_table_exists_miss(): from google.api_core.exceptions import NotFound - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -447,7 +447,7 @@ def test_table_exists_miss(): def test_table_exists_error(): from google.api_core.exceptions import BadRequest - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -512,8 +512,8 @@ def test_table_list_column_families(): def test_table_get_cluster_states(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState INITIALIZING = enum_table.ReplicationState.INITIALIZING PLANNED_MAINTENANCE = enum_table.ReplicationState.PLANNED_MAINTENANCE @@ -557,10 +557,10 @@ def test_table_get_cluster_states(): def test_table_get_encryption_info(): from google.rpc.code_pb2 import Code - from google.cloud.bigtable.deprecated.encryption_info import EncryptionInfo - from google.cloud.bigtable.deprecated.enums import EncryptionInfo as enum_crypto - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.error import Status + from google.cloud.bigtable.encryption_info import EncryptionInfo + from google.cloud.bigtable.enums import EncryptionInfo as enum_crypto + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.error import Status ENCRYPTION_TYPE_UNSPECIFIED = enum_crypto.EncryptionType.ENCRYPTION_TYPE_UNSPECIFIED GOOGLE_DEFAULT_ENCRYPTION = enum_crypto.EncryptionType.GOOGLE_DEFAULT_ENCRYPTION @@ -640,9 +640,10 @@ def _make_data_api(): def _table_read_row_helper(chunks, expected_result, app_profile_id=None): from google.cloud._testing import _Monkey - from google.cloud.bigtable.deprecated import table as MUT - from google.cloud.bigtable.deprecated.row_set import RowSet - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable import table as MUT + from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -691,7 +692,9 @@ def mock_create_row_request(table_name, **kwargs): assert result == expected_result assert mock_created == expected_request - data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0) + data_api.read_rows.assert_called_once_with( + request_pb, timeout=61.0, retry=DEFAULT_RETRY_READ_ROWS + ) def test_table_read_row_miss_no__responses(): @@ -704,8 +707,8 @@ def test_table_read_row_miss_no_chunks_in_response(): def test_table_read_row_complete(): - from google.cloud.bigtable.deprecated.row_data import Cell - from google.cloud.bigtable.deprecated.row_data import PartialRowData + from google.cloud.bigtable.row_data import Cell + from google.cloud.bigtable.row_data import PartialRowData app_profile_id = "app-profile-id" chunk = _ReadRowsResponseCellChunkPB( @@ -768,7 +771,7 @@ def _table_mutate_rows_helper( mutation_timeout=None, app_profile_id=None, retry=None, timeout=None ): from google.rpc.status_pb2 import Status - from google.cloud.bigtable.deprecated.table import DEFAULT_RETRY + from google.cloud.bigtable.table import DEFAULT_RETRY credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -787,7 +790,7 @@ def _table_mutate_rows_helper( response = [Status(code=0), Status(code=1)] instance_mock = mock.Mock(return_value=response) klass_mock = mock.patch( - "google.cloud.bigtable.deprecated.table._RetryableMutateRowsWorker", + "google.cloud.bigtable.table._RetryableMutateRowsWorker", new=mock.MagicMock(return_value=instance_mock), ) @@ -854,9 +857,9 @@ def test_table_mutate_rows_w_mutation_timeout_and_timeout_arg(): def test_table_read_rows(): from google.cloud._testing import _Monkey - from google.cloud.bigtable.deprecated.row_data import PartialRowsData - from google.cloud.bigtable.deprecated import table as MUT - from google.cloud.bigtable.deprecated.row_data import DEFAULT_RETRY_READ_ROWS + from google.cloud.bigtable.row_data import PartialRowsData + from google.cloud.bigtable import table as MUT + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -906,7 +909,7 @@ def mock_create_row_request(table_name, **kwargs): } assert mock_created == [(table.name, created_kwargs)] - data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0) + data_api.read_rows.assert_called_once_with(request_pb, timeout=61.0, retry=retry) def test_table_read_retry_rows(): @@ -1017,7 +1020,7 @@ def test_table_read_retry_rows_no_full_table_scan(): def test_table_yield_retry_rows(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1079,9 +1082,10 @@ def test_table_yield_retry_rows(): def test_table_yield_rows_with_row_set(): - from google.cloud.bigtable.deprecated.row_set import RowSet - from google.cloud.bigtable.deprecated.row_set import RowRange - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange + from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.row_data import DEFAULT_RETRY_READ_ROWS credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1149,7 +1153,9 @@ def test_table_yield_rows_with_row_set(): end_key=ROW_KEY_2, ) expected_request.rows.row_keys.append(ROW_KEY_3) - data_api.read_rows.assert_called_once_with(expected_request, timeout=61.0) + data_api.read_rows.assert_called_once_with( + expected_request, timeout=61.0, retry=DEFAULT_RETRY_READ_ROWS + ) def test_table_sample_row_keys(): @@ -1174,9 +1180,7 @@ def test_table_truncate(): table = _make_table(TABLE_ID, instance) table_api = client._table_admin_client = _make_table_api() - with mock.patch( - "google.cloud.bigtable.deprecated.table.Table.name", new=TABLE_NAME - ): + with mock.patch("google.cloud.bigtable.table.Table.name", new=TABLE_NAME): result = table.truncate() assert result is None @@ -1257,7 +1261,7 @@ def test_table_mutations_batcher_factory(): def test_table_get_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1288,8 +1292,8 @@ def test_table_get_iam_policy(): def test_table_set_iam_policy(): from google.iam.v1 import policy_pb2 - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE credentials = _make_credentials() client = _make_client(project="project-id", credentials=credentials, admin=True) @@ -1351,7 +1355,7 @@ def test_table_test_iam_permissions(): def test_table_backup_factory_defaults(): - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup instance = _make_table(INSTANCE_ID, None) table = _make_table(TABLE_ID, instance) @@ -1375,8 +1379,8 @@ def test_table_backup_factory_defaults(): def test_table_backup_factory_non_defaults(): import datetime from google.cloud._helpers import UTC - from google.cloud.bigtable.deprecated.backup import Backup - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.backup import Backup + from google.cloud.bigtable.instance import Instance instance = Instance(INSTANCE_ID, None) table = _make_table(TABLE_ID, instance) @@ -1406,7 +1410,7 @@ def _table_list_backups_helper(cluster_id=None, filter_=None, **kwargs): Backup as backup_pb, bigtable_table_admin, ) - from google.cloud.bigtable.deprecated.backup import Backup + from google.cloud.bigtable.backup import Backup client = _make_client( project=PROJECT_ID, credentials=_make_credentials(), admin=True @@ -1468,7 +1472,7 @@ def test_table_list_backups_w_options(): def _table_restore_helper(backup_name=None): - from google.cloud.bigtable.deprecated.instance import Instance + from google.cloud.bigtable.instance import Instance op_future = object() credentials = _make_credentials() @@ -1504,7 +1508,7 @@ def test_table_restore_table_w_backup_name(): def _make_worker(*args, **kwargs): - from google.cloud.bigtable.deprecated.table import _RetryableMutateRowsWorker + from google.cloud.bigtable.table import _RetryableMutateRowsWorker return _RetryableMutateRowsWorker(*args, **kwargs) @@ -1545,7 +1549,7 @@ def test_rmrw_callable_empty_rows(): def test_rmrw_callable_no_retry_strategy(): - from google.cloud.bigtable.deprecated.row import DirectRow + from google.cloud.bigtable.row import DirectRow # Setup: # - Mutate 3 rows. @@ -1587,8 +1591,8 @@ def test_rmrw_callable_no_retry_strategy(): def test_rmrw_callable_retry(): - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import DEFAULT_RETRY + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import DEFAULT_RETRY # Setup: # - Mutate 3 rows. @@ -1642,8 +1646,8 @@ def _do_mutate_retryable_rows_helper( mutate_rows_side_effect=None, ): from google.api_core.exceptions import ServiceUnavailable - from google.cloud.bigtable.deprecated.row import DirectRow - from google.cloud.bigtable.deprecated.table import _BigtableRetryableError + from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.table import _BigtableRetryableError from google.cloud.bigtable_v2.types import bigtable as data_messages_v2_pb2 # Setup: @@ -1799,9 +1803,7 @@ def test_rmrw_do_mutate_retryable_rows_w_retryable_error_internal_rst_stream_err # Raise internal server error with RST STREAM error messages # There should be no error raised and that the request is retried from google.api_core.exceptions import InternalServerError - from google.cloud.bigtable.deprecated.row_data import ( - RETRYABLE_INTERNAL_ERROR_MESSAGES, - ) + from google.cloud.bigtable.row_data import RETRYABLE_INTERNAL_ERROR_MESSAGES row_cells = [ (b"row_key_1", ("cf", b"col", b"value1")), @@ -2007,7 +2009,7 @@ def test_rmrw_do_mutate_retryable_rows_mismatch_num_responses(): def test__create_row_request_table_name_only(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request table_name = "table_name" result = _create_row_request(table_name) @@ -2016,14 +2018,14 @@ def test__create_row_request_table_name_only(): def test__create_row_request_row_range_row_set_conflict(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request with pytest.raises(ValueError): _create_row_request(None, end_key=object(), row_set=object()) def test__create_row_request_row_range_start_key(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2036,7 +2038,7 @@ def test__create_row_request_row_range_start_key(): def test__create_row_request_row_range_end_key(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2049,7 +2051,7 @@ def test__create_row_request_row_range_end_key(): def test__create_row_request_row_range_both_keys(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2063,7 +2065,7 @@ def test__create_row_request_row_range_both_keys(): def test__create_row_request_row_range_both_keys_inclusive(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request from google.cloud.bigtable_v2.types import RowRange table_name = "table_name" @@ -2079,8 +2081,8 @@ def test__create_row_request_row_range_both_keys_inclusive(): def test__create_row_request_with_filter(): - from google.cloud.bigtable.deprecated.table import _create_row_request - from google.cloud.bigtable.deprecated.row_filters import RowSampleFilter + from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.row_filters import RowSampleFilter table_name = "table_name" row_filter = RowSampleFilter(0.33) @@ -2092,7 +2094,7 @@ def test__create_row_request_with_filter(): def test__create_row_request_with_limit(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request table_name = "table_name" limit = 1337 @@ -2102,8 +2104,8 @@ def test__create_row_request_with_limit(): def test__create_row_request_with_row_set(): - from google.cloud.bigtable.deprecated.table import _create_row_request - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.table import _create_row_request + from google.cloud.bigtable.row_set import RowSet table_name = "table_name" row_set = RowSet() @@ -2113,7 +2115,7 @@ def test__create_row_request_with_row_set(): def test__create_row_request_with_app_profile_id(): - from google.cloud.bigtable.deprecated.table import _create_row_request + from google.cloud.bigtable.table import _create_row_request table_name = "table_name" limit = 1337 @@ -2132,8 +2134,8 @@ def _ReadRowsRequestPB(*args, **kw): def test_cluster_state___eq__(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2142,8 +2144,8 @@ def test_cluster_state___eq__(): def test_cluster_state___eq__type_differ(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2152,8 +2154,8 @@ def test_cluster_state___eq__type_differ(): def test_cluster_state___ne__same_value(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState READY = enum_table.ReplicationState.READY state1 = ClusterState(READY) @@ -2162,8 +2164,8 @@ def test_cluster_state___ne__same_value(): def test_cluster_state___ne__(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState READY = enum_table.ReplicationState.READY INITIALIZING = enum_table.ReplicationState.INITIALIZING @@ -2173,8 +2175,8 @@ def test_cluster_state___ne__(): def test_cluster_state__repr__(): - from google.cloud.bigtable.deprecated.enums import Table as enum_table - from google.cloud.bigtable.deprecated.table import ClusterState + from google.cloud.bigtable.enums import Table as enum_table + from google.cloud.bigtable.table import ClusterState STATE_NOT_KNOWN = enum_table.ReplicationState.STATE_NOT_KNOWN INITIALIZING = enum_table.ReplicationState.INITIALIZING From badaf652c37c69ba2a36856df59783209c3554cb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:07:15 -0700 Subject: [PATCH 185/213] rewrote some test paths --- .../data/{ => _async}/test__mutate_rows.py | 18 +- .../unit/data/{ => _async}/test__read_rows.py | 82 ++-- tests/unit/data/{ => _async}/test_client.py | 63 ++- .../unit/data/{ => _async}/test_iterators.py | 8 +- tests/unit/data/test__helpers.py | 4 +- tests/unit/data/test_exceptions.py | 16 +- tests/unit/data/test_mutations.py | 18 +- tests/unit/data/test_mutations_batcher.py | 46 +-- .../unit/data/test_read_modify_write_rules.py | 6 +- tests/unit/data/test_read_rows_acceptance.py | 8 +- tests/unit/data/test_read_rows_query.py | 44 +-- tests/unit/data/test_row.py | 12 +- tests/unit/data/test_row_filters.py | 360 +++++++++--------- 13 files changed, 342 insertions(+), 343 deletions(-) rename tests/unit/data/{ => _async}/test__mutate_rows.py (94%) rename tests/unit/data/{ => _async}/test__read_rows.py (93%) rename tests/unit/data/{ => _async}/test_client.py (98%) rename tests/unit/data/{ => _async}/test_iterators.py (96%) diff --git a/tests/unit/data/test__mutate_rows.py b/tests/unit/data/_async/test__mutate_rows.py similarity index 94% rename from tests/unit/data/test__mutate_rows.py rename to tests/unit/data/_async/test__mutate_rows.py index 18b2beede..a31862b50 100644 --- a/tests/unit/data/test__mutate_rows.py +++ b/tests/unit/data/_async/test__mutate_rows.py @@ -36,7 +36,7 @@ def _make_mutation(count=1, size=1): class TestMutateRowsOperation: def _target_class(self): - from google.cloud.bigtable._mutate_rows import _MutateRowsOperation + from google.cloud.bigtable.data._mutate_rows import _MutateRowsOperation return _MutateRowsOperation @@ -73,7 +73,7 @@ def test_ctor(self): """ test that constructor sets all the attributes correctly """ - from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete from google.api_core.exceptions import DeadlineExceeded from google.api_core.exceptions import ServiceUnavailable @@ -116,7 +116,7 @@ def test_ctor_too_many_entries(self): """ should raise an error if an operation is created with more than 100,000 entries """ - from google.cloud.bigtable._mutate_rows import ( + from google.cloud.bigtable.data._mutate_rows import ( MUTATE_ROWS_REQUEST_MUTATION_LIMIT, ) @@ -168,8 +168,8 @@ async def test_mutate_rows_exception(self, exc_type): """ exceptions raised from retryable should be raised in MutationsExceptionGroup """ - from google.cloud.bigtable.exceptions import MutationsExceptionGroup - from google.cloud.bigtable.exceptions import FailedMutationEntryError + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.exceptions import FailedMutationEntryError client = mock.Mock() table = mock.Mock() @@ -204,7 +204,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): """ If an exception fails but eventually passes, it should not raise an exception """ - from google.cloud.bigtable._mutate_rows import _MutateRowsOperation + from google.cloud.bigtable.data._mutate_rows import _MutateRowsOperation client = mock.Mock() table = mock.Mock() @@ -229,8 +229,8 @@ async def test_mutate_rows_incomplete_ignored(self): """ MutateRowsIncomplete exceptions should not be added to error list """ - from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete - from google.cloud.bigtable.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup from google.api_core.exceptions import DeadlineExceeded client = mock.Mock() @@ -286,7 +286,7 @@ async def test_run_attempt_empty_request(self): @pytest.mark.asyncio async def test_run_attempt_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" - from google.cloud.bigtable._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete success_mutation = _make_mutation() success_mutation_2 = _make_mutation() diff --git a/tests/unit/data/test__read_rows.py b/tests/unit/data/_async/test__read_rows.py similarity index 93% rename from tests/unit/data/test__read_rows.py rename to tests/unit/data/_async/test__read_rows.py index c893c56cd..cd820006f 100644 --- a/tests/unit/data/test__read_rows.py +++ b/tests/unit/data/_async/test__read_rows.py @@ -1,10 +1,10 @@ import unittest import pytest -from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable._read_rows import AWAITING_NEW_ROW -from google.cloud.bigtable._read_rows import AWAITING_NEW_CELL -from google.cloud.bigtable._read_rows import AWAITING_CELL_VALUE +from google.cloud.bigtable.data.exceptions import InvalidChunk +from google.cloud.bigtable.data._read_rows import AWAITING_NEW_ROW +from google.cloud.bigtable.data._read_rows import AWAITING_NEW_CELL +from google.cloud.bigtable.data._read_rows import AWAITING_CELL_VALUE # try/except added for compatibility with python < 3.8 try: @@ -29,7 +29,7 @@ class TestReadRowsOperation: @staticmethod def _get_target_class(): - from google.cloud.bigtable._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation return _ReadRowsOperation @@ -44,7 +44,7 @@ def test_ctor_defaults(self): default_operation_timeout = 600 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._read_rows._attempt_timeout_generator", time_gen_mock ): instance = self._make_one(request, client) assert time_gen_mock.call_count == 1 @@ -70,7 +70,7 @@ def test_ctor(self): expected_request_timeout = 44 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._read_rows._attempt_timeout_generator", time_gen_mock ): instance = self._make_one( request, @@ -198,7 +198,7 @@ def test_revise_request_full_table(self, last_key): def test_revise_to_empty_rowset(self): """revising to an empty rowset should raise error""" - from google.cloud.bigtable.exceptions import _RowSetComplete + from google.cloud.bigtable.data.exceptions import _RowSetComplete row_keys = ["a", "b", "c"] row_set = {"row_keys": row_keys, "row_ranges": [{"end_key_open": "c"}]} @@ -320,9 +320,9 @@ async def test_retryable_ignore_repeated_rows(self): """ Duplicate rows should cause an invalid chunk error """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - from google.cloud.bigtable.row import Row - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data.row import Row + from google.cloud.bigtable.data.exceptions import InvalidChunk async def mock_stream(): while True: @@ -345,8 +345,8 @@ async def test_retryable_ignore_last_scanned_rows(self): """ Last scanned rows should not be emitted """ - from google.cloud.bigtable._read_rows import _ReadRowsOperation - from google.cloud.bigtable.row import Row, _LastScannedRow + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data.row import Row, _LastScannedRow async def mock_stream(): while True: @@ -367,8 +367,8 @@ async def mock_stream(): @pytest.mark.asyncio async def test_retryable_cancel_on_close(self): """Underlying gapic call should be cancelled when stream is closed""" - from google.cloud.bigtable._read_rows import _ReadRowsOperation - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data.row import Row async def mock_stream(): while True: @@ -390,7 +390,7 @@ async def mock_stream(): class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine return _StateMachine @@ -398,7 +398,7 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor(self): - from google.cloud.bigtable._read_rows import _RowBuilder + from google.cloud.bigtable.data._read_rows import _RowBuilder instance = self._make_one() assert instance.last_seen_row_key is None @@ -435,7 +435,7 @@ def test__reset_row(self): assert instance.adapter.reset.call_count == 1 def test_handle_last_scanned_row_wrong_state(self): - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data.exceptions import InvalidChunk instance = self._make_one() instance.current_state = AWAITING_NEW_CELL @@ -448,7 +448,7 @@ def test_handle_last_scanned_row_wrong_state(self): assert e.value.args[0] == "Last scanned row key received in invalid state" def test_handle_last_scanned_row_out_of_order(self): - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data.exceptions import InvalidChunk instance = self._make_one() instance.last_seen_row_key = b"b" @@ -460,7 +460,7 @@ def test_handle_last_scanned_row_out_of_order(self): assert e.value.args[0] == "Last scanned row key is out of order" def test_handle_last_scanned_row(self): - from google.cloud.bigtable.row import _LastScannedRow + from google.cloud.bigtable.data.row import _LastScannedRow instance = self._make_one() instance.adapter = mock.Mock() @@ -475,7 +475,7 @@ def test_handle_last_scanned_row(self): assert instance.adapter.reset.call_count == 1 def test__handle_complete_row(self): - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data.row import Row instance = self._make_one() instance.current_state = mock.Mock() @@ -490,7 +490,7 @@ def test__handle_complete_row(self): assert instance.adapter.reset.call_count == 1 def test__handle_reset_chunk_errors(self): - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse instance = self._make_one() @@ -528,7 +528,7 @@ def test__handle_reset_chunk_errors(self): assert e.value.args[0] == "Reset chunk has labels" def test_handle_chunk_out_of_order(self): - from google.cloud.bigtable.exceptions import InvalidChunk + from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse instance = self._make_one() @@ -570,7 +570,7 @@ def handle_chunk_with_commit_wrong_state(self, state): def test_handle_chunk_with_commit(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data.row import Row instance = self._make_one() with mock.patch.object(type(instance), "_reset_row") as mock_reset: @@ -587,7 +587,7 @@ def test_handle_chunk_with_commit(self): def test_handle_chunk_with_commit_empty_strings(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data.row import Row instance = self._make_one() with mock.patch.object(type(instance), "_reset_row") as mock_reset: @@ -648,7 +648,7 @@ def test_AWAITING_NEW_ROW(self): def test_AWAITING_NEW_CELL_family_without_qualifier(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() state_machine.current_qualifier = b"q" @@ -660,7 +660,7 @@ def test_AWAITING_NEW_CELL_family_without_qualifier(self): def test_AWAITING_NEW_CELL_qualifier_without_family(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_NEW_CELL @@ -671,7 +671,7 @@ def test_AWAITING_NEW_CELL_qualifier_without_family(self): def test_AWAITING_NEW_CELL_no_row_state(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_NEW_CELL @@ -687,7 +687,7 @@ def test_AWAITING_NEW_CELL_no_row_state(self): def test_AWAITING_NEW_CELL_invalid_row_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_NEW_CELL @@ -699,7 +699,7 @@ def test_AWAITING_NEW_CELL_invalid_row_key(self): def test_AWAITING_NEW_CELL_success_no_split(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() state_machine.adapter = mock.Mock() @@ -733,7 +733,7 @@ def test_AWAITING_NEW_CELL_success_no_split(self): def test_AWAITING_NEW_CELL_success_with_split(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() state_machine.adapter = mock.Mock() @@ -768,7 +768,7 @@ def test_AWAITING_NEW_CELL_success_with_split(self): def test_AWAITING_CELL_VALUE_w_row_key(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_CELL_VALUE @@ -779,7 +779,7 @@ def test_AWAITING_CELL_VALUE_w_row_key(self): def test_AWAITING_CELL_VALUE_w_family(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_CELL_VALUE @@ -790,7 +790,7 @@ def test_AWAITING_CELL_VALUE_w_family(self): def test_AWAITING_CELL_VALUE_w_qualifier(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_CELL_VALUE @@ -801,7 +801,7 @@ def test_AWAITING_CELL_VALUE_w_qualifier(self): def test_AWAITING_CELL_VALUE_w_timestamp(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_CELL_VALUE @@ -812,7 +812,7 @@ def test_AWAITING_CELL_VALUE_w_timestamp(self): def test_AWAITING_CELL_VALUE_w_labels(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() instance = AWAITING_CELL_VALUE @@ -823,7 +823,7 @@ def test_AWAITING_CELL_VALUE_w_labels(self): def test_AWAITING_CELL_VALUE_continuation(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() state_machine.adapter = mock.Mock() @@ -838,7 +838,7 @@ def test_AWAITING_CELL_VALUE_continuation(self): def test_AWAITING_CELL_VALUE_final_chunk(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows import _StateMachine state_machine = _StateMachine() state_machine.adapter = mock.Mock() @@ -855,7 +855,7 @@ def test_AWAITING_CELL_VALUE_final_chunk(self): class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable._read_rows import _RowBuilder + from google.cloud.bigtable.data._read_rows import _RowBuilder return _RowBuilder @@ -1003,7 +1003,7 @@ def test_reset(self): class TestChunkHasField: def test__chunk_has_field_empty(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _chunk_has_field + from google.cloud.bigtable.data._read_rows import _chunk_has_field chunk = ReadRowsResponse.CellChunk()._pb assert not _chunk_has_field(chunk, "family_name") @@ -1011,7 +1011,7 @@ def test__chunk_has_field_empty(self): def test__chunk_has_field_populated_empty_strings(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable._read_rows import _chunk_has_field + from google.cloud.bigtable.data._read_rows import _chunk_has_field chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb assert _chunk_has_field(chunk, "family_name") diff --git a/tests/unit/data/test_client.py b/tests/unit/data/_async/test_client.py similarity index 98% rename from tests/unit/data/test_client.py rename to tests/unit/data/_async/test_client.py index 0e39851e4..f94bb09dd 100644 --- a/tests/unit/data/test_client.py +++ b/tests/unit/data/_async/test_client.py @@ -45,7 +45,7 @@ class TestBigtableDataClientAsync: def _get_target_class(self): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync @@ -118,10 +118,9 @@ async def test_ctor_dict_options(self): BigtableAsyncClient, ) from google.api_core.client_options import ClientOptions - from google.cloud.bigtable.data.client import BigtableDataClientAsync client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: + with mock.patch.object(self._get_target_class(), "__init__") as bigtable_client_init: try: self._make_one(client_options=client_options) except TypeError: @@ -132,7 +131,7 @@ async def test_ctor_dict_options(self): assert called_options.api_endpoint == "foo.bar:1234" assert isinstance(called_options, ClientOptions) with mock.patch.object( - BigtableDataClientAsync, "start_background_channel_refresh" + self._get_target_class(), "start_background_channel_refresh" ) as start_background_refresh: client = self._make_one(client_options=client_options) start_background_refresh.assert_called_once() @@ -725,7 +724,7 @@ async def test__multiple_table_registration(self): add multiple owners to instance_owners, but only keep one copy of shared key in active_instances """ - from google.cloud.bigtable.data.client import _WarmedInstanceKey + from google.cloud.bigtable.data._async.client import _WarmedInstanceKey async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: @@ -773,7 +772,7 @@ async def test__multiple_instance_registration(self): registering with multiple instance keys should update the key in instance_owners and active_instances """ - from google.cloud.bigtable.data.client import _WarmedInstanceKey + from google.cloud.bigtable.data._async.client import _WarmedInstanceKey async with self._make_one(project="project-id") as client: async with client.get_table("instance_1", "table_1") as table_1: @@ -808,8 +807,8 @@ async def test__multiple_instance_registration(self): @pytest.mark.asyncio async def test_get_table(self): - from google.cloud.bigtable.data.client import TableAsync - from google.cloud.bigtable.data.client import _WarmedInstanceKey + from google.cloud.bigtable.data._async.client import TableAsync + from google.cloud.bigtable.data._async.client import _WarmedInstanceKey client = self._make_one(project="project-id") assert not client._active_instances @@ -844,8 +843,8 @@ async def test_get_table(self): @pytest.mark.asyncio async def test_get_table_context_manager(self): - from google.cloud.bigtable.data.client import TableAsync - from google.cloud.bigtable.data.client import _WarmedInstanceKey + from google.cloud.bigtable.data._async.client import TableAsync + from google.cloud.bigtable.data._async.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" @@ -950,7 +949,7 @@ async def test_context_manager(self): def test_client_ctor_sync(self): # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync with pytest.warns(RuntimeWarning) as warnings: client = BigtableDataClientAsync(project="project-id") @@ -966,9 +965,9 @@ def test_client_ctor_sync(self): class TestTableAsync: @pytest.mark.asyncio async def test_table_ctor(self): - from google.cloud.bigtable.data.client import BigtableDataClientAsync - from google.cloud.bigtable.data.client import TableAsync - from google.cloud.bigtable.data.client import _WarmedInstanceKey + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import TableAsync + from google.cloud.bigtable.data._async.client import _WarmedInstanceKey expected_table_id = "table-id" expected_instance_id = "instance-id" @@ -1007,8 +1006,8 @@ async def test_table_ctor(self): @pytest.mark.asyncio async def test_table_ctor_bad_timeout_values(self): - from google.cloud.bigtable.data.client import BigtableDataClientAsync - from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import TableAsync client = BigtableDataClientAsync() @@ -1034,7 +1033,7 @@ async def test_table_ctor_bad_timeout_values(self): def test_table_ctor_sync(self): # initializing client in a sync context should raise RuntimeError - from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data._async.client import TableAsync client = mock.Mock() with pytest.raises(RuntimeError) as e: @@ -1048,12 +1047,12 @@ class TestReadRows: """ def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) def _make_table(self, *args, **kwargs): - from google.cloud.bigtable.data.client import TableAsync + from google.cloud.bigtable.data._async.client import TableAsync client_mock = mock.Mock() client_mock._register_instance.side_effect = ( @@ -1295,12 +1294,12 @@ async def test_read_rows_per_request_timeout( @pytest.mark.asyncio async def test_read_rows_idle_timeout(self): - from google.cloud.bigtable.data.client import ReadRowsIterator + from google.cloud.bigtable.data._async.client import ReadRowsIterator from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) from google.cloud.bigtable.data.exceptions import IdleTimeout - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation chunks = [ self._make_chunk(row_key=b"test_1"), @@ -1398,7 +1397,7 @@ async def test_read_rows_revise_request(self): """ Ensure that _revise_request is called between retries """ - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation from google.cloud.bigtable.data.exceptions import InvalidChunk with mock.patch.object( @@ -1432,7 +1431,7 @@ async def test_read_rows_default_timeouts(self): """ Ensure that the default timeouts are set on the read rows operation when not overridden """ - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation operation_timeout = 8 per_request_timeout = 4 @@ -1455,7 +1454,7 @@ async def test_read_rows_default_timeout_override(self): """ When timeouts are passed, they overwrite default values """ - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation operation_timeout = 8 per_request_timeout = 4 @@ -1653,7 +1652,7 @@ async def test_read_rows_metadata(self, include_app_profile): class TestReadRowsSharded: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) @@ -1785,8 +1784,8 @@ async def test_read_rows_sharded_batching(self): Large queries should be processed in batches to limit concurrency operation timeout should change between batches """ - from google.cloud.bigtable.data.client import TableAsync - from google.cloud.bigtable.data.client import CONCURRENCY_LIMIT + from google.cloud.bigtable.data._async.client import TableAsync + from google.cloud.bigtable.data._async.client import CONCURRENCY_LIMIT assert CONCURRENCY_LIMIT == 10 # change this test if this changes @@ -1843,7 +1842,7 @@ async def test_read_rows_sharded_batching(self): class TestSampleRowKeys: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) @@ -2023,7 +2022,7 @@ async def test_sample_row_keys_non_retryable_errors(self, non_retryable_exceptio class TestMutateRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) @@ -2190,7 +2189,7 @@ async def test_mutate_row_metadata(self, include_app_profile): class TestBulkMutateRows: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) @@ -2579,7 +2578,7 @@ async def test_bulk_mutate_row_metadata(self, include_app_profile): class TestCheckAndMutateRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) @@ -2772,7 +2771,7 @@ async def test_check_and_mutate_metadata(self, include_app_profile): class TestReadModifyWriteRow: def _make_client(self, *args, **kwargs): - from google.cloud.bigtable.data.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync return BigtableDataClientAsync(*args, **kwargs) diff --git a/tests/unit/data/test_iterators.py b/tests/unit/data/_async/test_iterators.py similarity index 96% rename from tests/unit/data/test_iterators.py rename to tests/unit/data/_async/test_iterators.py index f7aee2822..712ba1baa 100644 --- a/tests/unit/data/test_iterators.py +++ b/tests/unit/data/_async/test_iterators.py @@ -17,7 +17,7 @@ import asyncio import pytest -from google.cloud.bigtable._read_rows import _ReadRowsOperation +from google.cloud.bigtable.data._read_rows import _ReadRowsOperation # try/except added for compatibility with python < 3.8 try: @@ -61,7 +61,7 @@ async def mock_stream(self, size=10): yield i def _make_one(self, *args, **kwargs): - from google.cloud.bigtable.iterators import ReadRowsIterator + from google.cloud.bigtable.data.iterators import ReadRowsIterator stream = MockStream(*args, **kwargs) return ReadRowsIterator(stream) @@ -117,7 +117,7 @@ async def test__start_idle_timer_duplicate(self): @pytest.mark.asyncio async def test__idle_timeout_coroutine(self): - from google.cloud.bigtable.exceptions import IdleTimeout + from google.cloud.bigtable.data.exceptions import IdleTimeout iterator = self._make_one() await iterator._idle_timeout_coroutine(0.05) @@ -174,7 +174,7 @@ async def test___anext__with_deadline_error_with_cause(self): Transient errors should be exposed as an error group """ from google.api_core import exceptions as core_exceptions - from google.cloud.bigtable.exceptions import RetryExceptionGroup + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup items = [1, core_exceptions.RetryError("retry error", None)] expected_timeout = 99 diff --git a/tests/unit/data/test__helpers.py b/tests/unit/data/test__helpers.py index 9aa1a7bb4..dc688bb0c 100644 --- a/tests/unit/data/test__helpers.py +++ b/tests/unit/data/test__helpers.py @@ -13,8 +13,8 @@ # import pytest -import google.cloud.bigtable._helpers as _helpers -import google.cloud.bigtable.exceptions as bigtable_exceptions +import google.cloud.bigtable.data._helpers as _helpers +import google.cloud.bigtable.data.exceptions as bigtable_exceptions import mock diff --git a/tests/unit/data/test_exceptions.py b/tests/unit/data/test_exceptions.py index ef186a47c..38f07e674 100644 --- a/tests/unit/data/test_exceptions.py +++ b/tests/unit/data/test_exceptions.py @@ -16,7 +16,7 @@ import pytest import sys -import google.cloud.bigtable.exceptions as bigtable_exceptions +import google.cloud.bigtable.data.exceptions as bigtable_exceptions # try/except added for compatibility with python < 3.8 try: @@ -31,7 +31,7 @@ class TestBigtableExceptionGroup: """ def _get_class(self): - from google.cloud.bigtable.exceptions import BigtableExceptionGroup + from google.cloud.bigtable.data.exceptions import BigtableExceptionGroup return BigtableExceptionGroup @@ -74,7 +74,7 @@ def test_311_traceback(self): exc_group = self._make_one(excs=[sub_exc1, sub_exc2]) expected_traceback = ( - f" | google.cloud.bigtable.exceptions.{type(exc_group).__name__}: {str(exc_group)}", + f" | google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {str(exc_group)}", " +-+---------------- 1 ----------------", " | RuntimeError: first sub exception", " +---------------- 2 ----------------", @@ -123,7 +123,7 @@ def test_exception_handling(self): class TestMutationsExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): - from google.cloud.bigtable.exceptions import MutationsExceptionGroup + from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup return MutationsExceptionGroup @@ -228,7 +228,7 @@ def test_from_truncated_lists( class TestRetryExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): - from google.cloud.bigtable.exceptions import RetryExceptionGroup + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup return RetryExceptionGroup @@ -269,7 +269,7 @@ def test_raise(self, exception_list, expected_message): class TestShardedReadRowsExceptionGroup(TestBigtableExceptionGroup): def _get_class(self): - from google.cloud.bigtable.exceptions import ShardedReadRowsExceptionGroup + from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup return ShardedReadRowsExceptionGroup @@ -306,7 +306,7 @@ def test_raise(self, exception_list, succeeded, total_entries, expected_message) class TestFailedMutationEntryError: def _get_class(self): - from google.cloud.bigtable.exceptions import FailedMutationEntryError + from google.cloud.bigtable.data.exceptions import FailedMutationEntryError return FailedMutationEntryError @@ -374,7 +374,7 @@ def test_no_index(self): class TestFailedQueryShardError: def _get_class(self): - from google.cloud.bigtable.exceptions import FailedQueryShardError + from google.cloud.bigtable.data.exceptions import FailedQueryShardError return FailedQueryShardError diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index c8c6788b1..5a93c7881 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -14,7 +14,7 @@ import pytest -import google.cloud.bigtable.mutations as mutations +import google.cloud.bigtable.data.mutations as mutations # try/except added for compatibility with python < 3.8 try: @@ -25,7 +25,7 @@ class TestBaseMutation: def _target_class(self): - from google.cloud.bigtable.mutations import Mutation + from google.cloud.bigtable.data.mutations import Mutation return Mutation @@ -173,7 +173,7 @@ def test__from_dict_wrong_subclass(self): class TestSetCell: def _target_class(self): - from google.cloud.bigtable.mutations import SetCell + from google.cloud.bigtable.data.mutations import SetCell return SetCell @@ -336,7 +336,7 @@ def test___str__(self): class TestDeleteRangeFromColumn: def _target_class(self): - from google.cloud.bigtable.mutations import DeleteRangeFromColumn + from google.cloud.bigtable.data.mutations import DeleteRangeFromColumn return DeleteRangeFromColumn @@ -423,7 +423,7 @@ def test___str__(self): class TestDeleteAllFromFamily: def _target_class(self): - from google.cloud.bigtable.mutations import DeleteAllFromFamily + from google.cloud.bigtable.data.mutations import DeleteAllFromFamily return DeleteAllFromFamily @@ -460,7 +460,7 @@ def test___str__(self): class TestDeleteFromRow: def _target_class(self): - from google.cloud.bigtable.mutations import DeleteAllFromRow + from google.cloud.bigtable.data.mutations import DeleteAllFromRow return DeleteAllFromRow @@ -490,7 +490,7 @@ def test___str__(self): class TestRowMutationEntry: def _target_class(self): - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry return RowMutationEntry @@ -506,7 +506,7 @@ def test_ctor(self): def test_ctor_over_limit(self): """Should raise error if mutations exceed MAX_MUTATIONS_PER_ENTRY""" - from google.cloud.bigtable._mutate_rows import ( + from google.cloud.bigtable.data._mutate_rows import ( MUTATE_ROWS_REQUEST_MUTATION_LIMIT, ) @@ -527,7 +527,7 @@ def test_ctor_str_key(self): assert list(instance.mutations) == expected_mutations def test_ctor_single_mutation(self): - from google.cloud.bigtable.mutations import DeleteAllFromRow + from google.cloud.bigtable.data.mutations import DeleteAllFromRow expected_key = b"row_key" expected_mutations = DeleteAllFromRow() diff --git a/tests/unit/data/test_mutations_batcher.py b/tests/unit/data/test_mutations_batcher.py index a900468d5..ae690e6fa 100644 --- a/tests/unit/data/test_mutations_batcher.py +++ b/tests/unit/data/test_mutations_batcher.py @@ -33,7 +33,7 @@ def _make_mutation(count=1, size=1): class Test_FlowControl: def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): - from google.cloud.bigtable.mutations_batcher import _FlowControl + from google.cloud.bigtable.data.mutations_batcher import _FlowControl return _FlowControl(max_mutation_count, max_mutation_bytes) @@ -238,7 +238,7 @@ async def test_add_to_flow_max_mutation_limits( Should submit request early, even if the flow control has room for more """ with mock.patch( - "google.cloud.bigtable.mutations_batcher.MUTATE_ROWS_REQUEST_MUTATION_LIMIT", + "google.cloud.bigtable.data.mutations_batcher.MUTATE_ROWS_REQUEST_MUTATION_LIMIT", max_limit, ): mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] @@ -277,7 +277,7 @@ async def test_add_to_flow_oversize(self): class TestMutationsBatcher: def _get_target_class(self): - from google.cloud.bigtable.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher return MutationsBatcher @@ -290,7 +290,7 @@ def _make_one(self, table=None, **kwargs): return self._get_target_class()(table, **kwargs) @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_defaults(self, flush_timer_mock): @@ -320,7 +320,7 @@ async def test_ctor_defaults(self, flush_timer_mock): assert isinstance(instance._flush_timer, asyncio.Future) @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer", + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer", ) @pytest.mark.asyncio async def test_ctor_explicit(self, flush_timer_mock): @@ -368,7 +368,7 @@ async def test_ctor_explicit(self, flush_timer_mock): assert isinstance(instance._flush_timer, asyncio.Future) @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._start_flush_timer" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_no_flush_limits(self, flush_timer_mock): @@ -423,8 +423,8 @@ def test_default_argument_consistency(self): table.mutations_batcher. Make sure any changes to defaults are applied to both places """ - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data.client import Table + from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher import inspect get_batcher_signature = dict( @@ -446,7 +446,7 @@ def test_default_argument_consistency(self): ) @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio async def test__start_flush_timer_w_None(self, flush_mock): @@ -458,7 +458,7 @@ async def test__start_flush_timer_w_None(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio async def test__start_flush_timer_call_when_closed(self, flush_mock): @@ -472,7 +472,7 @@ async def test__start_flush_timer_call_when_closed(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer(self, flush_mock): @@ -492,7 +492,7 @@ async def test__flush_timer(self, flush_mock): assert flush_mock.call_count == loop_num @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer_no_mutations(self, flush_mock): @@ -511,7 +511,7 @@ async def test__flush_timer_no_mutations(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer_close(self, flush_mock): @@ -541,7 +541,7 @@ async def test_append_wrong_mutation(self): Mutation objects should raise an exception. Only support RowMutationEntry """ - from google.cloud.bigtable.mutations import DeleteAllFromRow + from google.cloud.bigtable.data.mutations import DeleteAllFromRow async with self._make_one() as instance: expected_error = "invalid mutation type: DeleteAllFromRow. Only RowMutationEntry objects are supported by batcher" @@ -577,7 +577,7 @@ async def test_append_flush_runs_after_limit_hit(self): If the user appends a bunch of entries above the flush limits back-to-back, it should still flush in a single task """ - from google.cloud.bigtable.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher with mock.patch.object(MutationsBatcher, "_execute_mutate_rows") as op_mock: async with self._make_one(flush_limit_bytes=100) as instance: @@ -789,7 +789,7 @@ async def test__flush_internal_with_errors( """ errors returned from _execute_mutate_rows should be added to internal exceptions """ - from google.cloud.bigtable import exceptions + from google.cloud.bigtable.data.data import exceptions num_entries = 10 expected_errors = [ @@ -861,7 +861,7 @@ async def test_timer_flush_end_to_end(self): @pytest.mark.asyncio @mock.patch( - "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", + "google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation", ) async def test__execute_mutate_rows(self, mutate_rows): mutate_rows.return_value = AsyncMock() @@ -884,10 +884,10 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @mock.patch("google.cloud.bigtable.mutations_batcher._MutateRowsOperation.start") + @mock.patch("google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation.start") async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" - from google.cloud.bigtable.exceptions import ( + from google.cloud.bigtable.data.exceptions import ( MutationsExceptionGroup, FailedMutationEntryError, ) @@ -911,7 +911,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): @pytest.mark.asyncio async def test__raise_exceptions(self): """Raise exceptions and reset error state""" - from google.cloud.bigtable import exceptions + from google.cloud.bigtable.data.data import exceptions expected_total = 1201 expected_exceptions = [RuntimeError("mock")] * 3 @@ -958,7 +958,7 @@ async def test_close(self): @pytest.mark.asyncio async def test_close_w_exceptions(self): """Raise exceptions on close""" - from google.cloud.bigtable import exceptions + from google.cloud.bigtable.data.data import exceptions expected_total = 10 expected_exceptions = [RuntimeError("mock")] @@ -1002,7 +1002,7 @@ async def test_atexit_registration(self): import atexit with mock.patch( - "google.cloud.bigtable.mutations_batcher.MutationsBatcher._on_exit" + "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._on_exit" ) as on_exit_mock: async with self._make_one(): assert on_exit_mock.call_count == 0 @@ -1014,7 +1014,7 @@ async def test_atexit_registration(self): @pytest.mark.asyncio @mock.patch( - "google.cloud.bigtable.mutations_batcher._MutateRowsOperation", + "google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation", ) async def test_timeout_args_passed(self, mutate_rows): """ diff --git a/tests/unit/data/test_read_modify_write_rules.py b/tests/unit/data/test_read_modify_write_rules.py index 02240df6d..dd12f017e 100644 --- a/tests/unit/data/test_read_modify_write_rules.py +++ b/tests/unit/data/test_read_modify_write_rules.py @@ -24,7 +24,7 @@ class TestBaseReadModifyWriteRule: def _target_class(self): - from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule + from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule return ReadModifyWriteRule @@ -40,7 +40,7 @@ def test__to_dict(self): class TestIncrementRule: def _target_class(self): - from google.cloud.bigtable.read_modify_write_rules import IncrementRule + from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule return IncrementRule @@ -98,7 +98,7 @@ def test__to_dict(self, args, expected): class TestAppendValueRule: def _target_class(self): - from google.cloud.bigtable.read_modify_write_rules import AppendValueRule + from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule return AppendValueRule diff --git a/tests/unit/data/test_read_rows_acceptance.py b/tests/unit/data/test_read_rows_acceptance.py index 2349d25c6..4aeba0d2d 100644 --- a/tests/unit/data/test_read_rows_acceptance.py +++ b/tests/unit/data/test_read_rows_acceptance.py @@ -21,10 +21,10 @@ from google.cloud.bigtable_v2 import ReadRowsResponse -from google.cloud.bigtable.client import BigtableDataClient -from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable._read_rows import _ReadRowsOperation, _StateMachine -from google.cloud.bigtable.row import Row +from google.cloud.bigtable.data.client import BigtableDataClient +from google.cloud.bigtable.data.exceptions import InvalidChunk +from google.cloud.bigtable.data._read_rows import _ReadRowsOperation, _StateMachine +from google.cloud.bigtable.data.row import Row from .v2_client.test_row_merger import ReadRowsTest, TestFile diff --git a/tests/unit/data/test_read_rows_query.py b/tests/unit/data/test_read_rows_query.py index 7ecd91f8c..88fde2d24 100644 --- a/tests/unit/data/test_read_rows_query.py +++ b/tests/unit/data/test_read_rows_query.py @@ -23,7 +23,7 @@ class TestRowRange: @staticmethod def _get_target_class(): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange return RowRange @@ -139,7 +139,7 @@ def test__from_dict( start_is_inclusive, end_is_inclusive, ): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange row_range = RowRange._from_dict(input_dict) assert row_range._to_dict().keys() == input_dict.keys() @@ -172,7 +172,7 @@ def test__from_dict( ], ) def test__from_points(self, dict_repr): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange row_range_from_dict = RowRange._from_dict(dict_repr) row_range_from_points = RowRange._from_points( @@ -210,7 +210,7 @@ def test__from_points(self, dict_repr): ], ) def test___hash__(self, first_dict, second_dict, should_match): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange row_range1 = RowRange._from_dict(first_dict) row_range2 = RowRange._from_dict(second_dict) @@ -233,7 +233,7 @@ def test___bool__(self, dict_repr, expected): """ Only row range with both points empty should be falsy """ - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange row_range = RowRange._from_dict(dict_repr) assert bool(row_range) is expected @@ -242,7 +242,7 @@ def test___bool__(self, dict_repr, expected): class TestReadRowsQuery: @staticmethod def _get_target_class(): - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery return ReadRowsQuery @@ -257,8 +257,8 @@ def test_ctor_defaults(self): assert query.limit is None def test_ctor_explicit(self): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.read_rows_query import RowRange filter_ = RowFilterChain() query = self._make_one( @@ -281,7 +281,7 @@ def test_ctor_invalid_limit(self): assert str(exc.value) == "limit must be >= 0" def test_set_filter(self): - from google.cloud.bigtable.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowFilterChain filter1 = RowFilterChain() query = self._make_one() @@ -300,7 +300,7 @@ def test_set_filter(self): assert str(exc.value) == "row_filter must be a RowFilter or dict" def test_set_filter_dict(self): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest filter1 = RowSampleFilter(0.5) @@ -402,7 +402,7 @@ def test_duplicate_rows(self): assert len(query.row_keys) == 3 def test_add_range(self): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange query = self._make_one() assert query.row_ranges == set() @@ -419,7 +419,7 @@ def test_add_range(self): assert len(query.row_ranges) == 2 def test_add_range_dict(self): - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import RowRange query = self._make_one() assert query.row_ranges == set() @@ -449,8 +449,8 @@ def test_to_dict_rows_default(self): def test_to_dict_rows_populated(self): # dictionary should be in rowset proto format from google.cloud.bigtable_v2.types.bigtable import ReadRowsRequest - from google.cloud.bigtable.row_filters import PassAllFilter - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.row_filters import PassAllFilter + from google.cloud.bigtable.data.read_rows_query import RowRange row_filter = PassAllFilter(False) query = self._make_one(limit=100, row_filter=row_filter) @@ -494,7 +494,7 @@ def test_to_dict_rows_populated(self): assert filter_proto == row_filter._to_pb() def _parse_query_string(self, query_string): - from google.cloud.bigtable.read_rows_query import ReadRowsQuery, RowRange + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery, RowRange query = ReadRowsQuery() segments = query_string.split(",") @@ -550,7 +550,7 @@ def test_shard_full_table_scan_empty_split(self): """ Sharding a full table scan with no split should return another full table scan. """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery full_scan_query = ReadRowsQuery() split_points = [] @@ -563,7 +563,7 @@ def test_shard_full_table_scan_with_split(self): """ Test splitting a full table scan into two queries """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery full_scan_query = ReadRowsQuery() split_points = [(b"a", None)] @@ -576,7 +576,7 @@ def test_shard_full_table_scan_with_multiple_split(self): """ Test splitting a full table scan into three queries """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery full_scan_query = ReadRowsQuery() split_points = [(b"a", None), (b"z", None)] @@ -684,7 +684,7 @@ def test_shard_limit_exception(self): """ queries with a limit should raise an exception when a shard is attempted """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery query = ReadRowsQuery(limit=10) with pytest.raises(AttributeError) as e: @@ -718,8 +718,8 @@ def test_shard_limit_exception(self): ], ) def test___eq__(self, first_args, second_args, expected): - from google.cloud.bigtable.read_rows_query import ReadRowsQuery - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import RowRange # replace row_range placeholders with a RowRange object if len(first_args) > 1: @@ -733,7 +733,7 @@ def test___eq__(self, first_args, second_args, expected): assert (first == second) == expected def test___repr__(self): - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery instance = self._make_one(row_keys=["a", "b"], row_filter={}, limit=10) # should be able to recreate the instance from the repr diff --git a/tests/unit/data/test_row.py b/tests/unit/data/test_row.py index 0413b2889..c9c797b61 100644 --- a/tests/unit/data/test_row.py +++ b/tests/unit/data/test_row.py @@ -27,7 +27,7 @@ class TestRow(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row import Row + from google.cloud.bigtable.data.row import Row return Row @@ -45,7 +45,7 @@ def _make_cell( timestamp=TEST_TIMESTAMP, labels=TEST_LABELS, ): - from google.cloud.bigtable.row import Cell + from google.cloud.bigtable.data.row import Cell return Cell(value, row_key, family_id, qualifier, timestamp, labels) @@ -223,7 +223,7 @@ def test_to_dict(self): self.assertEqual(column.cells[1].labels, TEST_LABELS) def test_iteration(self): - from google.cloud.bigtable.row import Cell + from google.cloud.bigtable.data.row import Cell # should be able to iterate over the Row as a list cell1 = self._make_cell(value=b"1") @@ -499,7 +499,7 @@ def test_index_of(self): class TestCell(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.row import Cell + from google.cloud.bigtable.data.row import Cell return Cell @@ -623,7 +623,7 @@ def test___str__(self): self.assertEqual(str(cell), str(test_value)) def test___repr__(self): - from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 + from google.cloud.bigtable.data.row import Cell # type: ignore # noqa: F401 cell = self._make_one() expected = ( @@ -637,7 +637,7 @@ def test___repr__(self): self.assertEqual(result, cell) def test___repr___no_labels(self): - from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 + from google.cloud.bigtable.data.row import Cell # type: ignore # noqa: F401 cell_no_labels = self._make_one( TEST_VALUE, diff --git a/tests/unit/data/test_row_filters.py b/tests/unit/data/test_row_filters.py index 11ff9f2f1..a3e275e70 100644 --- a/tests/unit/data/test_row_filters.py +++ b/tests/unit/data/test_row_filters.py @@ -17,10 +17,10 @@ def test_abstract_class_constructors(): - from google.cloud.bigtable.row_filters import RowFilter - from google.cloud.bigtable.row_filters import _BoolFilter - from google.cloud.bigtable.row_filters import _FilterCombination - from google.cloud.bigtable.row_filters import _CellCountFilter + from google.cloud.bigtable.data.row_filters import RowFilter + from google.cloud.bigtable.data.row_filters import _BoolFilter + from google.cloud.bigtable.data.row_filters import _FilterCombination + from google.cloud.bigtable.data.row_filters import _CellCountFilter with pytest.raises(TypeError): RowFilter() @@ -64,7 +64,7 @@ def test_bool_filter___ne__same_value(): def test_sink_filter_to_pb(): - from google.cloud.bigtable.row_filters import SinkFilter + from google.cloud.bigtable.data.row_filters import SinkFilter flag = True row_filter = SinkFilter(flag) @@ -74,7 +74,7 @@ def test_sink_filter_to_pb(): def test_sink_filter_to_dict(): - from google.cloud.bigtable.row_filters import SinkFilter + from google.cloud.bigtable.data.row_filters import SinkFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 flag = True @@ -86,7 +86,7 @@ def test_sink_filter_to_dict(): def test_sink_filter___repr__(): - from google.cloud.bigtable.row_filters import SinkFilter + from google.cloud.bigtable.data.row_filters import SinkFilter flag = True row_filter = SinkFilter(flag) @@ -96,7 +96,7 @@ def test_sink_filter___repr__(): def test_pass_all_filter_to_pb(): - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.data.row_filters import PassAllFilter flag = True row_filter = PassAllFilter(flag) @@ -106,7 +106,7 @@ def test_pass_all_filter_to_pb(): def test_pass_all_filter_to_dict(): - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.data.row_filters import PassAllFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 flag = True @@ -118,7 +118,7 @@ def test_pass_all_filter_to_dict(): def test_pass_all_filter___repr__(): - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.data.row_filters import PassAllFilter flag = True row_filter = PassAllFilter(flag) @@ -128,7 +128,7 @@ def test_pass_all_filter___repr__(): def test_block_all_filter_to_pb(): - from google.cloud.bigtable.row_filters import BlockAllFilter + from google.cloud.bigtable.data.row_filters import BlockAllFilter flag = True row_filter = BlockAllFilter(flag) @@ -138,7 +138,7 @@ def test_block_all_filter_to_pb(): def test_block_all_filter_to_dict(): - from google.cloud.bigtable.row_filters import BlockAllFilter + from google.cloud.bigtable.data.row_filters import BlockAllFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 flag = True @@ -150,7 +150,7 @@ def test_block_all_filter_to_dict(): def test_block_all_filter___repr__(): - from google.cloud.bigtable.row_filters import BlockAllFilter + from google.cloud.bigtable.data.row_filters import BlockAllFilter flag = True row_filter = BlockAllFilter(flag) @@ -198,7 +198,7 @@ def test_regex_filter__ne__same_value(): def test_row_key_regex_filter_to_pb(): - from google.cloud.bigtable.row_filters import RowKeyRegexFilter + from google.cloud.bigtable.data.row_filters import RowKeyRegexFilter regex = b"row-key-regex" row_filter = RowKeyRegexFilter(regex) @@ -208,7 +208,7 @@ def test_row_key_regex_filter_to_pb(): def test_row_key_regex_filter_to_dict(): - from google.cloud.bigtable.row_filters import RowKeyRegexFilter + from google.cloud.bigtable.data.row_filters import RowKeyRegexFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 regex = b"row-key-regex" @@ -220,7 +220,7 @@ def test_row_key_regex_filter_to_dict(): def test_row_key_regex_filter___repr__(): - from google.cloud.bigtable.row_filters import RowKeyRegexFilter + from google.cloud.bigtable.data.row_filters import RowKeyRegexFilter regex = b"row-key-regex" row_filter = RowKeyRegexFilter(regex) @@ -230,7 +230,7 @@ def test_row_key_regex_filter___repr__(): def test_row_sample_filter_constructor(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = object() row_filter = RowSampleFilter(sample) @@ -238,7 +238,7 @@ def test_row_sample_filter_constructor(): def test_row_sample_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -247,7 +247,7 @@ def test_row_sample_filter___eq__type_differ(): def test_row_sample_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = object() row_filter1 = RowSampleFilter(sample) @@ -256,7 +256,7 @@ def test_row_sample_filter___eq__same_value(): def test_row_sample_filter___ne__(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = object() other_sample = object() @@ -266,7 +266,7 @@ def test_row_sample_filter___ne__(): def test_row_sample_filter_to_pb(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = 0.25 row_filter = RowSampleFilter(sample) @@ -276,7 +276,7 @@ def test_row_sample_filter_to_pb(): def test_row_sample_filter___repr__(): - from google.cloud.bigtable.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter sample = 0.25 row_filter = RowSampleFilter(sample) @@ -286,7 +286,7 @@ def test_row_sample_filter___repr__(): def test_family_name_regex_filter_to_pb(): - from google.cloud.bigtable.row_filters import FamilyNameRegexFilter + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter regex = "family-regex" row_filter = FamilyNameRegexFilter(regex) @@ -296,7 +296,7 @@ def test_family_name_regex_filter_to_pb(): def test_family_name_regex_filter_to_dict(): - from google.cloud.bigtable.row_filters import FamilyNameRegexFilter + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 regex = "family-regex" @@ -308,7 +308,7 @@ def test_family_name_regex_filter_to_dict(): def test_family_name_regex_filter___repr__(): - from google.cloud.bigtable.row_filters import FamilyNameRegexFilter + from google.cloud.bigtable.data.row_filters import FamilyNameRegexFilter regex = "family-regex" row_filter = FamilyNameRegexFilter(regex) @@ -319,7 +319,7 @@ def test_family_name_regex_filter___repr__(): def test_column_qualifier_regex_filter_to_pb(): - from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.data.row_filters import ColumnQualifierRegexFilter regex = b"column-regex" row_filter = ColumnQualifierRegexFilter(regex) @@ -329,7 +329,7 @@ def test_column_qualifier_regex_filter_to_pb(): def test_column_qualifier_regex_filter_to_dict(): - from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.data.row_filters import ColumnQualifierRegexFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 regex = b"column-regex" @@ -341,7 +341,7 @@ def test_column_qualifier_regex_filter_to_dict(): def test_column_qualifier_regex_filter___repr__(): - from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.data.row_filters import ColumnQualifierRegexFilter regex = b"column-regex" row_filter = ColumnQualifierRegexFilter(regex) @@ -351,7 +351,7 @@ def test_column_qualifier_regex_filter___repr__(): def test_timestamp_range_constructor(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange start = object() end = object() @@ -361,7 +361,7 @@ def test_timestamp_range_constructor(): def test_timestamp_range___eq__(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange start = object() end = object() @@ -371,7 +371,7 @@ def test_timestamp_range___eq__(): def test_timestamp_range___eq__type_differ(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange start = object() end = object() @@ -381,7 +381,7 @@ def test_timestamp_range___eq__type_differ(): def test_timestamp_range___ne__same_value(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange start = object() end = object() @@ -393,7 +393,7 @@ def test_timestamp_range___ne__same_value(): def _timestamp_range_to_pb_helper(pb_kwargs, start=None, end=None): import datetime from google.cloud._helpers import _EPOCH - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange if start is not None: start = _EPOCH + datetime.timedelta(microseconds=start) @@ -421,7 +421,7 @@ def test_timestamp_range_to_pb(): def test_timestamp_range_to_dict(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange from google.cloud.bigtable_v2.types import data as data_v2_pb2 import datetime @@ -448,7 +448,7 @@ def test_timestamp_range_to_pb_start_only(): def test_timestamp_range_to_dict_start_only(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange from google.cloud.bigtable_v2.types import data as data_v2_pb2 import datetime @@ -470,7 +470,7 @@ def test_timestamp_range_to_pb_end_only(): def test_timestamp_range_to_dict_end_only(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange from google.cloud.bigtable_v2.types import data as data_v2_pb2 import datetime @@ -482,7 +482,7 @@ def test_timestamp_range_to_dict_end_only(): def timestamp_range___repr__(): - from google.cloud.bigtable.row_filters import TimestampRange + from google.cloud.bigtable.data.row_filters import TimestampRange start = object() end = object() @@ -493,7 +493,7 @@ def timestamp_range___repr__(): def test_timestamp_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -502,7 +502,7 @@ def test_timestamp_range_filter___eq__type_differ(): def test_timestamp_range_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter range_ = object() row_filter1 = TimestampRangeFilter(range_) @@ -511,7 +511,7 @@ def test_timestamp_range_filter___eq__same_value(): def test_timestamp_range_filter___ne__(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter range_ = object() other_range_ = object() @@ -521,7 +521,7 @@ def test_timestamp_range_filter___ne__(): def test_timestamp_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter row_filter = TimestampRangeFilter() pb_val = row_filter._to_pb() @@ -530,7 +530,7 @@ def test_timestamp_range_filter_to_pb(): def test_timestamp_range_filter_to_dict(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 import datetime @@ -549,7 +549,7 @@ def test_timestamp_range_filter_to_dict(): def test_timestamp_range_filter_empty_to_dict(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter = TimestampRangeFilter() @@ -560,7 +560,7 @@ def test_timestamp_range_filter_empty_to_dict(): def test_timestamp_range_filter___repr__(): - from google.cloud.bigtable.row_filters import TimestampRangeFilter + from google.cloud.bigtable.data.row_filters import TimestampRangeFilter import datetime start = datetime.datetime(2019, 1, 1) @@ -575,7 +575,7 @@ def test_timestamp_range_filter___repr__(): def test_column_range_filter_constructor_defaults(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() row_filter = ColumnRangeFilter(family_id) @@ -587,7 +587,7 @@ def test_column_range_filter_constructor_defaults(): def test_column_range_filter_constructor_explicit(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() start_qualifier = object() @@ -609,7 +609,7 @@ def test_column_range_filter_constructor_explicit(): def test_column_range_filter_constructor_(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() with pytest.raises(ValueError): @@ -617,7 +617,7 @@ def test_column_range_filter_constructor_(): def test_column_range_filter_constructor_bad_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() with pytest.raises(ValueError): @@ -625,7 +625,7 @@ def test_column_range_filter_constructor_bad_end(): def test_column_range_filter___eq__(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() start_qualifier = object() @@ -650,7 +650,7 @@ def test_column_range_filter___eq__(): def test_column_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() row_filter1 = ColumnRangeFilter(family_id) @@ -659,7 +659,7 @@ def test_column_range_filter___eq__type_differ(): def test_column_range_filter___ne__(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = object() other_family_id = object() @@ -685,7 +685,7 @@ def test_column_range_filter___ne__(): def test_column_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" row_filter = ColumnRangeFilter(family_id) @@ -695,7 +695,7 @@ def test_column_range_filter_to_pb(): def test_column_range_filter_to_dict(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 family_id = "column-family-id" @@ -707,7 +707,7 @@ def test_column_range_filter_to_dict(): def test_column_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" column = b"column" @@ -718,7 +718,7 @@ def test_column_range_filter_to_pb_inclusive_start(): def test_column_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" column = b"column" @@ -731,7 +731,7 @@ def test_column_range_filter_to_pb_exclusive_start(): def test_column_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" column = b"column" @@ -742,7 +742,7 @@ def test_column_range_filter_to_pb_inclusive_end(): def test_column_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" column = b"column" @@ -753,7 +753,7 @@ def test_column_range_filter_to_pb_exclusive_end(): def test_column_range_filter___repr__(): - from google.cloud.bigtable.row_filters import ColumnRangeFilter + from google.cloud.bigtable.data.row_filters import ColumnRangeFilter family_id = "column-family-id" start_qualifier = b"column" @@ -766,7 +766,7 @@ def test_column_range_filter___repr__(): def test_value_regex_filter_to_pb_w_bytes(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.data.row_filters import ValueRegexFilter value = regex = b"value-regex" row_filter = ValueRegexFilter(value) @@ -776,7 +776,7 @@ def test_value_regex_filter_to_pb_w_bytes(): def test_value_regex_filter_to_dict_w_bytes(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.data.row_filters import ValueRegexFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 value = regex = b"value-regex" @@ -788,7 +788,7 @@ def test_value_regex_filter_to_dict_w_bytes(): def test_value_regex_filter_to_pb_w_str(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.data.row_filters import ValueRegexFilter value = "value-regex" regex = value.encode("ascii") @@ -799,7 +799,7 @@ def test_value_regex_filter_to_pb_w_str(): def test_value_regex_filter_to_dict_w_str(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.data.row_filters import ValueRegexFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 value = "value-regex" @@ -812,7 +812,7 @@ def test_value_regex_filter_to_dict_w_str(): def test_value_regex_filter___repr__(): - from google.cloud.bigtable.row_filters import ValueRegexFilter + from google.cloud.bigtable.data.row_filters import ValueRegexFilter value = "value-regex" row_filter = ValueRegexFilter(value) @@ -823,7 +823,7 @@ def test_value_regex_filter___repr__(): def test_literal_value_filter_to_pb_w_bytes(): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter value = regex = b"value_regex" row_filter = LiteralValueFilter(value) @@ -833,7 +833,7 @@ def test_literal_value_filter_to_pb_w_bytes(): def test_literal_value_filter_to_dict_w_bytes(): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 value = regex = b"value_regex" @@ -845,7 +845,7 @@ def test_literal_value_filter_to_dict_w_bytes(): def test_literal_value_filter_to_pb_w_str(): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter value = "value_regex" regex = value.encode("ascii") @@ -856,7 +856,7 @@ def test_literal_value_filter_to_pb_w_str(): def test_literal_value_filter_to_dict_w_str(): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 value = "value_regex" @@ -886,7 +886,7 @@ def test_literal_value_filter_to_dict_w_str(): ], ) def test_literal_value_filter_w_int(value, expected_byte_string): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter = LiteralValueFilter(value) @@ -901,7 +901,7 @@ def test_literal_value_filter_w_int(value, expected_byte_string): def test_literal_value_filter___repr__(): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter value = "value_regex" row_filter = LiteralValueFilter(value) @@ -912,7 +912,7 @@ def test_literal_value_filter___repr__(): def test_value_range_filter_constructor_defaults(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() @@ -923,7 +923,7 @@ def test_value_range_filter_constructor_defaults(): def test_value_range_filter_constructor_explicit(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -944,7 +944,7 @@ def test_value_range_filter_constructor_explicit(): def test_value_range_filter_constructor_w_int_values(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter import struct start_value = 1 @@ -962,21 +962,21 @@ def test_value_range_filter_constructor_w_int_values(): def test_value_range_filter_constructor_bad_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_start=True) def test_value_range_filter_constructor_bad_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter with pytest.raises(ValueError): ValueRangeFilter(inclusive_end=True) def test_value_range_filter___eq__(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter start_value = object() end_value = object() @@ -998,7 +998,7 @@ def test_value_range_filter___eq__(): def test_value_range_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter row_filter1 = ValueRangeFilter() row_filter2 = object() @@ -1006,7 +1006,7 @@ def test_value_range_filter___eq__type_differ(): def test_value_range_filter___ne__(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter start_value = object() other_start_value = object() @@ -1029,7 +1029,7 @@ def test_value_range_filter___ne__(): def test_value_range_filter_to_pb(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter row_filter = ValueRangeFilter() expected_pb = _RowFilterPB(value_range_filter=_ValueRangePB()) @@ -1037,7 +1037,7 @@ def test_value_range_filter_to_pb(): def test_value_range_filter_to_dict(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter = ValueRangeFilter() @@ -1048,7 +1048,7 @@ def test_value_range_filter_to_dict(): def test_value_range_filter_to_pb_inclusive_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value) @@ -1058,7 +1058,7 @@ def test_value_range_filter_to_pb_inclusive_start(): def test_value_range_filter_to_pb_exclusive_start(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(start_value=value, inclusive_start=False) @@ -1068,7 +1068,7 @@ def test_value_range_filter_to_pb_exclusive_start(): def test_value_range_filter_to_pb_inclusive_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value) @@ -1078,7 +1078,7 @@ def test_value_range_filter_to_pb_inclusive_end(): def test_value_range_filter_to_pb_exclusive_end(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter value = b"some-value" row_filter = ValueRangeFilter(end_value=value, inclusive_end=False) @@ -1088,7 +1088,7 @@ def test_value_range_filter_to_pb_exclusive_end(): def test_value_range_filter___repr__(): - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.row_filters import ValueRangeFilter start_value = b"some-value" end_value = b"some-other-value" @@ -1133,7 +1133,7 @@ def test_cell_count___ne__same_value(): def test_cells_row_offset_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.data.row_filters import CellsRowOffsetFilter num_cells = 76 row_filter = CellsRowOffsetFilter(num_cells) @@ -1143,7 +1143,7 @@ def test_cells_row_offset_filter_to_pb(): def test_cells_row_offset_filter_to_dict(): - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.data.row_filters import CellsRowOffsetFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 num_cells = 76 @@ -1155,7 +1155,7 @@ def test_cells_row_offset_filter_to_dict(): def test_cells_row_offset_filter___repr__(): - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.data.row_filters import CellsRowOffsetFilter num_cells = 76 row_filter = CellsRowOffsetFilter(num_cells) @@ -1166,7 +1166,7 @@ def test_cells_row_offset_filter___repr__(): def test_cells_row_limit_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter num_cells = 189 row_filter = CellsRowLimitFilter(num_cells) @@ -1176,7 +1176,7 @@ def test_cells_row_limit_filter_to_pb(): def test_cells_row_limit_filter_to_dict(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 num_cells = 189 @@ -1188,7 +1188,7 @@ def test_cells_row_limit_filter_to_dict(): def test_cells_row_limit_filter___repr__(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter num_cells = 189 row_filter = CellsRowLimitFilter(num_cells) @@ -1199,7 +1199,7 @@ def test_cells_row_limit_filter___repr__(): def test_cells_column_limit_filter_to_pb(): - from google.cloud.bigtable.row_filters import CellsColumnLimitFilter + from google.cloud.bigtable.data.row_filters import CellsColumnLimitFilter num_cells = 10 row_filter = CellsColumnLimitFilter(num_cells) @@ -1209,7 +1209,7 @@ def test_cells_column_limit_filter_to_pb(): def test_cells_column_limit_filter_to_dict(): - from google.cloud.bigtable.row_filters import CellsColumnLimitFilter + from google.cloud.bigtable.data.row_filters import CellsColumnLimitFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 num_cells = 10 @@ -1221,7 +1221,7 @@ def test_cells_column_limit_filter_to_dict(): def test_cells_column_limit_filter___repr__(): - from google.cloud.bigtable.row_filters import CellsColumnLimitFilter + from google.cloud.bigtable.data.row_filters import CellsColumnLimitFilter num_cells = 10 row_filter = CellsColumnLimitFilter(num_cells) @@ -1232,7 +1232,7 @@ def test_cells_column_limit_filter___repr__(): def test_strip_value_transformer_filter_to_pb(): - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter flag = True row_filter = StripValueTransformerFilter(flag) @@ -1242,7 +1242,7 @@ def test_strip_value_transformer_filter_to_pb(): def test_strip_value_transformer_filter_to_dict(): - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 flag = True @@ -1254,7 +1254,7 @@ def test_strip_value_transformer_filter_to_dict(): def test_strip_value_transformer_filter___repr__(): - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter flag = True row_filter = StripValueTransformerFilter(flag) @@ -1265,7 +1265,7 @@ def test_strip_value_transformer_filter___repr__(): def test_apply_label_filter_constructor(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = object() row_filter = ApplyLabelFilter(label) @@ -1273,7 +1273,7 @@ def test_apply_label_filter_constructor(): def test_apply_label_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -1282,7 +1282,7 @@ def test_apply_label_filter___eq__type_differ(): def test_apply_label_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = object() row_filter1 = ApplyLabelFilter(label) @@ -1291,7 +1291,7 @@ def test_apply_label_filter___eq__same_value(): def test_apply_label_filter___ne__(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = object() other_label = object() @@ -1301,7 +1301,7 @@ def test_apply_label_filter___ne__(): def test_apply_label_filter_to_pb(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = "label" row_filter = ApplyLabelFilter(label) @@ -1311,7 +1311,7 @@ def test_apply_label_filter_to_pb(): def test_apply_label_filter_to_dict(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 label = "label" @@ -1323,7 +1323,7 @@ def test_apply_label_filter_to_dict(): def test_apply_label_filter___repr__(): - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter label = "label" row_filter = ApplyLabelFilter(label) @@ -1399,7 +1399,7 @@ def test_filter_combination___getitem__(): def test_filter_combination___str__(): - from google.cloud.bigtable.row_filters import PassAllFilter + from google.cloud.bigtable.data.row_filters import PassAllFilter for FilterType in _get_filter_combination_filters(): filters = [PassAllFilter(True), PassAllFilter(False)] @@ -1411,9 +1411,9 @@ def test_filter_combination___str__(): def test_row_filter_chain_to_pb(): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1._to_pb() @@ -1431,9 +1431,9 @@ def test_row_filter_chain_to_pb(): def test_row_filter_chain_to_dict(): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1452,10 +1452,10 @@ def test_row_filter_chain_to_dict(): def test_row_filter_chain_to_pb_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1476,10 +1476,10 @@ def test_row_filter_chain_to_pb_nested(): def test_row_filter_chain_to_dict_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1502,9 +1502,9 @@ def test_row_filter_chain_to_dict_nested(): def test_row_filter_chain___repr__(): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1516,9 +1516,9 @@ def test_row_filter_chain___repr__(): def test_row_filter_chain___str__(): - from google.cloud.bigtable.row_filters import RowFilterChain - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterChain + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1533,9 +1533,9 @@ def test_row_filter_chain___str__(): def test_row_filter_union_to_pb(): - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1._to_pb() @@ -1553,9 +1553,9 @@ def test_row_filter_union_to_pb(): def test_row_filter_union_to_dict(): - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1574,10 +1574,10 @@ def test_row_filter_union_to_dict(): def test_row_filter_union_to_pb_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1598,10 +1598,10 @@ def test_row_filter_union_to_pb_nested(): def test_row_filter_union_to_dict_nested(): - from google.cloud.bigtable.row_filters import CellsRowLimitFilter - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1624,9 +1624,9 @@ def test_row_filter_union_to_dict_nested(): def test_row_filter_union___repr__(): - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1638,9 +1638,9 @@ def test_row_filter_union___repr__(): def test_row_filter_union___str__(): - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1655,7 +1655,7 @@ def test_row_filter_union___str__(): def test_conditional_row_filter_constructor(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter predicate_filter = object() true_filter = object() @@ -1669,7 +1669,7 @@ def test_conditional_row_filter_constructor(): def test_conditional_row_filter___eq__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter predicate_filter = object() true_filter = object() @@ -1684,7 +1684,7 @@ def test_conditional_row_filter___eq__(): def test_conditional_row_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter predicate_filter = object() true_filter = object() @@ -1697,7 +1697,7 @@ def test_conditional_row_filter___eq__type_differ(): def test_conditional_row_filter___ne__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter predicate_filter = object() other_predicate_filter = object() @@ -1713,10 +1713,10 @@ def test_conditional_row_filter___ne__(): def test_conditional_row_filter_to_pb(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1._to_pb() @@ -1743,10 +1743,10 @@ def test_conditional_row_filter_to_pb(): def test_conditional_row_filter_to_dict(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import CellsRowOffsetFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import CellsRowOffsetFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1776,9 +1776,9 @@ def test_conditional_row_filter_to_dict(): def test_conditional_row_filter_to_pb_true_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1._to_pb() @@ -1798,9 +1798,9 @@ def test_conditional_row_filter_to_pb_true_only(): def test_conditional_row_filter_to_dict_true_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1824,9 +1824,9 @@ def test_conditional_row_filter_to_dict_true_only(): def test_conditional_row_filter_to_pb_false_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter1_pb = row_filter1._to_pb() @@ -1846,9 +1846,9 @@ def test_conditional_row_filter_to_pb_false_only(): def test_conditional_row_filter_to_dict_false_only(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter from google.cloud.bigtable_v2.types import data as data_v2_pb2 row_filter1 = StripValueTransformerFilter(True) @@ -1872,9 +1872,9 @@ def test_conditional_row_filter_to_dict_false_only(): def test_conditional_row_filter___repr__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1893,10 +1893,10 @@ def test_conditional_row_filter___repr__(): def test_conditional_row_filter___str__(): - from google.cloud.bigtable.row_filters import ConditionalRowFilter - from google.cloud.bigtable.row_filters import RowSampleFilter - from google.cloud.bigtable.row_filters import RowFilterUnion - from google.cloud.bigtable.row_filters import StripValueTransformerFilter + from google.cloud.bigtable.data.row_filters import ConditionalRowFilter + from google.cloud.bigtable.data.row_filters import RowSampleFilter + from google.cloud.bigtable.data.row_filters import RowFilterUnion + from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) row_filter2 = RowSampleFilter(0.25) @@ -1931,7 +1931,7 @@ def test_conditional_row_filter___str__(): ], ) def test_literal_value__write_literal_regex(input_arg, expected_bytes): - from google.cloud.bigtable.row_filters import LiteralValueFilter + from google.cloud.bigtable.data.row_filters import LiteralValueFilter filter_ = LiteralValueFilter(input_arg) assert filter_.regex == expected_bytes @@ -1980,7 +1980,7 @@ def _ValueRangePB(*args, **kw): def _get_regex_filters(): - from google.cloud.bigtable.row_filters import ( + from google.cloud.bigtable.data.row_filters import ( RowKeyRegexFilter, FamilyNameRegexFilter, ColumnQualifierRegexFilter, @@ -1998,7 +1998,7 @@ def _get_regex_filters(): def _get_bool_filters(): - from google.cloud.bigtable.row_filters import ( + from google.cloud.bigtable.data.row_filters import ( SinkFilter, PassAllFilter, BlockAllFilter, @@ -2014,7 +2014,7 @@ def _get_bool_filters(): def _get_cell_count_filters(): - from google.cloud.bigtable.row_filters import ( + from google.cloud.bigtable.data.row_filters import ( CellsRowLimitFilter, CellsRowOffsetFilter, CellsColumnLimitFilter, @@ -2028,7 +2028,7 @@ def _get_cell_count_filters(): def _get_filter_combination_filters(): - from google.cloud.bigtable.row_filters import ( + from google.cloud.bigtable.data.row_filters import ( RowFilterChain, RowFilterUnion, ) From 48b2d2cb5a6d0635617d843a112af991f2213e9e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:07:55 -0700 Subject: [PATCH 186/213] renamed async subpath to _async --- google/cloud/bigtable/data/{async => _async}/_mutate_rows.py | 0 google/cloud/bigtable/data/{async => _async}/_read_rows.py | 0 google/cloud/bigtable/data/{async => _async}/client.py | 0 google/cloud/bigtable/data/{async => _async}/mutations_batcher.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename google/cloud/bigtable/data/{async => _async}/_mutate_rows.py (100%) rename google/cloud/bigtable/data/{async => _async}/_read_rows.py (100%) rename google/cloud/bigtable/data/{async => _async}/client.py (100%) rename google/cloud/bigtable/data/{async => _async}/mutations_batcher.py (100%) diff --git a/google/cloud/bigtable/data/async/_mutate_rows.py b/google/cloud/bigtable/data/_async/_mutate_rows.py similarity index 100% rename from google/cloud/bigtable/data/async/_mutate_rows.py rename to google/cloud/bigtable/data/_async/_mutate_rows.py diff --git a/google/cloud/bigtable/data/async/_read_rows.py b/google/cloud/bigtable/data/_async/_read_rows.py similarity index 100% rename from google/cloud/bigtable/data/async/_read_rows.py rename to google/cloud/bigtable/data/_async/_read_rows.py diff --git a/google/cloud/bigtable/data/async/client.py b/google/cloud/bigtable/data/_async/client.py similarity index 100% rename from google/cloud/bigtable/data/async/client.py rename to google/cloud/bigtable/data/_async/client.py diff --git a/google/cloud/bigtable/data/async/mutations_batcher.py b/google/cloud/bigtable/data/_async/mutations_batcher.py similarity index 100% rename from google/cloud/bigtable/data/async/mutations_batcher.py rename to google/cloud/bigtable/data/_async/mutations_batcher.py From c00b395a8927e686944470949b300098df282212 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:24:16 -0700 Subject: [PATCH 187/213] fixed imports --- google/cloud/bigtable/data/__init__.py | 36 ++++++------- .../bigtable/data/_async/_mutate_rows.py | 14 ++--- .../cloud/bigtable/data/_async/_read_rows.py | 16 +++--- google/cloud/bigtable/data/_async/client.py | 54 +++++++++---------- .../bigtable/data/_async/mutations_batcher.py | 19 +++---- google/cloud/bigtable/data/_helpers.py | 2 +- .../bigtable/data/_read_rows_state_machine.py | 4 +- google/cloud/bigtable/data/exceptions.py | 6 +-- google/cloud/bigtable/data/gapic_version.py | 16 ------ google/cloud/bigtable/data/mutations.py | 2 +- google/cloud/bigtable/data/read_rows_query.py | 6 +-- tests/unit/data/_async/test_client.py | 22 ++++---- 12 files changed, 89 insertions(+), 108 deletions(-) delete mode 100644 google/cloud/bigtable/data/gapic_version.py diff --git a/google/cloud/bigtable/data/__init__.py b/google/cloud/bigtable/data/__init__.py index 06b45bc4d..ef7be687c 100644 --- a/google/cloud/bigtable/data/__init__.py +++ b/google/cloud/bigtable/data/__init__.py @@ -18,21 +18,21 @@ from google.cloud.bigtable import gapic_version as package_version -from google.cloud.bigtable.client import BigtableDataClient -from google.cloud.bigtable.client import Table - -from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable.read_rows_query import RowRange -from google.cloud.bigtable.row import Row -from google.cloud.bigtable.row import Cell - -from google.cloud.bigtable.mutations_batcher import MutationsBatcher -from google.cloud.bigtable.mutations import Mutation -from google.cloud.bigtable.mutations import RowMutationEntry -from google.cloud.bigtable.mutations import SetCell -from google.cloud.bigtable.mutations import DeleteRangeFromColumn -from google.cloud.bigtable.mutations import DeleteAllFromFamily -from google.cloud.bigtable.mutations import DeleteAllFromRow +from google.cloud.bigtable.data._async.client import BigtableDataClientAsync +from google.cloud.bigtable.data._async.client import TableAsync + +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.data.read_rows_query import RowRange +from google.cloud.bigtable.data.row import Row +from google.cloud.bigtable.data.row import Cell + +from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync +from google.cloud.bigtable.data.mutations import Mutation +from google.cloud.bigtable.data.mutations import RowMutationEntry +from google.cloud.bigtable.data.mutations import SetCell +from google.cloud.bigtable.data.mutations import DeleteRangeFromColumn +from google.cloud.bigtable.data.mutations import DeleteAllFromFamily +from google.cloud.bigtable.data.mutations import DeleteAllFromRow # Type alias for the output of sample_keys RowKeySamples = List[Tuple[bytes, int]] @@ -42,12 +42,12 @@ __version__: str = package_version.__version__ __all__ = ( - "BigtableDataClient", - "Table", + "BigtableDataClientAsync", + "TableAsync", "RowKeySamples", "ReadRowsQuery", "RowRange", - "MutationsBatcher", + "MutationsBatcherAsync", "Mutation", "RowMutationEntry", "SetCell", diff --git a/google/cloud/bigtable/data/_async/_mutate_rows.py b/google/cloud/bigtable/data/_async/_mutate_rows.py index acabc863e..75b6bef81 100644 --- a/google/cloud/bigtable/data/_async/_mutate_rows.py +++ b/google/cloud/bigtable/data/_async/_mutate_rows.py @@ -19,20 +19,20 @@ from google.api_core import exceptions as core_exceptions from google.api_core import retry_async as retries -import google.cloud.bigtable.exceptions as bt_exceptions -from google.cloud.bigtable._helpers import _make_metadata -from google.cloud.bigtable._helpers import _convert_retry_deadline -from google.cloud.bigtable._helpers import _attempt_timeout_generator +import google.cloud.bigtable.data.exceptions as bt_exceptions +from google.cloud.bigtable.data._helpers import _make_metadata +from google.cloud.bigtable.data._helpers import _convert_retry_deadline +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator # mutate_rows requests are limited to this number of mutations -from google.cloud.bigtable.mutations import MUTATE_ROWS_REQUEST_MUTATION_LIMIT +from google.cloud.bigtable.data.mutations import MUTATE_ROWS_REQUEST_MUTATION_LIMIT if TYPE_CHECKING: from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) - from google.cloud.bigtable.client import Table - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.client import Table + from google.cloud.bigtable.data.mutations import RowMutationEntry class _MutateRowsOperationAsync: diff --git a/google/cloud/bigtable/data/_async/_read_rows.py b/google/cloud/bigtable/data/_async/_read_rows.py index b3becbd89..450518b60 100644 --- a/google/cloud/bigtable/data/_async/_read_rows.py +++ b/google/cloud/bigtable/data/_async/_read_rows.py @@ -33,16 +33,16 @@ from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse from google.cloud.bigtable_v2.services.bigtable.async_client import BigtableAsyncClient -from google.cloud.bigtable.row import Row, _LastScannedRow -from google.cloud.bigtable.exceptions import InvalidChunk -from google.cloud.bigtable.exceptions import _RowSetComplete -from google.cloud.bigtable.exceptions import IdleTimeout -from google.cloud.bigtable._read_rows_state_machine import _StateMachine +from google.cloud.bigtable.data.row import Row, _LastScannedRow +from google.cloud.bigtable.data.exceptions import InvalidChunk +from google.cloud.bigtable.data.exceptions import _RowSetComplete +from google.cloud.bigtable.data.exceptions import IdleTimeout +from google.cloud.bigtable.data._read_rows_state_machine import _StateMachine from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable._helpers import _make_metadata -from google.cloud.bigtable._helpers import _attempt_timeout_generator -from google.cloud.bigtable._helpers import _convert_retry_deadline +from google.cloud.bigtable.data._helpers import _make_metadata +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator +from google.cloud.bigtable.data._helpers import _convert_retry_deadline class _ReadRowsOperationAsync(AsyncIterable[Row]): diff --git a/google/cloud/bigtable/data/_async/client.py b/google/cloud/bigtable/data/_async/client.py index 198b63822..c1f414d58 100644 --- a/google/cloud/bigtable/data/_async/client.py +++ b/google/cloud/bigtable/data/_async/client.py @@ -43,34 +43,34 @@ from google.api_core.exceptions import GoogleAPICallError from google.api_core import retry_async as retries from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable._read_rows import _ReadRowsOperationAsync +from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync +from google.cloud.bigtable.data._async._read_rows import ReadRowsIteratorAsync import google.auth.credentials import google.auth._default from google.api_core import client_options as client_options_lib -from google.cloud.bigtable.row import Row -from google.cloud.bigtable.read_rows_query import ReadRowsQuery -from google.cloud.bigtable.iterators import ReadRowsIteratorAsync -from google.cloud.bigtable.exceptions import FailedQueryShardError -from google.cloud.bigtable.exceptions import ShardedReadRowsExceptionGroup - -from google.cloud.bigtable.mutations import Mutation, RowMutationEntry -from google.cloud.bigtable._mutate_rows import _MutateRowsOperationAsync -from google.cloud.bigtable._helpers import _make_metadata -from google.cloud.bigtable._helpers import _convert_retry_deadline -from google.cloud.bigtable.mutations_batcher import MutationsBatcher -from google.cloud.bigtable.mutations_batcher import _MB_SIZE -from google.cloud.bigtable._helpers import _attempt_timeout_generator - -from google.cloud.bigtable.read_modify_write_rules import ReadModifyWriteRule -from google.cloud.bigtable.row_filters import RowFilter -from google.cloud.bigtable.row_filters import StripValueTransformerFilter -from google.cloud.bigtable.row_filters import CellsRowLimitFilter -from google.cloud.bigtable.row_filters import RowFilterChain +from google.cloud.bigtable.data.row import Row +from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.data.exceptions import FailedQueryShardError +from google.cloud.bigtable.data.exceptions import ShardedReadRowsExceptionGroup + +from google.cloud.bigtable.data.mutations import Mutation, RowMutationEntry +from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync +from google.cloud.bigtable.data._helpers import _make_metadata +from google.cloud.bigtable.data._helpers import _convert_retry_deadline +from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync +from google.cloud.bigtable.data._async.mutations_batcher import _MB_SIZE +from google.cloud.bigtable.data._helpers import _attempt_timeout_generator + +from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule +from google.cloud.bigtable.data.row_filters import RowFilter +from google.cloud.bigtable.data.row_filters import StripValueTransformerFilter +from google.cloud.bigtable.data.row_filters import CellsRowLimitFilter +from google.cloud.bigtable.data.row_filters import RowFilterChain if TYPE_CHECKING: - from google.cloud.bigtable import RowKeySamples - from google.cloud.bigtable import ShardedQuery + from google.cloud.bigtable.data.data import RowKeySamples + from google.cloud.bigtable.data.data import ShardedQuery # used by read_rows_sharded to limit how many requests are attempted in parallel CONCURRENCY_LIMIT = 10 @@ -556,7 +556,7 @@ async def read_row( See read_rows_stream Raises: - - google.cloud.bigtable.exceptions.RowNotFound: if the row does not exist + - google.cloud.bigtable.data.exceptions.RowNotFound: if the row does not exist Returns: - the individual row requested, or None if it does not exist """ @@ -762,7 +762,7 @@ def mutations_batcher( flow_control_max_bytes: int = 100 * _MB_SIZE, batch_operation_timeout: float | None = None, batch_per_request_timeout: float | None = None, - ) -> MutationsBatcher: + ) -> MutationsBatcherAsync: """ Returns a new mutations batcher instance. @@ -782,9 +782,9 @@ def mutations_batcher( - batch_per_request_timeout: timeout for each individual request, in seconds. If None, table default_per_request_timeout will be used Returns: - - a MutationsBatcher context manager that can batch requests + - a MutationsBatcherAsync context manager that can batch requests """ - return MutationsBatcher( + return MutationsBatcherAsync( self, flush_interval=flush_interval, flush_limit_mutation_count=flush_limit_mutation_count, @@ -930,7 +930,7 @@ async def bulk_mutate_rows( if per_request_timeout is not None and per_request_timeout > operation_timeout: raise ValueError("per_request_timeout must be less than operation_timeout") - operation = _MutateRowsOperation( + operation = _MutateRowsOperationAsync( self.client._gapic_client, self, mutation_entries, diff --git a/google/cloud/bigtable/data/_async/mutations_batcher.py b/google/cloud/bigtable/data/_async/mutations_batcher.py index c6d617896..032a53c91 100644 --- a/google/cloud/bigtable/data/_async/mutations_batcher.py +++ b/google/cloud/bigtable/data/_async/mutations_batcher.py @@ -20,16 +20,13 @@ import warnings from collections import deque -from google.cloud.bigtable.mutations import RowMutationEntry -from google.cloud.bigtable.exceptions import MutationsExceptionGroup -from google.cloud.bigtable.exceptions import FailedMutationEntryError +from google.cloud.bigtable.data.mutations import RowMutationEntry +from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup +from google.cloud.bigtable.data.exceptions import FailedMutationEntryError -from google.cloud.bigtable._mutate_rows import _MutateRowsOperation -from google.cloud.bigtable._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT -from google.cloud.bigtable.mutations import Mutation - -if TYPE_CHECKING: - from google.cloud.bigtable.client import Table # pragma: no cover +from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync +from google.cloud.bigtable.data._async._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT +from google.cloud.bigtable.data.mutations import Mutation # used to make more readable default values _MB_SIZE = 1024 * 1024 @@ -179,7 +176,7 @@ class MutationsBatcherAsync: def __init__( self, - table: "Table", + table: TableAsync, *, flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, @@ -353,7 +350,7 @@ async def _execute_mutate_rows( if self._table.app_profile_id: request["app_profile_id"] = self._table.app_profile_id try: - operation = _MutateRowsOperation( + operation = _MutateRowsOperationAsync( self._table.client._gapic_client, self._table, batch, diff --git a/google/cloud/bigtable/data/_helpers.py b/google/cloud/bigtable/data/_helpers.py index 722fac9f4..64d91e108 100644 --- a/google/cloud/bigtable/data/_helpers.py +++ b/google/cloud/bigtable/data/_helpers.py @@ -18,7 +18,7 @@ import time from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.exceptions import RetryExceptionGroup +from google.cloud.bigtable.data.exceptions import RetryExceptionGroup """ Helper functions used in various places in the library. diff --git a/google/cloud/bigtable/data/_read_rows_state_machine.py b/google/cloud/bigtable/data/_read_rows_state_machine.py index bda0b1337..7c0d05fb9 100644 --- a/google/cloud/bigtable/data/_read_rows_state_machine.py +++ b/google/cloud/bigtable/data/_read_rows_state_machine.py @@ -17,8 +17,8 @@ from typing import Type from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse -from google.cloud.bigtable.row import Row, Cell, _LastScannedRow -from google.cloud.bigtable.exceptions import InvalidChunk +from google.cloud.bigtable.data.row import Row, Cell, _LastScannedRow +from google.cloud.bigtable.data.exceptions import InvalidChunk """ This module provides classes for the read_rows state machine: diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index 15048a2e2..0dd085e8c 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -19,13 +19,13 @@ from typing import Any, TYPE_CHECKING from google.api_core import exceptions as core_exceptions -from google.cloud.bigtable.row import Row +from google.cloud.bigtable.data.row import Row is_311_plus = sys.version_info >= (3, 11) if TYPE_CHECKING: - from google.cloud.bigtable.mutations import RowMutationEntry - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.mutations import RowMutationEntry + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery class IdleTimeout(core_exceptions.DeadlineExceeded): diff --git a/google/cloud/bigtable/data/gapic_version.py b/google/cloud/bigtable/data/gapic_version.py deleted file mode 100644 index 8d4f4cfb6..000000000 --- a/google/cloud/bigtable/data/gapic_version.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -__version__ = "2.17.0" # {x-release-please-version} diff --git a/google/cloud/bigtable/data/mutations.py b/google/cloud/bigtable/data/mutations.py index c33bb61d7..de1b3b137 100644 --- a/google/cloud/bigtable/data/mutations.py +++ b/google/cloud/bigtable/data/mutations.py @@ -19,7 +19,7 @@ from abc import ABC, abstractmethod from sys import getsizeof -from google.cloud.bigtable.read_modify_write_rules import MAX_INCREMENT_VALUE +from google.cloud.bigtable.data.read_modify_write_rules import MAX_INCREMENT_VALUE # special value for SetCell mutation timestamps. If set, server will assign a timestamp SERVER_SIDE_TIMESTAMP = -1 diff --git a/google/cloud/bigtable/data/read_rows_query.py b/google/cloud/bigtable/data/read_rows_query.py index eb28eeda3..7d7e1f99f 100644 --- a/google/cloud/bigtable/data/read_rows_query.py +++ b/google/cloud/bigtable/data/read_rows_query.py @@ -18,11 +18,11 @@ from bisect import bisect_right from collections import defaultdict from dataclasses import dataclass -from google.cloud.bigtable.row_filters import RowFilter +from google.cloud.bigtable.data.row_filters import RowFilter if TYPE_CHECKING: - from google.cloud.bigtable import RowKeySamples - from google.cloud.bigtable import ShardedQuery + from google.cloud.bigtable.data import RowKeySamples + from google.cloud.bigtable.data import ShardedQuery @dataclass diff --git a/tests/unit/data/_async/test_client.py b/tests/unit/data/_async/test_client.py index f94bb09dd..78eaa6b20 100644 --- a/tests/unit/data/_async/test_client.py +++ b/tests/unit/data/_async/test_client.py @@ -1294,12 +1294,12 @@ async def test_read_rows_per_request_timeout( @pytest.mark.asyncio async def test_read_rows_idle_timeout(self): - from google.cloud.bigtable.data._async.client import ReadRowsIterator + from google.cloud.bigtable.data._async.client import ReadRowsIteratorAsync from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) from google.cloud.bigtable.data.exceptions import IdleTimeout - from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync chunks = [ self._make_chunk(row_key=b"test_1"), @@ -1310,7 +1310,7 @@ async def test_read_rows_idle_timeout(self): chunks ) with mock.patch.object( - ReadRowsIterator, "_start_idle_timer" + ReadRowsIteratorAsync, "_start_idle_timer" ) as start_idle_timer: client = self._make_client() table = client.get_table("instance", "table") @@ -1318,7 +1318,7 @@ async def test_read_rows_idle_timeout(self): gen = await table.read_rows_stream(query) # should start idle timer on creation start_idle_timer.assert_called_once() - with mock.patch.object(_ReadRowsOperation, "aclose", AsyncMock()) as aclose: + with mock.patch.object(_ReadRowsOperationAsync, "aclose", AsyncMock()) as aclose: # start idle timer with our own value await gen._start_idle_timer(0.1) # should timeout after being abandoned @@ -1397,13 +1397,13 @@ async def test_read_rows_revise_request(self): """ Ensure that _revise_request is called between retries """ - from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync from google.cloud.bigtable.data.exceptions import InvalidChunk with mock.patch.object( - _ReadRowsOperation, "_revise_request_rowset" + _ReadRowsOperationAsync, "_revise_request_rowset" ) as revise_rowset: - with mock.patch.object(_ReadRowsOperation, "aclose"): + with mock.patch.object(_ReadRowsOperationAsync, "aclose"): revise_rowset.return_value = "modified" async with self._make_table() as table: read_rows = table.client._gapic_client.read_rows @@ -1431,11 +1431,11 @@ async def test_read_rows_default_timeouts(self): """ Ensure that the default timeouts are set on the read rows operation when not overridden """ - from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync operation_timeout = 8 per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + with mock.patch.object(_ReadRowsOperationAsync, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with self._make_table( default_operation_timeout=operation_timeout, @@ -1454,11 +1454,11 @@ async def test_read_rows_default_timeout_override(self): """ When timeouts are passed, they overwrite default values """ - from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync operation_timeout = 8 per_request_timeout = 4 - with mock.patch.object(_ReadRowsOperation, "__init__") as mock_op: + with mock.patch.object(_ReadRowsOperationAsync, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with self._make_table( default_operation_timeout=99, default_per_request_timeout=97 From e4a654621b90e469a2b0220d7f1943a5c7aa4046 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:34:12 -0700 Subject: [PATCH 188/213] split up test files for state machine class --- tests/unit/data/_async/test__read_rows.py | 840 +++++------------- tests/unit/data/_async/test_iterators.py | 251 ------ .../data/test__read_rows_state_machine.py | 666 ++++++++++++++ 3 files changed, 889 insertions(+), 868 deletions(-) delete mode 100644 tests/unit/data/_async/test_iterators.py create mode 100644 tests/unit/data/test__read_rows_state_machine.py diff --git a/tests/unit/data/_async/test__read_rows.py b/tests/unit/data/_async/test__read_rows.py index cd820006f..ffc1286d5 100644 --- a/tests/unit/data/_async/test__read_rows.py +++ b/tests/unit/data/_async/test__read_rows.py @@ -1,10 +1,22 @@ -import unittest +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest +import sys +import asyncio from google.cloud.bigtable.data.exceptions import InvalidChunk -from google.cloud.bigtable.data._read_rows import AWAITING_NEW_ROW -from google.cloud.bigtable.data._read_rows import AWAITING_NEW_CELL -from google.cloud.bigtable.data._read_rows import AWAITING_CELL_VALUE +from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync # try/except added for compatibility with python < 3.8 try: @@ -29,9 +41,9 @@ class TestReadRowsOperation: @staticmethod def _get_target_class(): - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync - return _ReadRowsOperation + return _ReadRowsOperationAsync def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) @@ -44,7 +56,7 @@ def test_ctor_defaults(self): default_operation_timeout = 600 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable.data._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", time_gen_mock ): instance = self._make_one(request, client) assert time_gen_mock.call_count == 1 @@ -70,7 +82,7 @@ def test_ctor(self): expected_request_timeout = 44 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable.data._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", time_gen_mock ): instance = self._make_one( request, @@ -320,7 +332,7 @@ async def test_retryable_ignore_repeated_rows(self): """ Duplicate rows should cause an invalid chunk error """ - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync from google.cloud.bigtable.data.row import Row from google.cloud.bigtable.data.exceptions import InvalidChunk @@ -330,7 +342,7 @@ async def mock_stream(): yield Row(b"dup_key", cells=[]) with mock.patch.object( - _ReadRowsOperation, "merge_row_response_stream" + _ReadRowsOperationAsync, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() instance = self._make_one({}, mock.AsyncMock()) @@ -345,7 +357,7 @@ async def test_retryable_ignore_last_scanned_rows(self): """ Last scanned rows should not be emitted """ - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync from google.cloud.bigtable.data.row import Row, _LastScannedRow async def mock_stream(): @@ -355,7 +367,7 @@ async def mock_stream(): yield Row(b"key3", cells=[]) with mock.patch.object( - _ReadRowsOperation, "merge_row_response_stream" + _ReadRowsOperationAsync, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() instance = self._make_one({}, mock.AsyncMock()) @@ -367,7 +379,7 @@ async def mock_stream(): @pytest.mark.asyncio async def test_retryable_cancel_on_close(self): """Underlying gapic call should be cancelled when stream is closed""" - from google.cloud.bigtable.data._read_rows import _ReadRowsOperation + from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync from google.cloud.bigtable.data.row import Row async def mock_stream(): @@ -375,7 +387,7 @@ async def mock_stream(): yield Row(b"key1", cells=[]) with mock.patch.object( - _ReadRowsOperation, "merge_row_response_stream" + _ReadRowsOperationAsync, "merge_row_response_stream" ) as mock_stream_fn: mock_stream_fn.return_value = mock_stream() mock_gapic = mock.AsyncMock() @@ -386,633 +398,227 @@ async def mock_stream(): await instance.aclose() assert mock_call.cancel.call_count == 1 +class MockStream(_ReadRowsOperationAsync): + """ + Mock a _ReadRowsOperationAsync stream for testing + """ -class TestStateMachine(unittest.TestCase): - @staticmethod - def _get_target_class(): - from google.cloud.bigtable.data._read_rows import _StateMachine + def __init__(self, items=None, errors=None, operation_timeout=None): + self.transient_errors = errors + self.operation_timeout = operation_timeout + self.next_idx = 0 + if items is None: + items = list(range(10)) + self.items = items - return _StateMachine + def __aiter__(self): + return self - def _make_one(self, *args, **kwargs): - return self._get_target_class()(*args, **kwargs) + async def __anext__(self): + if self.next_idx >= len(self.items): + raise StopAsyncIteration + item = self.items[self.next_idx] + self.next_idx += 1 + if isinstance(item, Exception): + raise item + return item - def test_ctor(self): - from google.cloud.bigtable.data._read_rows import _RowBuilder - - instance = self._make_one() - assert instance.last_seen_row_key is None - assert instance.current_state == AWAITING_NEW_ROW - assert instance.current_family is None - assert instance.current_qualifier is None - assert isinstance(instance.adapter, _RowBuilder) - assert instance.adapter.current_key is None - assert instance.adapter.working_cell is None - assert instance.adapter.working_value is None - assert instance.adapter.completed_cells == [] - - def test_is_terminal_state(self): - - instance = self._make_one() - assert instance.is_terminal_state() is True - instance.current_state = AWAITING_NEW_ROW - assert instance.is_terminal_state() is True - instance.current_state = AWAITING_NEW_CELL - assert instance.is_terminal_state() is False - instance.current_state = AWAITING_CELL_VALUE - assert instance.is_terminal_state() is False - - def test__reset_row(self): - instance = self._make_one() - instance.current_state = mock.Mock() - instance.current_family = "family" - instance.current_qualifier = "qualifier" - instance.adapter = mock.Mock() - instance._reset_row() - assert instance.current_state == AWAITING_NEW_ROW - assert instance.current_family is None - assert instance.current_qualifier is None - assert instance.adapter.reset.call_count == 1 - - def test_handle_last_scanned_row_wrong_state(self): - from google.cloud.bigtable.data.exceptions import InvalidChunk + async def aclose(self): + pass - instance = self._make_one() - instance.current_state = AWAITING_NEW_CELL - with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row("row_key") - assert e.value.args[0] == "Last scanned row key received in invalid state" - instance.current_state = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row("row_key") - assert e.value.args[0] == "Last scanned row key received in invalid state" - - def test_handle_last_scanned_row_out_of_order(self): - from google.cloud.bigtable.data.exceptions import InvalidChunk - instance = self._make_one() - instance.last_seen_row_key = b"b" - with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row(b"a") - assert e.value.args[0] == "Last scanned row key is out of order" - with pytest.raises(InvalidChunk) as e: - instance.handle_last_scanned_row(b"b") - assert e.value.args[0] == "Last scanned row key is out of order" - - def test_handle_last_scanned_row(self): - from google.cloud.bigtable.data.row import _LastScannedRow - - instance = self._make_one() - instance.adapter = mock.Mock() - instance.last_seen_row_key = b"a" - output_row = instance.handle_last_scanned_row(b"b") - assert instance.last_seen_row_key == b"b" - assert isinstance(output_row, _LastScannedRow) - assert output_row.row_key == b"b" - assert instance.current_state == AWAITING_NEW_ROW - assert instance.current_family is None - assert instance.current_qualifier is None - assert instance.adapter.reset.call_count == 1 - - def test__handle_complete_row(self): - from google.cloud.bigtable.data.row import Row - instance = self._make_one() - instance.current_state = mock.Mock() - instance.current_family = "family" - instance.current_qualifier = "qualifier" - instance.adapter = mock.Mock() - instance._handle_complete_row(Row(b"row_key", {})) - assert instance.last_seen_row_key == b"row_key" - assert instance.current_state == AWAITING_NEW_ROW - assert instance.current_family is None - assert instance.current_qualifier is None - assert instance.adapter.reset.call_count == 1 - - def test__handle_reset_chunk_errors(self): - from google.cloud.bigtable.data.exceptions import InvalidChunk - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse +class TestReadRowsIteratorAsync: + async def mock_stream(self, size=10): + for i in range(size): + yield i - instance = self._make_one() - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(mock.Mock()) - instance.current_state = mock.Mock() - assert e.value.args[0] == "Reset chunk received when not processing row" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk( - ReadRowsResponse.CellChunk(row_key=b"row_key")._pb - ) - assert e.value.args[0] == "Reset chunk has a row key" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk( - ReadRowsResponse.CellChunk(family_name="family")._pb - ) - assert e.value.args[0] == "Reset chunk has a family name" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk( - ReadRowsResponse.CellChunk(qualifier=b"qualifier")._pb - ) - assert e.value.args[0] == "Reset chunk has a qualifier" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk( - ReadRowsResponse.CellChunk(timestamp_micros=1)._pb - ) - assert e.value.args[0] == "Reset chunk has a timestamp" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk(ReadRowsResponse.CellChunk(value=b"value")._pb) - assert e.value.args[0] == "Reset chunk has a value" - with pytest.raises(InvalidChunk) as e: - instance._handle_reset_chunk( - ReadRowsResponse.CellChunk(labels=["label"])._pb - ) - assert e.value.args[0] == "Reset chunk has labels" - - def test_handle_chunk_out_of_order(self): - from google.cloud.bigtable.data.exceptions import InvalidChunk - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - - instance = self._make_one() - instance.last_seen_row_key = b"b" - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(row_key=b"a")._pb - instance.handle_chunk(chunk) - assert "increasing" in e.value.args[0] - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(row_key=b"b")._pb - instance.handle_chunk(chunk) - assert "increasing" in e.value.args[0] - - def test_handle_chunk_reset(self): - """Should call _handle_reset_chunk when a chunk with reset_row is encountered""" - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + def _make_one(self, *args, **kwargs): + from google.cloud.bigtable.data._async._read_rows import ReadRowsIteratorAsync - instance = self._make_one() - with mock.patch.object(type(instance), "_handle_reset_chunk") as mock_reset: - chunk = ReadRowsResponse.CellChunk(reset_row=True)._pb - output = instance.handle_chunk(chunk) - assert output is None - assert mock_reset.call_count == 1 + stream = MockStream(*args, **kwargs) + return ReadRowsIteratorAsync(stream) - @pytest.mark.parametrize("state", [AWAITING_NEW_ROW, AWAITING_CELL_VALUE]) - def handle_chunk_with_commit_wrong_state(self, state): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + def test_ctor(self): + with mock.patch("time.time", return_value=0): + iterator = self._make_one() + assert iterator.last_interaction_time == 0 + assert iterator._idle_timeout_task is None + assert iterator.active is True - instance = self._make_one() - with mock.patch.object( - type(instance.current_state), "handle_chunk" - ) as mock_state_handle: - mock_state_handle.return_value = state(mock.Mock()) - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(commit_row=True)._pb - instance.handle_chunk(mock.Mock(), chunk) - assert instance.current_state == state - assert e.value.args[0] == "Commit chunk received with in invalid state" - - def test_handle_chunk_with_commit(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data.row import Row + def test___aiter__(self): + iterator = self._make_one() + assert iterator.__aiter__() is iterator - instance = self._make_one() - with mock.patch.object(type(instance), "_reset_row") as mock_reset: - chunk = ReadRowsResponse.CellChunk( - row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=True - )._pb - output = instance.handle_chunk(chunk) - assert isinstance(output, Row) - assert output.row_key == b"row_key" - assert output[0].family == "f" - assert output[0].qualifier == b"q" - assert instance.last_seen_row_key == b"row_key" - assert mock_reset.call_count == 1 - - def test_handle_chunk_with_commit_empty_strings(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data.row import Row + @pytest.mark.skipif( + sys.version_info < (3, 8), reason="mock coroutine requires python3.8 or higher" + ) + @pytest.mark.asyncio + async def test__start_idle_timer(self): + """Should start timer coroutine""" + iterator = self._make_one() + expected_timeout = 10 + with mock.patch("time.time", return_value=1): + with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: + await iterator._start_idle_timer(expected_timeout) + assert mock_coro.call_count == 1 + assert mock_coro.call_args[0] == (expected_timeout,) + assert iterator.last_interaction_time == 1 + assert iterator._idle_timeout_task is not None + + @pytest.mark.skipif( + sys.version_info < (3, 8), reason="mock coroutine requires python3.8 or higher" + ) + @pytest.mark.asyncio + async def test__start_idle_timer_duplicate(self): + """Multiple calls should replace task""" + iterator = self._make_one() + with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: + await iterator._start_idle_timer(1) + first_task = iterator._idle_timeout_task + await iterator._start_idle_timer(2) + second_task = iterator._idle_timeout_task + assert mock_coro.call_count == 2 + + assert first_task is not None + assert first_task != second_task + # old tasks hould be cancelled + with pytest.raises(asyncio.CancelledError): + await first_task + # new task should not be cancelled + await second_task - instance = self._make_one() - with mock.patch.object(type(instance), "_reset_row") as mock_reset: - chunk = ReadRowsResponse.CellChunk( - row_key=b"row_key", family_name="", qualifier=b"", commit_row=True - )._pb - output = instance.handle_chunk(chunk) - assert isinstance(output, Row) - assert output.row_key == b"row_key" - assert output[0].family == "" - assert output[0].qualifier == b"" - assert instance.last_seen_row_key == b"row_key" - assert mock_reset.call_count == 1 - - def handle_chunk_incomplete(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + @pytest.mark.asyncio + async def test__idle_timeout_coroutine(self): + from google.cloud.bigtable.data.exceptions import IdleTimeout - instance = self._make_one() - chunk = ReadRowsResponse.CellChunk( - row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=False - )._pb - output = instance.handle_chunk(chunk) - assert output is None - assert isinstance(instance.current_state, AWAITING_CELL_VALUE) - assert instance.current_family == "f" - assert instance.current_qualifier == b"q" + iterator = self._make_one() + await iterator._idle_timeout_coroutine(0.05) + await asyncio.sleep(0.1) + assert iterator.active is False + with pytest.raises(IdleTimeout): + await iterator.__anext__() + @pytest.mark.asyncio + async def test__idle_timeout_coroutine_extensions(self): + """touching the generator should reset the idle timer""" + iterator = self._make_one(items=list(range(100))) + await iterator._start_idle_timer(0.05) + for i in range(10): + # will not expire as long as it is in use + assert iterator.active is True + await iterator.__anext__() + await asyncio.sleep(0.03) + # now let it expire + await asyncio.sleep(0.5) + assert iterator.active is False -class TestState(unittest.TestCase): - def test_AWAITING_NEW_ROW_empty_key(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + @pytest.mark.asyncio + async def test___anext__(self): + num_rows = 10 + iterator = self._make_one(items=list(range(num_rows))) + for i in range(num_rows): + assert await iterator.__anext__() == i + with pytest.raises(StopAsyncIteration): + await iterator.__anext__() - instance = AWAITING_NEW_ROW - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(row_key=b"")._pb - instance.handle_chunk(mock.Mock(), chunk) - assert "missing a row key" in e.value.args[0] - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(mock.Mock(), chunk) - assert "missing a row key" in e.value.args[0] - - def test_AWAITING_NEW_ROW(self): + @pytest.mark.asyncio + async def test___anext__with_deadline_error(self): """ - AWAITING_NEW_ROW should start a RowBuilder row, then - delegate the call to AWAITING_NEW_CELL + RetryErrors mean a deadline has been hit. + Should be wrapped in a DeadlineExceeded exception """ - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - - instance = AWAITING_NEW_ROW - state_machine = mock.Mock() - with mock.patch.object(AWAITING_NEW_CELL, "handle_chunk") as mock_delegate: - chunk = ReadRowsResponse.CellChunk(row_key=b"row_key")._pb - instance.handle_chunk(state_machine, chunk) - assert state_machine.adapter.start_row.call_count == 1 - assert state_machine.adapter.start_row.call_args[0][0] == b"row_key" - mock_delegate.assert_called_once_with(state_machine, chunk) - - def test_AWAITING_NEW_CELL_family_without_qualifier(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - state_machine.current_qualifier = b"q" - instance = AWAITING_NEW_CELL - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(family_name="fam")._pb - instance.handle_chunk(state_machine, chunk) - assert "New family must specify qualifier" in e.value.args[0] - - def test_AWAITING_NEW_CELL_qualifier_without_family(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_NEW_CELL - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(qualifier=b"q")._pb - instance.handle_chunk(state_machine, chunk) - assert "Family not found" in e.value.args[0] - - def test_AWAITING_NEW_CELL_no_row_state(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_NEW_CELL - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(state_machine, chunk) - assert "Missing family for new cell" in e.value.args[0] - state_machine.current_family = "fam" - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk()._pb - instance.handle_chunk(state_machine, chunk) - assert "Missing qualifier for new cell" in e.value.args[0] - - def test_AWAITING_NEW_CELL_invalid_row_key(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_NEW_CELL - state_machine.adapter.current_key = b"abc" - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb - instance.handle_chunk(state_machine, chunk) - assert "Row key changed mid row" in e.value.args[0] - - def test_AWAITING_NEW_CELL_success_no_split(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - state_machine.adapter = mock.Mock() - instance = AWAITING_NEW_CELL - row_key = b"row_key" - family = "fam" - qualifier = b"q" - labels = ["label"] - timestamp = 123 - value = b"value" - chunk = ReadRowsResponse.CellChunk( - row_key=row_key, - family_name=family, - qualifier=qualifier, - timestamp_micros=timestamp, - value=value, - labels=labels, - )._pb - state_machine.adapter.current_key = row_key - new_state = instance.handle_chunk(state_machine, chunk) - assert state_machine.adapter.start_cell.call_count == 1 - kwargs = state_machine.adapter.start_cell.call_args[1] - assert kwargs["family"] == family - assert kwargs["qualifier"] == qualifier - assert kwargs["timestamp_micros"] == timestamp - assert kwargs["labels"] == labels - assert state_machine.adapter.cell_value.call_count == 1 - assert state_machine.adapter.cell_value.call_args[0][0] == value - assert state_machine.adapter.finish_cell.call_count == 1 - assert new_state == AWAITING_NEW_CELL - - def test_AWAITING_NEW_CELL_success_with_split(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - state_machine.adapter = mock.Mock() - instance = AWAITING_NEW_CELL - row_key = b"row_key" - family = "fam" - qualifier = b"q" - labels = ["label"] - timestamp = 123 - value = b"value" - chunk = ReadRowsResponse.CellChunk( - value_size=1, - row_key=row_key, - family_name=family, - qualifier=qualifier, - timestamp_micros=timestamp, - value=value, - labels=labels, - )._pb - state_machine.adapter.current_key = row_key - new_state = instance.handle_chunk(state_machine, chunk) - assert state_machine.adapter.start_cell.call_count == 1 - kwargs = state_machine.adapter.start_cell.call_args[1] - assert kwargs["family"] == family - assert kwargs["qualifier"] == qualifier - assert kwargs["timestamp_micros"] == timestamp - assert kwargs["labels"] == labels - assert state_machine.adapter.cell_value.call_count == 1 - assert state_machine.adapter.cell_value.call_args[0][0] == value - assert state_machine.adapter.finish_cell.call_count == 0 - assert new_state == AWAITING_CELL_VALUE - - def test_AWAITING_CELL_VALUE_w_row_key(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb - instance.handle_chunk(state_machine, chunk) - assert "In progress cell had a row key" in e.value.args[0] - - def test_AWAITING_CELL_VALUE_w_family(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(family_name="")._pb - instance.handle_chunk(state_machine, chunk) - assert "In progress cell had a family name" in e.value.args[0] - - def test_AWAITING_CELL_VALUE_w_qualifier(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(qualifier=b"")._pb - instance.handle_chunk(state_machine, chunk) - assert "In progress cell had a qualifier" in e.value.args[0] - - def test_AWAITING_CELL_VALUE_w_timestamp(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(timestamp_micros=123)._pb - instance.handle_chunk(state_machine, chunk) - assert "In progress cell had a timestamp" in e.value.args[0] - - def test_AWAITING_CELL_VALUE_w_labels(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - instance = AWAITING_CELL_VALUE - with pytest.raises(InvalidChunk) as e: - chunk = ReadRowsResponse.CellChunk(labels=[""])._pb - instance.handle_chunk(state_machine, chunk) - assert "In progress cell had labels" in e.value.args[0] - - def test_AWAITING_CELL_VALUE_continuation(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - state_machine.adapter = mock.Mock() - instance = AWAITING_CELL_VALUE - value = b"value" - chunk = ReadRowsResponse.CellChunk(value=value, value_size=1)._pb - new_state = instance.handle_chunk(state_machine, chunk) - assert state_machine.adapter.cell_value.call_count == 1 - assert state_machine.adapter.cell_value.call_args[0][0] == value - assert state_machine.adapter.finish_cell.call_count == 0 - assert new_state == AWAITING_CELL_VALUE - - def test_AWAITING_CELL_VALUE_final_chunk(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _StateMachine - - state_machine = _StateMachine() - state_machine.adapter = mock.Mock() - instance = AWAITING_CELL_VALUE - value = b"value" - chunk = ReadRowsResponse.CellChunk(value=value, value_size=0)._pb - new_state = instance.handle_chunk(state_machine, chunk) - assert state_machine.adapter.cell_value.call_count == 1 - assert state_machine.adapter.cell_value.call_args[0][0] == value - assert state_machine.adapter.finish_cell.call_count == 1 - assert new_state == AWAITING_NEW_CELL - - -class TestRowBuilder(unittest.TestCase): - @staticmethod - def _get_target_class(): - from google.cloud.bigtable.data._read_rows import _RowBuilder + from google.api_core import exceptions as core_exceptions - return _RowBuilder + items = [1, core_exceptions.RetryError("retry error", None)] + expected_timeout = 99 + iterator = self._make_one(items=items, operation_timeout=expected_timeout) + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.DeadlineExceeded) as exc: + await iterator.__anext__() + assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( + exc.value + ) + assert exc.value.__cause__ is None - def _make_one(self, *args, **kwargs): - return self._get_target_class()(*args, **kwargs) + @pytest.mark.asyncio + async def test___anext__with_deadline_error_with_cause(self): + """ + Transient errors should be exposed as an error group + """ + from google.api_core import exceptions as core_exceptions + from google.cloud.bigtable.data.exceptions import RetryExceptionGroup - def test_ctor(self): - with mock.patch.object(self._get_target_class(), "reset") as reset: - self._make_one() - reset.assert_called_once() - row_builder = self._make_one() - self.assertIsNone(row_builder.current_key) - self.assertIsNone(row_builder.working_cell) - self.assertIsNone(row_builder.working_value) - self.assertEqual(row_builder.completed_cells, []) - - def test_start_row(self): - row_builder = self._make_one() - row_builder.start_row(b"row_key") - self.assertEqual(row_builder.current_key, b"row_key") - row_builder.start_row(b"row_key2") - self.assertEqual(row_builder.current_key, b"row_key2") - - def test_start_cell(self): - # test with no family - with self.assertRaises(InvalidChunk) as e: - self._make_one().start_cell("", TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - self.assertEqual(str(e.exception), "Missing family for a new cell") - # test with no row - with self.assertRaises(InvalidChunk) as e: - row_builder = self._make_one() - row_builder.start_cell( - TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS - ) - self.assertEqual(str(e.exception), "start_cell called without a row") - # test with valid row - row_builder = self._make_one() - row_builder.start_row(b"row_key") - row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - self.assertEqual(row_builder.working_cell.family, TEST_FAMILY) - self.assertEqual(row_builder.working_cell.qualifier, TEST_QUALIFIER) - self.assertEqual(row_builder.working_cell.timestamp_micros, TEST_TIMESTAMP) - self.assertEqual(row_builder.working_cell.labels, TEST_LABELS) - self.assertEqual(row_builder.working_value, b"") - - def test_cell_value(self): - row_builder = self._make_one() - row_builder.start_row(b"row_key") - with self.assertRaises(InvalidChunk): - # start_cell must be called before cell_value - row_builder.cell_value(b"cell_value") - row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - row_builder.cell_value(b"cell_value") - self.assertEqual(row_builder.working_value, b"cell_value") - # should be able to continuously append to the working value - row_builder.cell_value(b"appended") - self.assertEqual(row_builder.working_value, b"cell_valueappended") - - def test_finish_cell(self): - row_builder = self._make_one() - row_builder.start_row(b"row_key") - row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - row_builder.finish_cell() - self.assertEqual(len(row_builder.completed_cells), 1) - self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) - self.assertEqual(row_builder.completed_cells[0].qualifier, TEST_QUALIFIER) - self.assertEqual( - row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP + items = [1, core_exceptions.RetryError("retry error", None)] + expected_timeout = 99 + errors = [RuntimeError("error1"), ValueError("error2")] + iterator = self._make_one( + items=items, operation_timeout=expected_timeout, errors=errors ) - self.assertEqual(row_builder.completed_cells[0].labels, TEST_LABELS) - self.assertEqual(row_builder.completed_cells[0].value, b"") - self.assertEqual(row_builder.working_cell, None) - self.assertEqual(row_builder.working_value, None) - # add additional cell with value - row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - row_builder.cell_value(b"cell_value") - row_builder.cell_value(b"appended") - row_builder.finish_cell() - self.assertEqual(len(row_builder.completed_cells), 2) - self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) - self.assertEqual(row_builder.completed_cells[1].qualifier, TEST_QUALIFIER) - self.assertEqual( - row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.DeadlineExceeded) as exc: + await iterator.__anext__() + assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( + exc.value ) - self.assertEqual(row_builder.completed_cells[1].labels, TEST_LABELS) - self.assertEqual(row_builder.completed_cells[1].value, b"cell_valueappended") - self.assertEqual(row_builder.working_cell, None) - self.assertEqual(row_builder.working_value, None) - - def test_finish_cell_no_cell(self): - with self.assertRaises(InvalidChunk) as e: - self._make_one().finish_cell() - self.assertEqual(str(e.exception), "finish_cell called before start_cell") - with self.assertRaises(InvalidChunk) as e: - row_builder = self._make_one() - row_builder.start_row(b"row_key") - row_builder.finish_cell() - self.assertEqual(str(e.exception), "finish_cell called before start_cell") - - def test_finish_row(self): - row_builder = self._make_one() - row_builder.start_row(b"row_key") - for i in range(3): - row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - row_builder.cell_value(b"cell_value: ") - row_builder.cell_value(str(i).encode("utf-8")) - row_builder.finish_cell() - self.assertEqual(len(row_builder.completed_cells), i + 1) - output = row_builder.finish_row() - self.assertEqual(row_builder.current_key, None) - self.assertEqual(row_builder.working_cell, None) - self.assertEqual(row_builder.working_value, None) - self.assertEqual(len(row_builder.completed_cells), 0) - - self.assertEqual(output.row_key, b"row_key") - self.assertEqual(len(output), 3) - for i in range(3): - self.assertEqual(output[i].family, str(i)) - self.assertEqual(output[i].qualifier, TEST_QUALIFIER) - self.assertEqual(output[i].timestamp_micros, TEST_TIMESTAMP) - self.assertEqual(output[i].labels, TEST_LABELS) - self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) - - def test_finish_row_no_row(self): - with self.assertRaises(InvalidChunk) as e: - self._make_one().finish_row() - self.assertEqual(str(e.exception), "No row in progress") - - def test_reset(self): - row_builder = self._make_one() - row_builder.start_row(b"row_key") - for i in range(3): - row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) - row_builder.cell_value(b"cell_value: ") - row_builder.cell_value(str(i).encode("utf-8")) - row_builder.finish_cell() - self.assertEqual(len(row_builder.completed_cells), i + 1) - row_builder.reset() - self.assertEqual(row_builder.current_key, None) - self.assertEqual(row_builder.working_cell, None) - self.assertEqual(row_builder.working_value, None) - self.assertEqual(len(row_builder.completed_cells), 0) - - -class TestChunkHasField: - def test__chunk_has_field_empty(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _chunk_has_field + error_group = exc.value.__cause__ + assert isinstance(error_group, RetryExceptionGroup) + assert len(error_group.exceptions) == 2 + assert error_group.exceptions[0] is errors[0] + assert error_group.exceptions[1] is errors[1] + assert "2 failed attempts" in str(error_group) + + @pytest.mark.asyncio + async def test___anext__with_error(self): + """ + Other errors should be raised as-is + """ + from google.api_core import exceptions as core_exceptions - chunk = ReadRowsResponse.CellChunk()._pb - assert not _chunk_has_field(chunk, "family_name") - assert not _chunk_has_field(chunk, "qualifier") + items = [1, core_exceptions.InternalServerError("mock error")] + iterator = self._make_one(items=items) + assert await iterator.__anext__() == 1 + with pytest.raises(core_exceptions.InternalServerError) as exc: + await iterator.__anext__() + assert exc.value is items[1] + assert iterator.active is False + # next call should raise same error + with pytest.raises(core_exceptions.InternalServerError) as exc: + await iterator.__anext__() - def test__chunk_has_field_populated_empty_strings(self): - from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._read_rows import _chunk_has_field + @pytest.mark.asyncio + async def test__finish_with_error(self): + iterator = self._make_one() + await iterator._start_idle_timer(10) + timeout_task = iterator._idle_timeout_task + assert await iterator.__anext__() == 0 + assert iterator.active is True + err = ZeroDivisionError("mock error") + await iterator._finish_with_error(err) + assert iterator.active is False + assert iterator._error is err + assert iterator._idle_timeout_task is None + with pytest.raises(ZeroDivisionError) as exc: + await iterator.__anext__() + assert exc.value is err + # timeout task should be cancelled + with pytest.raises(asyncio.CancelledError): + await timeout_task - chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb - assert _chunk_has_field(chunk, "family_name") - assert _chunk_has_field(chunk, "qualifier") + @pytest.mark.asyncio + async def test_aclose(self): + iterator = self._make_one() + await iterator._start_idle_timer(10) + timeout_task = iterator._idle_timeout_task + assert await iterator.__anext__() == 0 + assert iterator.active is True + await iterator.aclose() + assert iterator.active is False + assert isinstance(iterator._error, StopAsyncIteration) + assert iterator._idle_timeout_task is None + with pytest.raises(StopAsyncIteration) as e: + await iterator.__anext__() + assert "closed" in str(e.value) + # timeout task should be cancelled + with pytest.raises(asyncio.CancelledError): + await timeout_task diff --git a/tests/unit/data/_async/test_iterators.py b/tests/unit/data/_async/test_iterators.py deleted file mode 100644 index 712ba1baa..000000000 --- a/tests/unit/data/_async/test_iterators.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import sys -import asyncio -import pytest - -from google.cloud.bigtable.data._read_rows import _ReadRowsOperation - -# try/except added for compatibility with python < 3.8 -try: - from unittest import mock -except ImportError: # pragma: NO COVER - import mock # type: ignore - - -class MockStream(_ReadRowsOperation): - """ - Mock a _ReadRowsOperation stream for testing - """ - - def __init__(self, items=None, errors=None, operation_timeout=None): - self.transient_errors = errors - self.operation_timeout = operation_timeout - self.next_idx = 0 - if items is None: - items = list(range(10)) - self.items = items - - def __aiter__(self): - return self - - async def __anext__(self): - if self.next_idx >= len(self.items): - raise StopAsyncIteration - item = self.items[self.next_idx] - self.next_idx += 1 - if isinstance(item, Exception): - raise item - return item - - async def aclose(self): - pass - - -class TestReadRowsIterator: - async def mock_stream(self, size=10): - for i in range(size): - yield i - - def _make_one(self, *args, **kwargs): - from google.cloud.bigtable.data.iterators import ReadRowsIterator - - stream = MockStream(*args, **kwargs) - return ReadRowsIterator(stream) - - def test_ctor(self): - with mock.patch("time.time", return_value=0): - iterator = self._make_one() - assert iterator.last_interaction_time == 0 - assert iterator._idle_timeout_task is None - assert iterator.active is True - - def test___aiter__(self): - iterator = self._make_one() - assert iterator.__aiter__() is iterator - - @pytest.mark.skipif( - sys.version_info < (3, 8), reason="mock coroutine requires python3.8 or higher" - ) - @pytest.mark.asyncio - async def test__start_idle_timer(self): - """Should start timer coroutine""" - iterator = self._make_one() - expected_timeout = 10 - with mock.patch("time.time", return_value=1): - with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: - await iterator._start_idle_timer(expected_timeout) - assert mock_coro.call_count == 1 - assert mock_coro.call_args[0] == (expected_timeout,) - assert iterator.last_interaction_time == 1 - assert iterator._idle_timeout_task is not None - - @pytest.mark.skipif( - sys.version_info < (3, 8), reason="mock coroutine requires python3.8 or higher" - ) - @pytest.mark.asyncio - async def test__start_idle_timer_duplicate(self): - """Multiple calls should replace task""" - iterator = self._make_one() - with mock.patch.object(iterator, "_idle_timeout_coroutine") as mock_coro: - await iterator._start_idle_timer(1) - first_task = iterator._idle_timeout_task - await iterator._start_idle_timer(2) - second_task = iterator._idle_timeout_task - assert mock_coro.call_count == 2 - - assert first_task is not None - assert first_task != second_task - # old tasks hould be cancelled - with pytest.raises(asyncio.CancelledError): - await first_task - # new task should not be cancelled - await second_task - - @pytest.mark.asyncio - async def test__idle_timeout_coroutine(self): - from google.cloud.bigtable.data.exceptions import IdleTimeout - - iterator = self._make_one() - await iterator._idle_timeout_coroutine(0.05) - await asyncio.sleep(0.1) - assert iterator.active is False - with pytest.raises(IdleTimeout): - await iterator.__anext__() - - @pytest.mark.asyncio - async def test__idle_timeout_coroutine_extensions(self): - """touching the generator should reset the idle timer""" - iterator = self._make_one(items=list(range(100))) - await iterator._start_idle_timer(0.05) - for i in range(10): - # will not expire as long as it is in use - assert iterator.active is True - await iterator.__anext__() - await asyncio.sleep(0.03) - # now let it expire - await asyncio.sleep(0.5) - assert iterator.active is False - - @pytest.mark.asyncio - async def test___anext__(self): - num_rows = 10 - iterator = self._make_one(items=list(range(num_rows))) - for i in range(num_rows): - assert await iterator.__anext__() == i - with pytest.raises(StopAsyncIteration): - await iterator.__anext__() - - @pytest.mark.asyncio - async def test___anext__with_deadline_error(self): - """ - RetryErrors mean a deadline has been hit. - Should be wrapped in a DeadlineExceeded exception - """ - from google.api_core import exceptions as core_exceptions - - items = [1, core_exceptions.RetryError("retry error", None)] - expected_timeout = 99 - iterator = self._make_one(items=items, operation_timeout=expected_timeout) - assert await iterator.__anext__() == 1 - with pytest.raises(core_exceptions.DeadlineExceeded) as exc: - await iterator.__anext__() - assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( - exc.value - ) - assert exc.value.__cause__ is None - - @pytest.mark.asyncio - async def test___anext__with_deadline_error_with_cause(self): - """ - Transient errors should be exposed as an error group - """ - from google.api_core import exceptions as core_exceptions - from google.cloud.bigtable.data.exceptions import RetryExceptionGroup - - items = [1, core_exceptions.RetryError("retry error", None)] - expected_timeout = 99 - errors = [RuntimeError("error1"), ValueError("error2")] - iterator = self._make_one( - items=items, operation_timeout=expected_timeout, errors=errors - ) - assert await iterator.__anext__() == 1 - with pytest.raises(core_exceptions.DeadlineExceeded) as exc: - await iterator.__anext__() - assert f"operation_timeout of {expected_timeout:0.1f}s exceeded" in str( - exc.value - ) - error_group = exc.value.__cause__ - assert isinstance(error_group, RetryExceptionGroup) - assert len(error_group.exceptions) == 2 - assert error_group.exceptions[0] is errors[0] - assert error_group.exceptions[1] is errors[1] - assert "2 failed attempts" in str(error_group) - - @pytest.mark.asyncio - async def test___anext__with_error(self): - """ - Other errors should be raised as-is - """ - from google.api_core import exceptions as core_exceptions - - items = [1, core_exceptions.InternalServerError("mock error")] - iterator = self._make_one(items=items) - assert await iterator.__anext__() == 1 - with pytest.raises(core_exceptions.InternalServerError) as exc: - await iterator.__anext__() - assert exc.value is items[1] - assert iterator.active is False - # next call should raise same error - with pytest.raises(core_exceptions.InternalServerError) as exc: - await iterator.__anext__() - - @pytest.mark.asyncio - async def test__finish_with_error(self): - iterator = self._make_one() - await iterator._start_idle_timer(10) - timeout_task = iterator._idle_timeout_task - assert await iterator.__anext__() == 0 - assert iterator.active is True - err = ZeroDivisionError("mock error") - await iterator._finish_with_error(err) - assert iterator.active is False - assert iterator._error is err - assert iterator._idle_timeout_task is None - with pytest.raises(ZeroDivisionError) as exc: - await iterator.__anext__() - assert exc.value is err - # timeout task should be cancelled - with pytest.raises(asyncio.CancelledError): - await timeout_task - - @pytest.mark.asyncio - async def test_aclose(self): - iterator = self._make_one() - await iterator._start_idle_timer(10) - timeout_task = iterator._idle_timeout_task - assert await iterator.__anext__() == 0 - assert iterator.active is True - await iterator.aclose() - assert iterator.active is False - assert isinstance(iterator._error, StopAsyncIteration) - assert iterator._idle_timeout_task is None - with pytest.raises(StopAsyncIteration) as e: - await iterator.__anext__() - assert "closed" in str(e.value) - # timeout task should be cancelled - with pytest.raises(asyncio.CancelledError): - await timeout_task diff --git a/tests/unit/data/test__read_rows_state_machine.py b/tests/unit/data/test__read_rows_state_machine.py new file mode 100644 index 000000000..b3873b287 --- /dev/null +++ b/tests/unit/data/test__read_rows_state_machine.py @@ -0,0 +1,666 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import pytest + +from google.cloud.bigtable.data.exceptions import InvalidChunk +from google.cloud.bigtable.data._read_rows_state_machine import AWAITING_NEW_ROW +from google.cloud.bigtable.data._read_rows_state_machine import AWAITING_NEW_CELL +from google.cloud.bigtable.data._read_rows_state_machine import AWAITING_CELL_VALUE + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock # type: ignore +except ImportError: # pragma: NO COVER + import mock # type: ignore + from mock import AsyncMock # type: ignore # noqa F401 + +TEST_FAMILY = "family_name" +TEST_QUALIFIER = b"qualifier" +TEST_TIMESTAMP = 123456789 +TEST_LABELS = ["label1", "label2"] + + +class TestStateMachine(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + return _StateMachine + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor(self): + from google.cloud.bigtable.data._async._read_rows import _RowBuilder + + instance = self._make_one() + assert instance.last_seen_row_key is None + assert instance.current_state == AWAITING_NEW_ROW + assert instance.current_family is None + assert instance.current_qualifier is None + assert isinstance(instance.adapter, _RowBuilder) + assert instance.adapter.current_key is None + assert instance.adapter.working_cell is None + assert instance.adapter.working_value is None + assert instance.adapter.completed_cells == [] + + def test_is_terminal_state(self): + + instance = self._make_one() + assert instance.is_terminal_state() is True + instance.current_state = AWAITING_NEW_ROW + assert instance.is_terminal_state() is True + instance.current_state = AWAITING_NEW_CELL + assert instance.is_terminal_state() is False + instance.current_state = AWAITING_CELL_VALUE + assert instance.is_terminal_state() is False + + def test__reset_row(self): + instance = self._make_one() + instance.current_state = mock.Mock() + instance.current_family = "family" + instance.current_qualifier = "qualifier" + instance.adapter = mock.Mock() + instance._reset_row() + assert instance.current_state == AWAITING_NEW_ROW + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test_handle_last_scanned_row_wrong_state(self): + from google.cloud.bigtable.data.exceptions import InvalidChunk + + instance = self._make_one() + instance.current_state = AWAITING_NEW_CELL + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row("row_key") + assert e.value.args[0] == "Last scanned row key received in invalid state" + instance.current_state = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row("row_key") + assert e.value.args[0] == "Last scanned row key received in invalid state" + + def test_handle_last_scanned_row_out_of_order(self): + from google.cloud.bigtable.data.exceptions import InvalidChunk + + instance = self._make_one() + instance.last_seen_row_key = b"b" + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row(b"a") + assert e.value.args[0] == "Last scanned row key is out of order" + with pytest.raises(InvalidChunk) as e: + instance.handle_last_scanned_row(b"b") + assert e.value.args[0] == "Last scanned row key is out of order" + + def test_handle_last_scanned_row(self): + from google.cloud.bigtable.data.row import _LastScannedRow + + instance = self._make_one() + instance.adapter = mock.Mock() + instance.last_seen_row_key = b"a" + output_row = instance.handle_last_scanned_row(b"b") + assert instance.last_seen_row_key == b"b" + assert isinstance(output_row, _LastScannedRow) + assert output_row.row_key == b"b" + assert instance.current_state == AWAITING_NEW_ROW + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test__handle_complete_row(self): + from google.cloud.bigtable.data.row import Row + + instance = self._make_one() + instance.current_state = mock.Mock() + instance.current_family = "family" + instance.current_qualifier = "qualifier" + instance.adapter = mock.Mock() + instance._handle_complete_row(Row(b"row_key", {})) + assert instance.last_seen_row_key == b"row_key" + assert instance.current_state == AWAITING_NEW_ROW + assert instance.current_family is None + assert instance.current_qualifier is None + assert instance.adapter.reset.call_count == 1 + + def test__handle_reset_chunk_errors(self): + from google.cloud.bigtable.data.exceptions import InvalidChunk + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = self._make_one() + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(mock.Mock()) + instance.current_state = mock.Mock() + assert e.value.args[0] == "Reset chunk received when not processing row" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(row_key=b"row_key")._pb + ) + assert e.value.args[0] == "Reset chunk has a row key" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(family_name="family")._pb + ) + assert e.value.args[0] == "Reset chunk has a family name" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(qualifier=b"qualifier")._pb + ) + assert e.value.args[0] == "Reset chunk has a qualifier" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(timestamp_micros=1)._pb + ) + assert e.value.args[0] == "Reset chunk has a timestamp" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk(ReadRowsResponse.CellChunk(value=b"value")._pb) + assert e.value.args[0] == "Reset chunk has a value" + with pytest.raises(InvalidChunk) as e: + instance._handle_reset_chunk( + ReadRowsResponse.CellChunk(labels=["label"])._pb + ) + assert e.value.args[0] == "Reset chunk has labels" + + def test_handle_chunk_out_of_order(self): + from google.cloud.bigtable.data.exceptions import InvalidChunk + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = self._make_one() + instance.last_seen_row_key = b"b" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"a")._pb + instance.handle_chunk(chunk) + assert "increasing" in e.value.args[0] + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"b")._pb + instance.handle_chunk(chunk) + assert "increasing" in e.value.args[0] + + def test_handle_chunk_reset(self): + """Should call _handle_reset_chunk when a chunk with reset_row is encountered""" + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = self._make_one() + with mock.patch.object(type(instance), "_handle_reset_chunk") as mock_reset: + chunk = ReadRowsResponse.CellChunk(reset_row=True)._pb + output = instance.handle_chunk(chunk) + assert output is None + assert mock_reset.call_count == 1 + + @pytest.mark.parametrize("state", [AWAITING_NEW_ROW, AWAITING_CELL_VALUE]) + def handle_chunk_with_commit_wrong_state(self, state): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = self._make_one() + with mock.patch.object( + type(instance.current_state), "handle_chunk" + ) as mock_state_handle: + mock_state_handle.return_value = state(mock.Mock()) + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(commit_row=True)._pb + instance.handle_chunk(mock.Mock(), chunk) + assert instance.current_state == state + assert e.value.args[0] == "Commit chunk received with in invalid state" + + def test_handle_chunk_with_commit(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data.row import Row + + instance = self._make_one() + with mock.patch.object(type(instance), "_reset_row") as mock_reset: + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=True + )._pb + output = instance.handle_chunk(chunk) + assert isinstance(output, Row) + assert output.row_key == b"row_key" + assert output[0].family == "f" + assert output[0].qualifier == b"q" + assert instance.last_seen_row_key == b"row_key" + assert mock_reset.call_count == 1 + + def test_handle_chunk_with_commit_empty_strings(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data.row import Row + + instance = self._make_one() + with mock.patch.object(type(instance), "_reset_row") as mock_reset: + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="", qualifier=b"", commit_row=True + )._pb + output = instance.handle_chunk(chunk) + assert isinstance(output, Row) + assert output.row_key == b"row_key" + assert output[0].family == "" + assert output[0].qualifier == b"" + assert instance.last_seen_row_key == b"row_key" + assert mock_reset.call_count == 1 + + def handle_chunk_incomplete(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = self._make_one() + chunk = ReadRowsResponse.CellChunk( + row_key=b"row_key", family_name="f", qualifier=b"q", commit_row=False + )._pb + output = instance.handle_chunk(chunk) + assert output is None + assert isinstance(instance.current_state, AWAITING_CELL_VALUE) + assert instance.current_family == "f" + assert instance.current_qualifier == b"q" + + +class TestState(unittest.TestCase): + def test_AWAITING_NEW_ROW_empty_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = AWAITING_NEW_ROW + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"")._pb + instance.handle_chunk(mock.Mock(), chunk) + assert "missing a row key" in e.value.args[0] + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(mock.Mock(), chunk) + assert "missing a row key" in e.value.args[0] + + def test_AWAITING_NEW_ROW(self): + """ + AWAITING_NEW_ROW should start a RowBuilder row, then + delegate the call to AWAITING_NEW_CELL + """ + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + + instance = AWAITING_NEW_ROW + state_machine = mock.Mock() + with mock.patch.object(AWAITING_NEW_CELL, "handle_chunk") as mock_delegate: + chunk = ReadRowsResponse.CellChunk(row_key=b"row_key")._pb + instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.start_row.call_count == 1 + assert state_machine.adapter.start_row.call_args[0][0] == b"row_key" + mock_delegate.assert_called_once_with(state_machine, chunk) + + def test_AWAITING_NEW_CELL_family_without_qualifier(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + state_machine.current_qualifier = b"q" + instance = AWAITING_NEW_CELL + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(family_name="fam")._pb + instance.handle_chunk(state_machine, chunk) + assert "New family must specify qualifier" in e.value.args[0] + + def test_AWAITING_NEW_CELL_qualifier_without_family(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(qualifier=b"q")._pb + instance.handle_chunk(state_machine, chunk) + assert "Family not found" in e.value.args[0] + + def test_AWAITING_NEW_CELL_no_row_state(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(state_machine, chunk) + assert "Missing family for new cell" in e.value.args[0] + state_machine.current_family = "fam" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk()._pb + instance.handle_chunk(state_machine, chunk) + assert "Missing qualifier for new cell" in e.value.args[0] + + def test_AWAITING_NEW_CELL_invalid_row_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_NEW_CELL + state_machine.adapter.current_key = b"abc" + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb + instance.handle_chunk(state_machine, chunk) + assert "Row key changed mid row" in e.value.args[0] + + def test_AWAITING_NEW_CELL_success_no_split(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_NEW_CELL + row_key = b"row_key" + family = "fam" + qualifier = b"q" + labels = ["label"] + timestamp = 123 + value = b"value" + chunk = ReadRowsResponse.CellChunk( + row_key=row_key, + family_name=family, + qualifier=qualifier, + timestamp_micros=timestamp, + value=value, + labels=labels, + )._pb + state_machine.adapter.current_key = row_key + new_state = instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.start_cell.call_count == 1 + kwargs = state_machine.adapter.start_cell.call_args[1] + assert kwargs["family"] == family + assert kwargs["qualifier"] == qualifier + assert kwargs["timestamp_micros"] == timestamp + assert kwargs["labels"] == labels + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 1 + assert new_state == AWAITING_NEW_CELL + + def test_AWAITING_NEW_CELL_success_with_split(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_NEW_CELL + row_key = b"row_key" + family = "fam" + qualifier = b"q" + labels = ["label"] + timestamp = 123 + value = b"value" + chunk = ReadRowsResponse.CellChunk( + value_size=1, + row_key=row_key, + family_name=family, + qualifier=qualifier, + timestamp_micros=timestamp, + value=value, + labels=labels, + )._pb + state_machine.adapter.current_key = row_key + new_state = instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.start_cell.call_count == 1 + kwargs = state_machine.adapter.start_cell.call_args[1] + assert kwargs["family"] == family + assert kwargs["qualifier"] == qualifier + assert kwargs["timestamp_micros"] == timestamp + assert kwargs["labels"] == labels + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 0 + assert new_state == AWAITING_CELL_VALUE + + def test_AWAITING_CELL_VALUE_w_row_key(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(row_key=b"123")._pb + instance.handle_chunk(state_machine, chunk) + assert "In progress cell had a row key" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_family(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(family_name="")._pb + instance.handle_chunk(state_machine, chunk) + assert "In progress cell had a family name" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_qualifier(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(qualifier=b"")._pb + instance.handle_chunk(state_machine, chunk) + assert "In progress cell had a qualifier" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_timestamp(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(timestamp_micros=123)._pb + instance.handle_chunk(state_machine, chunk) + assert "In progress cell had a timestamp" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_w_labels(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + instance = AWAITING_CELL_VALUE + with pytest.raises(InvalidChunk) as e: + chunk = ReadRowsResponse.CellChunk(labels=[""])._pb + instance.handle_chunk(state_machine, chunk) + assert "In progress cell had labels" in e.value.args[0] + + def test_AWAITING_CELL_VALUE_continuation(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_CELL_VALUE + value = b"value" + chunk = ReadRowsResponse.CellChunk(value=value, value_size=1)._pb + new_state = instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 0 + assert new_state == AWAITING_CELL_VALUE + + def test_AWAITING_CELL_VALUE_final_chunk(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _StateMachine + + state_machine = _StateMachine() + state_machine.adapter = mock.Mock() + instance = AWAITING_CELL_VALUE + value = b"value" + chunk = ReadRowsResponse.CellChunk(value=value, value_size=0)._pb + new_state = instance.handle_chunk(state_machine, chunk) + assert state_machine.adapter.cell_value.call_count == 1 + assert state_machine.adapter.cell_value.call_args[0][0] == value + assert state_machine.adapter.finish_cell.call_count == 1 + assert new_state == AWAITING_NEW_CELL + + +class TestRowBuilder(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.data._async._read_rows import _RowBuilder + + return _RowBuilder + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + def test_ctor(self): + with mock.patch.object(self._get_target_class(), "reset") as reset: + self._make_one() + reset.assert_called_once() + row_builder = self._make_one() + self.assertIsNone(row_builder.current_key) + self.assertIsNone(row_builder.working_cell) + self.assertIsNone(row_builder.working_value) + self.assertEqual(row_builder.completed_cells, []) + + def test_start_row(self): + row_builder = self._make_one() + row_builder.start_row(b"row_key") + self.assertEqual(row_builder.current_key, b"row_key") + row_builder.start_row(b"row_key2") + self.assertEqual(row_builder.current_key, b"row_key2") + + def test_start_cell(self): + # test with no family + with self.assertRaises(InvalidChunk) as e: + self._make_one().start_cell("", TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(str(e.exception), "Missing family for a new cell") + # test with no row + with self.assertRaises(InvalidChunk) as e: + row_builder = self._make_one() + row_builder.start_cell( + TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS + ) + self.assertEqual(str(e.exception), "start_cell called without a row") + # test with valid row + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + self.assertEqual(row_builder.working_cell.family, TEST_FAMILY) + self.assertEqual(row_builder.working_cell.qualifier, TEST_QUALIFIER) + self.assertEqual(row_builder.working_cell.timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(row_builder.working_cell.labels, TEST_LABELS) + self.assertEqual(row_builder.working_value, b"") + + def test_cell_value(self): + row_builder = self._make_one() + row_builder.start_row(b"row_key") + with self.assertRaises(InvalidChunk): + # start_cell must be called before cell_value + row_builder.cell_value(b"cell_value") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value") + self.assertEqual(row_builder.working_value, b"cell_value") + # should be able to continuously append to the working value + row_builder.cell_value(b"appended") + self.assertEqual(row_builder.working_value, b"cell_valueappended") + + def test_finish_cell(self): + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), 1) + self.assertEqual(row_builder.completed_cells[0].family, TEST_FAMILY) + self.assertEqual(row_builder.completed_cells[0].qualifier, TEST_QUALIFIER) + self.assertEqual( + row_builder.completed_cells[0].timestamp_micros, TEST_TIMESTAMP + ) + self.assertEqual(row_builder.completed_cells[0].labels, TEST_LABELS) + self.assertEqual(row_builder.completed_cells[0].value, b"") + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + # add additional cell with value + row_builder.start_cell(TEST_FAMILY, TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value") + row_builder.cell_value(b"appended") + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), 2) + self.assertEqual(row_builder.completed_cells[1].family, TEST_FAMILY) + self.assertEqual(row_builder.completed_cells[1].qualifier, TEST_QUALIFIER) + self.assertEqual( + row_builder.completed_cells[1].timestamp_micros, TEST_TIMESTAMP + ) + self.assertEqual(row_builder.completed_cells[1].labels, TEST_LABELS) + self.assertEqual(row_builder.completed_cells[1].value, b"cell_valueappended") + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + + def test_finish_cell_no_cell(self): + with self.assertRaises(InvalidChunk) as e: + self._make_one().finish_cell() + self.assertEqual(str(e.exception), "finish_cell called before start_cell") + with self.assertRaises(InvalidChunk) as e: + row_builder = self._make_one() + row_builder.start_row(b"row_key") + row_builder.finish_cell() + self.assertEqual(str(e.exception), "finish_cell called before start_cell") + + def test_finish_row(self): + row_builder = self._make_one() + row_builder.start_row(b"row_key") + for i in range(3): + row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value: ") + row_builder.cell_value(str(i).encode("utf-8")) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), i + 1) + output = row_builder.finish_row() + self.assertEqual(row_builder.current_key, None) + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + self.assertEqual(len(row_builder.completed_cells), 0) + + self.assertEqual(output.row_key, b"row_key") + self.assertEqual(len(output), 3) + for i in range(3): + self.assertEqual(output[i].family, str(i)) + self.assertEqual(output[i].qualifier, TEST_QUALIFIER) + self.assertEqual(output[i].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(output[i].labels, TEST_LABELS) + self.assertEqual(output[i].value, b"cell_value: " + str(i).encode("utf-8")) + + def test_finish_row_no_row(self): + with self.assertRaises(InvalidChunk) as e: + self._make_one().finish_row() + self.assertEqual(str(e.exception), "No row in progress") + + def test_reset(self): + row_builder = self._make_one() + row_builder.start_row(b"row_key") + for i in range(3): + row_builder.start_cell(str(i), TEST_QUALIFIER, TEST_TIMESTAMP, TEST_LABELS) + row_builder.cell_value(b"cell_value: ") + row_builder.cell_value(str(i).encode("utf-8")) + row_builder.finish_cell() + self.assertEqual(len(row_builder.completed_cells), i + 1) + row_builder.reset() + self.assertEqual(row_builder.current_key, None) + self.assertEqual(row_builder.working_cell, None) + self.assertEqual(row_builder.working_value, None) + self.assertEqual(len(row_builder.completed_cells), 0) + + +class TestChunkHasField: + def test__chunk_has_field_empty(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _chunk_has_field + + chunk = ReadRowsResponse.CellChunk()._pb + assert not _chunk_has_field(chunk, "family_name") + assert not _chunk_has_field(chunk, "qualifier") + + def test__chunk_has_field_populated_empty_strings(self): + from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse + from google.cloud.bigtable.data._async._read_rows import _chunk_has_field + + chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb + assert _chunk_has_field(chunk, "family_name") + assert _chunk_has_field(chunk, "qualifier") + + + From 11914444a41ac401a3eac11f53d6ad49339e1f59 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:38:44 -0700 Subject: [PATCH 189/213] got unit tests running --- .../{ => data}/read-rows-acceptance-test.json | 0 tests/unit/data/test_read_rows_acceptance.py | 15 ++++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) rename tests/unit/{ => data}/read-rows-acceptance-test.json (100%) diff --git a/tests/unit/read-rows-acceptance-test.json b/tests/unit/data/read-rows-acceptance-test.json similarity index 100% rename from tests/unit/read-rows-acceptance-test.json rename to tests/unit/data/read-rows-acceptance-test.json diff --git a/tests/unit/data/test_read_rows_acceptance.py b/tests/unit/data/test_read_rows_acceptance.py index 4aeba0d2d..813e5b93e 100644 --- a/tests/unit/data/test_read_rows_acceptance.py +++ b/tests/unit/data/test_read_rows_acceptance.py @@ -21,12 +21,13 @@ from google.cloud.bigtable_v2 import ReadRowsResponse -from google.cloud.bigtable.data.client import BigtableDataClient +from google.cloud.bigtable.data._async.client import BigtableDataClientAsync from google.cloud.bigtable.data.exceptions import InvalidChunk -from google.cloud.bigtable.data._read_rows import _ReadRowsOperation, _StateMachine +from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync +from google.clout.bigtable.data import StateMachine from google.cloud.bigtable.data.row import Row -from .v2_client.test_row_merger import ReadRowsTest, TestFile +from ..v2_client.test_row_merger import ReadRowsTest, TestFile def parse_readrows_acceptance_tests(): @@ -67,7 +68,7 @@ async def _scenerio_stream(): try: state = _StateMachine() results = [] - async for row in _ReadRowsOperation.merge_row_response_stream( + async for row in _ReadRowsOperationAsync.merge_row_response_stream( _scenerio_stream(), state ): for cell in row: @@ -117,7 +118,7 @@ def cancel(self): return mock_stream(chunk_list) try: - client = BigtableDataClient() + client = BigtableDataClientAsync() table = client.get_table("instance", "table") results = [] with mock.patch.object(table.client._gapic_client, "read_rows") as read_rows: @@ -150,7 +151,7 @@ async def _row_stream(): state = _StateMachine() state.last_seen_row_key = b"a" with pytest.raises(InvalidChunk): - async for _ in _ReadRowsOperation.merge_row_response_stream( + async for _ in _ReadRowsOperationAsync.merge_row_response_stream( _row_stream(), state ): pass @@ -309,6 +310,6 @@ async def _row_stream(): state = _StateMachine() results = [] - async for row in _ReadRowsOperation.merge_row_response_stream(_row_stream(), state): + async for row in _ReadRowsOperationAsync.merge_row_response_stream(_row_stream(), state): results.append(row) return results From 2ca05d856466c71f558fb53331c5bc5725090301 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Fri, 23 Jun 2023 17:43:34 -0700 Subject: [PATCH 190/213] fixed batcher tests --- .../{ => _async}/test_mutations_batcher.py | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) rename tests/unit/data/{ => _async}/test_mutations_batcher.py (95%) diff --git a/tests/unit/data/test_mutations_batcher.py b/tests/unit/data/_async/test_mutations_batcher.py similarity index 95% rename from tests/unit/data/test_mutations_batcher.py rename to tests/unit/data/_async/test_mutations_batcher.py index ae690e6fa..030f0037a 100644 --- a/tests/unit/data/test_mutations_batcher.py +++ b/tests/unit/data/_async/test_mutations_batcher.py @@ -33,9 +33,9 @@ def _make_mutation(count=1, size=1): class Test_FlowControl: def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): - from google.cloud.bigtable.data.mutations_batcher import _FlowControl + from google.cloud.bigtable.data._async.mutations_batcher import _FlowControlAsync - return _FlowControl(max_mutation_count, max_mutation_bytes) + return _FlowControlAsync(max_mutation_count, max_mutation_bytes) def test_ctor(self): max_mutation_count = 9 @@ -238,7 +238,7 @@ async def test_add_to_flow_max_mutation_limits( Should submit request early, even if the flow control has room for more """ with mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MUTATE_ROWS_REQUEST_MUTATION_LIMIT", + "google.cloud.bigtable.data._async.mutations_batcher.MUTATE_ROWS_REQUEST_MUTATION_LIMIT", max_limit, ): mutation_objs = [_make_mutation(count=m[0], size=m[1]) for m in mutations] @@ -275,11 +275,11 @@ async def test_add_to_flow_oversize(self): assert len(count_results) == 1 -class TestMutationsBatcher: +class TestMutationsBatcherAsync: def _get_target_class(self): - from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync - return MutationsBatcher + return MutationsBatcherAsync def _make_one(self, table=None, **kwargs): if table is None: @@ -290,7 +290,7 @@ def _make_one(self, table=None, **kwargs): return self._get_target_class()(table, **kwargs) @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_defaults(self, flush_timer_mock): @@ -320,7 +320,7 @@ async def test_ctor_defaults(self, flush_timer_mock): assert isinstance(instance._flush_timer, asyncio.Future) @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer", + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._start_flush_timer", ) @pytest.mark.asyncio async def test_ctor_explicit(self, flush_timer_mock): @@ -368,7 +368,7 @@ async def test_ctor_explicit(self, flush_timer_mock): assert isinstance(instance._flush_timer, asyncio.Future) @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._start_flush_timer" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._start_flush_timer" ) @pytest.mark.asyncio async def test_ctor_no_flush_limits(self, flush_timer_mock): @@ -419,19 +419,19 @@ async def test_ctor_invalid_values(self): def test_default_argument_consistency(self): """ - We supply default arguments in MutationsBatcher.__init__, and in + We supply default arguments in MutationsBatcherAsync.__init__, and in table.mutations_batcher. Make sure any changes to defaults are applied to both places """ - from google.cloud.bigtable.data.client import Table - from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data._async.client import TableAsync + from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync import inspect get_batcher_signature = dict( - inspect.signature(Table.mutations_batcher).parameters + inspect.signature(TableAsync.mutations_batcher).parameters ) get_batcher_signature.pop("self") - batcher_init_signature = dict(inspect.signature(MutationsBatcher).parameters) + batcher_init_signature = dict(inspect.signature(MutationsBatcherAsync).parameters) batcher_init_signature.pop("table") # both should have same number of arguments assert len(get_batcher_signature.keys()) == len(batcher_init_signature.keys()) @@ -446,7 +446,7 @@ def test_default_argument_consistency(self): ) @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._schedule_flush" ) @pytest.mark.asyncio async def test__start_flush_timer_w_None(self, flush_mock): @@ -458,7 +458,7 @@ async def test__start_flush_timer_w_None(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._schedule_flush" ) @pytest.mark.asyncio async def test__start_flush_timer_call_when_closed(self, flush_mock): @@ -472,7 +472,7 @@ async def test__start_flush_timer_call_when_closed(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer(self, flush_mock): @@ -492,7 +492,7 @@ async def test__flush_timer(self, flush_mock): assert flush_mock.call_count == loop_num @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer_no_mutations(self, flush_mock): @@ -511,7 +511,7 @@ async def test__flush_timer_no_mutations(self, flush_mock): assert flush_mock.call_count == 0 @mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._schedule_flush" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._schedule_flush" ) @pytest.mark.asyncio async def test__flush_timer_close(self, flush_mock): @@ -577,9 +577,9 @@ async def test_append_flush_runs_after_limit_hit(self): If the user appends a bunch of entries above the flush limits back-to-back, it should still flush in a single task """ - from google.cloud.bigtable.data.mutations_batcher import MutationsBatcher + from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync - with mock.patch.object(MutationsBatcher, "_execute_mutate_rows") as op_mock: + with mock.patch.object(MutationsBatcherAsync, "_execute_mutate_rows") as op_mock: async with self._make_one(flush_limit_bytes=100) as instance: # mock network calls async def mock_call(*args, **kwargs): @@ -789,7 +789,7 @@ async def test__flush_internal_with_errors( """ errors returned from _execute_mutate_rows should be added to internal exceptions """ - from google.cloud.bigtable.data.data import exceptions + from google.cloud.bigtable.data import exceptions num_entries = 10 expected_errors = [ @@ -861,7 +861,7 @@ async def test_timer_flush_end_to_end(self): @pytest.mark.asyncio @mock.patch( - "google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation", + "google.cloud.bigtable.data._async.mutations_batcher._MutateRowsOperationAsync", ) async def test__execute_mutate_rows(self, mutate_rows): mutate_rows.return_value = AsyncMock() @@ -884,7 +884,7 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @mock.patch("google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation.start") + @mock.patch("google.cloud.bigtable.data._async.mutations_batcher._MutateRowsOperationAsync.start") async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" from google.cloud.bigtable.data.exceptions import ( @@ -911,7 +911,7 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): @pytest.mark.asyncio async def test__raise_exceptions(self): """Raise exceptions and reset error state""" - from google.cloud.bigtable.data.data import exceptions + from google.cloud.bigtable.data import exceptions expected_total = 1201 expected_exceptions = [RuntimeError("mock")] * 3 @@ -958,7 +958,7 @@ async def test_close(self): @pytest.mark.asyncio async def test_close_w_exceptions(self): """Raise exceptions on close""" - from google.cloud.bigtable.data.data import exceptions + from google.cloud.bigtable.data import exceptions expected_total = 10 expected_exceptions = [RuntimeError("mock")] @@ -1002,7 +1002,7 @@ async def test_atexit_registration(self): import atexit with mock.patch( - "google.cloud.bigtable.data.mutations_batcher.MutationsBatcher._on_exit" + "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._on_exit" ) as on_exit_mock: async with self._make_one(): assert on_exit_mock.call_count == 0 @@ -1014,7 +1014,7 @@ async def test_atexit_registration(self): @pytest.mark.asyncio @mock.patch( - "google.cloud.bigtable.data.mutations_batcher._MutateRowsOperation", + "google.cloud.bigtable.data._async.mutations_batcher._MutateRowsOperationAsync", ) async def test_timeout_args_passed(self, mutate_rows): """ From 6f12cdaaaa56e0a94317c91c03565544a733e402 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 10:13:20 -0700 Subject: [PATCH 191/213] got unit tests passing --- tests/unit/data/_async/test__mutate_rows.py | 16 ++++++++-------- tests/unit/data/_async/test_client.py | 2 +- tests/unit/data/test__read_rows_state_machine.py | 10 +++++----- tests/unit/data/test_mutations.py | 2 +- tests/unit/data/test_read_rows_acceptance.py | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/unit/data/_async/test__mutate_rows.py b/tests/unit/data/_async/test__mutate_rows.py index a31862b50..cffe2c258 100644 --- a/tests/unit/data/_async/test__mutate_rows.py +++ b/tests/unit/data/_async/test__mutate_rows.py @@ -36,9 +36,9 @@ def _make_mutation(count=1, size=1): class TestMutateRowsOperation: def _target_class(self): - from google.cloud.bigtable.data._mutate_rows import _MutateRowsOperation + from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync - return _MutateRowsOperation + return _MutateRowsOperationAsync def _make_one(self, *args, **kwargs): if not args: @@ -73,7 +73,7 @@ def test_ctor(self): """ test that constructor sets all the attributes correctly """ - from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data.exceptions import _MutateRowsIncomplete from google.api_core.exceptions import DeadlineExceeded from google.api_core.exceptions import ServiceUnavailable @@ -116,7 +116,7 @@ def test_ctor_too_many_entries(self): """ should raise an error if an operation is created with more than 100,000 entries """ - from google.cloud.bigtable.data._mutate_rows import ( + from google.cloud.bigtable.data._async._mutate_rows import ( MUTATE_ROWS_REQUEST_MUTATION_LIMIT, ) @@ -204,7 +204,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): """ If an exception fails but eventually passes, it should not raise an exception """ - from google.cloud.bigtable.data._mutate_rows import _MutateRowsOperation + from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync client = mock.Mock() table = mock.Mock() @@ -213,7 +213,7 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): expected_cause = exc_type("retry") num_retries = 2 with mock.patch.object( - _MutateRowsOperation, + _MutateRowsOperationAsync, "_run_attempt", AsyncMock(), ) as attempt_mock: @@ -229,7 +229,7 @@ async def test_mutate_rows_incomplete_ignored(self): """ MutateRowsIncomplete exceptions should not be added to error list """ - from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data.exceptions import _MutateRowsIncomplete from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup from google.api_core.exceptions import DeadlineExceeded @@ -286,7 +286,7 @@ async def test_run_attempt_empty_request(self): @pytest.mark.asyncio async def test_run_attempt_partial_success_retryable(self): """Some entries succeed, but one fails. Should report the proper index, and raise incomplete exception""" - from google.cloud.bigtable.data._mutate_rows import _MutateRowsIncomplete + from google.cloud.bigtable.data.exceptions import _MutateRowsIncomplete success_mutation = _make_mutation() success_mutation_2 = _make_mutation() diff --git a/tests/unit/data/_async/test_client.py b/tests/unit/data/_async/test_client.py index 78eaa6b20..0d63d51e0 100644 --- a/tests/unit/data/_async/test_client.py +++ b/tests/unit/data/_async/test_client.py @@ -120,7 +120,7 @@ async def test_ctor_dict_options(self): from google.api_core.client_options import ClientOptions client_options = {"api_endpoint": "foo.bar:1234"} - with mock.patch.object(self._get_target_class(), "__init__") as bigtable_client_init: + with mock.patch.object(BigtableAsyncClient, "__init__") as bigtable_client_init: try: self._make_one(client_options=client_options) except TypeError: diff --git a/tests/unit/data/test__read_rows_state_machine.py b/tests/unit/data/test__read_rows_state_machine.py index b3873b287..03ca4120b 100644 --- a/tests/unit/data/test__read_rows_state_machine.py +++ b/tests/unit/data/test__read_rows_state_machine.py @@ -35,7 +35,7 @@ class TestStateMachine(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.data._async._read_rows import _StateMachine + from google.cloud.bigtable.data._read_rows_state_machine import _StateMachine return _StateMachine @@ -43,7 +43,7 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def test_ctor(self): - from google.cloud.bigtable.data._async._read_rows import _RowBuilder + from google.cloud.bigtable.data._read_rows_state_machine import _RowBuilder instance = self._make_one() assert instance.last_seen_row_key is None @@ -500,7 +500,7 @@ def test_AWAITING_CELL_VALUE_final_chunk(self): class TestRowBuilder(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigtable.data._async._read_rows import _RowBuilder + from google.cloud.bigtable.data._read_rows_state_machine import _RowBuilder return _RowBuilder @@ -648,7 +648,7 @@ def test_reset(self): class TestChunkHasField: def test__chunk_has_field_empty(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._async._read_rows import _chunk_has_field + from google.cloud.bigtable.data._read_rows_state_machine import _chunk_has_field chunk = ReadRowsResponse.CellChunk()._pb assert not _chunk_has_field(chunk, "family_name") @@ -656,7 +656,7 @@ def test__chunk_has_field_empty(self): def test__chunk_has_field_populated_empty_strings(self): from google.cloud.bigtable_v2.types.bigtable import ReadRowsResponse - from google.cloud.bigtable.data._async._read_rows import _chunk_has_field + from google.cloud.bigtable.data._read_rows_state_machine import _chunk_has_field chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb assert _chunk_has_field(chunk, "family_name") diff --git a/tests/unit/data/test_mutations.py b/tests/unit/data/test_mutations.py index 5a93c7881..8365dbd02 100644 --- a/tests/unit/data/test_mutations.py +++ b/tests/unit/data/test_mutations.py @@ -506,7 +506,7 @@ def test_ctor(self): def test_ctor_over_limit(self): """Should raise error if mutations exceed MAX_MUTATIONS_PER_ENTRY""" - from google.cloud.bigtable.data._mutate_rows import ( + from google.cloud.bigtable.data.mutations import ( MUTATE_ROWS_REQUEST_MUTATION_LIMIT, ) diff --git a/tests/unit/data/test_read_rows_acceptance.py b/tests/unit/data/test_read_rows_acceptance.py index 813e5b93e..a644b89d2 100644 --- a/tests/unit/data/test_read_rows_acceptance.py +++ b/tests/unit/data/test_read_rows_acceptance.py @@ -24,7 +24,7 @@ from google.cloud.bigtable.data._async.client import BigtableDataClientAsync from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync -from google.clout.bigtable.data import StateMachine +from google.cloud.bigtable.data._read_rows_state_machine import _StateMachine from google.cloud.bigtable.data.row import Row from ..v2_client.test_row_merger import ReadRowsTest, TestFile From 67f5a3d04de1dcff6dedfdeef5e65b75c60625f4 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 10:17:29 -0700 Subject: [PATCH 192/213] fixed system tests --- tests/system/data/__init__.py | 15 ++++++ tests/system/{ => data}/test_system.py | 66 +++++++++++++------------- tests/unit/v2_client/test_batcher.py | 31 ++++++------ 3 files changed, 63 insertions(+), 49 deletions(-) create mode 100644 tests/system/data/__init__.py rename tests/system/{ => data}/test_system.py (93%) diff --git a/tests/system/data/__init__.py b/tests/system/data/__init__.py new file mode 100644 index 000000000..89a37dc92 --- /dev/null +++ b/tests/system/data/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/system/test_system.py b/tests/system/data/test_system.py similarity index 93% rename from tests/system/test_system.py rename to tests/system/data/test_system.py index e1771202a..548433444 100644 --- a/tests/system/test_system.py +++ b/tests/system/data/test_system.py @@ -20,7 +20,7 @@ from google.api_core import retry from google.api_core.exceptions import ClientError -from google.cloud.bigtable.read_modify_write_rules import MAX_INCREMENT_VALUE +from google.cloud.bigtable.data.read_modify_write_rules import MAX_INCREMENT_VALUE TEST_FAMILY = "test-family" TEST_FAMILY_2 = "test-family-2" @@ -135,10 +135,10 @@ def table_id(table_admin_client, project_id, instance_id): @pytest_asyncio.fixture(scope="session") async def client(): - from google.cloud.bigtable import BigtableDataClient + from google.cloud.bigtable.data import BigtableDataClientAsync project = os.getenv("GOOGLE_CLOUD_PROJECT") or None - async with BigtableDataClient(project=project) as client: + async with BigtableDataClientAsync(project=project) as client: yield client @@ -201,7 +201,7 @@ async def _retrieve_cell_value(table, row_key): """ Helper to read an individual row """ - from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable.data import ReadRowsQuery row_list = await table.read_rows(ReadRowsQuery(row_keys=row_key)) assert len(row_list) == 1 @@ -216,7 +216,7 @@ async def _create_row_and_mutation( """ Helper to create a new row, and a sample set_cell mutation to change its value """ - from google.cloud.bigtable.mutations import SetCell + from google.cloud.bigtable.data.mutations import SetCell row_key = uuid.uuid4().hex.encode() family = TEST_FAMILY @@ -303,7 +303,7 @@ async def test_bulk_mutations_set_cell(client, table, temp_rows): """ Ensure cells can be set properly """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value = uuid.uuid4().hex.encode() row_key, mutation = await _create_row_and_mutation( @@ -323,7 +323,7 @@ async def test_mutations_batcher_context_manager(client, table, temp_rows): """ test batcher with context manager. Should flush on exit """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] row_key, mutation = await _create_row_and_mutation( @@ -349,7 +349,7 @@ async def test_mutations_batcher_timer_flush(client, table, temp_rows): """ batch should occur after flush_interval seconds """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value = uuid.uuid4().hex.encode() row_key, mutation = await _create_row_and_mutation( @@ -373,7 +373,7 @@ async def test_mutations_batcher_count_flush(client, table, temp_rows): """ batch should flush after flush_limit_mutation_count mutations """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] row_key, mutation = await _create_row_and_mutation( @@ -407,7 +407,7 @@ async def test_mutations_batcher_bytes_flush(client, table, temp_rows): """ batch should flush after flush_limit_bytes bytes """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value, new_value2 = [uuid.uuid4().hex.encode() for _ in range(2)] row_key, mutation = await _create_row_and_mutation( @@ -442,7 +442,7 @@ async def test_mutations_batcher_no_flush(client, table, temp_rows): """ test with no flush requirements met """ - from google.cloud.bigtable.mutations import RowMutationEntry + from google.cloud.bigtable.data.mutations import RowMutationEntry new_value = uuid.uuid4().hex.encode() start_value = b"unchanged" @@ -494,7 +494,7 @@ async def test_read_modify_write_row_increment( """ test read_modify_write_row """ - from google.cloud.bigtable.read_modify_write_rules import IncrementRule + from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule row_key = b"test-row-key" family = TEST_FAMILY @@ -531,7 +531,7 @@ async def test_read_modify_write_row_append( """ test read_modify_write_row """ - from google.cloud.bigtable.read_modify_write_rules import AppendValueRule + from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule row_key = b"test-row-key" family = TEST_FAMILY @@ -554,8 +554,8 @@ async def test_read_modify_write_row_chained(client, table, temp_rows): """ test read_modify_write_row with multiple rules """ - from google.cloud.bigtable.read_modify_write_rules import AppendValueRule - from google.cloud.bigtable.read_modify_write_rules import IncrementRule + from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule + from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule row_key = b"test-row-key" family = TEST_FAMILY @@ -599,8 +599,8 @@ async def test_check_and_mutate( """ test that check_and_mutate_row works applies the right mutations, and returns the right result """ - from google.cloud.bigtable.mutations import SetCell - from google.cloud.bigtable.row_filters import ValueRangeFilter + from google.cloud.bigtable.data.mutations import SetCell + from google.cloud.bigtable.data.row_filters import ValueRangeFilter row_key = b"test-row-key" family = TEST_FAMILY @@ -671,7 +671,7 @@ async def test_read_rows_sharded_simple(table, temp_rows): """ Test read rows sharded with two queries """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -693,8 +693,8 @@ async def test_read_rows_sharded_from_sample(table, temp_rows): """ Test end-to-end sharding """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery - from google.cloud.bigtable.read_rows_query import RowRange + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.read_rows_query import RowRange await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -717,8 +717,8 @@ async def test_read_rows_sharded_filters_limits(table, temp_rows): """ Test read rows sharded with filters and limits """ - from google.cloud.bigtable.read_rows_query import ReadRowsQuery - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data.read_rows_query import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -745,8 +745,8 @@ async def test_read_rows_range_query(table, temp_rows): """ Ensure that the read_rows method works """ - from google.cloud.bigtable import ReadRowsQuery - from google.cloud.bigtable import RowRange + from google.cloud.bigtable.data import ReadRowsQuery + from google.cloud.bigtable.data import RowRange await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -766,7 +766,7 @@ async def test_read_rows_single_key_query(table, temp_rows): """ Ensure that the read_rows method works with specified query """ - from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable.data import ReadRowsQuery await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -786,8 +786,8 @@ async def test_read_rows_with_filter(table, temp_rows): """ ensure filters are applied """ - from google.cloud.bigtable import ReadRowsQuery - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter await temp_rows.add_row(b"a") await temp_rows.add_row(b"b") @@ -828,7 +828,7 @@ async def test_read_rows_stream_inactive_timer(table, temp_rows): """ Ensure that the read_rows_stream method works """ - from google.cloud.bigtable.exceptions import IdleTimeout + from google.cloud.bigtable.data.exceptions import IdleTimeout await temp_rows.add_row(b"row_key_1") await temp_rows.add_row(b"row_key_2") @@ -848,7 +848,7 @@ async def test_read_row(table, temp_rows): """ Test read_row (single row helper) """ - from google.cloud.bigtable import Row + from google.cloud.bigtable.data import Row await temp_rows.add_row(b"row_key_1", value=b"value") row = await table.read_row(b"row_key_1") @@ -877,8 +877,8 @@ async def test_read_row_w_filter(table, temp_rows): """ Test read_row (single row helper) """ - from google.cloud.bigtable import Row - from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.data import Row + from google.cloud.bigtable.data.row_filters import ApplyLabelFilter await temp_rows.add_row(b"row_key_1", value=b"value") expected_label = "test-label" @@ -943,8 +943,8 @@ async def test_literal_value_filter( Literal value filter does complex escaping on re2 strings. Make sure inputs are properly interpreted by the server """ - from google.cloud.bigtable.row_filters import LiteralValueFilter - from google.cloud.bigtable import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import LiteralValueFilter + from google.cloud.bigtable.data import ReadRowsQuery f = LiteralValueFilter(filter_input) await temp_rows.add_row(b"row_key_1", value=cell_value) diff --git a/tests/unit/v2_client/test_batcher.py b/tests/unit/v2_client/test_batcher.py index 7284652f4..ab511e030 100644 --- a/tests/unit/v2_client/test_batcher.py +++ b/tests/unit/v2_client/test_batcher.py @@ -204,22 +204,21 @@ def test_mutations_batcher_response_with_error_codes(): mocked_response = [Status(code=1), Status(code=5)] - with mock.patch("test_batcher._Table") as mocked_table: - table = mocked_table.return_value - mutation_batcher = MutationsBatcher(table=table) - - row1 = DirectRow(row_key=b"row_key") - row2 = DirectRow(row_key=b"row_key") - table.mutate_rows.return_value = mocked_response - - mutation_batcher.mutate_rows([row1, row2]) - with pytest.raises(MutationsBatchError) as exc: - mutation_batcher.close() - assert exc.value.message == "Errors in batch mutations." - assert len(exc.value.exc) == 2 - - assert exc.value.exc[0].message == mocked_response[0].message - assert exc.value.exc[1].message == mocked_response[1].message + table = mock.Mock() + mutation_batcher = MutationsBatcher(table=table) + + row1 = DirectRow(row_key=b"row_key") + row2 = DirectRow(row_key=b"row_key") + table.mutate_rows.return_value = mocked_response + + mutation_batcher.mutate_rows([row1, row2]) + with pytest.raises(MutationsBatchError) as exc: + mutation_batcher.close() + assert exc.value.message == "Errors in batch mutations." + assert len(exc.value.exc) == 2 + + assert exc.value.exc[0].message == mocked_response[0].message + assert exc.value.exc[1].message == mocked_response[1].message def test_flow_control_event_is_set_when_not_blocked(): From d7b6dca43c4709cd6a8b4bdaf53ae624fcdd589c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 10:37:49 -0700 Subject: [PATCH 193/213] fixed v2 system tests --- noxfile.py | 1 - tests/system/v2_client/conftest.py | 2 +- tests/system/v2_client/test_data_api.py | 20 +++++++++---------- tests/system/v2_client/test_instance_admin.py | 6 +++--- tests/system/v2_client/test_table_admin.py | 12 +++++------ 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/noxfile.py b/noxfile.py index 164d138bd..6ec253e21 100644 --- a/noxfile.py +++ b/noxfile.py @@ -318,7 +318,6 @@ def system(session): "py.test", "--quiet", f"--junitxml=system_{session.python}_sponge_log.xml", - "--ignore=tests/system/v2_client", system_test_folder_path, *session.posargs, ) diff --git a/tests/system/v2_client/conftest.py b/tests/system/v2_client/conftest.py index bb4f54b41..f39fcba88 100644 --- a/tests/system/v2_client/conftest.py +++ b/tests/system/v2_client/conftest.py @@ -17,7 +17,7 @@ import pytest from test_utils.system import unique_resource_id -from google.cloud.bigtable.deprecated.client import Client +from google.cloud.bigtable.client import Client from google.cloud.environment_vars import BIGTABLE_EMULATOR from . import _helpers diff --git a/tests/system/v2_client/test_data_api.py b/tests/system/v2_client/test_data_api.py index 551a221ee..2ca7e1504 100644 --- a/tests/system/v2_client/test_data_api.py +++ b/tests/system/v2_client/test_data_api.py @@ -60,7 +60,7 @@ def rows_to_delete(): def test_table_read_rows_filter_millis(data_table): - from google.cloud.bigtable.deprecated import row_filters + from google.cloud.bigtable import row_filters end = datetime.datetime.now() start = end - datetime.timedelta(minutes=60) @@ -158,8 +158,8 @@ def test_table_drop_by_prefix(data_table, rows_to_delete): def test_table_read_rows_w_row_set(data_table, rows_to_delete): - from google.cloud.bigtable.deprecated.row_set import RowSet - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange row_keys = [ b"row_key_1", @@ -189,7 +189,7 @@ def test_table_read_rows_w_row_set(data_table, rows_to_delete): def test_rowset_add_row_range_w_pfx(data_table, rows_to_delete): - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable.row_set import RowSet row_keys = [ b"row_key_1", @@ -234,7 +234,7 @@ def _write_to_row(row1, row2, row3, row4): from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _microseconds_from_datetime from google.cloud._helpers import UTC - from google.cloud.bigtable.deprecated.row_data import Cell + from google.cloud.bigtable.row_data import Cell timestamp1 = datetime.datetime.utcnow().replace(tzinfo=UTC) timestamp1_micros = _microseconds_from_datetime(timestamp1) @@ -290,7 +290,7 @@ def test_table_read_row(data_table, rows_to_delete): def test_table_read_rows(data_table, rows_to_delete): - from google.cloud.bigtable.deprecated.row_data import PartialRowData + from google.cloud.bigtable.row_data import PartialRowData row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) @@ -326,10 +326,10 @@ def test_table_read_rows(data_table, rows_to_delete): def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator): - from google.cloud.bigtable.deprecated.row_filters import ApplyLabelFilter - from google.cloud.bigtable.deprecated.row_filters import ColumnQualifierRegexFilter - from google.cloud.bigtable.deprecated.row_filters import RowFilterChain - from google.cloud.bigtable.deprecated.row_filters import RowFilterUnion + from google.cloud.bigtable.row_filters import ApplyLabelFilter + from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter + from google.cloud.bigtable.row_filters import RowFilterChain + from google.cloud.bigtable.row_filters import RowFilterUnion row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) diff --git a/tests/system/v2_client/test_instance_admin.py b/tests/system/v2_client/test_instance_admin.py index debe1ab56..e5e311213 100644 --- a/tests/system/v2_client/test_instance_admin.py +++ b/tests/system/v2_client/test_instance_admin.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud.bigtable.deprecated import enums -from google.cloud.bigtable.deprecated.table import ClusterState +from google.cloud.bigtable import enums +from google.cloud.bigtable.table import ClusterState from . import _helpers @@ -149,7 +149,7 @@ def test_instance_create_prod( instances_to_delete, skip_on_emulator, ): - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums alt_instance_id = f"ndef{unique_suffix}" instance = admin_client.instance(alt_instance_id, labels=instance_labels) diff --git a/tests/system/v2_client/test_table_admin.py b/tests/system/v2_client/test_table_admin.py index 107ed41bf..c50189013 100644 --- a/tests/system/v2_client/test_table_admin.py +++ b/tests/system/v2_client/test_table_admin.py @@ -97,7 +97,7 @@ def test_table_create_w_families( data_instance_populated, tables_to_delete, ): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule temp_table_id = "test-create-table-with-failies" column_family_id = "col-fam-id1" @@ -134,7 +134,7 @@ def test_table_create_w_split_keys( def test_column_family_create(data_instance_populated, tables_to_delete): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule temp_table_id = "test-create-column-family" temp_table = data_instance_populated.table(temp_table_id) @@ -158,7 +158,7 @@ def test_column_family_create(data_instance_populated, tables_to_delete): def test_column_family_update(data_instance_populated, tables_to_delete): - from google.cloud.bigtable.deprecated.column_family import MaxVersionsGCRule + from google.cloud.bigtable.column_family import MaxVersionsGCRule temp_table_id = "test-update-column-family" temp_table = data_instance_populated.table(temp_table_id) @@ -219,8 +219,8 @@ def test_table_get_iam_policy( def test_table_set_iam_policy( service_account, data_instance_populated, tables_to_delete, skip_on_emulator ): - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE - from google.cloud.bigtable.deprecated.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable.policy import Policy temp_table_id = "test-set-iam-policy-table" temp_table = data_instance_populated.table(temp_table_id) @@ -264,7 +264,7 @@ def test_table_backup( skip_on_emulator, ): from google.cloud._helpers import _datetime_to_pb_timestamp - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import enums temp_table_id = "test-backup-table" temp_table = data_instance_populated.table(temp_table_id) From 14a3d3d6a7c12173d33b534f540dc79e8bc99be6 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 10:56:33 -0700 Subject: [PATCH 194/213] pin to api_core with streaming retries --- noxfile.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index 6ec253e21..db24f50e0 100644 --- a/noxfile.py +++ b/noxfile.py @@ -40,7 +40,7 @@ "pytest-asyncio", ] UNIT_TEST_EXTERNAL_DEPENDENCIES = [ - "git+https://github.com/googleapis/python-api-core.git@retry_generators" + # "git+https://github.com/googleapis/python-api-core.git@retry_generators" ] UNIT_TEST_LOCAL_DEPENDENCIES = [] UNIT_TEST_DEPENDENCIES = [] @@ -55,7 +55,7 @@ "google-cloud-testutils", ] SYSTEM_TEST_EXTERNAL_DEPENDENCIES = [ - "git+https://github.com/googleapis/python-api-core.git@retry_generators" + # "git+https://github.com/googleapis/python-api-core.git@retry_generators" ] SYSTEM_TEST_LOCAL_DEPENDENCIES = [] UNIT_TEST_DEPENDENCIES = [] diff --git a/setup.py b/setup.py index 49bb10adc..0193516dd 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.34.0, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,!=2.10.*", + "google-api-core[grpc] == 2.12.0.dev0", #TODO: change to >= after streaming retries is merged "google-cloud-core >= 1.4.1, <3.0.0dev", "grpc-google-iam-v1 >= 0.12.4, <1.0.0dev", "proto-plus >= 1.22.0, <2.0.0dev", From 563a13395b9034a0c3fc35f750dd5151a43c0f99 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 11:18:34 -0700 Subject: [PATCH 195/213] fixed type errors --- .../bigtable/data/_async/_mutate_rows.py | 4 +-- .../cloud/bigtable/data/_async/_read_rows.py | 1 - google/cloud/bigtable/data/_async/client.py | 4 +-- .../bigtable/data/_async/mutations_batcher.py | 9 ++++-- setup.py | 2 +- tests/unit/data/_async/test__mutate_rows.py | 8 ++++-- tests/unit/data/_async/test__read_rows.py | 9 +++--- tests/unit/data/_async/test_client.py | 9 ++++-- .../data/_async/test_mutations_batcher.py | 28 ++++++++++++++----- .../data/test__read_rows_state_machine.py | 3 -- .../unit/data/test_read_modify_write_rules.py | 4 ++- tests/unit/data/test_read_rows_acceptance.py | 4 ++- 12 files changed, 56 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigtable/data/_async/_mutate_rows.py b/google/cloud/bigtable/data/_async/_mutate_rows.py index 75b6bef81..ac491adaf 100644 --- a/google/cloud/bigtable/data/_async/_mutate_rows.py +++ b/google/cloud/bigtable/data/_async/_mutate_rows.py @@ -31,8 +31,8 @@ from google.cloud.bigtable_v2.services.bigtable.async_client import ( BigtableAsyncClient, ) - from google.cloud.bigtable.data.client import Table from google.cloud.bigtable.data.mutations import RowMutationEntry + from google.cloud.bigtable.data._async.client import TableAsync class _MutateRowsOperationAsync: @@ -49,7 +49,7 @@ class _MutateRowsOperationAsync: def __init__( self, gapic_client: "BigtableAsyncClient", - table: "Table", + table: "TableAsync", mutation_entries: list["RowMutationEntry"], operation_timeout: float, per_request_timeout: float | None, diff --git a/google/cloud/bigtable/data/_async/_read_rows.py b/google/cloud/bigtable/data/_async/_read_rows.py index 450518b60..c50af21ec 100644 --- a/google/cloud/bigtable/data/_async/_read_rows.py +++ b/google/cloud/bigtable/data/_async/_read_rows.py @@ -23,7 +23,6 @@ Iterator, Callable, Awaitable, - Type, ) import sys import time diff --git a/google/cloud/bigtable/data/_async/client.py b/google/cloud/bigtable/data/_async/client.py index c1f414d58..fa9d9270d 100644 --- a/google/cloud/bigtable/data/_async/client.py +++ b/google/cloud/bigtable/data/_async/client.py @@ -69,8 +69,8 @@ from google.cloud.bigtable.data.row_filters import RowFilterChain if TYPE_CHECKING: - from google.cloud.bigtable.data.data import RowKeySamples - from google.cloud.bigtable.data.data import ShardedQuery + from google.cloud.bigtable.data import RowKeySamples + from google.cloud.bigtable.data import ShardedQuery # used by read_rows_sharded to limit how many requests are attempted in parallel CONCURRENCY_LIMIT = 10 diff --git a/google/cloud/bigtable/data/_async/mutations_batcher.py b/google/cloud/bigtable/data/_async/mutations_batcher.py index 032a53c91..8842f8ddc 100644 --- a/google/cloud/bigtable/data/_async/mutations_batcher.py +++ b/google/cloud/bigtable/data/_async/mutations_batcher.py @@ -25,9 +25,14 @@ from google.cloud.bigtable.data.exceptions import FailedMutationEntryError from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync -from google.cloud.bigtable.data._async._mutate_rows import MUTATE_ROWS_REQUEST_MUTATION_LIMIT +from google.cloud.bigtable.data._async._mutate_rows import ( + MUTATE_ROWS_REQUEST_MUTATION_LIMIT, +) from google.cloud.bigtable.data.mutations import Mutation +if TYPE_CHECKING: + from google.cloud.bigtable.data._async.client import TableAsync + # used to make more readable default values _MB_SIZE = 1024 * 1024 @@ -176,7 +181,7 @@ class MutationsBatcherAsync: def __init__( self, - table: TableAsync, + table: "TableAsync", *, flush_interval: float | None = 5, flush_limit_mutation_count: int | None = 1000, diff --git a/setup.py b/setup.py index 0193516dd..e05b37c79 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] == 2.12.0.dev0", #TODO: change to >= after streaming retries is merged + "google-api-core[grpc] == 2.12.0.dev0", # TODO: change to >= after streaming retries is merged "google-cloud-core >= 1.4.1, <3.0.0dev", "grpc-google-iam-v1 >= 0.12.4, <1.0.0dev", "proto-plus >= 1.22.0, <2.0.0dev", diff --git a/tests/unit/data/_async/test__mutate_rows.py b/tests/unit/data/_async/test__mutate_rows.py index cffe2c258..f77455d60 100644 --- a/tests/unit/data/_async/test__mutate_rows.py +++ b/tests/unit/data/_async/test__mutate_rows.py @@ -36,7 +36,9 @@ def _make_mutation(count=1, size=1): class TestMutateRowsOperation: def _target_class(self): - from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync + from google.cloud.bigtable.data._async._mutate_rows import ( + _MutateRowsOperationAsync, + ) return _MutateRowsOperationAsync @@ -204,7 +206,9 @@ async def test_mutate_rows_exception_retryable_eventually_pass(self, exc_type): """ If an exception fails but eventually passes, it should not raise an exception """ - from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync + from google.cloud.bigtable.data._async._mutate_rows import ( + _MutateRowsOperationAsync, + ) client = mock.Mock() table = mock.Mock() diff --git a/tests/unit/data/_async/test__read_rows.py b/tests/unit/data/_async/test__read_rows.py index ffc1286d5..f35244227 100644 --- a/tests/unit/data/_async/test__read_rows.py +++ b/tests/unit/data/_async/test__read_rows.py @@ -15,7 +15,6 @@ import sys import asyncio -from google.cloud.bigtable.data.exceptions import InvalidChunk from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync # try/except added for compatibility with python < 3.8 @@ -56,7 +55,8 @@ def test_ctor_defaults(self): default_operation_timeout = 600 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", + time_gen_mock, ): instance = self._make_one(request, client) assert time_gen_mock.call_count == 1 @@ -82,7 +82,8 @@ def test_ctor(self): expected_request_timeout = 44 time_gen_mock = mock.Mock() with mock.patch( - "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", time_gen_mock + "google.cloud.bigtable.data._async._read_rows._attempt_timeout_generator", + time_gen_mock, ): instance = self._make_one( request, @@ -398,6 +399,7 @@ async def mock_stream(): await instance.aclose() assert mock_call.cancel.call_count == 1 + class MockStream(_ReadRowsOperationAsync): """ Mock a _ReadRowsOperationAsync stream for testing @@ -427,7 +429,6 @@ async def aclose(self): pass - class TestReadRowsIteratorAsync: async def mock_stream(self, size=10): for i in range(size): diff --git a/tests/unit/data/_async/test_client.py b/tests/unit/data/_async/test_client.py index 0d63d51e0..da91334a4 100644 --- a/tests/unit/data/_async/test_client.py +++ b/tests/unit/data/_async/test_client.py @@ -955,8 +955,9 @@ def test_client_ctor_sync(self): client = BigtableDataClientAsync(project="project-id") expected_warning = [w for w in warnings if "client.py" in w.filename] assert len(expected_warning) == 1 - assert "BigtableDataClientAsync should be started in an asyncio event loop." in str( - expected_warning[0].message + assert ( + "BigtableDataClientAsync should be started in an asyncio event loop." + in str(expected_warning[0].message) ) assert client.project == "project-id" assert client._channel_refresh_tasks == [] @@ -1318,7 +1319,9 @@ async def test_read_rows_idle_timeout(self): gen = await table.read_rows_stream(query) # should start idle timer on creation start_idle_timer.assert_called_once() - with mock.patch.object(_ReadRowsOperationAsync, "aclose", AsyncMock()) as aclose: + with mock.patch.object( + _ReadRowsOperationAsync, "aclose", AsyncMock() + ) as aclose: # start idle timer with our own value await gen._start_idle_timer(0.1) # should timeout after being abandoned diff --git a/tests/unit/data/_async/test_mutations_batcher.py b/tests/unit/data/_async/test_mutations_batcher.py index 030f0037a..c99513713 100644 --- a/tests/unit/data/_async/test_mutations_batcher.py +++ b/tests/unit/data/_async/test_mutations_batcher.py @@ -33,7 +33,9 @@ def _make_mutation(count=1, size=1): class Test_FlowControl: def _make_one(self, max_mutation_count=10, max_mutation_bytes=100): - from google.cloud.bigtable.data._async.mutations_batcher import _FlowControlAsync + from google.cloud.bigtable.data._async.mutations_batcher import ( + _FlowControlAsync, + ) return _FlowControlAsync(max_mutation_count, max_mutation_bytes) @@ -277,7 +279,9 @@ async def test_add_to_flow_oversize(self): class TestMutationsBatcherAsync: def _get_target_class(self): - from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync + from google.cloud.bigtable.data._async.mutations_batcher import ( + MutationsBatcherAsync, + ) return MutationsBatcherAsync @@ -424,14 +428,18 @@ def test_default_argument_consistency(self): both places """ from google.cloud.bigtable.data._async.client import TableAsync - from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync + from google.cloud.bigtable.data._async.mutations_batcher import ( + MutationsBatcherAsync, + ) import inspect get_batcher_signature = dict( inspect.signature(TableAsync.mutations_batcher).parameters ) get_batcher_signature.pop("self") - batcher_init_signature = dict(inspect.signature(MutationsBatcherAsync).parameters) + batcher_init_signature = dict( + inspect.signature(MutationsBatcherAsync).parameters + ) batcher_init_signature.pop("table") # both should have same number of arguments assert len(get_batcher_signature.keys()) == len(batcher_init_signature.keys()) @@ -577,9 +585,13 @@ async def test_append_flush_runs_after_limit_hit(self): If the user appends a bunch of entries above the flush limits back-to-back, it should still flush in a single task """ - from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync + from google.cloud.bigtable.data._async.mutations_batcher import ( + MutationsBatcherAsync, + ) - with mock.patch.object(MutationsBatcherAsync, "_execute_mutate_rows") as op_mock: + with mock.patch.object( + MutationsBatcherAsync, "_execute_mutate_rows" + ) as op_mock: async with self._make_one(flush_limit_bytes=100) as instance: # mock network calls async def mock_call(*args, **kwargs): @@ -884,7 +896,9 @@ async def test__execute_mutate_rows(self, mutate_rows): assert result == [] @pytest.mark.asyncio - @mock.patch("google.cloud.bigtable.data._async.mutations_batcher._MutateRowsOperationAsync.start") + @mock.patch( + "google.cloud.bigtable.data._async.mutations_batcher._MutateRowsOperationAsync.start" + ) async def test__execute_mutate_rows_returns_errors(self, mutate_rows): """Errors from operation should be retruned as list""" from google.cloud.bigtable.data.exceptions import ( diff --git a/tests/unit/data/test__read_rows_state_machine.py b/tests/unit/data/test__read_rows_state_machine.py index 03ca4120b..0d1ee6b06 100644 --- a/tests/unit/data/test__read_rows_state_machine.py +++ b/tests/unit/data/test__read_rows_state_machine.py @@ -661,6 +661,3 @@ def test__chunk_has_field_populated_empty_strings(self): chunk = ReadRowsResponse.CellChunk(qualifier=b"", family_name="")._pb assert _chunk_has_field(chunk, "family_name") assert _chunk_has_field(chunk, "qualifier") - - - diff --git a/tests/unit/data/test_read_modify_write_rules.py b/tests/unit/data/test_read_modify_write_rules.py index dd12f017e..aeb41f19c 100644 --- a/tests/unit/data/test_read_modify_write_rules.py +++ b/tests/unit/data/test_read_modify_write_rules.py @@ -24,7 +24,9 @@ class TestBaseReadModifyWriteRule: def _target_class(self): - from google.cloud.bigtable.data.read_modify_write_rules import ReadModifyWriteRule + from google.cloud.bigtable.data.read_modify_write_rules import ( + ReadModifyWriteRule, + ) return ReadModifyWriteRule diff --git a/tests/unit/data/test_read_rows_acceptance.py b/tests/unit/data/test_read_rows_acceptance.py index a644b89d2..804e4e0fb 100644 --- a/tests/unit/data/test_read_rows_acceptance.py +++ b/tests/unit/data/test_read_rows_acceptance.py @@ -310,6 +310,8 @@ async def _row_stream(): state = _StateMachine() results = [] - async for row in _ReadRowsOperationAsync.merge_row_response_stream(_row_stream(), state): + async for row in _ReadRowsOperationAsync.merge_row_response_stream( + _row_stream(), state + ): results.append(row) return results From b030fb561e880235dcff748a80d0e3249758cedb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Sat, 24 Jun 2023 11:20:27 -0700 Subject: [PATCH 196/213] updated submodule --- python-api-core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-api-core b/python-api-core index 9ba76760f..a526d6593 160000 --- a/python-api-core +++ b/python-api-core @@ -1 +1 @@ -Subproject commit 9ba76760f5b7ba8128be85ca780811a0b9ec9087 +Subproject commit a526d659320939cd7f47ee775b250e8a3e3ab16b From ac0ef64b6d2e338318136e99ea563b357886ace3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 10:35:53 -0700 Subject: [PATCH 197/213] fixed batcher comments --- google/cloud/bigtable/data/_async/mutations_batcher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigtable/data/_async/mutations_batcher.py b/google/cloud/bigtable/data/_async/mutations_batcher.py index 8842f8ddc..25aafc2a1 100644 --- a/google/cloud/bigtable/data/_async/mutations_batcher.py +++ b/google/cloud/bigtable/data/_async/mutations_batcher.py @@ -283,6 +283,8 @@ async def append(self, mutation_entry: RowMutationEntry): """ Add a new set of mutations to the internal queue + TODO: return a future to track completion of this entry + Args: - mutation_entry: new entry to add to flush queue Raises: @@ -332,8 +334,7 @@ async def _flush_internal(self, new_entries: list[RowMutationEntry]): in_process_requests.append(batch_task) # wait for all inflight requests to complete found_exceptions = await self._wait_for_batch_results(*in_process_requests) - # allow previous flush tasks to finalize before adding new exceptions to list - # collect exception data for next raise, after previous flush tasks have completed + # update exception data to reflect any new errors self._entries_processed_since_last_raise += len(new_entries) self._add_exceptions(found_exceptions) From 4122a167f86f550450e945cfe3d96b21745616c7 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 12:28:34 -0700 Subject: [PATCH 198/213] updated docs folder --- docs/app-profile.rst | 2 +- docs/backup.rst | 2 +- docs/client-intro.rst | 18 ++--- docs/client.rst | 2 +- docs/cluster.rst | 2 +- docs/column-family.rst | 22 +++--- docs/data-api.rst | 82 ++++++++++----------- docs/encryption-info.rst | 2 +- docs/instance-api.rst | 32 ++++---- docs/instance.rst | 2 +- docs/row-data.rst | 2 +- docs/row-filters.rst | 12 +-- docs/row-set.rst | 2 +- docs/row.rst | 2 +- docs/snippets.py | 118 +++++++++++++++--------------- docs/snippets_table.py | 154 +++++++++++++++++++-------------------- docs/table-api.rst | 40 +++++----- docs/table.rst | 2 +- docs/usage.rst | 16 ++-- noxfile.py | 2 - 20 files changed, 257 insertions(+), 259 deletions(-) diff --git a/docs/app-profile.rst b/docs/app-profile.rst index 50e57c179..5c9d426c2 100644 --- a/docs/app-profile.rst +++ b/docs/app-profile.rst @@ -1,6 +1,6 @@ App Profile ~~~~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.app_profile +.. automodule:: google.cloud.bigtable.app_profile :members: :show-inheritance: diff --git a/docs/backup.rst b/docs/backup.rst index 46c32c91b..e75abd431 100644 --- a/docs/backup.rst +++ b/docs/backup.rst @@ -1,6 +1,6 @@ Backup ~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.backup +.. automodule:: google.cloud.bigtable.backup :members: :show-inheritance: diff --git a/docs/client-intro.rst b/docs/client-intro.rst index d75cf5f96..242068499 100644 --- a/docs/client-intro.rst +++ b/docs/client-intro.rst @@ -1,21 +1,21 @@ Base for Everything =================== -To use the API, the :class:`Client ` +To use the API, the :class:`Client ` class defines a high-level interface which handles authorization and creating other objects: .. code:: python - from google.cloud.bigtable.deprecated.client import Client + from google.cloud.bigtable.client import Client client = Client() Long-lived Defaults ------------------- -When creating a :class:`Client `, the +When creating a :class:`Client `, the ``user_agent`` argument has sensible a default -(:data:`DEFAULT_USER_AGENT `). +(:data:`DEFAULT_USER_AGENT `). However, you may over-ride it and the value will be used throughout all API requests made with the ``client`` you create. @@ -38,14 +38,14 @@ Configuration .. code:: - >>> import google.cloud.deprecated as bigtable + >>> from google.cloud import bigtable >>> client = bigtable.Client() or pass in ``credentials`` and ``project`` explicitly .. code:: - >>> import google.cloud.deprecated as bigtable + >>> from google.cloud import bigtable >>> client = bigtable.Client(project='my-project', credentials=creds) .. tip:: @@ -73,15 +73,15 @@ you can pass the ``read_only`` argument: client = bigtable.Client(read_only=True) This will ensure that the -:data:`READ_ONLY_SCOPE ` is used +:data:`READ_ONLY_SCOPE ` is used for API requests (so any accidental requests that would modify data will fail). Next Step --------- -After a :class:`Client `, the next highest-level -object is an :class:`Instance `. You'll need +After a :class:`Client `, the next highest-level +object is an :class:`Instance `. You'll need one before you can interact with tables or data. Head next to learn about the :doc:`instance-api`. diff --git a/docs/client.rst b/docs/client.rst index df92a9861..c48595c8a 100644 --- a/docs/client.rst +++ b/docs/client.rst @@ -1,6 +1,6 @@ Client ~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.client +.. automodule:: google.cloud.bigtable.client :members: :show-inheritance: diff --git a/docs/cluster.rst b/docs/cluster.rst index 9747b226f..ad33aae5e 100644 --- a/docs/cluster.rst +++ b/docs/cluster.rst @@ -1,6 +1,6 @@ Cluster ~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.cluster +.. automodule:: google.cloud.bigtable.cluster :members: :show-inheritance: diff --git a/docs/column-family.rst b/docs/column-family.rst index 39095000d..de6c1eb1f 100644 --- a/docs/column-family.rst +++ b/docs/column-family.rst @@ -2,7 +2,7 @@ Column Families =============== When creating a -:class:`ColumnFamily `, it is +:class:`ColumnFamily `, it is possible to set garbage collection rules for expired data. By setting a rule, cells in the table matching the rule will be deleted @@ -10,19 +10,19 @@ during periodic garbage collection (which executes opportunistically in the background). The types -:class:`MaxAgeGCRule `, -:class:`MaxVersionsGCRule `, -:class:`GarbageCollectionRuleUnion ` and -:class:`GarbageCollectionRuleIntersection ` +:class:`MaxAgeGCRule `, +:class:`MaxVersionsGCRule `, +:class:`GarbageCollectionRuleUnion ` and +:class:`GarbageCollectionRuleIntersection ` can all be used as the optional ``gc_rule`` argument in the -:class:`ColumnFamily ` +:class:`ColumnFamily ` constructor. This value is then used in the -:meth:`create() ` and -:meth:`update() ` methods. +:meth:`create() ` and +:meth:`update() ` methods. These rules can be nested arbitrarily, with a -:class:`MaxAgeGCRule ` or -:class:`MaxVersionsGCRule ` +:class:`MaxAgeGCRule ` or +:class:`MaxVersionsGCRule ` at the lowest level of the nesting: .. code:: python @@ -44,6 +44,6 @@ at the lowest level of the nesting: ---- -.. automodule:: google.cloud.bigtable.deprecated.column_family +.. automodule:: google.cloud.bigtable.column_family :members: :show-inheritance: diff --git a/docs/data-api.rst b/docs/data-api.rst index e68835d1a..01a49178f 100644 --- a/docs/data-api.rst +++ b/docs/data-api.rst @@ -1,7 +1,7 @@ Data API ======== -After creating a :class:`Table ` and some +After creating a :class:`Table ` and some column families, you are ready to store and retrieve data. Cells vs. Columns vs. Column Families @@ -27,7 +27,7 @@ Modifying Data Since data is stored in cells, which are stored in rows, we use the metaphor of a **row** in classes that are used to modify (write, update, delete) data in a -:class:`Table `. +:class:`Table `. Direct vs. Conditional vs. Append --------------------------------- @@ -38,26 +38,26 @@ methods. * The **direct** way is via `MutateRow`_ which involves simply adding, overwriting or deleting cells. The - :class:`DirectRow ` class + :class:`DirectRow ` class handles direct mutations. * The **conditional** way is via `CheckAndMutateRow`_. This method first checks if some filter is matched in a given row, then applies one of two sets of mutations, depending on if a match occurred or not. (These mutation sets are called the "true mutations" and "false mutations".) The - :class:`ConditionalRow ` class + :class:`ConditionalRow ` class handles conditional mutations. * The **append** way is via `ReadModifyWriteRow`_. This simply appends (as bytes) or increments (as an integer) data in a presumed existing cell in a row. The - :class:`AppendRow ` class + :class:`AppendRow ` class handles append mutations. Row Factory ----------- A single factory can be used to create any of the three row types. -To create a :class:`DirectRow `: +To create a :class:`DirectRow `: .. code:: python @@ -66,15 +66,15 @@ To create a :class:`DirectRow `: Unlike the previous string values we've used before, the row key must be ``bytes``. -To create a :class:`ConditionalRow `, -first create a :class:`RowFilter ` and +To create a :class:`ConditionalRow `, +first create a :class:`RowFilter ` and then .. code:: python cond_row = table.row(row_key, filter_=filter_) -To create an :class:`AppendRow ` +To create an :class:`AppendRow ` .. code:: python @@ -95,7 +95,7 @@ Direct Mutations Direct mutations can be added via one of four methods -* :meth:`set_cell() ` allows a +* :meth:`set_cell() ` allows a single value to be written to a column .. code:: python @@ -109,7 +109,7 @@ Direct mutations can be added via one of four methods The value can either be bytes or an integer, which will be converted to bytes as a signed 64-bit integer. -* :meth:`delete_cell() ` deletes +* :meth:`delete_cell() ` deletes all cells (i.e. for all timestamps) in a given column .. code:: python @@ -119,7 +119,7 @@ Direct mutations can be added via one of four methods Remember, this only happens in the ``row`` we are using. If we only want to delete cells from a limited range of time, a - :class:`TimestampRange ` can + :class:`TimestampRange ` can be used .. code:: python @@ -127,9 +127,9 @@ Direct mutations can be added via one of four methods row.delete_cell(column_family_id, column, time_range=time_range) -* :meth:`delete_cells() ` does +* :meth:`delete_cells() ` does the same thing as - :meth:`delete_cell() `, + :meth:`delete_cell() `, but accepts a list of columns in a column family rather than a single one. .. code:: python @@ -138,7 +138,7 @@ Direct mutations can be added via one of four methods time_range=time_range) In addition, if we want to delete cells from every column in a column family, - the special :attr:`ALL_COLUMNS ` + the special :attr:`ALL_COLUMNS ` value can be used .. code:: python @@ -146,7 +146,7 @@ Direct mutations can be added via one of four methods row.delete_cells(column_family_id, row.ALL_COLUMNS, time_range=time_range) -* :meth:`delete() ` will delete the +* :meth:`delete() ` will delete the entire row .. code:: python @@ -177,14 +177,14 @@ Append Mutations Append mutations can be added via one of two methods -* :meth:`append_cell_value() ` +* :meth:`append_cell_value() ` appends a bytes value to an existing cell: .. code:: python append_row.append_cell_value(column_family_id, column, bytes_value) -* :meth:`increment_cell_value() ` +* :meth:`increment_cell_value() ` increments an integer value in an existing cell: .. code:: python @@ -217,7 +217,7 @@ Read Single Row from a Table ---------------------------- To make a `ReadRows`_ API request for a single row key, use -:meth:`Table.read_row() `: +:meth:`Table.read_row() `: .. code:: python @@ -226,34 +226,34 @@ To make a `ReadRows`_ API request for a single row key, use { u'fam1': { b'col1': [ - , - , + , + , ], b'col2': [ - , + , ], }, u'fam2': { b'col3': [ - , - , - , + , + , + , ], }, } >>> cell = row_data.cells[u'fam1'][b'col1'][0] >>> cell - + >>> cell.value b'val1' >>> cell.timestamp datetime.datetime(2016, 2, 27, 3, 41, 18, 122823, tzinfo=) -Rather than returning a :class:`DirectRow ` +Rather than returning a :class:`DirectRow ` or similar class, this method returns a -:class:`PartialRowData ` +:class:`PartialRowData ` instance. This class is used for reading and parsing data rather than for -modifying data (as :class:`DirectRow ` is). +modifying data (as :class:`DirectRow ` is). A filter can also be applied to the results: @@ -262,15 +262,15 @@ A filter can also be applied to the results: row_data = table.read_row(row_key, filter_=filter_val) The allowable ``filter_`` values are the same as those used for a -:class:`ConditionalRow `. For +:class:`ConditionalRow `. For more information, see the -:meth:`Table.read_row() ` documentation. +:meth:`Table.read_row() ` documentation. Stream Many Rows from a Table ----------------------------- To make a `ReadRows`_ API request for a stream of rows, use -:meth:`Table.read_rows() `: +:meth:`Table.read_rows() `: .. code:: python @@ -279,32 +279,32 @@ To make a `ReadRows`_ API request for a stream of rows, use Using gRPC over HTTP/2, a continual stream of responses will be delivered. In particular -* :meth:`consume_next() ` +* :meth:`consume_next() ` pulls the next result from the stream, parses it and stores it on the - :class:`PartialRowsData ` instance -* :meth:`consume_all() ` + :class:`PartialRowsData ` instance +* :meth:`consume_all() ` pulls results from the stream until there are no more -* :meth:`cancel() ` closes +* :meth:`cancel() ` closes the stream -See the :class:`PartialRowsData ` +See the :class:`PartialRowsData ` documentation for more information. As with -:meth:`Table.read_row() `, an optional +:meth:`Table.read_row() `, an optional ``filter_`` can be applied. In addition a ``start_key`` and / or ``end_key`` can be supplied for the stream, a ``limit`` can be set and a boolean ``allow_row_interleaving`` can be specified to allow faster streamed results at the potential cost of non-sequential reads. -See the :meth:`Table.read_rows() ` +See the :meth:`Table.read_rows() ` documentation for more information on the optional arguments. Sample Keys in a Table ---------------------- Make a `SampleRowKeys`_ API request with -:meth:`Table.sample_row_keys() `: +:meth:`Table.sample_row_keys() `: .. code:: python @@ -315,7 +315,7 @@ approximately equal size, which can be used to break up the data for distributed tasks like mapreduces. As with -:meth:`Table.read_rows() `, the +:meth:`Table.read_rows() `, the returned ``keys_iterator`` is connected to a cancellable HTTP/2 stream. The next key in the result can be accessed via diff --git a/docs/encryption-info.rst b/docs/encryption-info.rst index 62b77ea0c..46f19880f 100644 --- a/docs/encryption-info.rst +++ b/docs/encryption-info.rst @@ -1,6 +1,6 @@ Encryption Info ~~~~~~~~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.encryption_info +.. automodule:: google.cloud.bigtable.encryption_info :members: :show-inheritance: diff --git a/docs/instance-api.rst b/docs/instance-api.rst index 78123e8ca..88b4eb4dc 100644 --- a/docs/instance-api.rst +++ b/docs/instance-api.rst @@ -1,7 +1,7 @@ Instance Admin API ================== -After creating a :class:`Client `, you can +After creating a :class:`Client `, you can interact with individual instances for a project. List Instances @@ -9,7 +9,7 @@ List Instances If you want a comprehensive list of all existing instances, make a `ListInstances`_ API request with -:meth:`Client.list_instances() `: +:meth:`Client.list_instances() `: .. code:: python @@ -18,7 +18,7 @@ If you want a comprehensive list of all existing instances, make a Instance Factory ---------------- -To create an :class:`Instance ` object: +To create an :class:`Instance ` object: .. code:: python @@ -40,7 +40,7 @@ Create a new Instance --------------------- After creating the instance object, make a `CreateInstance`_ API request -with :meth:`create() `: +with :meth:`create() `: .. code:: python @@ -54,14 +54,14 @@ Check on Current Operation When modifying an instance (via a `CreateInstance`_ request), the Bigtable API will return a `long-running operation`_ and a corresponding - :class:`Operation ` object + :class:`Operation ` object will be returned by - :meth:`create() `. + :meth:`create() `. You can check if a long-running operation (for a -:meth:`create() ` has finished +:meth:`create() ` has finished by making a `GetOperation`_ request with -:meth:`Operation.finished() `: +:meth:`Operation.finished() `: .. code:: python @@ -71,18 +71,18 @@ by making a `GetOperation`_ request with .. note:: - Once an :class:`Operation ` object + Once an :class:`Operation ` object has returned :data:`True` from - :meth:`finished() `, the + :meth:`finished() `, the object should not be re-used. Subsequent calls to - :meth:`finished() ` + :meth:`finished() ` will result in a :class:`ValueError `. Get metadata for an existing Instance ------------------------------------- After creating the instance object, make a `GetInstance`_ API request -with :meth:`reload() `: +with :meth:`reload() `: .. code:: python @@ -94,7 +94,7 @@ Update an existing Instance --------------------------- After creating the instance object, make an `UpdateInstance`_ API request -with :meth:`update() `: +with :meth:`update() `: .. code:: python @@ -105,7 +105,7 @@ Delete an existing Instance --------------------------- Make a `DeleteInstance`_ API request with -:meth:`delete() `: +:meth:`delete() `: .. code:: python @@ -115,8 +115,8 @@ Next Step --------- Now we go down the hierarchy from -:class:`Instance ` to a -:class:`Table `. +:class:`Instance ` to a +:class:`Table `. Head next to learn about the :doc:`table-api`. diff --git a/docs/instance.rst b/docs/instance.rst index 3a61faf1c..f9be9672f 100644 --- a/docs/instance.rst +++ b/docs/instance.rst @@ -1,6 +1,6 @@ Instance ~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.instance +.. automodule:: google.cloud.bigtable.instance :members: :show-inheritance: diff --git a/docs/row-data.rst b/docs/row-data.rst index b9013ebf5..503f9b1cb 100644 --- a/docs/row-data.rst +++ b/docs/row-data.rst @@ -1,6 +1,6 @@ Row Data ~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.row_data +.. automodule:: google.cloud.bigtable.row_data :members: :show-inheritance: diff --git a/docs/row-filters.rst b/docs/row-filters.rst index 8d1fac46b..9884ce400 100644 --- a/docs/row-filters.rst +++ b/docs/row-filters.rst @@ -2,11 +2,11 @@ Bigtable Row Filters ==================== It is possible to use a -:class:`RowFilter ` +:class:`RowFilter ` when adding mutations to a -:class:`ConditionalRow ` and when -reading row data with :meth:`read_row() ` -or :meth:`read_rows() `. +:class:`ConditionalRow ` and when +reading row data with :meth:`read_row() ` +or :meth:`read_rows() `. As laid out in the `RowFilter definition`_, the following basic filters are provided: @@ -60,8 +60,8 @@ level. For example: ---- -.. automodule:: google.cloud.bigtable.deprecated.row_filters +.. automodule:: google.cloud.bigtable.row_filters :members: :show-inheritance: -.. _RowFilter definition: https://googleapis.dev/python/bigtable/latest/row-filters.html?highlight=rowfilter#google.cloud.bigtable.deprecated.row_filters.RowFilter +.. _RowFilter definition: https://googleapis.dev/python/bigtable/latest/row-filters.html?highlight=rowfilter#google.cloud.bigtable.row_filters.RowFilter diff --git a/docs/row-set.rst b/docs/row-set.rst index 92cd107e8..5f7a16a02 100644 --- a/docs/row-set.rst +++ b/docs/row-set.rst @@ -1,6 +1,6 @@ Row Set ~~~~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.row_set +.. automodule:: google.cloud.bigtable.row_set :members: :show-inheritance: diff --git a/docs/row.rst b/docs/row.rst index e8fa48cdd..33686608b 100644 --- a/docs/row.rst +++ b/docs/row.rst @@ -1,7 +1,7 @@ Bigtable Row ============ -.. automodule:: google.cloud.bigtable.deprecated.row +.. automodule:: google.cloud.bigtable.row :members: :show-inheritance: :inherited-members: diff --git a/docs/snippets.py b/docs/snippets.py index 084f10270..1d93fdf12 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -16,7 +16,7 @@ """Testable usage examples for Google Cloud Bigtable API wrapper Each example function takes a ``client`` argument (which must be an instance -of :class:`google.cloud.bigtable.deprecated.client.Client`) and uses it to perform a task +of :class:`google.cloud.bigtable.client.Client`) and uses it to perform a task with the API. To facilitate running the examples as system tests, each example is also passed @@ -40,8 +40,8 @@ from test_utils.retry import RetryErrors from google.cloud._helpers import UTC -from google.cloud.bigtable.deprecated import Client -from google.cloud.bigtable.deprecated import enums +from google.cloud.bigtable import Client +from google.cloud.bigtable import enums UNIQUE_SUFFIX = unique_resource_id("-") @@ -110,8 +110,8 @@ def teardown_module(): def test_bigtable_create_instance(): # [START bigtable_api_create_prod_instance] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import Client + from google.cloud.bigtable import enums my_instance_id = "inst-my-" + UNIQUE_SUFFIX my_cluster_id = "clus-my-" + UNIQUE_SUFFIX @@ -144,8 +144,8 @@ def test_bigtable_create_instance(): def test_bigtable_create_additional_cluster(): # [START bigtable_api_create_cluster] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import Client + from google.cloud.bigtable import enums # Assuming that there is an existing instance with `INSTANCE_ID` # on the server already. @@ -181,8 +181,8 @@ def test_bigtable_create_reload_delete_app_profile(): import re # [START bigtable_api_create_app_profile] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import enums + from google.cloud.bigtable import Client + from google.cloud.bigtable import enums routing_policy_type = enums.RoutingPolicyType.ANY @@ -202,7 +202,7 @@ def test_bigtable_create_reload_delete_app_profile(): # [END bigtable_api_create_app_profile] # [START bigtable_api_app_profile_name] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -219,7 +219,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert _profile_name_re.match(app_profile_name) # [START bigtable_api_app_profile_exists] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -230,7 +230,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile_exists # [START bigtable_api_reload_app_profile] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -241,7 +241,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile.routing_policy_type == ROUTING_POLICY_TYPE # [START bigtable_api_update_app_profile] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -255,7 +255,7 @@ def test_bigtable_create_reload_delete_app_profile(): assert app_profile.description == description # [START bigtable_api_delete_app_profile] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -269,7 +269,7 @@ def test_bigtable_create_reload_delete_app_profile(): def test_bigtable_list_instances(): # [START bigtable_api_list_instances] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) (instances_list, failed_locations_list) = client.list_instances() @@ -280,7 +280,7 @@ def test_bigtable_list_instances(): def test_bigtable_list_clusters_on_instance(): # [START bigtable_api_list_clusters_on_instance] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -292,7 +292,7 @@ def test_bigtable_list_clusters_on_instance(): def test_bigtable_list_clusters_in_project(): # [START bigtable_api_list_clusters_in_project] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) (clusters_list, failed_locations_list) = client.list_clusters() @@ -309,7 +309,7 @@ def test_bigtable_list_app_profiles(): app_profile = app_profile.create(ignore_warnings=True) # [START bigtable_api_list_app_profiles] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -325,7 +325,7 @@ def test_bigtable_list_app_profiles(): def test_bigtable_instance_exists(): # [START bigtable_api_check_instance_exists] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -337,7 +337,7 @@ def test_bigtable_instance_exists(): def test_bigtable_cluster_exists(): # [START bigtable_api_check_cluster_exists] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -350,7 +350,7 @@ def test_bigtable_cluster_exists(): def test_bigtable_reload_instance(): # [START bigtable_api_reload_instance] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -362,7 +362,7 @@ def test_bigtable_reload_instance(): def test_bigtable_reload_cluster(): # [START bigtable_api_reload_cluster] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -375,7 +375,7 @@ def test_bigtable_reload_cluster(): def test_bigtable_update_instance(): # [START bigtable_api_update_instance] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -389,7 +389,7 @@ def test_bigtable_update_instance(): def test_bigtable_update_cluster(): # [START bigtable_api_update_cluster] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -403,7 +403,7 @@ def test_bigtable_update_cluster(): def test_bigtable_cluster_disable_autoscaling(): # [START bigtable_api_cluster_disable_autoscaling] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -424,8 +424,8 @@ def test_bigtable_create_table(): # [START bigtable_api_create_table] from google.api_core import exceptions from google.api_core import retry - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -450,7 +450,7 @@ def test_bigtable_create_table(): def test_bigtable_list_tables(): # [START bigtable_api_list_tables] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -463,7 +463,7 @@ def test_bigtable_list_tables(): def test_bigtable_delete_cluster(): - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -480,7 +480,7 @@ def test_bigtable_delete_cluster(): operation.result(timeout=1000) # [START bigtable_api_delete_cluster] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -493,7 +493,7 @@ def test_bigtable_delete_cluster(): def test_bigtable_delete_instance(): - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) @@ -515,7 +515,7 @@ def test_bigtable_delete_instance(): INSTANCES_TO_DELETE.append(instance) # [START bigtable_api_delete_instance] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) @@ -531,7 +531,7 @@ def test_bigtable_delete_instance(): def test_bigtable_test_iam_permissions(): # [START bigtable_api_test_iam_permissions] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -547,9 +547,9 @@ def test_bigtable_set_iam_policy_then_get_iam_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_set_iam_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -563,7 +563,7 @@ def test_bigtable_set_iam_policy_then_get_iam_policy(): assert len(policy_latest.bigtable_admins) > 0 # [START bigtable_api_get_iam_policy] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -577,7 +577,7 @@ def test_bigtable_project_path(): import re # [START bigtable_api_project_path] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) project_path = client.project_path @@ -586,7 +586,7 @@ def test_bigtable_project_path(): def test_bigtable_table_data_client(): # [START bigtable_api_table_data_client] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) table_data_client = client.table_data_client @@ -595,7 +595,7 @@ def test_bigtable_table_data_client(): def test_bigtable_table_admin_client(): # [START bigtable_api_table_admin_client] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) table_admin_client = client.table_admin_client @@ -604,7 +604,7 @@ def test_bigtable_table_admin_client(): def test_bigtable_instance_admin_client(): # [START bigtable_api_instance_admin_client] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance_admin_client = client.instance_admin_client @@ -615,9 +615,9 @@ def test_bigtable_admins_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_admins_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -636,9 +636,9 @@ def test_bigtable_readers_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_readers_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_READER_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_READER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -657,9 +657,9 @@ def test_bigtable_users_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_users_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_USER_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_USER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -678,9 +678,9 @@ def test_bigtable_viewers_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_viewers_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_VIEWER_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_VIEWER_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -699,7 +699,7 @@ def test_bigtable_instance_name(): import re # [START bigtable_api_instance_name] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -711,7 +711,7 @@ def test_bigtable_cluster_name(): import re # [START bigtable_api_cluster_name] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -722,7 +722,7 @@ def test_bigtable_cluster_name(): def test_bigtable_instance_from_pb(): # [START bigtable_api_instance_from_pb] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 client = Client(admin=True) @@ -741,7 +741,7 @@ def test_bigtable_instance_from_pb(): def test_bigtable_cluster_from_pb(): # [START bigtable_api_cluster_from_pb] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client from google.cloud.bigtable_admin_v2.types import instance as data_v2_pb2 client = Client(admin=True) @@ -767,7 +767,7 @@ def test_bigtable_cluster_from_pb(): def test_bigtable_instance_state(): # [START bigtable_api_instance_state] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -779,7 +779,7 @@ def test_bigtable_instance_state(): def test_bigtable_cluster_state(): # [START bigtable_api_cluster_state] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) diff --git a/docs/snippets_table.py b/docs/snippets_table.py index 72c342907..f27260425 100644 --- a/docs/snippets_table.py +++ b/docs/snippets_table.py @@ -16,7 +16,7 @@ """Testable usage examples for Google Cloud Bigtable API wrapper Each example function takes a ``client`` argument (which must be an instance -of :class:`google.cloud.bigtable.deprecated.client.Client`) and uses it to perform a task +of :class:`google.cloud.bigtable.client.Client`) and uses it to perform a task with the API. To facilitate running the examples as system tests, each example is also passed @@ -38,9 +38,9 @@ from test_utils.retry import RetryErrors from google.cloud._helpers import UTC -from google.cloud.bigtable.deprecated import Client -from google.cloud.bigtable.deprecated import enums -from google.cloud.bigtable.deprecated import column_family +from google.cloud.bigtable import Client +from google.cloud.bigtable import enums +from google.cloud.bigtable import column_family INSTANCE_ID = "snippet" + unique_resource_id("-") @@ -113,8 +113,8 @@ def teardown_module(): def test_bigtable_create_table(): # [START bigtable_api_create_table] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -143,7 +143,7 @@ def test_bigtable_sample_row_keys(): assert table_sample.exists() # [START bigtable_api_sample_row_keys] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -159,7 +159,7 @@ def test_bigtable_sample_row_keys(): def test_bigtable_write_read_drop_truncate(): # [START bigtable_api_mutate_rows] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -190,7 +190,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_mutate_rows] assert len(response) == len(rows) # [START bigtable_api_read_row] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -200,7 +200,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_read_row] assert row.row_key.decode("utf-8") == row_key # [START bigtable_api_read_rows] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -218,7 +218,7 @@ def test_bigtable_write_read_drop_truncate(): # [END bigtable_api_read_rows] assert len(total_rows) == len(rows) # [START bigtable_api_drop_by_prefix] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -231,7 +231,7 @@ def test_bigtable_write_read_drop_truncate(): assert row.row_key.decode("utf-8") not in dropped_row_keys # [START bigtable_api_truncate_table] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -246,7 +246,7 @@ def test_bigtable_write_read_drop_truncate(): def test_bigtable_mutations_batcher(): # [START bigtable_api_mutations_batcher] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -297,7 +297,7 @@ def test_bigtable_mutations_batcher(): def test_bigtable_table_column_family(): # [START bigtable_api_table_column_family] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -311,7 +311,7 @@ def test_bigtable_table_column_family(): def test_bigtable_list_tables(): # [START bigtable_api_list_tables] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -324,7 +324,7 @@ def test_bigtable_table_name(): import re # [START bigtable_api_table_name] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -342,7 +342,7 @@ def test_bigtable_table_name(): def test_bigtable_list_column_families(): # [START bigtable_api_list_column_families] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -356,7 +356,7 @@ def test_bigtable_list_column_families(): def test_bigtable_get_cluster_states(): # [START bigtable_api_get_cluster_states] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -374,7 +374,7 @@ def test_bigtable_table_test_iam_permissions(): assert table_policy.exists # [START bigtable_api_table_test_iam_permissions] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -392,9 +392,9 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): service_account_email = Config.CLIENT._credentials.service_account_email # [START bigtable_api_table_set_iam_policy] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.policy import Policy - from google.cloud.bigtable.deprecated.policy import BIGTABLE_ADMIN_ROLE + from google.cloud.bigtable import Client + from google.cloud.bigtable.policy import Policy + from google.cloud.bigtable.policy import BIGTABLE_ADMIN_ROLE client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -407,7 +407,7 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): assert len(policy_latest.bigtable_admins) > 0 # [START bigtable_api_table_get_iam_policy] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -419,7 +419,7 @@ def test_bigtable_table_set_iam_policy_then_get_iam_policy(): def test_bigtable_table_exists(): # [START bigtable_api_check_table_exists] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -435,7 +435,7 @@ def test_bigtable_delete_table(): assert table_del.exists() # [START bigtable_api_delete_table] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -448,7 +448,7 @@ def test_bigtable_delete_table(): def test_bigtable_table_row(): # [START bigtable_api_table_row] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -475,7 +475,7 @@ def test_bigtable_table_row(): def test_bigtable_table_append_row(): # [START bigtable_api_table_append_row] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -502,7 +502,7 @@ def test_bigtable_table_append_row(): def test_bigtable_table_direct_row(): # [START bigtable_api_table_direct_row] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -529,8 +529,8 @@ def test_bigtable_table_direct_row(): def test_bigtable_table_conditional_row(): # [START bigtable_api_table_conditional_row] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.row_filters import PassAllFilter + from google.cloud.bigtable import Client + from google.cloud.bigtable.row_filters import PassAllFilter client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -558,7 +558,7 @@ def test_bigtable_table_conditional_row(): def test_bigtable_column_family_name(): # [START bigtable_api_column_family_name] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -581,8 +581,8 @@ def test_bigtable_column_family_name(): def test_bigtable_create_update_delete_column_family(): # [START bigtable_api_create_column_family] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -598,8 +598,8 @@ def test_bigtable_create_update_delete_column_family(): assert column_families[column_family_id].gc_rule == gc_rule # [START bigtable_api_update_column_family] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -617,8 +617,8 @@ def test_bigtable_create_update_delete_column_family(): assert updated_families[column_family_id].gc_rule == max_age_rule # [START bigtable_api_delete_column_family] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -653,8 +653,8 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): Config.TABLE.mutate_rows(rows) # [START bigtable_api_add_row_key] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable import Client + from google.cloud.bigtable.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -670,9 +670,9 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): assert found_row_keys == expected_row_keys # [START bigtable_api_add_row_range] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.row_set import RowSet - from google.cloud.bigtable.deprecated.row_set import RowRange + from google.cloud.bigtable import Client + from google.cloud.bigtable.row_set import RowSet + from google.cloud.bigtable.row_set import RowRange client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -688,8 +688,8 @@ def test_bigtable_add_row_add_row_range_add_row_range_from_keys(): assert found_row_keys == expected_row_keys # [START bigtable_api_row_range_from_keys] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable import Client + from google.cloud.bigtable.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -723,8 +723,8 @@ def test_bigtable_add_row_range_with_prefix(): Config.TABLE.mutate_rows(rows) # [START bigtable_api_add_row_range_with_prefix] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated.row_set import RowSet + from google.cloud.bigtable import Client + from google.cloud.bigtable.row_set import RowSet client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -747,7 +747,7 @@ def test_bigtable_add_row_range_with_prefix(): def test_bigtable_batcher_mutate_flush_mutate_rows(): # [START bigtable_api_batcher_mutate] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -769,7 +769,7 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): # [END bigtable_api_batcher_mutate] # [START bigtable_api_batcher_flush] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -795,7 +795,7 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): table.truncate(timeout=200) # [START bigtable_api_batcher_mutate_rows] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -829,8 +829,8 @@ def test_bigtable_batcher_mutate_flush_mutate_rows(): def test_bigtable_create_family_gc_max_age(): # [START bigtable_api_create_family_gc_max_age] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -851,8 +851,8 @@ def test_bigtable_create_family_gc_max_age(): def test_bigtable_create_family_gc_max_versions(): # [START bigtable_api_create_family_gc_max_versions] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -872,8 +872,8 @@ def test_bigtable_create_family_gc_max_versions(): def test_bigtable_create_family_gc_union(): # [START bigtable_api_create_family_gc_union] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -898,8 +898,8 @@ def test_bigtable_create_family_gc_union(): def test_bigtable_create_family_gc_intersection(): # [START bigtable_api_create_family_gc_intersection] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -927,8 +927,8 @@ def test_bigtable_create_family_gc_intersection(): def test_bigtable_create_family_gc_nested(): # [START bigtable_api_create_family_gc_nested] - from google.cloud.bigtable.deprecated import Client - from google.cloud.bigtable.deprecated import column_family + from google.cloud.bigtable import Client + from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -978,7 +978,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): row.commit() # [START bigtable_api_row_data_cells] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -993,7 +993,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): assert actual_cell_value == value # [START bigtable_api_row_cell_value] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1006,7 +1006,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): assert cell_value == value # [START bigtable_api_row_cell_values] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1025,7 +1025,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): row.commit() # [START bigtable_api_row_find_cells] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1042,7 +1042,7 @@ def test_bigtable_row_data_cells_cell_value_cell_values(): def test_bigtable_row_setcell_rowkey(): # [START bigtable_api_row_set_cell] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1061,7 +1061,7 @@ def test_bigtable_row_setcell_rowkey(): assert status.code == 0 # [START bigtable_api_row_row_key] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1073,7 +1073,7 @@ def test_bigtable_row_setcell_rowkey(): assert row_key == ROW_KEY1 # [START bigtable_api_row_table] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1098,7 +1098,7 @@ def test_bigtable_row_delete(): assert written_row_keys == [b"row_key_1"] # [START bigtable_api_row_delete] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1130,7 +1130,7 @@ def test_bigtable_row_delete_cell(): assert written_row_keys == [row_key1] # [START bigtable_api_row_delete_cell] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1163,7 +1163,7 @@ def test_bigtable_row_delete_cells(): assert written_row_keys == [row_key1] # [START bigtable_api_row_delete_cells] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1189,7 +1189,7 @@ def test_bigtable_row_clear(): assert mutation_size > 0 # [START bigtable_api_row_clear] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1208,7 +1208,7 @@ def test_bigtable_row_clear(): def test_bigtable_row_clear_get_mutations_size(): # [START bigtable_api_row_get_mutations_size] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1230,7 +1230,7 @@ def test_bigtable_row_clear_get_mutations_size(): def test_bigtable_row_setcell_commit_rowkey(): # [START bigtable_api_row_set_cell] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1244,7 +1244,7 @@ def test_bigtable_row_setcell_commit_rowkey(): row_obj.commit() # [START bigtable_api_row_commit] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1264,7 +1264,7 @@ def test_bigtable_row_setcell_commit_rowkey(): assert written_row_keys == [b"row_key_1", b"row_key_2"] # [START bigtable_api_row_row_key] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1286,7 +1286,7 @@ def test_bigtable_row_append_cell_value(): row.commit() # [START bigtable_api_row_append_cell_value] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1303,7 +1303,7 @@ def test_bigtable_row_append_cell_value(): assert actual_value == cell_val1 + cell_val2 # [START bigtable_api_row_commit] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) @@ -1315,7 +1315,7 @@ def test_bigtable_row_append_cell_value(): # [END bigtable_api_row_commit] # [START bigtable_api_row_increment_cell_value] - from google.cloud.bigtable.deprecated import Client + from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) diff --git a/docs/table-api.rst b/docs/table-api.rst index ce05a3419..1bbf85146 100644 --- a/docs/table-api.rst +++ b/docs/table-api.rst @@ -1,7 +1,7 @@ Table Admin API =============== -After creating an :class:`Instance `, you can +After creating an :class:`Instance `, you can interact with individual tables, groups of tables or column families within a table. @@ -10,33 +10,33 @@ List Tables If you want a comprehensive list of all existing tables in a instance, make a `ListTables`_ API request with -:meth:`Instance.list_tables() `: +:meth:`Instance.list_tables() `: .. code:: python >>> instance.list_tables() - [, - ] + [, + ] Table Factory ------------- -To create a :class:`Table ` object: +To create a :class:`Table ` object: .. code:: python table = instance.table(table_id) -Even if this :class:`Table ` already +Even if this :class:`Table ` already has been created with the API, you'll want this object to use as a -parent of a :class:`ColumnFamily ` -or :class:`Row `. +parent of a :class:`ColumnFamily ` +or :class:`Row `. Create a new Table ------------------ After creating the table object, make a `CreateTable`_ API request -with :meth:`create() `: +with :meth:`create() `: .. code:: python @@ -53,7 +53,7 @@ Delete an existing Table ------------------------ Make a `DeleteTable`_ API request with -:meth:`delete() `: +:meth:`delete() `: .. code:: python @@ -67,7 +67,7 @@ associated with a table, the `GetTable`_ API method returns a table object with the names of the column families. To retrieve the list of column families use -:meth:`list_column_families() `: +:meth:`list_column_families() `: .. code:: python @@ -77,7 +77,7 @@ Column Family Factory --------------------- To create a -:class:`ColumnFamily ` object: +:class:`ColumnFamily ` object: .. code:: python @@ -87,7 +87,7 @@ There is no real reason to use this factory unless you intend to create or delete a column family. In addition, you can specify an optional ``gc_rule`` (a -:class:`GarbageCollectionRule ` +:class:`GarbageCollectionRule ` or similar): .. code:: python @@ -99,7 +99,7 @@ This rule helps the backend determine when and how to clean up old cells in the column family. See :doc:`column-family` for more information about -:class:`GarbageCollectionRule ` +:class:`GarbageCollectionRule ` and related classes. Create a new Column Family @@ -107,7 +107,7 @@ Create a new Column Family After creating the column family object, make a `CreateColumnFamily`_ API request with -:meth:`ColumnFamily.create() ` +:meth:`ColumnFamily.create() ` .. code:: python @@ -117,7 +117,7 @@ Delete an existing Column Family -------------------------------- Make a `DeleteColumnFamily`_ API request with -:meth:`ColumnFamily.delete() ` +:meth:`ColumnFamily.delete() ` .. code:: python @@ -127,7 +127,7 @@ Update an existing Column Family -------------------------------- Make an `UpdateColumnFamily`_ API request with -:meth:`ColumnFamily.delete() ` +:meth:`ColumnFamily.delete() ` .. code:: python @@ -137,9 +137,9 @@ Next Step --------- Now we go down the final step of the hierarchy from -:class:`Table ` to -:class:`Row ` as well as streaming -data directly via a :class:`Table `. +:class:`Table ` to +:class:`Row ` as well as streaming +data directly via a :class:`Table `. Head next to learn about the :doc:`data-api`. diff --git a/docs/table.rst b/docs/table.rst index 0d938e0af..c230725d1 100644 --- a/docs/table.rst +++ b/docs/table.rst @@ -1,6 +1,6 @@ Table ~~~~~ -.. automodule:: google.cloud.bigtable.deprecated.table +.. automodule:: google.cloud.bigtable.table :members: :show-inheritance: diff --git a/docs/usage.rst b/docs/usage.rst index 2a6ce88ec..73a32b039 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -22,12 +22,12 @@ Using the API In the hierarchy of API concepts -* a :class:`Client ` owns an - :class:`Instance ` -* an :class:`Instance ` owns a - :class:`Table ` -* a :class:`Table ` owns a - :class:`ColumnFamily ` -* a :class:`Table ` owns a - :class:`Row ` +* a :class:`Client ` owns an + :class:`Instance ` +* an :class:`Instance ` owns a + :class:`Table ` +* a :class:`Table ` owns a + :class:`ColumnFamily ` +* a :class:`Table ` owns a + :class:`Row ` (and all the cells in the row) diff --git a/noxfile.py b/noxfile.py index db24f50e0..82c1c3434 100644 --- a/noxfile.py +++ b/noxfile.py @@ -143,8 +143,6 @@ def mypy(session): "--warn-unreachable", "--disallow-any-generics", "--exclude", - "google/cloud/bigtable/deprecated", - "--exclude", "tests/system/v2_client", "--exclude", "tests/unit/v2_client", From ff325ef49346f732d2fdc8bd580d09b8b01e7002 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 13:15:31 -0700 Subject: [PATCH 199/213] changed mypy to only check data client --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 82c1c3434..95fac1e3c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -138,7 +138,7 @@ def mypy(session): session.install("google-cloud-testutils") session.run( "mypy", - "google/cloud/bigtable", + "google/cloud/bigtable/data", "--check-untyped-defs", "--warn-unreachable", "--disallow-any-generics", From e996946ab9cfc0a9989143525e5ea5274b65a346 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 13:25:45 -0700 Subject: [PATCH 200/213] fixed some test issues --- testing/constraints-3.7.txt | 5 ++--- tests/unit/v2_client/test_batcher.py | 31 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 7bf769c9b..92b616563 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,9 +5,8 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -# TODO: reset after merging api-core submodule -# google-api-core==2.11.0 -google-cloud-core==1.4.1 +google-api-core==2.12.0.dev0 +google-cloud-core==2.3.2 grpc-google-iam-v1==0.12.4 proto-plus==1.22.0 libcst==0.2.5 diff --git a/tests/unit/v2_client/test_batcher.py b/tests/unit/v2_client/test_batcher.py index 998748141..ab511e030 100644 --- a/tests/unit/v2_client/test_batcher.py +++ b/tests/unit/v2_client/test_batcher.py @@ -204,22 +204,21 @@ def test_mutations_batcher_response_with_error_codes(): mocked_response = [Status(code=1), Status(code=5)] - with mock.patch("tests.unit.test_batcher._Table") as mocked_table: - table = mocked_table.return_value - mutation_batcher = MutationsBatcher(table=table) - - row1 = DirectRow(row_key=b"row_key") - row2 = DirectRow(row_key=b"row_key") - table.mutate_rows.return_value = mocked_response - - mutation_batcher.mutate_rows([row1, row2]) - with pytest.raises(MutationsBatchError) as exc: - mutation_batcher.close() - assert exc.value.message == "Errors in batch mutations." - assert len(exc.value.exc) == 2 - - assert exc.value.exc[0].message == mocked_response[0].message - assert exc.value.exc[1].message == mocked_response[1].message + table = mock.Mock() + mutation_batcher = MutationsBatcher(table=table) + + row1 = DirectRow(row_key=b"row_key") + row2 = DirectRow(row_key=b"row_key") + table.mutate_rows.return_value = mocked_response + + mutation_batcher.mutate_rows([row1, row2]) + with pytest.raises(MutationsBatchError) as exc: + mutation_batcher.close() + assert exc.value.message == "Errors in batch mutations." + assert len(exc.value.exc) == 2 + + assert exc.value.exc[0].message == mocked_response[0].message + assert exc.value.exc[1].message == mocked_response[1].message def test_flow_control_event_is_set_when_not_blocked(): From d63d68c1689de999572f223032d04975e83595e8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 14:46:32 -0700 Subject: [PATCH 201/213] reverted gapic tests --- tests/unit/gapic/bigtable_v2/test_bigtable.py | 505 +----------------- 1 file changed, 4 insertions(+), 501 deletions(-) diff --git a/tests/unit/gapic/bigtable_v2/test_bigtable.py b/tests/unit/gapic/bigtable_v2/test_bigtable.py index b1500aa48..03ba3044f 100644 --- a/tests/unit/gapic/bigtable_v2/test_bigtable.py +++ b/tests/unit/gapic/bigtable_v2/test_bigtable.py @@ -100,7 +100,6 @@ def test__get_default_mtls_endpoint(): [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), - (BigtableAsyncClient, "pooled_grpc_asyncio"), (BigtableClient, "rest"), ], ) @@ -117,7 +116,7 @@ def test_bigtable_client_from_service_account_info(client_class, transport_name) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -127,7 +126,6 @@ def test_bigtable_client_from_service_account_info(client_class, transport_name) [ (transports.BigtableGrpcTransport, "grpc"), (transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), - (transports.PooledBigtableGrpcAsyncIOTransport, "pooled_grpc_asyncio"), (transports.BigtableRestTransport, "rest"), ], ) @@ -154,7 +152,6 @@ def test_bigtable_client_service_account_always_use_jwt( [ (BigtableClient, "grpc"), (BigtableAsyncClient, "grpc_asyncio"), - (BigtableAsyncClient, "pooled_grpc_asyncio"), (BigtableClient, "rest"), ], ) @@ -178,7 +175,7 @@ def test_bigtable_client_from_service_account_file(client_class, transport_name) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -200,11 +197,6 @@ def test_bigtable_client_get_transport_class(): [ (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), - ( - BigtableAsyncClient, - transports.PooledBigtableGrpcAsyncIOTransport, - "pooled_grpc_asyncio", - ), (BigtableClient, transports.BigtableRestTransport, "rest"), ], ) @@ -340,12 +332,6 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "grpc_asyncio", "true", ), - ( - BigtableAsyncClient, - transports.PooledBigtableGrpcAsyncIOTransport, - "pooled_grpc_asyncio", - "true", - ), (BigtableClient, transports.BigtableGrpcTransport, "grpc", "false"), ( BigtableAsyncClient, @@ -353,12 +339,6 @@ def test_bigtable_client_client_options(client_class, transport_class, transport "grpc_asyncio", "false", ), - ( - BigtableAsyncClient, - transports.PooledBigtableGrpcAsyncIOTransport, - "pooled_grpc_asyncio", - "false", - ), (BigtableClient, transports.BigtableRestTransport, "rest", "true"), (BigtableClient, transports.BigtableRestTransport, "rest", "false"), ], @@ -550,11 +530,6 @@ def test_bigtable_client_get_mtls_endpoint_and_cert_source(client_class): [ (BigtableClient, transports.BigtableGrpcTransport, "grpc"), (BigtableAsyncClient, transports.BigtableGrpcAsyncIOTransport, "grpc_asyncio"), - ( - BigtableAsyncClient, - transports.PooledBigtableGrpcAsyncIOTransport, - "pooled_grpc_asyncio", - ), (BigtableClient, transports.BigtableRestTransport, "rest"), ], ) @@ -591,12 +566,6 @@ def test_bigtable_client_client_options_scopes( "grpc_asyncio", grpc_helpers_async, ), - ( - BigtableAsyncClient, - transports.PooledBigtableGrpcAsyncIOTransport, - "pooled_grpc_asyncio", - grpc_helpers_async, - ), (BigtableClient, transports.BigtableRestTransport, "rest", None), ], ) @@ -743,35 +712,6 @@ def test_read_rows(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.ReadRowsResponse) -def test_read_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.read_rows(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.read_rows(request) - assert next_channel.call_count == i - - def test_read_rows_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -991,35 +931,6 @@ def test_sample_row_keys(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.SampleRowKeysResponse) -def test_sample_row_keys_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.sample_row_keys(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.sample_row_keys(request) - assert next_channel.call_count == i - - def test_sample_row_keys_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1238,35 +1149,6 @@ def test_mutate_row(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.MutateRowResponse) -def test_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.mutate_row(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.mutate_row(request) - assert next_channel.call_count == i - - def test_mutate_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1530,35 +1412,6 @@ def test_mutate_rows(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.MutateRowsResponse) -def test_mutate_rows_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.mutate_rows(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.mutate_rows(request) - assert next_channel.call_count == i - - def test_mutate_rows_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -1792,35 +1645,6 @@ def test_check_and_mutate_row(request_type, transport: str = "grpc"): assert response.predicate_matched is True -def test_check_and_mutate_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.check_and_mutate_row(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.check_and_mutate_row(request) - assert next_channel.call_count == i - - def test_check_and_mutate_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2198,35 +2022,6 @@ def test_ping_and_warm(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.PingAndWarmResponse) -def test_ping_and_warm_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.ping_and_warm(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.ping_and_warm(request) - assert next_channel.call_count == i - - def test_ping_and_warm_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2447,35 +2242,6 @@ def test_read_modify_write_row(request_type, transport: str = "grpc"): assert isinstance(response, bigtable.ReadModifyWriteRowResponse) -def test_read_modify_write_row_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.read_modify_write_row(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.read_modify_write_row(request) - assert next_channel.call_count == i - - def test_read_modify_write_row_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -2735,37 +2501,6 @@ def test_generate_initial_change_stream_partitions( ) -def test_generate_initial_change_stream_partitions_pooled_rotation( - transport: str = "pooled_grpc_asyncio", -): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.generate_initial_change_stream_partitions(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.generate_initial_change_stream_partitions(request) - assert next_channel.call_count == i - - def test_generate_initial_change_stream_partitions_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -3025,35 +2760,6 @@ def test_read_change_stream(request_type, transport: str = "grpc"): assert isinstance(message, bigtable.ReadChangeStreamResponse) -def test_read_change_stream_pooled_rotation(transport: str = "pooled_grpc_asyncio"): - with mock.patch.object( - transports.pooled_grpc_asyncio.PooledChannel, "next_channel" - ) as next_channel: - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport=transport, - ) - - # Everything is optional in proto3 as far as the runtime is concerned, - # and we are mocking out the actual API, so just send an empty request. - request = {} - - channel = client.transport._grpc_channel._pool[ - client.transport._grpc_channel._next_idx - ] - next_channel.return_value = channel - - response = client.read_change_stream(request) - - # Establish that next_channel was called - next_channel.assert_called_once() - # Establish that subsequent calls all call next_channel - starting_idx = client.transport._grpc_channel._next_idx - for i in range(2, 10): - response = client.read_change_stream(request) - assert next_channel.call_count == i - - def test_read_change_stream_empty_call(): # This test is a coverage failsafe to make sure that totally empty calls, # i.e. request == None and no flattened fields passed, work. @@ -5957,7 +5663,6 @@ def test_transport_get_channel(): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, - transports.PooledBigtableGrpcAsyncIOTransport, transports.BigtableRestTransport, ], ) @@ -6105,7 +5810,6 @@ def test_bigtable_auth_adc(): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, - transports.PooledBigtableGrpcAsyncIOTransport, ], ) def test_bigtable_transport_auth_adc(transport_class): @@ -6133,7 +5837,6 @@ def test_bigtable_transport_auth_adc(transport_class): [ transports.BigtableGrpcTransport, transports.BigtableGrpcAsyncIOTransport, - transports.PooledBigtableGrpcAsyncIOTransport, transports.BigtableRestTransport, ], ) @@ -6236,61 +5939,6 @@ def test_bigtable_grpc_transport_client_cert_source_for_mtls(transport_class): ) -@pytest.mark.parametrize( - "transport_class", [transports.PooledBigtableGrpcAsyncIOTransport] -) -def test_bigtable_pooled_grpc_transport_client_cert_source_for_mtls(transport_class): - cred = ga_credentials.AnonymousCredentials() - - # test with invalid pool size - with pytest.raises(ValueError): - transport_class( - host="squid.clam.whelk", - credentials=cred, - pool_size=0, - ) - - # Check ssl_channel_credentials is used if provided. - for pool_num in range(1, 5): - with mock.patch.object( - transport_class, "create_channel" - ) as mock_create_channel: - mock_ssl_channel_creds = mock.Mock() - transport_class( - host="squid.clam.whelk", - credentials=cred, - ssl_channel_credentials=mock_ssl_channel_creds, - pool_size=pool_num, - ) - mock_create_channel.assert_called_with( - pool_num, - "squid.clam.whelk:443", - credentials=cred, - credentials_file=None, - scopes=None, - ssl_credentials=mock_ssl_channel_creds, - quota_project_id=None, - options=[ - ("grpc.max_send_message_length", -1), - ("grpc.max_receive_message_length", -1), - ], - ) - assert mock_create_channel.call_count == 1 - - # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls - # is used. - with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): - with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: - transport_class( - credentials=cred, - client_cert_source_for_mtls=client_cert_source_callback, - ) - expected_cert, expected_key = client_cert_source_callback() - mock_ssl_cred.assert_called_once_with( - certificate_chain=expected_cert, private_key=expected_key - ) - - def test_bigtable_http_transport_client_cert_source_for_mtls(): cred = ga_credentials.AnonymousCredentials() with mock.patch( @@ -6307,7 +5955,6 @@ def test_bigtable_http_transport_client_cert_source_for_mtls(): [ "grpc", "grpc_asyncio", - "pooled_grpc_asyncio", "rest", ], ) @@ -6321,7 +5968,7 @@ def test_bigtable_host_no_port(transport_name): ) assert client.transport._host == ( "bigtable.googleapis.com:443" - if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio"] else "https://bigtable.googleapis.com" ) @@ -6331,7 +5978,6 @@ def test_bigtable_host_no_port(transport_name): [ "grpc", "grpc_asyncio", - "pooled_grpc_asyncio", "rest", ], ) @@ -6345,7 +5991,7 @@ def test_bigtable_host_with_port(transport_name): ) assert client.transport._host == ( "bigtable.googleapis.com:8000" - if transport_name in ["grpc", "grpc_asyncio", "pooled_grpc_asyncio"] + if transport_name in ["grpc", "grpc_asyncio"] else "https://bigtable.googleapis.com:8000" ) @@ -6701,24 +6347,6 @@ async def test_transport_close_async(): async with client: close.assert_not_called() close.assert_called_once() - close.assert_awaited() - - -@pytest.mark.asyncio -async def test_pooled_transport_close_async(): - client = BigtableAsyncClient( - credentials=ga_credentials.AnonymousCredentials(), - transport="pooled_grpc_asyncio", - ) - num_channels = len(client.transport._grpc_channel._pool) - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - async with client: - close.assert_not_called() - close.assert_called() - assert close.call_count == num_channels - close.assert_awaited() def test_transport_close(): @@ -6785,128 +6413,3 @@ def test_api_key_credentials(client_class, transport_class): always_use_jwt_access=True, api_audience=None, ) - - -@pytest.mark.asyncio -async def test_pooled_transport_replace_default(): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport="pooled_grpc_asyncio", - ) - num_channels = len(client.transport._grpc_channel._pool) - for replace_idx in range(num_channels): - prev_pool = [channel for channel in client.transport._grpc_channel._pool] - grace_period = 4 - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - await client.transport.replace_channel(replace_idx, grace=grace_period) - close.assert_called_once() - close.assert_awaited() - close.assert_called_with(grace=grace_period) - assert isinstance( - client.transport._grpc_channel._pool[replace_idx], grpc.aio.Channel - ) - # only the specified channel should be replaced - for i in range(num_channels): - if i == replace_idx: - assert client.transport._grpc_channel._pool[i] != prev_pool[i] - else: - assert client.transport._grpc_channel._pool[i] == prev_pool[i] - with pytest.raises(ValueError): - await client.transport.replace_channel(num_channels + 1) - with pytest.raises(ValueError): - await client.transport.replace_channel(-1) - - -@pytest.mark.asyncio -async def test_pooled_transport_replace_explicit(): - client = BigtableClient( - credentials=ga_credentials.AnonymousCredentials(), - transport="pooled_grpc_asyncio", - ) - num_channels = len(client.transport._grpc_channel._pool) - for replace_idx in range(num_channels): - prev_pool = [channel for channel in client.transport._grpc_channel._pool] - grace_period = 0 - with mock.patch.object( - type(client.transport._grpc_channel._pool[0]), "close" - ) as close: - new_channel = grpc.aio.insecure_channel("localhost:8080") - await client.transport.replace_channel( - replace_idx, grace=grace_period, new_channel=new_channel - ) - close.assert_called_once() - close.assert_awaited() - close.assert_called_with(grace=grace_period) - assert client.transport._grpc_channel._pool[replace_idx] == new_channel - # only the specified channel should be replaced - for i in range(num_channels): - if i == replace_idx: - assert client.transport._grpc_channel._pool[i] != prev_pool[i] - else: - assert client.transport._grpc_channel._pool[i] == prev_pool[i] - - -def test_pooled_transport_next_channel(): - num_channels = 10 - transport = transports.PooledBigtableGrpcAsyncIOTransport( - credentials=ga_credentials.AnonymousCredentials(), - pool_size=num_channels, - ) - assert len(transport._grpc_channel._pool) == num_channels - transport._grpc_channel._next_idx = 0 - # rotate through all channels multiple times - num_cycles = 4 - for _ in range(num_cycles): - for i in range(num_channels - 1): - assert transport._grpc_channel._next_idx == i - got_channel = transport._grpc_channel.next_channel() - assert got_channel == transport._grpc_channel._pool[i] - assert transport._grpc_channel._next_idx == (i + 1) - # test wrap around - assert transport._grpc_channel._next_idx == num_channels - 1 - got_channel = transport._grpc_channel.next_channel() - assert got_channel == transport._grpc_channel._pool[num_channels - 1] - assert transport._grpc_channel._next_idx == 0 - - -def test_pooled_transport_pool_unique_channels(): - num_channels = 50 - - transport = transports.PooledBigtableGrpcAsyncIOTransport( - credentials=ga_credentials.AnonymousCredentials(), - pool_size=num_channels, - ) - channel_list = [channel for channel in transport._grpc_channel._pool] - channel_set = set(channel_list) - assert len(channel_list) == num_channels - assert len(channel_set) == num_channels - for channel in channel_list: - assert isinstance(channel, grpc.aio.Channel) - - -def test_pooled_transport_pool_creation(): - # channels should be created with the specified options - num_channels = 50 - creds = ga_credentials.AnonymousCredentials() - scopes = ["test1", "test2"] - quota_project_id = "test3" - host = "testhost:8080" - with mock.patch( - "google.api_core.grpc_helpers_async.create_channel" - ) as create_channel: - transport = transports.PooledBigtableGrpcAsyncIOTransport( - credentials=creds, - pool_size=num_channels, - scopes=scopes, - quota_project_id=quota_project_id, - host=host, - ) - assert create_channel.call_count == num_channels - for i in range(num_channels): - kwargs = create_channel.call_args_list[i][1] - assert kwargs["target"] == host - assert kwargs["credentials"] == creds - assert kwargs["scopes"] == scopes - assert kwargs["quota_project_id"] == quota_project_id From 5b7db3e25cb4d28ee47dfd450725683785f20dad Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 14:47:33 -0700 Subject: [PATCH 202/213] added verbose logging for unit tests --- noxfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/noxfile.py b/noxfile.py index 95fac1e3c..637dc9ab5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -209,6 +209,7 @@ def default(session): "--cov-config=.coveragerc", "--cov-report=", "--cov-fail-under=0", + "-vvvvv", # TODO: for testing, remove os.path.join("tests", "unit"), *session.posargs, ) From 9947afdabef1319b91483a6e9d402ab5fe6bf73d Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 16:14:38 -0700 Subject: [PATCH 203/213] fixed atexit test that was breaking coverage reports --- noxfile.py | 1 - tests/unit/data/_async/test_mutations_batcher.py | 12 +++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/noxfile.py b/noxfile.py index 637dc9ab5..95fac1e3c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -209,7 +209,6 @@ def default(session): "--cov-config=.coveragerc", "--cov-report=", "--cov-fail-under=0", - "-vvvvv", # TODO: for testing, remove os.path.join("tests", "unit"), *session.posargs, ) diff --git a/tests/unit/data/_async/test_mutations_batcher.py b/tests/unit/data/_async/test_mutations_batcher.py index c99513713..1b14cc128 100644 --- a/tests/unit/data/_async/test_mutations_batcher.py +++ b/tests/unit/data/_async/test_mutations_batcher.py @@ -1015,16 +1015,10 @@ async def test_atexit_registration(self): """Should run _on_exit on program termination""" import atexit - with mock.patch( - "google.cloud.bigtable.data._async.mutations_batcher.MutationsBatcherAsync._on_exit" - ) as on_exit_mock: + with mock.patch.object(atexit, "register") as register_mock: + assert register_mock.call_count == 0 async with self._make_one(): - assert on_exit_mock.call_count == 0 - atexit._run_exitfuncs() - assert on_exit_mock.call_count == 1 - # should not call after close - atexit._run_exitfuncs() - assert on_exit_mock.call_count == 1 + assert register_mock.call_count == 1 @pytest.mark.asyncio @mock.patch( From 9e8a0b3473d6ec5e6144298e443be60c8c7564f3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 26 Jun 2023 16:24:59 -0700 Subject: [PATCH 204/213] removed external api-core installation for prerelease-deps --- noxfile.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/noxfile.py b/noxfile.py index 95fac1e3c..8499a610f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -463,11 +463,6 @@ def prerelease_deps(session): ) session.run("python", "-c", "import grpc; print(grpc.__version__)") - # TODO: remove adter merging api-core - session.install( - "--upgrade", "--no-deps", "--force-reinstall", *UNIT_TEST_EXTERNAL_DEPENDENCIES - ) - session.run("py.test", "tests/unit") system_test_path = os.path.join("tests", "system.py") From 30b9b11f4645ca0cefd0b24aaa64a686663dd1a8 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 27 Jun 2023 11:34:13 -0700 Subject: [PATCH 205/213] improve timeouts --- .../bigtable/data/_async/_mutate_rows.py | 6 +- .../cloud/bigtable/data/_async/_read_rows.py | 6 +- google/cloud/bigtable/data/_async/client.py | 329 +++++++++++------- .../bigtable/data/_async/mutations_batcher.py | 28 +- google/cloud/bigtable/data/_helpers.py | 27 ++ tests/unit/data/_async/test__mutate_rows.py | 4 +- tests/unit/data/_async/test__read_rows.py | 2 +- tests/unit/data/_async/test_client.py | 193 ++++++---- .../data/_async/test_mutations_batcher.py | 62 ++-- tests/unit/data/test__helpers.py | 45 +++ 10 files changed, 458 insertions(+), 244 deletions(-) diff --git a/google/cloud/bigtable/data/_async/_mutate_rows.py b/google/cloud/bigtable/data/_async/_mutate_rows.py index ac491adaf..db7728ee9 100644 --- a/google/cloud/bigtable/data/_async/_mutate_rows.py +++ b/google/cloud/bigtable/data/_async/_mutate_rows.py @@ -52,7 +52,7 @@ def __init__( table: "TableAsync", mutation_entries: list["RowMutationEntry"], operation_timeout: float, - per_request_timeout: float | None, + attempt_timeout: float | None, ): """ Args: @@ -60,7 +60,7 @@ def __init__( - table: the table associated with the request - mutation_entries: a list of RowMutationEntry objects to send to the server - operation_timeout: the timeout t o use for the entire operation, in seconds. - - per_request_timeout: the timeoutto use for each mutate_rows attempt, in seconds. + - attempt_timeout: the timeoutto use for each mutate_rows attempt, in seconds. If not specified, the request will run until operation_timeout is reached. """ # check that mutations are within limits @@ -99,7 +99,7 @@ def __init__( self._operation = _convert_retry_deadline(retry_wrapped, operation_timeout) # initialize state self.timeout_generator = _attempt_timeout_generator( - per_request_timeout, operation_timeout + attempt_timeout, operation_timeout ) self.mutations = mutation_entries self.remaining_indices = list(range(len(self.mutations))) diff --git a/google/cloud/bigtable/data/_async/_read_rows.py b/google/cloud/bigtable/data/_async/_read_rows.py index c50af21ec..282c0bca4 100644 --- a/google/cloud/bigtable/data/_async/_read_rows.py +++ b/google/cloud/bigtable/data/_async/_read_rows.py @@ -63,14 +63,14 @@ def __init__( client: BigtableAsyncClient, *, operation_timeout: float = 600.0, - per_request_timeout: float | None = None, + attempt_timeout: float | None = None, ): """ Args: - request: the request dict to send to the Bigtable API - client: the Bigtable client to use to make the request - operation_timeout: the timeout to use for the entire operation, in seconds - - per_request_timeout: the timeout to use when waiting for each individual grpc request, in seconds + - attempt_timeout: the timeout to use when waiting for each individual grpc request, in seconds If not specified, defaults to operation_timeout """ self._last_emitted_row_key: bytes | None = None @@ -79,7 +79,7 @@ def __init__( self.operation_timeout = operation_timeout # use generator to lower per-attempt timeout as we approach operation_timeout deadline attempt_timeout_gen = _attempt_timeout_generator( - per_request_timeout, operation_timeout + attempt_timeout, operation_timeout ) row_limit = request.get("rows_limit", 0) # lock in paramters for retryable wrapper diff --git a/google/cloud/bigtable/data/_async/client.py b/google/cloud/bigtable/data/_async/client.py index fa9d9270d..eaff4f809 100644 --- a/google/cloud/bigtable/data/_async/client.py +++ b/google/cloud/bigtable/data/_async/client.py @@ -58,6 +58,7 @@ from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync from google.cloud.bigtable.data._helpers import _make_metadata from google.cloud.bigtable.data._helpers import _convert_retry_deadline +from google.cloud.bigtable.data._helpers import _validate_timeouts from google.cloud.bigtable.data._async.mutations_batcher import MutationsBatcherAsync from google.cloud.bigtable.data._async.mutations_batcher import _MB_SIZE from google.cloud.bigtable.data._helpers import _attempt_timeout_generator @@ -340,14 +341,18 @@ async def _remove_instance_registration( except KeyError: return False - # TODO: revisit timeouts https://github.com/googleapis/python-bigtable/issues/782 def get_table( self, instance_id: str, table_id: str, app_profile_id: str | None = None, - default_operation_timeout: float = 600, - default_per_request_timeout: float | None = None, + *, + default_read_rows_operation_timeout: float = 600, + default_read_rows_attempt_timeout: float | None = None, + default_mutate_rows_operation_timeout: float = 600, + default_mutate_rows_attempt_timeout: float | None = None, + default_operation_timeout: float = 60, + default_attempt_timeout: float | None = None, ) -> TableAsync: """ Returns a table instance for making data API requests @@ -366,7 +371,7 @@ def get_table( table_id, app_profile_id, default_operation_timeout=default_operation_timeout, - default_per_request_timeout=default_per_request_timeout, + default_attempt_timeout=default_attempt_timeout, ) async def __aenter__(self): @@ -393,8 +398,12 @@ def __init__( table_id: str, app_profile_id: str | None = None, *, - default_operation_timeout: float = 600, - default_per_request_timeout: float | None = None, + default_read_rows_operation_timeout: float = 600, + default_read_rows_attempt_timeout: float | None = None, + default_mutate_rows_operation_timeout: float = 600, + default_mutate_rows_attempt_timeout: float | None = None, + default_operation_timeout: float = 60, + default_attempt_timeout: float | None = None, ): """ Initialize a Table instance @@ -410,23 +419,26 @@ def __init__( app_profile_id: (Optional) The app profile to associate with requests. https://cloud.google.com/bigtable/docs/app-profiles default_operation_timeout: (Optional) The default timeout, in seconds - default_per_request_timeout: (Optional) The default timeout for individual + default_attempt_timeout: (Optional) The default timeout for individual rpc requests, in seconds Raises: - RuntimeError if called outside of an async context (no running event loop) """ # validate timeouts - if default_operation_timeout <= 0: - raise ValueError("default_operation_timeout must be greater than 0") - if default_per_request_timeout is not None and default_per_request_timeout <= 0: - raise ValueError("default_per_request_timeout must be greater than 0") - if ( - default_per_request_timeout is not None - and default_per_request_timeout > default_operation_timeout - ): - raise ValueError( - "default_per_request_timeout must be less than default_operation_timeout" - ) + _validate_timeouts( + default_operation_timeout, default_attempt_timeout, allow_none=True + ) + _validate_timeouts( + default_read_rows_operation_timeout, + default_read_rows_attempt_timeout, + allow_none=True, + ) + _validate_timeouts( + default_mutate_rows_operation_timeout, + default_mutate_rows_attempt_timeout, + allow_none=True, + ) + self.client = client self.instance_id = instance_id self.instance_name = self.client._gapic_client.instance_path( @@ -439,7 +451,13 @@ def __init__( self.app_profile_id = app_profile_id self.default_operation_timeout = default_operation_timeout - self.default_per_request_timeout = default_per_request_timeout + self.default_attempt_timeout = default_attempt_timeout + self.default_read_rows_operation_timeout = default_read_rows_operation_timeout + self.default_read_rows_attempt_timeout = default_read_rows_attempt_timeout + self.default_mutate_rows_operation_timeout = ( + default_mutate_rows_operation_timeout + ) + self.default_mutate_rows_attempt_timeout = default_mutate_rows_attempt_timeout # raises RuntimeError if called outside of an async context (no running event loop) try: @@ -456,24 +474,24 @@ async def read_rows_stream( query: ReadRowsQuery | dict[str, Any], *, operation_timeout: float | None = None, - per_request_timeout: float | None = None, + attempt_timeout: float | None = None, ) -> ReadRowsIteratorAsync: """ + Read a set of rows from the table, based on the specified query. Returns an iterator to asynchronously stream back row data. - Failed requests within operation_timeout and operation_deadline policies will be retried. + Failed requests within operation_timeout will be retried. Args: - query: contains details about which rows to return - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. - time is only counted while actively waiting on the network. - If None, defaults to the Table's default_operation_timeout - - per_request_timeout: the time budget for an individual network request, in seconds. + If None, defaults to the Table's default_read_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. If it takes longer than this time to complete, the request will be cancelled with a DeadlineExceeded exception, and a retry will be attempted. - If None, defaults to the Table's default_per_request_timeout - + If None, defaults to the Table's default_read_rows_attempt_timeout, + or the operation_timeout if that is also None. Returns: - an asynchronous iterator that yields rows returned by the query Raises: @@ -484,34 +502,31 @@ async def read_rows_stream( - IdleTimeout: if iterator was abandoned """ - operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout + operation_timeout = ( + operation_timeout or self.default_read_rows_operation_timeout + ) + attempt_timeout = ( + attempt_timeout + or self.default_read_rows_attempt_timeout + or operation_timeout + ) + _validate_timeouts(operation_timeout, attempt_timeout) - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError( - "per_request_timeout must not be greater than operation_timeout" - ) - if per_request_timeout is None: - per_request_timeout = operation_timeout request = query._to_dict() if isinstance(query, ReadRowsQuery) else query request["table_name"] = self.table_name if self.app_profile_id: request["app_profile_id"] = self.app_profile_id # read_rows smart retries is implemented using a series of iterators: - # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has per_request_timeout + # - client.read_rows: outputs raw ReadRowsResponse objects from backend. Has attempt_timeout # - ReadRowsOperation.merge_row_response_stream: parses chunks into rows - # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, per_request_timeout + # - ReadRowsOperation.retryable_merge_rows: adds retries, caching, revised requests, attempt_timeout # - ReadRowsIterator: adds idle_timeout, moves stats out of stream and into attribute row_merger = _ReadRowsOperationAsync( request, self.client._gapic_client, operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, + attempt_timeout=attempt_timeout, ) output_generator = ReadRowsIteratorAsync(row_merger) # add idle timeout to clear resources if generator is abandoned @@ -524,20 +539,37 @@ async def read_rows( query: ReadRowsQuery | dict[str, Any], *, operation_timeout: float | None = None, - per_request_timeout: float | None = None, + attempt_timeout: float | None = None, ) -> list[Row]: """ - Helper function that returns a full list instead of a generator + Read a set of rows from the table, based on the specified query. + Retruns results as a list of Row objects when the request is complete. + For streamed results, use read_rows_stream. - See read_rows_stream + Failed requests within operation_timeout and operation_deadline policies will be retried. + Args: + - query: contains details about which rows to return + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_read_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_read_rows_attempt_timeout, + or the operation_timeout if that is also None. Returns: - - a list of the rows returned by the query + - a list of Rows returned by the query + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + from any retries that failed + - GoogleAPIError: raised if the request encounters an unrecoverable error """ row_generator = await self.read_rows_stream( query, operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, + attempt_timeout=attempt_timeout, ) results = [row async for row in row_generator] return results @@ -547,18 +579,31 @@ async def read_row( row_key: str | bytes, *, row_filter: RowFilter | None = None, - operation_timeout: int | float | None = 60, - per_request_timeout: int | float | None = None, + operation_timeout: int | float | None = None, + attempt_timeout: int | float | None = None, ) -> Row | None: """ - Helper function to return a single row + Read a single row from the table, based on the specified key. - See read_rows_stream + Failed requests within operation_timeout and operation_deadline policies will be retried. - Raises: - - google.cloud.bigtable.data.exceptions.RowNotFound: if the row does not exist + Args: + - query: contains details about which rows to return + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_read_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_read_rows_attempt_timeout, or the operation_timeout + if that is also None. Returns: - - the individual row requested, or None if it does not exist + - a Row object if the row exists, otherwise None + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + from any retries that failed + - GoogleAPIError: raised if the request encounters an unrecoverable error """ if row_key is None: raise ValueError("row_key must be string or bytes") @@ -566,7 +611,7 @@ async def read_row( results = await self.read_rows( query, operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, + attempt_timeout=attempt_timeout, ) if len(results) == 0: return None @@ -577,7 +622,7 @@ async def read_rows_sharded( sharded_query: ShardedQuery, *, operation_timeout: int | float | None = None, - per_request_timeout: int | float | None = None, + attempt_timeout: int | float | None = None, ) -> list[Row]: """ Runs a sharded query in parallel, then return the results in a single list. @@ -594,6 +639,14 @@ async def read_rows_sharded( Args: - sharded_query: a sharded query to execute + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_read_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_read_rows_attempt_timeout, or the operation_timeout + if that is also None. Raises: - ShardedReadRowsExceptionGroup: if any of the queries failed - ValueError: if the query_list is empty @@ -601,10 +654,15 @@ async def read_rows_sharded( if not sharded_query: raise ValueError("empty sharded_query") # reduce operation_timeout between batches - operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = ( - per_request_timeout or self.default_per_request_timeout or operation_timeout + operation_timeout = ( + operation_timeout or self.default_read_rows_operation_timeout + ) + attempt_timeout = ( + attempt_timeout + or self.default_read_rows_attempt_timeout + or operation_timeout ) + _validate_timeouts(operation_timeout, attempt_timeout) timeout_generator = _attempt_timeout_generator( operation_timeout, operation_timeout ) @@ -623,9 +681,7 @@ async def read_rows_sharded( self.read_rows( query, operation_timeout=batch_operation_timeout, - per_request_timeout=min( - per_request_timeout, batch_operation_timeout - ), + attempt_timeout=min(attempt_timeout, batch_operation_timeout), ) for query in batch ] @@ -652,19 +708,33 @@ async def row_exists( self, row_key: str | bytes, *, - operation_timeout: int | float | None = 60, - per_request_timeout: int | float | None = None, + operation_timeout: int | float | None = None, + attempt_timeout: int | float | None = None, ) -> bool: """ - Helper function to determine if a row exists - + Return a boolean indicating whether the specified row exists in the table. uses the filters: chain(limit cells per row = 1, strip value) - + Args: + - row_key: the key of the row to check + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_read_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_read_rows_attempt_timeout, or the operation_timeout + if that is also None. Returns: - a bool indicating whether the row exists + Raises: + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + from any retries that failed + - GoogleAPIError: raised if the request encounters an unrecoverable error """ if row_key is None: raise ValueError("row_key must be string or bytes") + strip_filter = StripValueTransformerFilter(flag=True) limit_filter = CellsRowLimitFilter(1) chain_filter = RowFilterChain(filters=[limit_filter, strip_filter]) @@ -672,7 +742,7 @@ async def row_exists( results = await self.read_rows( query, operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, + attempt_timeout=attempt_timeout, ) return len(results) > 0 @@ -680,7 +750,7 @@ async def sample_row_keys( self, *, operation_timeout: float | None = None, - per_request_timeout: float | None = None, + attempt_timeout: float | None = None, ) -> RowKeySamples: """ Return a set of RowKeySamples that delimit contiguous sections of the table of @@ -693,25 +763,32 @@ async def sample_row_keys( RowKeySamples is simply a type alias for list[tuple[bytes, int]]; a list of row_keys, along with offset positions in the table + Args: + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_attempt_timeout, or the operation_timeout + if that is also None. Returns: - a set of RowKeySamples the delimit contiguous sections of the table Raises: - - GoogleAPICallError: if the sample_row_keys request fails + - DeadlineExceeded: raised after operation timeout + will be chained with a RetryExceptionGroup containing GoogleAPIError exceptions + from any retries that failed + - GoogleAPIError: raised if the request encounters an unrecoverable error """ # prepare timeouts operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout + attempt_timeout = ( + attempt_timeout or self.default_attempt_timeout or operation_timeout + ) + _validate_timeouts(operation_timeout, attempt_timeout) - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError( - "per_request_timeout must not be greater than operation_timeout" - ) attempt_timeout_gen = _attempt_timeout_generator( - per_request_timeout, operation_timeout + attempt_timeout, operation_timeout ) # prepare retryable predicate = retries.if_exception_type( @@ -761,7 +838,7 @@ def mutations_batcher( flow_control_max_mutation_count: int = 100_000, flow_control_max_bytes: int = 100 * _MB_SIZE, batch_operation_timeout: float | None = None, - batch_per_request_timeout: float | None = None, + batch_attempt_timeout: float | None = None, ) -> MutationsBatcherAsync: """ Returns a new mutations batcher instance. @@ -778,9 +855,10 @@ def mutations_batcher( - flow_control_max_mutation_count: Maximum number of inflight mutations. - flow_control_max_bytes: Maximum number of inflight bytes. - batch_operation_timeout: timeout for each mutate_rows operation, in seconds. If None, - table default_operation_timeout will be used - - batch_per_request_timeout: timeout for each individual request, in seconds. If None, - table default_per_request_timeout will be used + table default_mutate_rows_operation_timeout will be used + - batch_attempt_timeout: timeout for each individual request, in seconds. If None, + table default_mutate_rows_attempt_timeout will be used, or batch_operation_timeout + if that is also None. Returns: - a MutationsBatcherAsync context manager that can batch requests """ @@ -792,7 +870,7 @@ def mutations_batcher( flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, batch_operation_timeout=batch_operation_timeout, - batch_per_request_timeout=batch_per_request_timeout, + batch_attempt_timeout=batch_attempt_timeout, ) async def mutate_row( @@ -800,8 +878,8 @@ async def mutate_row( row_key: str | bytes, mutations: list[Mutation] | Mutation, *, - operation_timeout: float | None = 60, - per_request_timeout: float | None = None, + operation_timeout: float | None = None, + attempt_timeout: float | None = None, ): """ Mutates a row atomically. @@ -815,15 +893,14 @@ async def mutate_row( Args: - row_key: the row to apply mutations to - mutations: the set of mutations to apply to the row - - operation_timeout: the time budget for the entire operation, in seconds. - Failed requests will be retried within the budget. - time is only counted while actively waiting on the network. - DeadlineExceeded exception raised after timeout - - per_request_timeout: the time budget for an individual network request, - in seconds. If it takes longer than this time to complete, the request - will be cancelled with a DeadlineExceeded exception, and a retry will be - attempted if within operation_timeout budget - + - operation_timeout: the time budget for the entire operation, in seconds. + Failed requests will be retried within the budget. + If None, defaults to the Table's default_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_attempt_timeout, or the operation_timeout + if that is also None. Raises: - DeadlineExceeded: raised after operation timeout will be chained with a RetryExceptionGroup containing all @@ -832,14 +909,10 @@ async def mutate_row( safely retried. """ operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout - - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must be less than operation_timeout") + attempt_timeout = ( + attempt_timeout or self.default_attempt_timeout or operation_timeout + ) + _validate_timeouts(operation_timeout, attempt_timeout) if isinstance(row_key, str): row_key = row_key.encode("utf-8") @@ -883,14 +956,16 @@ def on_error_fn(exc): ) metadata = _make_metadata(self.table_name, self.app_profile_id) # trigger rpc - await deadline_wrapped(request, timeout=per_request_timeout, metadata=metadata) + await deadline_wrapped( + request, timeout=attempt_timeout, metadata=metadata, retry=None + ) async def bulk_mutate_rows( self, mutation_entries: list[RowMutationEntry], *, - operation_timeout: float | None = 60, - per_request_timeout: float | None = None, + operation_timeout: float | None = None, + attempt_timeout: float | None = None, ): """ Applies mutations for multiple rows in a single batched request. @@ -910,32 +985,32 @@ async def bulk_mutate_rows( in arbitrary order - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. - time is only counted while actively waiting on the network. - DeadlineExceeded exception raised after timeout - - per_request_timeout: the time budget for an individual network request, - in seconds. If it takes longer than this time to complete, the request - will be cancelled with a DeadlineExceeded exception, and a retry will - be attempted if within operation_timeout budget + If None, defaults to the Table's default_mutate_rows_operation_timeout + - attempt_timeout: the time budget for an individual network request, in seconds. + If it takes longer than this time to complete, the request will be cancelled with + a DeadlineExceeded exception, and a retry will be attempted. + If None, defaults to the Table's default_mutate_rows_attempt_timeout, + or the operation_timeout if that is also None. Raises: - MutationsExceptionGroup if one or more mutations fails Contains details about any failed entries in .exceptions """ - operation_timeout = operation_timeout or self.default_operation_timeout - per_request_timeout = per_request_timeout or self.default_per_request_timeout - - if operation_timeout <= 0: - raise ValueError("operation_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout <= 0: - raise ValueError("per_request_timeout must be greater than 0") - if per_request_timeout is not None and per_request_timeout > operation_timeout: - raise ValueError("per_request_timeout must be less than operation_timeout") + operation_timeout = ( + operation_timeout or self.default_mutate_rows_operation_timeout + ) + attempt_timeout = ( + attempt_timeout + or self.default_mutate_rows_attempt_timeout + or operation_timeout + ) + _validate_timeouts(operation_timeout, attempt_timeout) operation = _MutateRowsOperationAsync( self.client._gapic_client, self, mutation_entries, operation_timeout, - per_request_timeout, + attempt_timeout, ) await operation.start() @@ -946,7 +1021,7 @@ async def check_and_mutate_row( *, true_case_mutations: Mutation | list[Mutation] | None = None, false_case_mutations: Mutation | list[Mutation] | None = None, - operation_timeout: int | float | None = 20, + operation_timeout: int | float | None = None, ) -> bool: """ Mutates a row atomically based on the output of a predicate filter @@ -974,7 +1049,8 @@ async def check_and_mutate_row( ones. Must contain at least one entry if `true_case_mutations is empty, and at most 100000. - operation_timeout: the time budget for the entire operation, in seconds. - Failed requests will not be retried. + Failed requests will not be retried. Defaults to the Table's default_operation_timeout + if None. Returns: - bool indicating whether the predicate was true or false Raises: @@ -1016,7 +1092,7 @@ async def read_modify_write_row( row_key: str | bytes, rules: ReadModifyWriteRule | list[ReadModifyWriteRule], *, - operation_timeout: int | float | None = 20, + operation_timeout: int | float | None = None, ) -> Row: """ Reads and modifies a row atomically according to input ReadModifyWriteRules, @@ -1033,7 +1109,8 @@ async def read_modify_write_row( Rules are applied in order, meaning that earlier rules will affect the results of later ones. - operation_timeout: the time budget for the entire operation, in seconds. - Failed requests will not be retried. + Failed requests will not be retried. Defaults to the Table's default_operation_timeout + if None. Returns: - Row: containing cell data that was modified as part of the operation diff --git a/google/cloud/bigtable/data/_async/mutations_batcher.py b/google/cloud/bigtable/data/_async/mutations_batcher.py index 25aafc2a1..e13675ef1 100644 --- a/google/cloud/bigtable/data/_async/mutations_batcher.py +++ b/google/cloud/bigtable/data/_async/mutations_batcher.py @@ -23,6 +23,7 @@ from google.cloud.bigtable.data.mutations import RowMutationEntry from google.cloud.bigtable.data.exceptions import MutationsExceptionGroup from google.cloud.bigtable.data.exceptions import FailedMutationEntryError +from google.cloud.bigtable.data._helpers import _validate_timeouts from google.cloud.bigtable.data._async._mutate_rows import _MutateRowsOperationAsync from google.cloud.bigtable.data._async._mutate_rows import ( @@ -189,7 +190,7 @@ def __init__( flow_control_max_mutation_count: int = 100_000, flow_control_max_bytes: int = 100 * _MB_SIZE, batch_operation_timeout: float | None = None, - batch_per_request_timeout: float | None = None, + batch_attempt_timeout: float | None = None, ): """ Args: @@ -203,25 +204,18 @@ def __init__( - flow_control_max_bytes: Maximum number of inflight bytes. - batch_operation_timeout: timeout for each mutate_rows operation, in seconds. If None, table default_operation_timeout will be used - - batch_per_request_timeout: timeout for each individual request, in seconds. If None, - table default_per_request_timeout will be used + - batch_attempt_timeout: timeout for each individual request, in seconds. If None, + table default_attempt_timeout will be used """ self._operation_timeout: float = ( - batch_operation_timeout or table.default_operation_timeout + batch_operation_timeout or table.default_mutate_rows_operation_timeout ) - self._per_request_timeout: float = ( - batch_per_request_timeout - or table.default_per_request_timeout + self._attempt_timeout: float = ( + batch_attempt_timeout + or table.default_mutate_rows_attempt_timeout or self._operation_timeout ) - if self._operation_timeout <= 0: - raise ValueError("batch_operation_timeout must be greater than 0") - if self._per_request_timeout <= 0: - raise ValueError("batch_per_request_timeout must be greater than 0") - if self._per_request_timeout > self._operation_timeout: - raise ValueError( - "batch_per_request_timeout must be less than batch_operation_timeout" - ) + _validate_timeouts(self._operation_timeout, self._attempt_timeout) self.closed: bool = False self._table = table self._staged_entries: list[RowMutationEntry] = [] @@ -346,7 +340,7 @@ async def _execute_mutate_rows( Args: - batch: list of RowMutationEntry objects to send to server - - timeout: timeout in seconds. Used as operation_timeout and per_request_timeout. + - timeout: timeout in seconds. Used as operation_timeout and attempt_timeout. If not given, will use table defaults Returns: - list of FailedMutationEntryError objects for mutations that failed. @@ -361,7 +355,7 @@ async def _execute_mutate_rows( self._table, batch, operation_timeout=self._operation_timeout, - per_request_timeout=self._per_request_timeout, + attempt_timeout=self._attempt_timeout, ) await operation.start() except MutationsExceptionGroup as e: diff --git a/google/cloud/bigtable/data/_helpers.py b/google/cloud/bigtable/data/_helpers.py index 64d91e108..ab816f9a7 100644 --- a/google/cloud/bigtable/data/_helpers.py +++ b/google/cloud/bigtable/data/_helpers.py @@ -109,3 +109,30 @@ def wrapper(*args, **kwargs): handle_error() return wrapper_async if iscoroutinefunction(func) else wrapper + + +def _validate_timeouts( + operation_timeout: float, attempt_timeout: float | None, allow_none: bool = False +): + """ + Helper function that will verify that timeout values are valid, and raise + an exception if they are not. + + Args: + - operation_timeout: The timeout value to use for the entire operation, in seconds. + - attempt_timeout: The timeout value to use for each attempt, in seconds. + - allow_none: If True, attempt_timeout can be None. If False, None values will raise an exception. + Raises: + - ValueError if operation_timeout or attempt_timeout are invalid. + """ + if operation_timeout <= 0: + raise ValueError("operation_timeout must be greater than 0") + if not allow_none and attempt_timeout is None: + raise ValueError("attempt_timeout must not be None") + elif attempt_timeout is not None: + if attempt_timeout <= 0: + raise ValueError("attempt_timeout must be greater than 0") + if attempt_timeout > operation_timeout: + raise ValueError( + "attempt_timeout must not be greater than operation_timeout" + ) diff --git a/tests/unit/data/_async/test__mutate_rows.py b/tests/unit/data/_async/test__mutate_rows.py index f77455d60..9bebd35e6 100644 --- a/tests/unit/data/_async/test__mutate_rows.py +++ b/tests/unit/data/_async/test__mutate_rows.py @@ -48,7 +48,7 @@ def _make_one(self, *args, **kwargs): kwargs["table"] = kwargs.pop("table", AsyncMock()) kwargs["mutation_entries"] = kwargs.pop("mutation_entries", []) kwargs["operation_timeout"] = kwargs.pop("operation_timeout", 5) - kwargs["per_request_timeout"] = kwargs.pop("per_request_timeout", 0.1) + kwargs["attempt_timeout"] = kwargs.pop("attempt_timeout", 0.1) return self._target_class()(*args, **kwargs) async def _mock_stream(self, mutation_list, error_dict): @@ -267,7 +267,7 @@ async def test_run_attempt_single_entry_success(self): mock_gapic_fn = self._make_mock_gapic({0: mutation}) instance = self._make_one( mutation_entries=[mutation], - per_request_timeout=expected_timeout, + attempt_timeout=expected_timeout, ) with mock.patch.object(instance, "_gapic_fn", mock_gapic_fn): await instance._run_attempt() diff --git a/tests/unit/data/_async/test__read_rows.py b/tests/unit/data/_async/test__read_rows.py index f35244227..9af93317b 100644 --- a/tests/unit/data/_async/test__read_rows.py +++ b/tests/unit/data/_async/test__read_rows.py @@ -89,7 +89,7 @@ def test_ctor(self): request, client, operation_timeout=expected_operation_timeout, - per_request_timeout=expected_request_timeout, + attempt_timeout=expected_request_timeout, ) assert time_gen_mock.call_count == 1 time_gen_mock.assert_called_once_with( diff --git a/tests/unit/data/_async/test_client.py b/tests/unit/data/_async/test_client.py index da91334a4..9ebdfcdec 100644 --- a/tests/unit/data/_async/test_client.py +++ b/tests/unit/data/_async/test_client.py @@ -974,7 +974,11 @@ async def test_table_ctor(self): expected_instance_id = "instance-id" expected_app_profile_id = "app-profile-id" expected_operation_timeout = 123 - expected_per_request_timeout = 12 + expected_attempt_timeout = 12 + expected_read_rows_operation_timeout = 1.5 + expected_read_rows_attempt_timeout = 0.5 + expected_mutate_rows_operation_timeout = 2.5 + expected_mutate_rows_attempt_timeout = 0.75 client = BigtableDataClientAsync() assert not client._active_instances @@ -984,7 +988,11 @@ async def test_table_ctor(self): expected_table_id, expected_app_profile_id, default_operation_timeout=expected_operation_timeout, - default_per_request_timeout=expected_per_request_timeout, + default_attempt_timeout=expected_attempt_timeout, + default_read_rows_operation_timeout=expected_read_rows_operation_timeout, + default_read_rows_attempt_timeout=expected_read_rows_attempt_timeout, + default_mutate_rows_operation_timeout=expected_mutate_rows_operation_timeout, + default_mutate_rows_attempt_timeout=expected_mutate_rows_attempt_timeout, ) await asyncio.sleep(0) assert table.table_id == expected_table_id @@ -997,7 +1005,23 @@ async def test_table_ctor(self): assert instance_key in client._active_instances assert client._instance_owners[instance_key] == {id(table)} assert table.default_operation_timeout == expected_operation_timeout - assert table.default_per_request_timeout == expected_per_request_timeout + assert table.default_attempt_timeout == expected_attempt_timeout + assert ( + table.default_read_rows_operation_timeout + == expected_read_rows_operation_timeout + ) + assert ( + table.default_read_rows_attempt_timeout + == expected_read_rows_attempt_timeout + ) + assert ( + table.default_mutate_rows_operation_timeout + == expected_mutate_rows_operation_timeout + ) + assert ( + table.default_mutate_rows_attempt_timeout + == expected_mutate_rows_attempt_timeout + ) # ensure task reaches completion await table._register_instance_task assert table._register_instance_task.done() @@ -1006,30 +1030,74 @@ async def test_table_ctor(self): await client.close() @pytest.mark.asyncio - async def test_table_ctor_bad_timeout_values(self): + async def test_table_ctor_defaults(self): + """ + should provide default timeout values and app_profile_id + """ from google.cloud.bigtable.data._async.client import BigtableDataClientAsync from google.cloud.bigtable.data._async.client import TableAsync + expected_table_id = "table-id" + expected_instance_id = "instance-id" client = BigtableDataClientAsync() + assert not client._active_instances - with pytest.raises(ValueError) as e: - TableAsync(client, "", "", default_per_request_timeout=-1) - assert "default_per_request_timeout must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - TableAsync(client, "", "", default_operation_timeout=-1) - assert "default_operation_timeout must be greater than 0" in str(e.value) - with pytest.raises(ValueError) as e: - TableAsync( - client, - "", - "", - default_operation_timeout=1, - default_per_request_timeout=2, - ) - assert ( - "default_per_request_timeout must be less than default_operation_timeout" - in str(e.value) + table = TableAsync( + client, + expected_instance_id, + expected_table_id, ) + await asyncio.sleep(0) + assert table.table_id == expected_table_id + assert table.instance_id == expected_instance_id + assert table.app_profile_id is None + assert table.client is client + assert table.default_operation_timeout == 60 + assert table.default_read_rows_operation_timeout == 600 + assert table.default_mutate_rows_operation_timeout == 600 + assert table.default_attempt_timeout is None + assert table.default_read_rows_attempt_timeout is None + assert table.default_mutate_rows_attempt_timeout is None + await client.close() + + @pytest.mark.asyncio + async def test_table_ctor_invalid_timeout_values(self): + """ + bad timeout values should raise ValueError + """ + from google.cloud.bigtable.data._async.client import BigtableDataClientAsync + from google.cloud.bigtable.data._async.client import TableAsync + + client = BigtableDataClientAsync() + + timeout_pairs = [ + ("default_operation_timeout", "default_attempt_timeout"), + ( + "default_read_rows_operation_timeout", + "default_read_rows_attempt_timeout", + ), + ( + "default_mutate_rows_operation_timeout", + "default_mutate_rows_attempt_timeout", + ), + ] + for operation_timeout, attempt_timeout in timeout_pairs: + with pytest.raises(ValueError) as e: + TableAsync(client, "", "", **{attempt_timeout: -1}) + assert "attempt_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + TableAsync(client, "", "", **{operation_timeout: -1}) + assert "operation_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + TableAsync( + client, + "", + "", + **{operation_timeout: 1, attempt_timeout: 2}, + ) + assert "attempt_timeout must not be greater than operation_timeout" in str( + e.value + ) await client.close() def test_table_ctor_sync(self): @@ -1240,15 +1308,15 @@ async def test_read_rows_timeout(self, operation_timeout): ], ) @pytest.mark.asyncio - async def test_read_rows_per_request_timeout( + async def test_read_rows_attempt_timeout( self, per_request_t, operation_t, expected_num ): """ - Ensures that the per_request_timeout is respected and that the number of + Ensures that the attempt_timeout is respected and that the number of requests is as expected. operation_timeout does not cancel the request, so we expect the number of - requests to be the ceiling of operation_timeout / per_request_timeout. + requests to be the ceiling of operation_timeout / attempt_timeout. """ from google.cloud.bigtable.data.exceptions import RetryExceptionGroup @@ -1268,7 +1336,7 @@ async def test_read_rows_per_request_timeout( await table.read_rows( query, operation_timeout=operation_t, - per_request_timeout=per_request_t, + attempt_timeout=per_request_t, ) except core_exceptions.DeadlineExceeded as e: retry_exc = e.__cause__ @@ -1437,12 +1505,12 @@ async def test_read_rows_default_timeouts(self): from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync operation_timeout = 8 - per_request_timeout = 4 + attempt_timeout = 4 with mock.patch.object(_ReadRowsOperationAsync, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with self._make_table( - default_operation_timeout=operation_timeout, - default_per_request_timeout=per_request_timeout, + default_read_rows_operation_timeout=operation_timeout, + default_read_rows_attempt_timeout=attempt_timeout, ) as table: try: await table.read_rows(ReadRowsQuery()) @@ -1450,7 +1518,7 @@ async def test_read_rows_default_timeouts(self): pass kwargs = mock_op.call_args_list[0].kwargs assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_request_timeout"] == per_request_timeout + assert kwargs["attempt_timeout"] == attempt_timeout @pytest.mark.asyncio async def test_read_rows_default_timeout_override(self): @@ -1460,23 +1528,23 @@ async def test_read_rows_default_timeout_override(self): from google.cloud.bigtable.data._async._read_rows import _ReadRowsOperationAsync operation_timeout = 8 - per_request_timeout = 4 + attempt_timeout = 4 with mock.patch.object(_ReadRowsOperationAsync, "__init__") as mock_op: mock_op.side_effect = RuntimeError("mock error") async with self._make_table( - default_operation_timeout=99, default_per_request_timeout=97 + default_operation_timeout=99, default_attempt_timeout=97 ) as table: try: await table.read_rows( ReadRowsQuery(), operation_timeout=operation_timeout, - per_request_timeout=per_request_timeout, + attempt_timeout=attempt_timeout, ) except RuntimeError: pass kwargs = mock_op.call_args_list[0].kwargs assert kwargs["operation_timeout"] == operation_timeout - assert kwargs["per_request_timeout"] == per_request_timeout + assert kwargs["attempt_timeout"] == attempt_timeout @pytest.mark.asyncio async def test_read_row(self): @@ -1492,13 +1560,13 @@ async def test_read_row(self): row = await table.read_row( row_key, operation_timeout=expected_op_timeout, - per_request_timeout=expected_req_timeout, + attempt_timeout=expected_req_timeout, ) assert row == expected_result assert read_rows.call_count == 1 args, kwargs = read_rows.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout - assert kwargs["per_request_timeout"] == expected_req_timeout + assert kwargs["attempt_timeout"] == expected_req_timeout assert len(args) == 1 assert isinstance(args[0], ReadRowsQuery) assert args[0]._to_dict() == { @@ -1523,14 +1591,14 @@ async def test_read_row_w_filter(self): row = await table.read_row( row_key, operation_timeout=expected_op_timeout, - per_request_timeout=expected_req_timeout, + attempt_timeout=expected_req_timeout, row_filter=expected_filter, ) assert row == expected_result assert read_rows.call_count == 1 args, kwargs = read_rows.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout - assert kwargs["per_request_timeout"] == expected_req_timeout + assert kwargs["attempt_timeout"] == expected_req_timeout assert len(args) == 1 assert isinstance(args[0], ReadRowsQuery) assert args[0]._to_dict() == { @@ -1553,13 +1621,13 @@ async def test_read_row_no_response(self): result = await table.read_row( row_key, operation_timeout=expected_op_timeout, - per_request_timeout=expected_req_timeout, + attempt_timeout=expected_req_timeout, ) assert result is None assert read_rows.call_count == 1 args, kwargs = read_rows.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout - assert kwargs["per_request_timeout"] == expected_req_timeout + assert kwargs["attempt_timeout"] == expected_req_timeout assert isinstance(args[0], ReadRowsQuery) assert args[0]._to_dict() == { "rows": {"row_keys": [row_key], "row_ranges": []}, @@ -1598,13 +1666,13 @@ async def test_row_exists(self, return_value, expected_result): result = await table.row_exists( row_key, operation_timeout=expected_op_timeout, - per_request_timeout=expected_req_timeout, + attempt_timeout=expected_req_timeout, ) assert expected_result == result assert read_rows.call_count == 1 args, kwargs = read_rows.call_args_list[0] assert kwargs["operation_timeout"] == expected_op_timeout - assert kwargs["per_request_timeout"] == expected_req_timeout + assert kwargs["attempt_timeout"] == expected_req_timeout assert isinstance(args[0], ReadRowsQuery) expected_filter = { "chain": { @@ -1798,9 +1866,9 @@ async def test_read_rows_sharded_batching(self): table_mock = AsyncMock() start_operation_timeout = 10 - start_per_request_timeout = 3 - table_mock.default_operation_timeout = start_operation_timeout - table_mock.default_per_request_timeout = start_per_request_timeout + start_attempt_timeout = 3 + table_mock.default_read_rows_operation_timeout = start_operation_timeout + table_mock.default_read_rows_attempt_timeout = start_attempt_timeout # clock ticks one second on each check with mock.patch("time.monotonic", side_effect=range(0, 100000)): with mock.patch("asyncio.gather", AsyncMock()) as gather_mock: @@ -1829,14 +1897,11 @@ async def test_read_rows_sharded_batching(self): req_kwargs["operation_timeout"] == expected_operation_timeout ) - # each per_request_timeout should start with default value, but decrease when operation_timeout reaches it - expected_per_request_timeout = min( - start_per_request_timeout, expected_operation_timeout - ) - assert ( - req_kwargs["per_request_timeout"] - == expected_per_request_timeout + # each attempt_timeout should start with default value, but decrease when operation_timeout reaches it + expected_attempt_timeout = min( + start_attempt_timeout, expected_operation_timeout ) + assert req_kwargs["attempt_timeout"] == expected_attempt_timeout # await all created coroutines to avoid warnings for i in range(len(gather_mock.call_args_list)): for j in range(len(gather_mock.call_args_list[i][0])): @@ -1891,14 +1956,14 @@ async def test_sample_row_keys_bad_timeout(self): await table.sample_row_keys(operation_timeout=-1) assert "operation_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - await table.sample_row_keys(per_request_timeout=-1) - assert "per_request_timeout must be greater than 0" in str(e.value) + await table.sample_row_keys(attempt_timeout=-1) + assert "attempt_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: await table.sample_row_keys( - operation_timeout=10, per_request_timeout=20 + operation_timeout=10, attempt_timeout=20 ) assert ( - "per_request_timeout must not be greater than operation_timeout" + "attempt_timeout must not be greater than operation_timeout" in str(e.value) ) @@ -1936,7 +2001,7 @@ async def test_sample_row_keys_gapic_params(self): table.client._gapic_client, "sample_row_keys", AsyncMock() ) as sample_row_keys: sample_row_keys.return_value = self._make_gapic_stream([]) - await table.sample_row_keys(per_request_timeout=expected_timeout) + await table.sample_row_keys(attempt_timeout=expected_timeout) args, kwargs = sample_row_keys.call_args assert len(args) == 0 assert len(kwargs) == 4 @@ -2049,7 +2114,7 @@ def _make_client(self, *args, **kwargs): ) async def test_mutate_row(self, mutation_arg): """Test mutations with no errors""" - expected_per_request_timeout = 19 + expected_attempt_timeout = 19 async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( @@ -2059,9 +2124,10 @@ async def test_mutate_row(self, mutation_arg): await table.mutate_row( "row_key", mutation_arg, - per_request_timeout=expected_per_request_timeout, + attempt_timeout=expected_attempt_timeout, ) assert mock_gapic.call_count == 1 + kwargs = mock_gapic.call_args_list[0].kwargs request = mock_gapic.call_args[0][0] assert ( request["table_name"] @@ -2074,8 +2140,9 @@ async def test_mutate_row(self, mutation_arg): else [mutation_arg._to_dict()] ) assert request["mutations"] == formatted_mutations - found_per_request_timeout = mock_gapic.call_args[1]["timeout"] - assert found_per_request_timeout == expected_per_request_timeout + assert kwargs["timeout"] == expected_attempt_timeout + # make sure gapic layer is not retrying + assert kwargs["retry"] is None @pytest.mark.parametrize( "retryable_exception", @@ -2243,7 +2310,7 @@ async def generator(): ) async def test_bulk_mutate_rows(self, mutation_arg): """Test mutations with no errors""" - expected_per_request_timeout = 19 + expected_attempt_timeout = 19 async with self._make_client(project="project") as client: async with client.get_table("instance", "table") as table: with mock.patch.object( @@ -2253,7 +2320,7 @@ async def test_bulk_mutate_rows(self, mutation_arg): bulk_mutation = mutations.RowMutationEntry(b"row_key", mutation_arg) await table.bulk_mutate_rows( [bulk_mutation], - per_request_timeout=expected_per_request_timeout, + attempt_timeout=expected_attempt_timeout, ) assert mock_gapic.call_count == 1 kwargs = mock_gapic.call_args[1] @@ -2262,7 +2329,7 @@ async def test_bulk_mutate_rows(self, mutation_arg): == "projects/project/instances/instance/tables/table" ) assert kwargs["entries"] == [bulk_mutation._to_dict()] - assert kwargs["timeout"] == expected_per_request_timeout + assert kwargs["timeout"] == expected_attempt_timeout @pytest.mark.asyncio async def test_bulk_mutate_rows_multiple_entries(self): diff --git a/tests/unit/data/_async/test_mutations_batcher.py b/tests/unit/data/_async/test_mutations_batcher.py index 1b14cc128..25492c4e2 100644 --- a/tests/unit/data/_async/test_mutations_batcher.py +++ b/tests/unit/data/_async/test_mutations_batcher.py @@ -288,8 +288,8 @@ def _get_target_class(self): def _make_one(self, table=None, **kwargs): if table is None: table = mock.Mock() - table.default_operation_timeout = 10 - table.default_per_request_timeout = 10 + table.default_mutate_rows_operation_timeout = 10 + table.default_mutate_rows_attempt_timeout = 10 return self._get_target_class()(table, **kwargs) @@ -300,8 +300,8 @@ def _make_one(self, table=None, **kwargs): async def test_ctor_defaults(self, flush_timer_mock): flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() - table.default_operation_timeout = 10 - table.default_per_request_timeout = 8 + table.default_mutate_rows_operation_timeout = 10 + table.default_mutate_rows_attempt_timeout = 8 async with self._make_one(table) as instance: assert instance._table == table assert instance.closed is False @@ -316,8 +316,13 @@ async def test_ctor_defaults(self, flush_timer_mock): assert instance._flow_control._in_flight_mutation_count == 0 assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 - assert instance._operation_timeout == table.default_operation_timeout - assert instance._per_request_timeout == table.default_per_request_timeout + assert ( + instance._operation_timeout + == table.default_mutate_rows_operation_timeout + ) + assert ( + instance._attempt_timeout == table.default_mutate_rows_attempt_timeout + ) await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == 5 @@ -337,7 +342,7 @@ async def test_ctor_explicit(self, flush_timer_mock): flow_control_max_mutation_count = 1001 flow_control_max_bytes = 12 operation_timeout = 11 - per_request_timeout = 2 + attempt_timeout = 2 async with self._make_one( table, flush_interval=flush_interval, @@ -346,7 +351,7 @@ async def test_ctor_explicit(self, flush_timer_mock): flow_control_max_mutation_count=flow_control_max_mutation_count, flow_control_max_bytes=flow_control_max_bytes, batch_operation_timeout=operation_timeout, - batch_per_request_timeout=per_request_timeout, + batch_attempt_timeout=attempt_timeout, ) as instance: assert instance._table == table assert instance.closed is False @@ -365,7 +370,7 @@ async def test_ctor_explicit(self, flush_timer_mock): assert instance._flow_control._in_flight_mutation_bytes == 0 assert instance._entries_processed_since_last_raise == 0 assert instance._operation_timeout == operation_timeout - assert instance._per_request_timeout == per_request_timeout + assert instance._attempt_timeout == attempt_timeout await asyncio.sleep(0) assert flush_timer_mock.call_count == 1 assert flush_timer_mock.call_args[0][0] == flush_interval @@ -379,8 +384,8 @@ async def test_ctor_no_flush_limits(self, flush_timer_mock): """Test with None for flush limits""" flush_timer_mock.return_value = asyncio.create_task(asyncio.sleep(0)) table = mock.Mock() - table.default_operation_timeout = 10 - table.default_per_request_timeout = 8 + table.default_mutate_rows_operation_timeout = 10 + table.default_mutate_rows_attempt_timeout = 8 flush_interval = None flush_limit_count = None flush_limit_bytes = None @@ -410,15 +415,14 @@ async def test_ctor_invalid_values(self): """Test that timeout values are positive, and fit within expected limits""" with pytest.raises(ValueError) as e: self._make_one(batch_operation_timeout=-1) - assert "batch_operation_timeout must be greater than 0" in str(e.value) + assert "operation_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - self._make_one(batch_per_request_timeout=-1) - assert "batch_per_request_timeout must be greater than 0" in str(e.value) + self._make_one(batch_attempt_timeout=-1) + assert "attempt_timeout must be greater than 0" in str(e.value) with pytest.raises(ValueError) as e: - self._make_one(batch_operation_timeout=1, batch_per_request_timeout=2) - assert ( - "batch_per_request_timeout must be less than batch_operation_timeout" - in str(e.value) + self._make_one(batch_operation_timeout=1, batch_attempt_timeout=2) + assert "attempt_timeout must not be greater than operation_timeout" in str( + e.value ) def test_default_argument_consistency(self): @@ -857,7 +861,7 @@ async def test_timer_flush_end_to_end(self): async with self._make_one(flush_interval=0.05) as instance: instance._table.default_operation_timeout = 10 - instance._table.default_per_request_timeout = 9 + instance._table.default_attempt_timeout = 9 with mock.patch.object( instance._table.client._gapic_client, "mutate_rows" ) as gapic_mock: @@ -881,8 +885,8 @@ async def test__execute_mutate_rows(self, mutate_rows): table = mock.Mock() table.table_name = "test-table" table.app_profile_id = "test-app-profile" - table.default_operation_timeout = 17 - table.default_per_request_timeout = 13 + table.default_mutate_rows_operation_timeout = 17 + table.default_mutate_rows_attempt_timeout = 13 async with self._make_one(table) as instance: batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) @@ -892,7 +896,7 @@ async def test__execute_mutate_rows(self, mutate_rows): assert args[1] == table assert args[2] == batch kwargs["operation_timeout"] == 17 - kwargs["per_request_timeout"] == 13 + kwargs["attempt_timeout"] == 13 assert result == [] @pytest.mark.asyncio @@ -910,8 +914,8 @@ async def test__execute_mutate_rows_returns_errors(self, mutate_rows): err2 = FailedMutationEntryError(1, mock.Mock(), RuntimeError("test error")) mutate_rows.side_effect = MutationsExceptionGroup([err1, err2], 10) table = mock.Mock() - table.default_operation_timeout = 17 - table.default_per_request_timeout = 13 + table.default_mutate_rows_operation_timeout = 17 + table.default_mutate_rows_attempt_timeout = 13 async with self._make_one(table) as instance: batch = [_make_mutation()] result = await instance._execute_mutate_rows(batch) @@ -1026,24 +1030,24 @@ async def test_atexit_registration(self): ) async def test_timeout_args_passed(self, mutate_rows): """ - batch_operation_timeout and batch_per_request_timeout should be used + batch_operation_timeout and batch_attempt_timeout should be used in api calls """ mutate_rows.return_value = AsyncMock() expected_operation_timeout = 17 - expected_per_request_timeout = 13 + expected_attempt_timeout = 13 async with self._make_one( batch_operation_timeout=expected_operation_timeout, - batch_per_request_timeout=expected_per_request_timeout, + batch_attempt_timeout=expected_attempt_timeout, ) as instance: assert instance._operation_timeout == expected_operation_timeout - assert instance._per_request_timeout == expected_per_request_timeout + assert instance._attempt_timeout == expected_attempt_timeout # make simulated gapic call await instance._execute_mutate_rows([_make_mutation()]) assert mutate_rows.call_count == 1 kwargs = mutate_rows.call_args[1] assert kwargs["operation_timeout"] == expected_operation_timeout - assert kwargs["per_request_timeout"] == expected_per_request_timeout + assert kwargs["attempt_timeout"] == expected_attempt_timeout @pytest.mark.parametrize( "limit,in_e,start_e,end_e", diff --git a/tests/unit/data/test__helpers.py b/tests/unit/data/test__helpers.py index dc688bb0c..1671bf6f7 100644 --- a/tests/unit/data/test__helpers.py +++ b/tests/unit/data/test__helpers.py @@ -143,3 +143,48 @@ async def test_func(): assert isinstance(cause, bigtable_exceptions.RetryExceptionGroup) assert cause.exceptions == tuple(associated_errors) assert f"operation_timeout of {timeout}s exceeded" in str(e.value) + + +class TestValidateTimeouts: + def test_validate_timeouts_error_messages(self): + with pytest.raises(ValueError) as e: + _helpers._validate_timeouts(operation_timeout=1, attempt_timeout=-1) + assert "attempt_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + _helpers._validate_timeouts(operation_timeout=-1, attempt_timeout=1) + assert "operation_timeout must be greater than 0" in str(e.value) + with pytest.raises(ValueError) as e: + _helpers._validate_timeouts(operation_timeout=1, attempt_timeout=2) + assert "attempt_timeout must not be greater than operation_timeout" in str( + e.value + ) + + @pytest.mark.parametrize( + "args,expected", + [ + ([1, None, False], False), + ([1, None, True], True), + ([1, 1, False], True), + ([1, 1, True], True), + ([1, 1], True), + ([1, None], False), + ([2, 1], True), + ([1, 2], False), + ([0, 1], False), + ([1, 0], False), + ([60, None], False), + ([600, None], False), + ([600, 600], True), + ], + ) + def test_validate_with_inputs(self, args, expected): + """ + test whether an exception is thrown with different inputs + """ + success = False + try: + _helpers._validate_timeouts(*args) + success = True + except ValueError: + pass + assert success == expected From b6ad653ce61b494914ec407fb68def2b229dc5ad Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Tue, 27 Jun 2023 11:43:16 -0700 Subject: [PATCH 206/213] fixed docstrings --- google/cloud/bigtable/data/_async/client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigtable/data/_async/client.py b/google/cloud/bigtable/data/_async/client.py index eaff4f809..c4309d70b 100644 --- a/google/cloud/bigtable/data/_async/client.py +++ b/google/cloud/bigtable/data/_async/client.py @@ -546,7 +546,7 @@ async def read_rows( Retruns results as a list of Row objects when the request is complete. For streamed results, use read_rows_stream. - Failed requests within operation_timeout and operation_deadline policies will be retried. + Failed requests within operation_timeout will be retried. Args: - query: contains details about which rows to return @@ -585,7 +585,7 @@ async def read_row( """ Read a single row from the table, based on the specified key. - Failed requests within operation_timeout and operation_deadline policies will be retried. + Failed requests within operation_timeout will be retried. Args: - query: contains details about which rows to return @@ -891,8 +891,8 @@ async def mutate_row( retried on server failure. Non-idempotent operations will not. Args: - - row_key: the row to apply mutations to - - mutations: the set of mutations to apply to the row + - row_key: the row to apply mutations to + - mutations: the set of mutations to apply to the row - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will be retried within the budget. If None, defaults to the Table's default_operation_timeout @@ -1108,7 +1108,7 @@ async def read_modify_write_row( - rules: A rule or set of rules to apply to the row. Rules are applied in order, meaning that earlier rules will affect the results of later ones. - - operation_timeout: the time budget for the entire operation, in seconds. + - operation_timeout: the time budget for the entire operation, in seconds. Failed requests will not be retried. Defaults to the Table's default_operation_timeout if None. Returns: From eb55460b18a40ec1cde298593a0ad49ec45f7ef3 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 28 Jun 2023 03:44:12 -0400 Subject: [PATCH 207/213] chore: remove pinned Sphinx version [autoapprove] (#818) Source-Link: https://github.com/googleapis/synthtool/commit/909573ce9da2819eeb835909c795d29aea5c724e Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:ddf4551385d566771dc713090feb7b4c1164fb8a698fe52bbe7670b24236565b Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- noxfile.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 02a4dedce..1b3cb6c52 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc -# created: 2023-06-03T21:25:37.968717478Z + digest: sha256:ddf4551385d566771dc713090feb7b4c1164fb8a698fe52bbe7670b24236565b +# created: 2023-06-27T13:04:21.96690344Z diff --git a/noxfile.py b/noxfile.py index 18f489e19..a33d64638 100644 --- a/noxfile.py +++ b/noxfile.py @@ -345,10 +345,9 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", + "gcp-sphinx-docfx-yaml", "alabaster", "recommonmark", - "gcp-sphinx-docfx-yaml", ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From a723cb28df4e6f12e645074bfef6cd4203fab947 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 13:26:51 -0700 Subject: [PATCH 208/213] improve exception messages --- google/cloud/bigtable/data/exceptions.py | 43 ++++++++++++++++++------ 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index 9b6b4fe3f..0840f975d 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -74,7 +74,22 @@ def __init__(self, message, excs): if len(excs) == 0: raise ValueError("exceptions must be a non-empty sequence") self.exceptions = tuple(excs) - super().__init__(message) + # simulate an exception group in Python < 3.11 by adding exception info + # to the message + first_line = "--+---------------- 1 ----------------" + last_line = "+------------------------------------" + message_parts = [message + "\n" + first_line] + for idx, e in enumerate(excs[:15]): + if idx != 0: + message_parts.append(f"+---------------- {str(idx+1).rjust(2)} ----------------") + for sub_exc_line in f"| {e.__class__.__name__}: {str(e)}".split("\n"): + message_parts.append(sub_exc_line) + if len(excs) > 15: + message_parts.append("+---------------- ... ---------------") + message_parts.append(f"| and {len(excs) - 15} more") + if last_line not in message_parts[-1]: + message_parts.append(last_line) + super().__init__("\n ".join(message_parts)) def __new__(cls, message, excs): if is_311_plus: @@ -82,12 +97,14 @@ def __new__(cls, message, excs): else: return super().__new__(cls) - def __str__(self): + def __repr__(self): """ - String representation doesn't display sub-exceptions. Subexceptions are - described in message + repr representation should strip out sub-exception details """ - return self.args[0] + if is_311_plus: + return super().__repr__() + message = self.args[0].split("\n")[0] + return f"{self.__class__.__name__}({message!r}, {self.exceptions!r})" class MutationsExceptionGroup(_BigtableExceptionGroup): @@ -200,10 +217,11 @@ def __init__( idempotent_msg = ( "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" ) - index_msg = f" at index {failed_idx} " if failed_idx is not None else " " - message = ( - f"Failed {idempotent_msg} mutation entry{index_msg}with cause: {cause!r}" - ) + index_msg = f" at index {failed_idx}" if failed_idx is not None else " " + message = f"Failed {idempotent_msg} mutation entry{index_msg}." + if not is_311_plus: + # attach the cause to the message, since it won't be displayed in the stack trace by default + message = f"{message}\n| Caused by {cause.__class__.__name__}: {cause}" super().__init__(message) self.index = failed_idx self.entry = failed_mutation_entry @@ -220,7 +238,7 @@ def _format_message(excs: list[Exception]): if len(excs) == 1: return f"1 failed attempt: {type(excs[0]).__name__}" else: - return f"{len(excs)} failed attempts. Latest: {type(excs[-1]).__name__}" + return f"{len(excs)} failed attempts." def __init__(self, excs: list[Exception]): super().__init__(self._format_message(excs), excs) @@ -268,7 +286,10 @@ def __init__( failed_query: "ReadRowsQuery" | dict[str, Any], cause: Exception, ): - message = f"Failed query at index {failed_index} with cause: {cause!r}" + message = f"Failed query at index {failed_index}" + if not is_311_plus: + # attach the cause to the message, since it won't be displayed in the stack trace by default + message = f"{message}\n| Caused by {cause.__class__.__name__}: {cause}" super().__init__(message) self.index = failed_index self.query = failed_query From ccc9ae4280042f241188cd1d4ebde12120c899fb Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 14:13:04 -0700 Subject: [PATCH 209/213] attach cause in more general way --- google/cloud/bigtable/data/exceptions.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index 0840f975d..b32a3d0fe 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -82,8 +82,10 @@ def __init__(self, message, excs): for idx, e in enumerate(excs[:15]): if idx != 0: message_parts.append(f"+---------------- {str(idx+1).rjust(2)} ----------------") - for sub_exc_line in f"| {e.__class__.__name__}: {str(e)}".split("\n"): - message_parts.append(sub_exc_line) + message_parts.extend(f"| {type(e).__name__}: {e}".splitlines()) + cause = e.__cause__ + if cause is not None: + message_parts.extend(f"| Caused by {type(cause).__name__}: {cause}".splitlines()) if len(excs) > 15: message_parts.append("+---------------- ... ---------------") message_parts.append(f"| and {len(excs) - 15} more") @@ -219,9 +221,6 @@ def __init__( ) index_msg = f" at index {failed_idx}" if failed_idx is not None else " " message = f"Failed {idempotent_msg} mutation entry{index_msg}." - if not is_311_plus: - # attach the cause to the message, since it won't be displayed in the stack trace by default - message = f"{message}\n| Caused by {cause.__class__.__name__}: {cause}" super().__init__(message) self.index = failed_idx self.entry = failed_mutation_entry @@ -287,9 +286,6 @@ def __init__( cause: Exception, ): message = f"Failed query at index {failed_index}" - if not is_311_plus: - # attach the cause to the message, since it won't be displayed in the stack trace by default - message = f"{message}\n| Caused by {cause.__class__.__name__}: {cause}" super().__init__(message) self.index = failed_index self.query = failed_query From f0b355889f0a3369ca4050f8d5b89d58d624406b Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 14:32:15 -0700 Subject: [PATCH 210/213] fixed tests for 3.11 --- google/cloud/bigtable/data/exceptions.py | 16 +++++++++------ tests/unit/data/test_exceptions.py | 25 ++++++++---------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index b32a3d0fe..2c51875f2 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -99,6 +99,12 @@ def __new__(cls, message, excs): else: return super().__new__(cls) + def __str__(self): + if is_311_plus: + # don't return built-in sub-exception message + return self.args[0] + return super().__str__() + def __repr__(self): """ repr representation should strip out sub-exception details @@ -219,8 +225,8 @@ def __init__( idempotent_msg = ( "idempotent" if failed_mutation_entry.is_idempotent() else "non-idempotent" ) - index_msg = f" at index {failed_idx}" if failed_idx is not None else " " - message = f"Failed {idempotent_msg} mutation entry{index_msg}." + index_msg = f" at index {failed_idx}" if failed_idx is not None else "" + message = f"Failed {idempotent_msg} mutation entry{index_msg}" super().__init__(message) self.index = failed_idx self.entry = failed_mutation_entry @@ -234,10 +240,8 @@ class RetryExceptionGroup(_BigtableExceptionGroup): def _format_message(excs: list[Exception]): if len(excs) == 0: return "No exceptions" - if len(excs) == 1: - return f"1 failed attempt: {type(excs[0]).__name__}" - else: - return f"{len(excs)} failed attempts." + plural = "s" if len(excs) > 1 else "" + return f"{len(excs)} failed attempt{plural}" def __init__(self, excs: list[Exception]): super().__init__(self._format_message(excs), excs) diff --git a/tests/unit/data/test_exceptions.py b/tests/unit/data/test_exceptions.py index 9d1145e36..2914fcb09 100644 --- a/tests/unit/data/test_exceptions.py +++ b/tests/unit/data/test_exceptions.py @@ -241,11 +241,11 @@ def _make_one(self, excs=None): @pytest.mark.parametrize( "exception_list,expected_message", [ - ([Exception()], "1 failed attempt: Exception"), - ([Exception(), RuntimeError()], "2 failed attempts. Latest: RuntimeError"), + ([Exception()], "1 failed attempt"), + ([Exception(), RuntimeError()], "2 failed attempts"), ( [Exception(), ValueError("test")], - "2 failed attempts. Latest: ValueError", + "2 failed attempts", ), ( [ @@ -253,7 +253,7 @@ def _make_one(self, excs=None): [Exception(), ValueError("test")] ) ], - "1 failed attempt: RetryExceptionGroup", + "1 failed attempt", ), ], ) @@ -323,10 +323,7 @@ def test_raise(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_entry, test_exc) - assert ( - str(e.value) - == "Failed idempotent mutation entry at index 2 with cause: ValueError('test')" - ) + assert str(e.value) == "Failed idempotent mutation entry at index 2" assert e.value.index == test_idx assert e.value.entry == test_entry assert e.value.__cause__ == test_exc @@ -343,10 +340,7 @@ def test_raise_idempotent(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_entry, test_exc) - assert ( - str(e.value) - == "Failed non-idempotent mutation entry at index 2 with cause: ValueError('test')" - ) + assert str(e.value) == "Failed non-idempotent mutation entry at index 2" assert e.value.index == test_idx assert e.value.entry == test_entry assert e.value.__cause__ == test_exc @@ -361,10 +355,7 @@ def test_no_index(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_entry, test_exc) - assert ( - str(e.value) - == "Failed idempotent mutation entry with cause: ValueError('test')" - ) + assert str(e.value) == "Failed idempotent mutation entry" assert e.value.index == test_idx assert e.value.entry == test_entry assert e.value.__cause__ == test_exc @@ -391,7 +382,7 @@ def test_raise(self): test_exc = ValueError("test") with pytest.raises(self._get_class()) as e: raise self._get_class()(test_idx, test_query, test_exc) - assert str(e.value) == "Failed query at index 2 with cause: ValueError('test')" + assert str(e.value) == "Failed query at index 2" assert e.value.index == test_idx assert e.value.query == test_query assert e.value.__cause__ == test_exc From e2f263cf2d88f7c26ffa8db489e38b3a21f88242 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 14:41:52 -0700 Subject: [PATCH 211/213] added improved output tests for 3.11 --- tests/unit/data/test_exceptions.py | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/unit/data/test_exceptions.py b/tests/unit/data/test_exceptions.py index 2914fcb09..ac69d7116 100644 --- a/tests/unit/data/test_exceptions.py +++ b/tests/unit/data/test_exceptions.py @@ -71,11 +71,51 @@ def test_311_traceback(self): sub_exc1 = RuntimeError("first sub exception") sub_exc2 = ZeroDivisionError("second sub exception") + sub_group = self._make_one(excs=[sub_exc2]) + exc_group = self._make_one(excs=[sub_exc1, sub_group]) + + expected_traceback = ( + f" | google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {str(exc_group)}", + " +-+---------------- 1 ----------------", + " | RuntimeError: first sub exception", + " +---------------- 2 ----------------", + f" | google.cloud.bigtable.data.exceptions.{type(sub_group).__name__}: {str(sub_group)}", + " +-+---------------- 1 ----------------", + " | ZeroDivisionError: second sub exception", + " +------------------------------------", + ) + exception_caught = False + try: + raise exc_group + except self._get_class(): + exception_caught = True + tb = traceback.format_exc() + tb_relevant_lines = tuple(tb.splitlines()[3:]) + assert expected_traceback == tb_relevant_lines + assert exception_caught + + @pytest.mark.skipif( + sys.version_info < (3, 11), reason="requires python3.11 or higher" + ) + def test_311_traceback_with_cause(self): + """ + traceback should display nicely with sub-exceptions with __cause__ set + """ + import traceback + + sub_exc1 = RuntimeError("first sub exception") + cause_exc = ImportError("cause exception") + sub_exc1.__cause__ = cause_exc + sub_exc2 = ZeroDivisionError("second sub exception") exc_group = self._make_one(excs=[sub_exc1, sub_exc2]) expected_traceback = ( - f" | google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {str(exc_group)}", + f" | google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {str(exc_group)}", " +-+---------------- 1 ----------------", + " | ImportError: cause exception", + " | ", + " | The above exception was the direct cause of the following exception:", + " | ", " | RuntimeError: first sub exception", " +---------------- 2 ----------------", " | ZeroDivisionError: second sub exception", From 6f269a8c5597d50d5a08bb63b6218f1206b4edd3 Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 14:49:32 -0700 Subject: [PATCH 212/213] got 3.10 tests passing --- tests/unit/data/test_exceptions.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/unit/data/test_exceptions.py b/tests/unit/data/test_exceptions.py index ac69d7116..262702093 100644 --- a/tests/unit/data/test_exceptions.py +++ b/tests/unit/data/test_exceptions.py @@ -49,7 +49,8 @@ def test_raise(self): test_excs = [Exception(test_msg)] with pytest.raises(self._get_class()) as e: raise self._get_class()(test_msg, test_excs) - assert str(e.value) == test_msg + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == test_msg assert list(e.value.exceptions) == test_excs def test_raise_empty_list(self): @@ -191,7 +192,8 @@ def test_raise(self, exception_list, total_entries, expected_message): """ with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list, total_entries) - assert str(e.value) == expected_message + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == expected_message assert list(e.value.exceptions) == exception_list def test_raise_custom_message(self): @@ -202,7 +204,8 @@ def test_raise_custom_message(self): exception_list = [Exception()] with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list, 5, message=custom_message) - assert str(e.value) == custom_message + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == custom_message assert list(e.value.exceptions) == exception_list @pytest.mark.parametrize( @@ -262,7 +265,8 @@ def test_from_truncated_lists( raise self._get_class().from_truncated_lists( first_list, second_list, total_excs, entry_count ) - assert str(e.value) == expected_message + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == expected_message assert list(e.value.exceptions) == first_list + second_list @@ -303,7 +307,8 @@ def test_raise(self, exception_list, expected_message): """ with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list) - assert str(e.value) == expected_message + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == expected_message assert list(e.value.exceptions) == exception_list @@ -339,7 +344,8 @@ def test_raise(self, exception_list, succeeded, total_entries, expected_message) """ with pytest.raises(self._get_class()) as e: raise self._get_class()(exception_list, succeeded, total_entries) - assert str(e.value) == expected_message + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == expected_message assert list(e.value.exceptions) == exception_list assert e.value.successful_rows == succeeded From e0906892dd1c5b5755b4163ef4a8b4c1b36c247c Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Wed, 28 Jun 2023 15:33:28 -0700 Subject: [PATCH 213/213] added tests for 3.10- output --- google/cloud/bigtable/data/exceptions.py | 11 +- tests/unit/data/test_exceptions.py | 154 ++++++++++++++++++----- 2 files changed, 128 insertions(+), 37 deletions(-) diff --git a/google/cloud/bigtable/data/exceptions.py b/google/cloud/bigtable/data/exceptions.py index 2c51875f2..6933d4a2a 100644 --- a/google/cloud/bigtable/data/exceptions.py +++ b/google/cloud/bigtable/data/exceptions.py @@ -82,10 +82,13 @@ def __init__(self, message, excs): for idx, e in enumerate(excs[:15]): if idx != 0: message_parts.append(f"+---------------- {str(idx+1).rjust(2)} ----------------") - message_parts.extend(f"| {type(e).__name__}: {e}".splitlines()) cause = e.__cause__ if cause is not None: - message_parts.extend(f"| Caused by {type(cause).__name__}: {cause}".splitlines()) + message_parts.extend(f"| {type(cause).__name__}: {cause}".splitlines()) + message_parts.append("| ") + message_parts.append("| The above exception was the direct cause of the following exception:") + message_parts.append("| ") + message_parts.extend(f"| {type(e).__name__}: {e}".splitlines()) if len(excs) > 15: message_parts.append("+---------------- ... ---------------") message_parts.append(f"| and {len(excs) - 15} more") @@ -228,9 +231,9 @@ def __init__( index_msg = f" at index {failed_idx}" if failed_idx is not None else "" message = f"Failed {idempotent_msg} mutation entry{index_msg}" super().__init__(message) + self.__cause__ = cause self.index = failed_idx self.entry = failed_mutation_entry - self.__cause__ = cause class RetryExceptionGroup(_BigtableExceptionGroup): @@ -291,6 +294,6 @@ def __init__( ): message = f"Failed query at index {failed_index}" super().__init__(message) + self.__cause__ = cause self.index = failed_index self.query = failed_query - self.__cause__ = cause diff --git a/tests/unit/data/test_exceptions.py b/tests/unit/data/test_exceptions.py index 262702093..da543ce6e 100644 --- a/tests/unit/data/test_exceptions.py +++ b/tests/unit/data/test_exceptions.py @@ -25,42 +25,12 @@ import mock # type: ignore -class TestBigtableExceptionGroup: +class TracebackTests311: """ - Subclass for MutationsExceptionGroup, RetryExceptionGroup, and ShardedReadRowsExceptionGroup + Provides a set of tests that should be run on python 3.11 and above, + to verify that the exception traceback looks as expected """ - def _get_class(self): - from google.cloud.bigtable.data.exceptions import _BigtableExceptionGroup - - return _BigtableExceptionGroup - - def _make_one(self, message="test_message", excs=None): - if excs is None: - excs = [RuntimeError("mock")] - - return self._get_class()(message, excs=excs) - - def test_raise(self): - """ - Create exception in raise statement, which calls __new__ and __init__ - """ - test_msg = "test message" - test_excs = [Exception(test_msg)] - with pytest.raises(self._get_class()) as e: - raise self._get_class()(test_msg, test_excs) - found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 - assert found_message == test_msg - assert list(e.value.exceptions) == test_excs - - def test_raise_empty_list(self): - """ - Empty exception lists are not supported - """ - with pytest.raises(ValueError) as e: - raise self._make_one(excs=[]) - assert "non-empty sequence" in str(e.value) - @pytest.mark.skipif( sys.version_info < (3, 11), reason="requires python3.11 or higher" ) @@ -146,6 +116,124 @@ def test_311_exception_group(self): assert runtime_error.exceptions[0] == exceptions[0] assert others.exceptions[0] == exceptions[1] + +class TracebackTests310: + """ + Provides a set of tests that should be run on python 3.10 and under, + to verify that the exception traceback looks as expected + """ + + @pytest.mark.skipif( + sys.version_info >= (3, 11), reason="requires python3.10 or lower" + ) + def test_310_traceback(self): + """ + Exception customizations should not break rich exception group traceback in python 3.10 + """ + import traceback + + sub_exc1 = RuntimeError("first sub exception") + sub_exc2 = ZeroDivisionError("second sub exception") + sub_group = self._make_one(excs=[sub_exc2]) + exc_group = self._make_one(excs=[sub_exc1, sub_group]) + found_message = str(exc_group).splitlines()[0] + found_sub_message = str(sub_group).splitlines()[0] + + expected_traceback = ( + f"google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {found_message}", + "--+---------------- 1 ----------------", + " | RuntimeError: first sub exception", + " +---------------- 2 ----------------", + f" | {type(sub_group).__name__}: {found_sub_message}", + " --+---------------- 1 ----------------", + " | ZeroDivisionError: second sub exception", + " +------------------------------------", + ) + exception_caught = False + try: + raise exc_group + except self._get_class(): + exception_caught = True + tb = traceback.format_exc() + tb_relevant_lines = tuple(tb.splitlines()[3:]) + assert expected_traceback == tb_relevant_lines + assert exception_caught + + @pytest.mark.skipif( + sys.version_info >= (3, 11), reason="requires python3.10 or lower" + ) + def test_310_traceback_with_cause(self): + """ + traceback should display nicely with sub-exceptions with __cause__ set + """ + import traceback + + sub_exc1 = RuntimeError("first sub exception") + cause_exc = ImportError("cause exception") + sub_exc1.__cause__ = cause_exc + sub_exc2 = ZeroDivisionError("second sub exception") + exc_group = self._make_one(excs=[sub_exc1, sub_exc2]) + found_message = str(exc_group).splitlines()[0] + + expected_traceback = ( + f"google.cloud.bigtable.data.exceptions.{type(exc_group).__name__}: {found_message}", + "--+---------------- 1 ----------------", + " | ImportError: cause exception", + " | ", + " | The above exception was the direct cause of the following exception:", + " | ", + " | RuntimeError: first sub exception", + " +---------------- 2 ----------------", + " | ZeroDivisionError: second sub exception", + " +------------------------------------", + ) + exception_caught = False + try: + raise exc_group + except self._get_class(): + exception_caught = True + tb = traceback.format_exc() + tb_relevant_lines = tuple(tb.splitlines()[3:]) + assert expected_traceback == tb_relevant_lines + assert exception_caught + + +class TestBigtableExceptionGroup(TracebackTests311, TracebackTests310): + """ + Subclass for MutationsExceptionGroup, RetryExceptionGroup, and ShardedReadRowsExceptionGroup + """ + + def _get_class(self): + from google.cloud.bigtable.data.exceptions import _BigtableExceptionGroup + + return _BigtableExceptionGroup + + def _make_one(self, message="test_message", excs=None): + if excs is None: + excs = [RuntimeError("mock")] + + return self._get_class()(message, excs=excs) + + def test_raise(self): + """ + Create exception in raise statement, which calls __new__ and __init__ + """ + test_msg = "test message" + test_excs = [Exception(test_msg)] + with pytest.raises(self._get_class()) as e: + raise self._get_class()(test_msg, test_excs) + found_message = str(e.value).splitlines()[0] # added to prase out subexceptions in <3.11 + assert found_message == test_msg + assert list(e.value.exceptions) == test_excs + + def test_raise_empty_list(self): + """ + Empty exception lists are not supported + """ + with pytest.raises(ValueError) as e: + raise self._make_one(excs=[]) + assert "non-empty sequence" in str(e.value) + def test_exception_handling(self): """ All versions should inherit from exception