From 79e09283aae7996a271ba9251c2768d7467be9bd Mon Sep 17 00:00:00 2001 From: Mark Chappell Date: Mon, 15 Mar 2021 17:29:27 +0100 Subject: [PATCH] Attempt to improve the stability of ec2_vpc_vgw and ec2_vpc_vpn (#162) * Add a custom Retry so we can retry when we receive 'The maximum number of mutating objects has been reached' * Update ec2_vpc_vpn unit test to use a connection with an AWSRetry decorator * changelog --- changelogs/fragments/162-vgw-retries.yml | 3 + plugins/modules/ec2_vpc_vgw.py | 78 +++++++++++-------- plugins/modules/ec2_vpc_vpn.py | 57 ++++++++++---- .../unit/plugins/modules/test_ec2_vpc_vpn.py | 20 ++++- 4 files changed, 108 insertions(+), 50 deletions(-) create mode 100644 changelogs/fragments/162-vgw-retries.yml diff --git a/changelogs/fragments/162-vgw-retries.yml b/changelogs/fragments/162-vgw-retries.yml new file mode 100644 index 00000000000..93a38970451 --- /dev/null +++ b/changelogs/fragments/162-vgw-retries.yml @@ -0,0 +1,3 @@ +minor_changes: +- ec2_vpc_vpn - Add automatic retries for recoverable errors (https://github.com/ansible-collections/community.aws/pull/162). +- ec2_vpc_vgw - Add automatic retries for recoverable errors (https://github.com/ansible-collections/community.aws/pull/162). diff --git a/plugins/modules/ec2_vpc_vgw.py b/plugins/modules/ec2_vpc_vgw.py index ce68833bcfc..4dd9a2cb456 100644 --- a/plugins/modules/ec2_vpc_vgw.py +++ b/plugins/modules/ec2_vpc_vgw.py @@ -124,6 +124,29 @@ from ansible_collections.amazon.aws.plugins.module_utils.waiters import get_waiter +# AWS uses VpnGatewayLimitExceeded for both 'Too many VGWs' and 'Too many concurrent changes' +# we need to look at the mesage to tell the difference. +class VGWRetry(AWSRetry): + @staticmethod + def status_code_from_exception(error): + return (error.response['Error']['Code'], error.response['Error']['Message'],) + + @staticmethod + def found(response_codes, catch_extra_error_codes=None): + retry_on = ['The maximum number of mutating objects has been reached.'] + + if catch_extra_error_codes: + retry_on.extend(catch_extra_error_codes) + if not isinstance(response_codes, tuple): + response_codes = (response_codes,) + + for code in response_codes: + if super().found(response_codes, catch_extra_error_codes): + return True + + return False + + def get_vgw_info(vgws): if not isinstance(vgws, list): return @@ -174,7 +197,7 @@ def attach_vgw(client, module, vpn_gateway_id): # Immediately after a detachment, the EC2 API sometimes will report the VpnGateways[0].State # as available several seconds before actually permitting a new attachment. # So we catch and retry that error. See https://github.com/ansible/ansible/issues/53185 - response = AWSRetry.jittered_backoff(retries=5, + response = VGWRetry.jittered_backoff(retries=5, catch_extra_error_codes=['InvalidParameterValue'] )(client.attach_vpn_gateway)(VpnGatewayId=vpn_gateway_id, VpcId=params['VpcId']) @@ -193,16 +216,13 @@ def detach_vgw(client, module, vpn_gateway_id, vpc_id=None): params = dict() params['VpcId'] = module.params.get('vpc_id') - if vpc_id: - try: - response = client.detach_vpn_gateway(VpnGatewayId=vpn_gateway_id, VpcId=vpc_id) - except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: - module.fail_json_aws(e, msg='Failed to detach gateway') - else: - try: - response = client.detach_vpn_gateway(VpnGatewayId=vpn_gateway_id, VpcId=params['VpcId']) - except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: - module.fail_json_aws(e, msg='Failed to detach gateway') + try: + if vpc_id: + response = client.detach_vpn_gateway(VpnGatewayId=vpn_gateway_id, VpcId=vpc_id, aws_retry=True) + else: + response = client.detach_vpn_gateway(VpnGatewayId=vpn_gateway_id, VpcId=params['VpcId'], aws_retry=True) + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e, 'Failed to detach gateway') status_achieved, vgw = wait_for_status(client, module, [vpn_gateway_id], 'detached') if not status_achieved: @@ -219,7 +239,7 @@ def create_vgw(client, module): params['AmazonSideAsn'] = module.params.get('asn') try: - response = client.create_vpn_gateway(**params) + response = client.create_vpn_gateway(aws_retry=True, **params) get_waiter( client, 'vpn_gateway_exists' ).wait( @@ -239,7 +259,7 @@ def create_vgw(client, module): def delete_vgw(client, module, vpn_gateway_id): try: - response = client.delete_vpn_gateway(VpnGatewayId=vpn_gateway_id) + response = client.delete_vpn_gateway(VpnGatewayId=vpn_gateway_id, aws_retry=True) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Failed to delete gateway') @@ -252,7 +272,7 @@ def create_tags(client, module, vpn_gateway_id): params = dict() try: - response = client.create_tags(Resources=[vpn_gateway_id], Tags=load_tags(module)) + response = client.create_tags(Resources=[vpn_gateway_id], Tags=load_tags(module), aws_retry=True) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg="Failed to add tags") @@ -263,16 +283,13 @@ def create_tags(client, module, vpn_gateway_id): def delete_tags(client, module, vpn_gateway_id, tags_to_delete=None): params = dict() - if tags_to_delete: - try: - response = client.delete_tags(Resources=[vpn_gateway_id], Tags=tags_to_delete) - except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: - module.fail_json_aws(e, msg='Failed to delete tags') - else: - try: - response = client.delete_tags(Resources=[vpn_gateway_id]) - except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: - module.fail_json_aws(e, msg='Failed to delete all tags') + try: + if tags_to_delete: + response = client.delete_tags(Resources=[vpn_gateway_id], Tags=tags_to_delete, aws_retry=True) + else: + response = client.delete_tags(Resources=[vpn_gateway_id], aws_retry=True) + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e, msg='Unable to remove tags from gateway') result = response return result @@ -294,8 +311,8 @@ def find_tags(client, module, resource_id=None): if resource_id: try: - response = client.describe_tags(Filters=[ - {'Name': 'resource-id', 'Values': [resource_id]} + response = client.describe_tags(aws_retry=True, Filters=[ + {'Name': 'resource-id', 'Values': [resource_id]}, ]) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Failed to describe tags searching by resource') @@ -343,7 +360,7 @@ def find_vpc(client, module): if params['vpc_id']: try: - response = client.describe_vpcs(VpcIds=[params['vpc_id']]) + response = client.describe_vpcs(VpcIds=[params['vpc_id']], aws_retry=True) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Failed to describe VPC') @@ -363,7 +380,7 @@ def find_vgw(client, module, vpn_gateway_id=None): if module.params.get('state') == 'present': params['Filters'].append({'Name': 'state', 'Values': ['pending', 'available']}) try: - response = client.describe_vpn_gateways(**params) + response = client.describe_vpn_gateways(aws_retry=True, **params) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg='Failed to describe gateway using filters') @@ -549,10 +566,7 @@ def main(): state = module.params.get('state').lower() - try: - client = module.client('ec2') - except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: - module.fail_json_aws(e, msg='Failed to connect to AWS') + client = module.client('ec2', retry_decorator=VGWRetry.jittered_backoff(retries=10)) if state == 'present': (changed, results) = ensure_vgw_present(client, module) diff --git a/plugins/modules/ec2_vpc_vpn.py b/plugins/modules/ec2_vpc_vpn.py index 6e18e724258..56bb4e9b8fd 100644 --- a/plugins/modules/ec2_vpc_vpn.py +++ b/plugins/modules/ec2_vpc_vpn.py @@ -298,14 +298,13 @@ vpn_connection_id: vpn-781e0e19 """ -from ansible_collections.amazon.aws.plugins.module_utils.core import AnsibleAWSModule from ansible.module_utils._text import to_text -from ansible_collections.amazon.aws.plugins.module_utils.ec2 import ( - camel_dict_to_snake_dict, - boto3_tag_list_to_ansible_dict, - compare_aws_tags, - ansible_dict_to_boto3_tag_list, -) +from ansible_collections.amazon.aws.plugins.module_utils.core import AnsibleAWSModule +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import AWSRetry +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import ansible_dict_to_boto3_tag_list +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import boto3_tag_list_to_ansible_dict +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import camel_dict_to_snake_dict +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import compare_aws_tags try: from botocore.exceptions import BotoCoreError, ClientError, WaiterError @@ -319,6 +318,29 @@ def __init__(self, msg, exception=None): self.exception = exception +# AWS uses VpnGatewayLimitExceeded for both 'Too many VGWs' and 'Too many concurrent changes' +# we need to look at the mesage to tell the difference. +class VPNRetry(AWSRetry): + @staticmethod + def status_code_from_exception(error): + return (error.response['Error']['Code'], error.response['Error']['Message'],) + + @staticmethod + def found(response_codes, catch_extra_error_codes=None): + retry_on = ['The maximum number of mutating objects has been reached.'] + + if catch_extra_error_codes: + retry_on.extend(catch_extra_error_codes) + if not isinstance(response_codes, tuple): + response_codes = (response_codes,) + + for code in response_codes: + if super().found(response_codes, catch_extra_error_codes): + return True + + return False + + def find_connection(connection, module_params, vpn_connection_id=None): ''' Looks for a unique VPN connection. Uses find_connection_response() to return the connection found, None, or raise an error if there were multiple viable connections. ''' @@ -342,10 +364,11 @@ def find_connection(connection, module_params, vpn_connection_id=None): # see if there is a unique matching connection try: if vpn_connection_id: - existing_conn = connection.describe_vpn_connections(VpnConnectionIds=vpn_connection_id, + existing_conn = connection.describe_vpn_connections(aws_retry=True, + VpnConnectionIds=vpn_connection_id, Filters=formatted_filter) else: - existing_conn = connection.describe_vpn_connections(Filters=formatted_filter) + existing_conn = connection.describe_vpn_connections(aws_retry=True, Filters=formatted_filter) except (BotoCoreError, ClientError) as e: raise VPNConnectionException(msg="Failed while describing VPN connection.", exception=e) @@ -356,7 +379,8 @@ def find_connection(connection, module_params, vpn_connection_id=None): def add_routes(connection, vpn_connection_id, routes_to_add): for route in routes_to_add: try: - connection.create_vpn_connection_route(VpnConnectionId=vpn_connection_id, + connection.create_vpn_connection_route(aws_retry=True, + VpnConnectionId=vpn_connection_id, DestinationCidrBlock=route) except (BotoCoreError, ClientError) as e: raise VPNConnectionException(msg="Failed while adding route {0} to the VPN connection {1}.".format(route, vpn_connection_id), @@ -366,7 +390,8 @@ def add_routes(connection, vpn_connection_id, routes_to_add): def remove_routes(connection, vpn_connection_id, routes_to_remove): for route in routes_to_remove: try: - connection.delete_vpn_connection_route(VpnConnectionId=vpn_connection_id, + connection.delete_vpn_connection_route(aws_retry=True, + VpnConnectionId=vpn_connection_id, DestinationCidrBlock=route) except (BotoCoreError, ClientError) as e: raise VPNConnectionException(msg="Failed to remove route {0} from the VPN connection {1}.".format(route, vpn_connection_id), @@ -504,7 +529,7 @@ def create_connection(connection, customer_gateway_id, static_only, vpn_gateway_ def delete_connection(connection, vpn_connection_id, delay, max_attempts): """ Deletes a VPN connection """ try: - connection.delete_vpn_connection(VpnConnectionId=vpn_connection_id) + connection.delete_vpn_connection(aws_retry=True, VpnConnectionId=vpn_connection_id) connection.get_waiter('vpn_connection_deleted').wait( VpnConnectionIds=[vpn_connection_id], WaiterConfig={'Delay': delay, 'MaxAttempts': max_attempts} @@ -519,7 +544,8 @@ def delete_connection(connection, vpn_connection_id, delay, max_attempts): def add_tags(connection, vpn_connection_id, add): try: - connection.create_tags(Resources=[vpn_connection_id], + connection.create_tags(aws_retry=True, + Resources=[vpn_connection_id], Tags=add) except (BotoCoreError, ClientError) as e: raise VPNConnectionException(msg="Failed to add the tags: {0}.".format(add), @@ -530,7 +556,8 @@ def remove_tags(connection, vpn_connection_id, remove): # format tags since they are a list in the format ['tag1', 'tag2', 'tag3'] key_dict_list = [{'Key': tag} for tag in remove] try: - connection.delete_tags(Resources=[vpn_connection_id], + connection.delete_tags(aws_retry=True, + Resources=[vpn_connection_id], Tags=key_dict_list) except (BotoCoreError, ClientError) as e: raise VPNConnectionException(msg="Failed to remove the tags: {0}.".format(remove), @@ -755,7 +782,7 @@ def main(): ) module = AnsibleAWSModule(argument_spec=argument_spec, supports_check_mode=True) - connection = module.client('ec2') + connection = module.client('ec2', retry_decorator=VPNRetry.jittered_backoff(retries=10)) state = module.params.get('state') parameters = dict(module.params) diff --git a/tests/unit/plugins/modules/test_ec2_vpc_vpn.py b/tests/unit/plugins/modules/test_ec2_vpc_vpn.py index 1e50d42906a..ee8f284a1f6 100644 --- a/tests/unit/plugins/modules/test_ec2_vpc_vpn.py +++ b/tests/unit/plugins/modules/test_ec2_vpc_vpn.py @@ -6,15 +6,27 @@ import pytest import os -from ansible_collections.amazon.aws.tests.unit.utils.amazon_placebo_fixtures import placeboify, maybe_sleep +from ansible_collections.amazon.aws.tests.unit.utils.amazon_placebo_fixtures import placeboify +from ansible_collections.amazon.aws.tests.unit.utils.amazon_placebo_fixtures import maybe_sleep + +import ansible_collections.amazon.aws.plugins.module_utils.core as aws_core +import ansible_collections.amazon.aws.plugins.module_utils.ec2 as aws_ec2 +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import get_aws_connection_info +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import boto3_conn +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import boto3_tag_list_to_ansible_dict + from ansible_collections.community.aws.plugins.modules import ec2_vpc_vpn -from ansible_collections.amazon.aws.plugins.module_utils.ec2 import get_aws_connection_info, boto3_conn, boto3_tag_list_to_ansible_dict class FakeModule(object): def __init__(self, **kwargs): self.params = kwargs + def fail_json_aws(self, *args, **kwargs): + self.exit_args = args + self.exit_kwargs = kwargs + raise Exception('FAIL') + def fail_json(self, *args, **kwargs): self.exit_args = args self.exit_kwargs = kwargs @@ -68,8 +80,10 @@ def get_dependencies(): def setup_mod_conn(placeboify, params): conn = placeboify.client('ec2') + retry_decorator = aws_ec2.AWSRetry.jittered_backoff() + wrapped_conn = aws_core._RetryingBotoClientWrapper(conn, retry_decorator) m = FakeModule(**params) - return m, conn + return m, wrapped_conn def make_params(cgw, vgw, tags=None, filters=None, routes=None):