From 3291f20632ebcfcf80692682071d2b7bf1fc2f6a Mon Sep 17 00:00:00 2001 From: ford prior Date: Fri, 6 Sep 2024 10:31:50 -0400 Subject: [PATCH] Python - EC2: Audit results implemented (Pt. 2) (#6789) * This update applies coding standards to the source code and test code. --------- Co-authored-by: David Souther --- .../resilient_service/.gitignore | 4 +- .../resilient_service/auto_scaler.py | 631 +++++++++++------- .../resilient_service/{test => }/conftest.py | 17 +- .../resilient_service/load_balancer.py | 352 ++++++---- .../resilient_service/parameters.py | 43 +- .../recommendation_service.py | 41 +- .../resilient_service/requirements.txt | 3 +- .../cross_service/resilient_service/runner.py | 393 ++++++----- .../resilient_service/test/pytest.ini | 2 + .../test/resources/server.py | 4 +- .../resilient_service/test/test_demo.py | 253 ------- .../resilient_service/test/test_deploy.py | 264 -------- .../resilient_service/test/test_destroy.py | 179 ----- .../test/test_runner_integ.py | 139 ++-- python/example_code/auto-scaling/README.md | 22 +- .../auto-scaling/action_wrapper.py | 296 +++++--- .../auto-scaling/hello/hello_autoscaling.py | 2 +- .../scenario_groups_and_instances.py | 129 +++- .../auto-scaling/test/conftest.py | 3 +- .../auto-scaling/test/test_action_wrapper.py | 9 +- .../test_scenario_groups_and_instances.py | 3 +- python/example_code/ec2/README.md | 16 +- .../elastic-load-balancing/README.md | 12 +- python/example_code/iam/README.md | 4 +- 24 files changed, 1303 insertions(+), 1518 deletions(-) rename python/cross_service/resilient_service/{test => }/conftest.py (91%) create mode 100644 python/cross_service/resilient_service/test/pytest.ini delete mode 100644 python/cross_service/resilient_service/test/test_demo.py delete mode 100644 python/cross_service/resilient_service/test/test_deploy.py delete mode 100644 python/cross_service/resilient_service/test/test_destroy.py diff --git a/python/cross_service/resilient_service/.gitignore b/python/cross_service/resilient_service/.gitignore index cfaad761185..8806444ac2b 100644 --- a/python/cross_service/resilient_service/.gitignore +++ b/python/cross_service/resilient_service/.gitignore @@ -1 +1,3 @@ -*.pem +doc-example-resilience-key-pair-* +test-doc-example-resilience-key-pair-* + diff --git a/python/cross_service/resilient_service/auto_scaler.py b/python/cross_service/resilient_service/auto_scaler.py index 81ba58b5d37..d4dfd06577b 100644 --- a/python/cross_service/resilient_service/auto_scaler.py +++ b/python/cross_service/resilient_service/auto_scaler.py @@ -5,7 +5,8 @@ import json import logging import time -from os import remove, chmod +from os import chmod, remove +from typing import Any, Dict, List, Tuple import boto3 from botocore.exceptions import ClientError @@ -13,32 +14,29 @@ log = logging.getLogger(__name__) -class AutoScalerError(Exception): - pass - - # snippet-start:[python.example_code.workflow.ResilientService_AutoScaler] # snippet-start:[python.cross_service.resilient_service.AutoScaler.decl] -class AutoScaler: +class AutoScalingWrapper: """ Encapsulates Amazon EC2 Auto Scaling and EC2 management actions. """ def __init__( self, - resource_prefix, - inst_type, - ami_param, - autoscaling_client, - ec2_client, - ssm_client, - iam_client, + resource_prefix: str, + inst_type: str, + ami_param: str, + autoscaling_client: boto3.client, + ec2_client: boto3.client, + ssm_client: boto3.client, + iam_client: boto3.client, ): """ + Initializes the AutoScaler class with the necessary parameters. + :param resource_prefix: The prefix for naming AWS resources that are created by this class. :param inst_type: The type of EC2 instance to create, such as t3.micro. - :param ami_param: The Systems Manager parameter used to look up the AMI that is - created. + :param ami_param: The Systems Manager parameter used to look up the AMI that is created. :param autoscaling_client: A Boto3 EC2 Auto Scaling client. :param ec2_client: A Boto3 EC2 client. :param ssm_client: A Boto3 Systems Manager client. @@ -50,43 +48,118 @@ def __init__( self.ec2_client = ec2_client self.ssm_client = ssm_client self.iam_client = iam_client - self.launch_template_name = f"{resource_prefix}-template" + sts_client = boto3.client("sts") + self.account_id = sts_client.get_caller_identity()["Account"] + + self.key_pair_name = f"{resource_prefix}-key-pair" + self.launch_template_name = f"{resource_prefix}-template-" self.group_name = f"{resource_prefix}-group" + + # Happy path self.instance_policy_name = f"{resource_prefix}-pol" self.instance_role_name = f"{resource_prefix}-role" self.instance_profile_name = f"{resource_prefix}-prof" + + # Failure mode self.bad_creds_policy_name = f"{resource_prefix}-bc-pol" self.bad_creds_role_name = f"{resource_prefix}-bc-role" self.bad_creds_profile_name = f"{resource_prefix}-bc-prof" - self.key_pair_name = f"{resource_prefix}-key-pair" # snippet-end:[python.cross_service.resilient_service.AutoScaler.decl] - @classmethod - def from_client(cls, resource_prefix): + def create_policy(self, policy_file: str, policy_name: str) -> str: """ - Creates this class from Boto3 clients. + Creates a new IAM policy or retrieves the ARN of an existing policy. - :param resource_prefix: The prefix for naming AWS resources that are created by this class. + :param policy_file: The path to a JSON file that contains the policy definition. + :param policy_name: The name to give the created policy. + :return: The ARN of the created or existing policy. """ - as_client = boto3.client("autoscaling") - ec2_client = boto3.client("ec2") - ssm_client = boto3.client("ssm") - iam_client = boto3.client("iam") - return cls( - resource_prefix, - "t3.micro", - "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2", - as_client, - ec2_client, - ssm_client, - iam_client, - ) + with open(policy_file) as file: + policy_doc = file.read() + + try: + response = self.iam_client.create_policy( + PolicyName=policy_name, PolicyDocument=policy_doc + ) + policy_arn = response["Policy"]["Arn"] + log.info(f"Policy '{policy_name}' created successfully. ARN: {policy_arn}") + return policy_arn + + except ClientError as err: + if err.response["Error"]["Code"] == "EntityAlreadyExists": + # If the policy already exists, get its ARN + response = self.iam_client.get_policy( + PolicyArn=f"arn:aws:iam::{self.account_id}:policy/{policy_name}" + ) + policy_arn = response["Policy"]["Arn"] + log.info(f"Policy '{policy_name}' already exists. ARN: {policy_arn}") + return policy_arn + log.error(f"Full error:\n\t{err}") + pass + + def create_role(self, role_name: str, assume_role_doc: dict) -> str: + """ + Creates a new IAM role or retrieves the ARN of an existing role. + + :param role_name: The name to give the created role. + :param assume_role_doc: The assume role policy document that specifies which + entities can assume the role. + :return: The ARN of the created or existing role. + """ + try: + response = self.iam_client.create_role( + RoleName=role_name, AssumeRolePolicyDocument=json.dumps(assume_role_doc) + ) + role_arn = response["Role"]["Arn"] + log.info(f"Role '{role_name}' created successfully. ARN: {role_arn}") + return role_arn + + except ClientError as err: + if err.response["Error"]["Code"] == "EntityAlreadyExists": + # If the role already exists, get its ARN + response = self.iam_client.get_role(RoleName=role_name) + role_arn = response["Role"]["Arn"] + log.info(f"Role '{role_name}' already exists. ARN: {role_arn}") + return role_arn + log.error(f"Full error:\n\t{err}") + pass + + def attach_policy( + self, + role_name: str, + policy_arn: str, + aws_managed_policies: Tuple[str, ...] = (), + ) -> None: + """ + Attaches an IAM policy to a role and optionally attaches additional AWS-managed policies. + + :param role_name: The name of the role to attach the policy to. + :param policy_arn: The ARN of the policy to attach. + :param aws_managed_policies: A tuple of AWS-managed policy names to attach to the role. + """ + try: + self.iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy_arn) + for aws_policy in aws_managed_policies: + self.iam_client.attach_role_policy( + RoleName=role_name, + PolicyArn=f"arn:aws:iam::aws:policy/{aws_policy}", + ) + log.info(f"Attached policy {policy_arn} to role {role_name}.") + except ClientError as err: + log.error(f"Failed to attach policy {policy_arn} to role {role_name}.") + log.error(f"Full error:\n\t{err}") + pass # snippet-start:[python.cross_service.resilient_service.iam.CreateInstanceProfile] def create_instance_profile( - self, policy_file, policy_name, role_name, profile_name, aws_managed_policies=() - ): + self, + policy_file: str, + policy_name: str, + role_name: str, + profile_name: str, + aws_managed_policies: Tuple[str, ...] = (), + ) -> str: """ Creates a policy, role, and profile that is associated with instances created by this class. An instance's associated profile defines a role that is assumed by the @@ -113,43 +186,9 @@ def create_instance_profile( } ], } - with open(policy_file) as file: - instance_policy_doc = file.read() - - policy_arn = None - try: - pol_response = self.iam_client.create_policy( - PolicyName=policy_name, PolicyDocument=instance_policy_doc - ) - policy_arn = pol_response["Policy"]["Arn"] - log.info("Created policy with ARN %s.", policy_arn) - except ClientError as err: - if err.response["Error"]["Code"] == "EntityAlreadyExists": - log.info("Policy %s already exists, nothing to do.", policy_name) - list_pol_response = self.iam_client.list_policies(Scope="Local") - for pol in list_pol_response["Policies"]: - if pol["PolicyName"] == policy_name: - policy_arn = pol["Arn"] - break - if policy_arn is None: - raise AutoScalerError(f"Couldn't create policy {policy_name}: {err}") - - try: - self.iam_client.create_role( - RoleName=role_name, AssumeRolePolicyDocument=json.dumps(assume_role_doc) - ) - self.iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy_arn) - for aws_policy in aws_managed_policies: - self.iam_client.attach_role_policy( - RoleName=role_name, - PolicyArn=f"arn:aws:iam::aws:policy/{aws_policy}", - ) - log.info("Created role %s and attached policy %s.", role_name, policy_arn) - except ClientError as err: - if err.response["Error"]["Code"] == "EntityAlreadyExists": - log.info("Role %s already exists, nothing to do.", role_name) - else: - raise AutoScalerError(f"Couldn't create role {role_name}: {err}") + policy_arn = self.create_policy(policy_file, policy_name) + self.create_role(role_name, assume_role_doc) + self.attach_policy(role_name, policy_arn, aws_managed_policies) try: profile_response = self.iam_client.create_instance_profile( @@ -172,17 +211,14 @@ def create_instance_profile( log.info( "Instance profile %s already exists, nothing to do.", profile_name ) - else: - raise AutoScalerError( - f"Couldn't create profile {profile_name} and attach it to role\n" - f"{role_name}: {err}" - ) + log.error(f"Full error:\n\t{err}") + pass return profile_arn # snippet-end:[python.cross_service.resilient_service.iam.CreateInstanceProfile] # snippet-start:[python.cross_service.resilient_service.ec2.DescribeIamInstanceProfileAssociations] - def get_instance_profile(self, instance_id): + def get_instance_profile(self, instance_id: str) -> Dict[str, Any]: """ Gets data about the profile associated with an instance. @@ -193,25 +229,36 @@ def get_instance_profile(self, instance_id): response = self.ec2_client.describe_iam_instance_profile_associations( Filters=[{"Name": "instance-id", "Values": [instance_id]}] ) + if not response["IamInstanceProfileAssociations"]: + log.info(f"No instance profile found for instance {instance_id}.") + profile_data = response["IamInstanceProfileAssociations"][0] + log.info(f"Retrieved instance profile for instance {instance_id}.") + return profile_data except ClientError as err: - raise AutoScalerError( - f"Couldn't get instance profile association for instance {instance_id}: {err}" + log.error( + f"Failed to retrieve instance profile for instance {instance_id}." ) - else: - return response["IamInstanceProfileAssociations"][0] + error_code = err.response["Error"]["Code"] + if error_code == "InvalidInstanceID.NotFound": + log.error(f"The instance ID '{instance_id}' does not exist.") + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.DescribeIamInstanceProfileAssociations] # snippet-start:[python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation] def replace_instance_profile( - self, instance_id, new_instance_profile_name, profile_association_id - ): + self, + instance_id: str, + new_instance_profile_name: str, + profile_association_id: str, + ) -> None: """ Replaces the profile associated with a running instance. After the profile is replaced, the instance is rebooted to ensure that it uses the new profile. When the instance is ready, Systems Manager is used to restart the Python web server. - :param instance_id: The ID of the instance to update. + :param instance_id: The ID of the instance to restart. :param new_instance_profile_name: The name of the new profile to associate with the specified instance. :param profile_association_id: The ID of the existing profile association for the @@ -228,36 +275,40 @@ def replace_instance_profile( new_instance_profile_name, ) time.sleep(5) - inst_ready = False - tries = 0 - while not inst_ready: - if tries % 6 == 0: - self.ec2_client.reboot_instances(InstanceIds=[instance_id]) - log.info( - "Rebooting instance %s and waiting for it to to be ready.", - instance_id, - ) - tries += 1 - time.sleep(10) - response = self.ssm_client.describe_instance_information() - for info in response["InstanceInformationList"]: - if info["InstanceId"] == instance_id: - inst_ready = True + + self.ec2_client.reboot_instances(InstanceIds=[instance_id]) + log.info("Rebooting instance %s.", instance_id) + waiter = self.ec2_client.get_waiter("instance_running") + log.info("Waiting for instance %s to be running.", instance_id) + waiter.wait(InstanceIds=[instance_id]) + log.info("Instance %s is now running.", instance_id) + self.ssm_client.send_command( InstanceIds=[instance_id], DocumentName="AWS-RunShellScript", Parameters={"commands": ["cd / && sudo python3 server.py 80"]}, ) - log.info("Restarted the Python web server on instance %s.", instance_id) + log.info(f"Restarted the Python web server on instance '{instance_id}'.") except ClientError as err: - raise AutoScalerError( - f"Couldn't replace instance profile for association {profile_association_id}: {err}" - ) + log.error("Failed to replace instance profile.") + error_code = err.response["Error"]["Code"] + if error_code == "InvalidAssociationID.NotFound": + log.error( + f"Association ID '{profile_association_id}' does not exist." + "Please check the association ID and try again." + ) + if error_code == "InvalidInstanceId": + log.error( + f"The specified instance ID '{instance_id}' does not exist or is not available for SSM. " + f"Please verify the instance ID and try again." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation] # snippet-start:[python.cross_service.resilient_service.iam.DeleteInstanceProfile] - def delete_instance_profile(self, profile_name, role_name): + def delete_instance_profile(self, profile_name: str, role_name: str) -> None: """ Detaches a role from an instance profile, detaches policies from the role, and deletes all the resources. @@ -284,25 +335,24 @@ def delete_instance_profile(self, profile_name, role_name): self.iam_client.delete_role(RoleName=role_name) log.info("Deleted role %s.", role_name) except ClientError as err: + log.error( + f"Couldn't delete instance profile {profile_name} or detach " + f"policies and delete role {role_name}: {err}" + ) if err.response["Error"]["Code"] == "NoSuchEntity": log.info( "Instance profile %s doesn't exist, nothing to do.", profile_name ) - else: - raise AutoScalerError( - f"Couldn't delete instance profile {profile_name} or detach " - f"policies and delete role {role_name}: {err}" - ) + pass # snippet-end:[python.cross_service.resilient_service.iam.DeleteInstanceProfile] # snippet-start:[python.cross_service.resilient_service.ec2.CreateKeyPair] - def create_key_pair(self, key_pair_name): + def create_key_pair(self, key_pair_name: str) -> None: """ Creates a new key pair. :param key_pair_name: The name of the key pair to create. - :return: The newly created key pair. """ try: response = self.ec2_client.create_key_pair(KeyName=key_pair_name) @@ -311,40 +361,39 @@ def create_key_pair(self, key_pair_name): chmod(f"{key_pair_name}.pem", 0o600) log.info("Created key pair %s.", key_pair_name) except ClientError as err: - raise AutoScalerError(f"Couldn't create key pair {key_pair_name}: {err}") + error_code = err.response["Error"]["Code"] + log.error(f"Failed to create key pair {key_pair_name}.") + if error_code == "InvalidKeyPair.Duplicate": + log.error(f"A key pair with the name '{key_pair_name}' already exists.") + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.CreateKeyPair] # snippet-start:[python.cross_service.resilient_service.ec2.DeleteKeyPair] - def delete_key_pair(self): + def delete_key_pair(self) -> None: """ Deletes a key pair. - - :param key_pair_name: The name of the key pair to delete. """ try: self.ec2_client.delete_key_pair(KeyName=self.key_pair_name) remove(f"{self.key_pair_name}.pem") log.info("Deleted key pair %s.", self.key_pair_name) except ClientError as err: - raise AutoScalerError( - f"Couldn't delete key pair {self.key_pair_name}: {err}" - ) - except FileNotFoundError: + log.error(f"Couldn't delete key pair '{self.key_pair_name}'.") + log.error(f"Full error:\n\t{err}") + pass + except FileNotFoundError as err: log.info("Key pair %s doesn't exist, nothing to do.", self.key_pair_name) - except PermissionError: - log.info( - "Inadequate permissions to delete key pair %s.", self.key_pair_name - ) - except Exception as err: - raise AutoScalerError( - f"Couldn't delete key pair {self.key_pair_name}: {err}" - ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.DeleteKeyPair] # snippet-start:[python.cross_service.resilient_service.ec2.CreateLaunchTemplate] - def create_template(self, server_startup_script_file, instance_policy_file): + def create_template( + self, server_startup_script_file: str, instance_policy_file: str + ) -> Dict[str, Any]: """ Creates an Amazon EC2 launch template to use with Amazon EC2 Auto Scaling. The launch template specifies a Bash script in its user data field that runs after @@ -359,6 +408,7 @@ def create_template(self, server_startup_script_file, instance_policy_file): """ template = {} try: + # Create key pair and instance profile self.create_key_pair(self.key_pair_name) self.create_instance_profile( instance_policy_file, @@ -366,10 +416,16 @@ def create_template(self, server_startup_script_file, instance_policy_file): self.instance_role_name, self.instance_profile_name, ) + + # Read the startup script with open(server_startup_script_file) as file: start_server_script = file.read() + + # Get the latest AMI ID ami_latest = self.ssm_client.get_parameter(Name=self.ami_param) ami_id = ami_latest["Parameter"]["Value"] + + # Create the launch template lt_response = self.ec2_client.create_launch_template( LaunchTemplateName=self.launch_template_name, LaunchTemplateData={ @@ -384,24 +440,17 @@ def create_template(self, server_startup_script_file, instance_policy_file): ) template = lt_response["LaunchTemplate"] log.info( - "Created launch template %s for AMI %s on %s.", - self.launch_template_name, - ami_id, - self.inst_type, + f"Created launch template {self.launch_template_name} for AMI {ami_id} on {self.inst_type}." ) except ClientError as err: - if ( - err.response["Error"]["Code"] - == "InvalidLaunchTemplateName.AlreadyExistsException" - ): + log.error(f"Failed to create launch template {self.launch_template_name}.") + error_code = err.response["Error"]["Code"] + if error_code == "InvalidLaunchTemplateName.AlreadyExistsException": log.info( - "Launch template %s already exists, nothing to do.", - self.launch_template_name, - ) - else: - raise AutoScalerError( - f"Couldn't create launch template {self.launch_template_name}: {err}." + f"Launch template {self.launch_template_name} already exists, nothing to do." ) + log.error(f"Full error:\n\t{err}") + pass return template # snippet-end:[python.cross_service.resilient_service.ec2.CreateLaunchTemplate] @@ -428,15 +477,13 @@ def delete_template(self): "Launch template %s does not exist, nothing to do.", self.launch_template_name, ) - else: - raise AutoScalerError( - f"Couldn't delete launch template {self.launch_template_name}: {err}." - ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.DeleteLaunchTemplate] # snippet-start:[python.cross_service.resilient_service.ec2.DescribeAvailabilityZones] - def get_availability_zones(self): + def get_availability_zones(self) -> List[str]: """ Gets a list of Availability Zones in the AWS Region of the Amazon EC2 client. @@ -445,15 +492,18 @@ def get_availability_zones(self): try: response = self.ec2_client.describe_availability_zones() zones = [zone["ZoneName"] for zone in response["AvailabilityZones"]] + log.info(f"Retrieved {len(zones)} availability zones: {zones}.") except ClientError as err: - raise AutoScalerError(f"Couldn't get availability zones: {err}.") + log.error("Failed to retrieve availability zones.") + log.error(f"Full error:\n\t{err}") + pass else: return zones # snippet-end:[python.cross_service.resilient_service.ec2.DescribeAvailabilityZones] # snippet-start:[python.cross_service.resilient_service.auto-scaling.CreateAutoScalingGroup] - def create_group(self, group_size): + def create_autoscaling_group(self, group_size: int) -> List[str]: """ Creates an EC2 Auto Scaling group with the specified size. @@ -461,7 +511,6 @@ def create_group(self, group_size): the group. :return: The list of Availability Zones specified for the group. """ - zones = [] try: zones = self.get_availability_zones() self.autoscaling_client.create_auto_scaling_group( @@ -475,30 +524,29 @@ def create_group(self, group_size): MaxSize=group_size, ) log.info( - "Created EC2 Auto Scaling group %s with availability zones %s.", - self.launch_template_name, - zones, + f"Created EC2 Auto Scaling group {self.group_name} with availability zones {zones}." ) except ClientError as err: - if err.response["Error"]["Code"] == "AlreadyExists": + error_code = err.response["Error"]["Code"] + if error_code == "AlreadyExists": log.info( - "EC2 Auto Scaling group %s already exists, nothing to do.", - self.group_name, + f"EC2 Auto Scaling group {self.group_name} already exists, nothing to do." ) else: - raise AutoScalerError( - f"Couldn't create EC2 Auto Scaling group {self.group_name}: {err}" - ) - return zones + log.error(f"Failed to create EC2 Auto Scaling group {self.group_name}.") + log.error(f"Full error:\n\t{err}") + pass + else: + return zones # snippet-end:[python.cross_service.resilient_service.auto-scaling.CreateAutoScalingGroup] # snippet-start:[python.cross_service.resilient_service.auto-scaling.DescribeAutoScalingGroups] - def get_instances(self): + def get_instances(self) -> List[str]: """ Gets data about the instances in the EC2 Auto Scaling group. - :return: Data about the instances. + :return: A list of instance IDs in the Auto Scaling group. """ try: as_response = self.autoscaling_client.describe_auto_scaling_groups( @@ -508,35 +556,69 @@ def get_instances(self): i["InstanceId"] for i in as_response["AutoScalingGroups"][0]["Instances"] ] - except ClientError as err: - raise AutoScalerError( - f"Couldn't get instances for Auto Scaling group {self.group_name}: {err}" + log.info( + f"Retrieved {len(instance_ids)} instances for Auto Scaling group {self.group_name}." ) + except ClientError as err: + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to retrieve instances for Auto Scaling group {self.group_name}." + ) + if error_code == "ResourceNotFound": + log.error(f"The Auto Scaling group '{self.group_name}' does not exist.") + log.error(f"Full error:\n\t{err}") + pass else: return instance_ids # snippet-end:[python.cross_service.resilient_service.auto-scaling.DescribeAutoScalingGroups] - def terminate_instance(self, instance_id): + def terminate_instance(self, instance_id: str, decrementsetting=False) -> None: """ - Terminates and instances in an EC2 Auto Scaling group. After an instance is + Terminates an instance in an EC2 Auto Scaling group. After an instance is terminated, it can no longer be accessed. :param instance_id: The ID of the instance to terminate. + :param decrementsetting: If True, do not replace terminated instances. """ try: self.autoscaling_client.terminate_instance_in_auto_scaling_group( - InstanceId=instance_id, ShouldDecrementDesiredCapacity=False + InstanceId=instance_id, + ShouldDecrementDesiredCapacity=decrementsetting, ) log.info("Terminated instance %s.", instance_id) + + # Adding a waiter to ensure the instance is terminated + waiter = self.ec2_client.get_waiter("instance_terminated") + log.info("Waiting for instance %s to be terminated...", instance_id) + waiter.wait(InstanceIds=[instance_id]) + log.info( + f"Instance '{instance_id}' has been terminated and will be replaced." + ) + except ClientError as err: - raise AutoScalerError(f"Couldn't terminate instance {instance_id}: {err}") + error_code = err.response["Error"]["Code"] + log.error(f"Failed to terminate instance '{instance_id}'.") + if error_code == "ScalingActivityInProgressFault": + log.error( + "Scaling activity is currently in progress. " + "Wait for the scaling activity to complete before attempting to terminate the instance again." + ) + elif error_code == "ResourceContentionFault": + log.error( + "The request failed due to a resource contention issue. " + "Ensure that no conflicting operations are being performed on the resource." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-start:[python.cross_service.resilient_service.auto-scaling.AttachLoadBalancerTargetGroups] - def attach_load_balancer_target_group(self, lb_target_group): + def attach_load_balancer_target_group( + self, lb_target_group: Dict[str, Any] + ) -> None: """ Attaches an Elastic Load Balancing (ELB) target group to this EC2 Auto Scaling group. - The target group specifies how the load balancer forward requests to the instances + The target group specifies how the load balancer forwards requests to the instances in the group. :param lb_target_group: Data about the ELB target group to attach. @@ -552,83 +634,73 @@ def attach_load_balancer_target_group(self, lb_target_group): self.group_name, ) except ClientError as err: - raise AutoScalerError( - f"Couldn't attach load balancer target group {lb_target_group['TargetGroupName']}\n" - f"to auto scaling group {self.group_name}" - ) + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to attach load balancer target group '{lb_target_group['TargetGroupName']}'." + ) + if error_code == "ResourceContentionFault": + log.error( + "The request failed due to a resource contention issue. " + "Ensure that no conflicting operations are being performed on the resource." + ) + elif error_code == "ServiceLinkedRoleFailure": + log.error( + "The operation failed because the service-linked role is not ready or does not exist. " + "Check that the service-linked role exists and is correctly configured." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.auto-scaling.AttachLoadBalancerTargetGroups] # snippet-start:[python.cross_service.resilient_service.auto-scaling.DeleteAutoScalingGroup] - def _try_terminate_instance(self, inst_id): - stopping = False - log.info(f"Stopping {inst_id}.") - while not stopping: - try: - self.autoscaling_client.terminate_instance_in_auto_scaling_group( - InstanceId=inst_id, ShouldDecrementDesiredCapacity=True - ) - stopping = True - except ClientError as err: - if err.response["Error"]["Code"] == "ScalingActivityInProgress": - log.info("Scaling activity in progress for %s. Waiting...", inst_id) - time.sleep(10) - else: - raise AutoScalerError(f"Couldn't stop instance {inst_id}: {err}.") - - def _try_delete_group(self): - """ - Tries to delete the EC2 Auto Scaling group. If the group is in use or in progress, - the function waits and retries until the group is successfully deleted. - """ - stopped = False - while not stopped: - try: - self.autoscaling_client.delete_auto_scaling_group( - AutoScalingGroupName=self.group_name - ) - stopped = True - log.info("Deleted EC2 Auto Scaling group %s.", self.group_name) - except ClientError as err: - if ( - err.response["Error"]["Code"] == "ResourceInUse" - or err.response["Error"]["Code"] == "ScalingActivityInProgress" - ): - log.info( - "Some instances are still running. Waiting for them to stop..." - ) - time.sleep(10) - else: - raise AutoScalerError( - f"Couldn't delete group {self.group_name}: {err}." - ) - - def delete_group(self): - """ - Terminates all instances in the group, deletes the EC2 Auto Scaling group. + def delete_autoscaling_group(self, group_name: str) -> None: + """ + Terminates all instances in the group, then deletes the EC2 Auto Scaling group. + + :param group_name: The name of the group to delete. """ try: response = self.autoscaling_client.describe_auto_scaling_groups( - AutoScalingGroupNames=[self.group_name] + AutoScalingGroupNames=[group_name] ) groups = response.get("AutoScalingGroups", []) if len(groups) > 0: self.autoscaling_client.update_auto_scaling_group( - AutoScalingGroupName=self.group_name, MinSize=0 + AutoScalingGroupName=group_name, MinSize=0 ) instance_ids = [inst["InstanceId"] for inst in groups[0]["Instances"]] for inst_id in instance_ids: - self._try_terminate_instance(inst_id) - self._try_delete_group() + self.terminate_instance(inst_id) + + # Wait for all instances to be terminated + if instance_ids: + waiter = self.ec2_client.get_waiter("instance_terminated") + log.info("Waiting for all instances to be terminated...") + waiter.wait(InstanceIds=instance_ids) + log.info("All instances have been terminated.") else: - log.info("No groups found named %s, nothing to do.", self.group_name) + log.info(f"No groups found named '{group_name}'! Nothing to do.") except ClientError as err: - raise AutoScalerError(f"Couldn't delete group {self.group_name}: {err}.") + error_code = err.response["Error"]["Code"] + log.error(f"Failed to delete Auto Scaling group '{group_name}'.") + if error_code == "ScalingActivityInProgressFault": + log.error( + "Scaling activity is currently in progress. " + "Wait for the scaling activity to complete before attempting to delete the group again." + ) + elif error_code == "ResourceContentionFault": + log.error( + "The request failed due to a resource contention issue. " + "Ensure that no conflicting operations are being performed on the group." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.auto-scaling.DeleteAutoScalingGroup] # snippet-start:[python.cross_service.resilient_service.ec2.DescribeVpcs] - def get_default_vpc(self): + def get_default_vpc(self) -> Dict[str, Any]: """ Gets the default VPC for the account. @@ -639,14 +711,33 @@ def get_default_vpc(self): Filters=[{"Name": "is-default", "Values": ["true"]}] ) except ClientError as err: - raise AutoScalerError(f"Couldn't get default VPC: {err}") + error_code = err.response["Error"]["Code"] + log.error("Failed to retrieve the default VPC.") + if error_code == "UnauthorizedOperation": + log.error( + "You do not have the necessary permissions to describe VPCs. " + "Ensure that your AWS IAM user or role has the correct permissions." + ) + elif error_code == "InvalidParameterValue": + log.error( + "One or more parameters are invalid. Check the request parameters." + ) + + log.error(f"Full error:\n\t{err}") + pass else: - return response["Vpcs"][0] + if "Vpcs" in response and response["Vpcs"]: + log.info(f"Retrieved default VPC: {response['Vpcs'][0]['VpcId']}") + return response["Vpcs"][0] + else: + pass # snippet-end:[python.cross_service.resilient_service.ec2.DescribeVpcs] # snippet-start:[python.cross_service.resilient_service.ec2.DescribeSecurityGroups] - def verify_inbound_port(self, vpc, port, ip_address): + def verify_inbound_port( + self, vpc: Dict[str, Any], port: int, ip_address: str + ) -> Tuple[Dict[str, Any], bool]: """ Verify the default security group of the specified VPC allows ingress from this computer. This can be done by allowing ingress from this computer's IP @@ -658,7 +749,7 @@ def verify_inbound_port(self, vpc, port, ip_address): :param vpc: The VPC used by this example. :param port: The port to verify. :param ip_address: This computer's IP address. - :return: The default security group of the specific VPC, and a value that indicates + :return: The default security group of the specified VPC, and a value that indicates whether the specified port is open. """ try: @@ -670,10 +761,11 @@ def verify_inbound_port(self, vpc, port, ip_address): ) sec_group = response["SecurityGroups"][0] port_is_open = False - log.info("Found default security group %s.", sec_group["GroupId"]) + log.info(f"Found default security group {sec_group['GroupId']}.") + for ip_perm in sec_group["IpPermissions"]: if ip_perm.get("FromPort", 0) == port: - log.info("Found inbound rule: %s", ip_perm) + log.info(f"Found inbound rule: {ip_perm}") for ip_range in ip_perm["IpRanges"]: cidr = ip_range.get("CidrIp", "") if cidr.startswith(ip_address) or cidr == "0.0.0.0/0": @@ -682,23 +774,29 @@ def verify_inbound_port(self, vpc, port, ip_address): port_is_open = True if not port_is_open: log.info( - "The inbound rule does not appear to be open to either this computer's IP\n" - "address of %s, to all IP addresses (0.0.0.0/0), or to a prefix list ID.", - ip_address, + f"The inbound rule does not appear to be open to either this computer's IP " + f"address of {ip_address}, to all IP addresses (0.0.0.0/0), or to a prefix list ID." ) else: break except ClientError as err: - raise AutoScalerError( - f"Couldn't verify inbound rule for port {port} for VPC {vpc['VpcId']}: {err}" + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to verify inbound rule for port {port} for VPC {vpc['VpcId']}." ) + if error_code == "InvalidVpcID.NotFound": + log.error( + f"The specified VPC ID '{vpc['VpcId']}' does not exist. Please check the VPC ID." + ) + log.error(f"Full error:\n\t{err}") + pass else: return sec_group, port_is_open # snippet-end:[python.cross_service.resilient_service.ec2.DescribeSecurityGroups] # snippet-start:[python.cross_service.resilient_service.ec2.AuthorizeSecurityGroupIngress] - def open_inbound_port(self, sec_group_id, port, ip_address): + def open_inbound_port(self, sec_group_id: str, port: int, ip_address: str) -> None: """ Add an ingress rule to the specified security group that allows access on the specified port from the specified IP address. @@ -722,14 +820,27 @@ def open_inbound_port(self, sec_group_id, port, ip_address): ip_address, ) except ClientError as err: - raise AutoScalerError( - f"Couldn't authorize ingress to {sec_group_id} on port {port} from {ip_address}: {err}" - ) + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to authorize ingress to security group '{sec_group_id}' on port {port} from {ip_address}." + ) + if error_code == "InvalidGroupId.Malformed": + log.error( + "The security group ID is malformed. " + "Please verify that the security group ID is correct." + ) + elif error_code == "InvalidPermission.Duplicate": + log.error( + "The specified rule already exists in the security group. " + "Check the existing rules for this security group." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.AuthorizeSecurityGroupIngress] # snippet-start:[python.cross_service.resilient_service.ec2.DescribeSubnets] - def get_subnets(self, vpc_id, zones): + def get_subnets(self, vpc_id: str, zones: List[str] = None) -> List[Dict[str, Any]]: """ Gets the default subnets in a VPC for a specified list of Availability Zones. @@ -737,20 +848,38 @@ def get_subnets(self, vpc_id, zones): :param zones: The list of Availability Zones to look up. :return: The list of subnets found. """ + # Ensure that 'zones' is a list, even if None is passed + if zones is None: + zones = [] try: - response = self.ec2_client.describe_subnets( + paginator = self.ec2_client.get_paginator("describe_subnets") + page_iterator = paginator.paginate( Filters=[ {"Name": "vpc-id", "Values": [vpc_id]}, {"Name": "availability-zone", "Values": zones}, {"Name": "default-for-az", "Values": ["true"]}, ] ) - subnets = response["Subnets"] + + subnets = [] + for page in page_iterator: + subnets.extend(page["Subnets"]) + log.info("Found %s subnets for the specified zones.", len(subnets)) - except ClientError as err: - raise AutoScalerError(f"Couldn't get subnets: {err}") - else: return subnets + except ClientError as err: + log.error( + f"Failed to retrieve subnets for VPC '{vpc_id}' in zones {zones}." + ) + error_code = err.response["Error"]["Code"] + if error_code == "InvalidVpcID.NotFound": + log.error( + "The specified VPC ID does not exist. " + "Please check the VPC ID and try again." + ) + # Add more error-specific handling as needed + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.ec2.DescribeSubnets] diff --git a/python/cross_service/resilient_service/test/conftest.py b/python/cross_service/resilient_service/conftest.py similarity index 91% rename from python/cross_service/resilient_service/test/conftest.py rename to python/cross_service/resilient_service/conftest.py index 07332610590..8afcfaf575a 100644 --- a/python/cross_service/resilient_service/test/conftest.py +++ b/python/cross_service/resilient_service/conftest.py @@ -4,21 +4,16 @@ """ Contains common test fixtures used to run unit tests. """ - from datetime import datetime -import sys + import boto3 import pytest -from auto_scaler import AutoScaler -from load_balancer import LoadBalancer +import runner +from auto_scaler import AutoScalingWrapper +from load_balancer import ElasticLoadBalancerWrapper from parameters import ParameterHelper from recommendation_service import RecommendationService -import runner - -# This is needed so Python can find test_tools on the path. -sys.path.append("../..") -from test_tools.fixtures.common import * class ScenarioData: @@ -72,7 +67,7 @@ def __init__(self, auto_scaling, elb, ddb, ec2, ssm, iam): self.scenario = runner.Runner( self.test_resource_path, RecommendationService(self.table_name, self.ddb.client), - AutoScaler( + AutoScalingWrapper( self.resource_prefix, self.inst_type, self.ami_param, @@ -81,7 +76,7 @@ def __init__(self, auto_scaling, elb, ddb, ec2, ssm, iam): self.ssm.client, self.iam.client, ), - LoadBalancer(self.tg_name, self.lb_name, self.elb.client), + ElasticLoadBalancerWrapper(self.elb.client), ParameterHelper(self.table_name, self.ssm.client), ) diff --git a/python/cross_service/resilient_service/load_balancer.py b/python/cross_service/resilient_service/load_balancer.py index 4e177bc665d..4d5bb2f18e8 100644 --- a/python/cross_service/resilient_service/load_balancer.py +++ b/python/cross_service/resilient_service/load_balancer.py @@ -3,78 +3,42 @@ import logging import time +from typing import Any, Dict, List import boto3 -from botocore.exceptions import ClientError import requests +from botocore.exceptions import ClientError log = logging.getLogger(__name__) -class LoadBalancerError(Exception): - pass - - # snippet-start:[python.example_code.workflow.ResilientService_LoadBalancer] # snippet-start:[python.cross_service.resilient_service.LoadBalancer.decl] -class LoadBalancer: +class ElasticLoadBalancerWrapper: """Encapsulates Elastic Load Balancing (ELB) actions.""" - def __init__(self, target_group_name, load_balancer_name, elb_client): + def __init__(self, elb_client: boto3.client): """ - :param target_group_name: The name of the target group associated with the load balancer. - :param load_balancer_name: The name of the load balancer. - :param elb_client: A Boto3 Elastic Load Balancing client. + Initializes the LoadBalancer class with the necessary parameters. """ - self.target_group_name = target_group_name - self.load_balancer_name = load_balancer_name self.elb_client = elb_client - self._endpoint = None # snippet-end:[python.cross_service.resilient_service.LoadBalancer.decl] - @classmethod - def from_client(cls, resource_prefix): - """ - Creates this class from a Boto3 client. - - :param resource_prefix: The prefix to give to AWS resources created by this class. - """ - elb_client = boto3.client("elbv2") - return cls(f"{resource_prefix}-tg", f"{resource_prefix}-lb", elb_client) - - # snippet-start:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] - def endpoint(self): - """ - Gets the HTTP endpoint of the load balancer. - - :return: The endpoint. - """ - if self._endpoint is None: - try: - response = self.elb_client.describe_load_balancers( - Names=[self.load_balancer_name] - ) - self._endpoint = response["LoadBalancers"][0]["DNSName"] - except ClientError as err: - raise LoadBalancerError( - f"Couldn't get the endpoint for load balancer {self.load_balancer_name}: {err}" - ) - return self._endpoint - - # snippet-end:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] - # snippet-start:[python.cross_service.resilient_service.elbv2.CreateTargetGroup] - def create_target_group(self, protocol, port, vpc_id): + def create_target_group( + self, target_group_name: str, protocol: str, port: int, vpc_id: str + ) -> Dict[str, Any]: """ Creates an Elastic Load Balancing target group. The target group specifies how - the load balancer forward requests to instances in the group and how instance + the load balancer forwards requests to instances in the group and how instance health is checked. To speed up this demo, the health check is configured with shortened times and lower thresholds. In production, you might want to decrease the sensitivity of your health checks to avoid unwanted failures. + :param target_group_name: The name of the target group to create. :param protocol: The protocol to use to forward requests, such as 'HTTP'. :param port: The port to use to forward requests, such as 80. :param vpc_id: The ID of the VPC in which the load balancer exists. @@ -82,7 +46,7 @@ def create_target_group(self, protocol, port, vpc_id): """ try: response = self.elb_client.create_target_group( - Name=self.target_group_name, + Name=target_group_name, Protocol=protocol, Port=port, HealthCheckPath="/healthcheck", @@ -93,76 +57,167 @@ def create_target_group(self, protocol, port, vpc_id): VpcId=vpc_id, ) target_group = response["TargetGroups"][0] - log.info("Created load balancing target group %s.", self.target_group_name) + log.info(f"Created load balancing target group '{target_group_name}'.") + return target_group except ClientError as err: - raise LoadBalancerError( - f"Couldn't create load balancing target group {self.target_group_name}: {err}" + log.error( + f"Couldn't create load balancing target group '{target_group_name}'." ) - else: - return target_group + error_code = err.response["Error"]["Code"] + + if error_code == "DuplicateTargetGroupName": + log.error( + f"Target group name {target_group_name} already exists. " + "Check if the target group already exists." + "Consider using a different name or deleting the existing target group if appropriate." + ) + elif error_code == "TooManyTargetGroups": + log.error( + "Too many target groups exist in the account. " + "Consider deleting unused target groups to create space for new ones." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.elbv2.CreateTargetGroup] # snippet-start:[python.cross_service.resilient_service.elbv2.DeleteTargetGroup] - def delete_target_group(self): + def delete_target_group(self, target_group_name) -> None: """ Deletes the target group. """ - done = False - while not done: - try: - response = self.elb_client.describe_target_groups( - Names=[self.target_group_name] + try: + # Describe the target group to get its ARN + response = self.elb_client.describe_target_groups(Names=[target_group_name]) + tg_arn = response["TargetGroups"][0]["TargetGroupArn"] + + # Delete the target group + self.elb_client.delete_target_group(TargetGroupArn=tg_arn) + log.info("Deleted load balancing target group %s.", target_group_name) + + # Use a custom waiter to wait until the target group is no longer available + self.wait_for_target_group_deletion(self.elb_client, tg_arn) + log.info("Target group %s successfully deleted.", target_group_name) + + except ClientError as err: + error_code = err.response["Error"]["Code"] + log.error(f"Failed to delete target group '{target_group_name}'.") + if error_code == "TargetGroupNotFound": + log.error( + "Load balancer target group either already deleted or never existed. " + "Verify the name and check that the resource exists in the AWS Console." ) - tg_arn = response["TargetGroups"][0]["TargetGroupArn"] - self.elb_client.delete_target_group(TargetGroupArn=tg_arn) - log.info( - "Deleted load balancing target group %s.", self.target_group_name + elif error_code == "ResourceInUseException": + log.error( + "Target group still in use by another resource. " + "Ensure that the target group is no longer associated with any load balancers or resources.", ) - done = True - except ClientError as err: - if err.response["Error"]["Code"] == "TargetGroupNotFound": - log.info( - "Load balancer target group %s not found, nothing to do.", - self.target_group_name, - ) - done = True - elif err.response["Error"]["Code"] == "ResourceInUse": - log.info( - "Target group not yet released from load balancer, waiting..." + log.error(f"Full error:\n\t{err}") + pass + + def wait_for_target_group_deletion( + self, elb_client, target_group_arn, max_attempts=10, delay=30 + ): + for attempt in range(max_attempts): + try: + elb_client.describe_target_groups(TargetGroupArns=[target_group_arn]) + print( + f"Attempt {attempt + 1}: Target group {target_group_arn} still exists." + ) + except ClientError as e: + if e.response["Error"]["Code"] == "TargetGroupNotFound": + print( + f"Target group {target_group_arn} has been successfully deleted." ) - time.sleep(10) + return else: - raise LoadBalancerError( - f"Couldn't delete load balancing target group {self.target_group_name}: {err}" - ) + raise + time.sleep(delay) + raise TimeoutError( + f"Target group {target_group_arn} was not deleted after {max_attempts * delay} seconds." + ) # snippet-end:[python.cross_service.resilient_service.elbv2.DeleteTargetGroup] # snippet-start:[python.cross_service.resilient_service.elbv2.CreateLoadBalancer] - # snippet-start:[python.cross_service.resilient_service.elbv2.CreateListener] - def create_load_balancer(self, subnet_ids, target_group): + def create_load_balancer( + self, + load_balancer_name: str, + subnet_ids: List[str], + ) -> Dict[str, Any]: """ Creates an Elastic Load Balancing load balancer that uses the specified subnets and forwards requests to the specified target group. + :param load_balancer_name: The name of the load balancer to create. :param subnet_ids: A list of subnets to associate with the load balancer. - :param target_group: An existing target group that is added as a listener to the - load balancer. :return: Data about the newly created load balancer. """ try: response = self.elb_client.create_load_balancer( - Name=self.load_balancer_name, Subnets=subnet_ids + Name=load_balancer_name, Subnets=subnet_ids ) load_balancer = response["LoadBalancers"][0] - log.info("Created load balancer %s.", self.load_balancer_name) + log.info(f"Created load balancer '{load_balancer_name}'.") + waiter = self.elb_client.get_waiter("load_balancer_available") - log.info("Waiting for load balancer to be available...") - waiter.wait(Names=[self.load_balancer_name]) - log.info("Load balancer is available!") - self.elb_client.create_listener( - LoadBalancerArn=load_balancer["LoadBalancerArn"], + log.info( + f"Waiting for load balancer '{load_balancer_name}' to be available..." + ) + waiter.wait(Names=[load_balancer_name]) + log.info(f"Load balancer '{load_balancer_name}' is now available!") + + except ClientError as err: + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to create load balancer '{load_balancer_name}'. Error code: {error_code}, Message: {err.response['Error']['Message']}" + ) + + if error_code == "DuplicateLoadBalancerNameException": + log.error( + f"A load balancer with the name '{load_balancer_name}' already exists. " + "Load balancer names must be unique within the AWS region. " + "Please choose a different name and try again." + ) + if error_code == "TooManyLoadBalancersException": + log.error( + "The maximum number of load balancers has been reached in this account and region. " + "You can delete unused load balancers or request an increase in the service quota from AWS Support." + ) + log.error(f"Full error:\n\t{err}") + pass + else: + return load_balancer + + # snippet-end:[python.cross_service.resilient_service.elbv2.CreateLoadBalancer] + + # snippet-start:[python.cross_service.resilient_service.elbv2.CreateListener] + def create_listener( + self, + load_balancer_name: str, + target_group: Dict[str, Any], + ) -> Dict[str, Any]: + """ + Creates a listener for the specified load balancer that forwards requests to the + specified target group. + + :param load_balancer_name: The name of the load balancer to create a listener for. + :param target_group: An existing target group that is added as a listener to the + load balancer. + :return: Data about the newly created listener. + """ + try: + # Retrieve the load balancer ARN + load_balancer_response = self.elb_client.describe_load_balancers( + Names=[load_balancer_name] + ) + load_balancer_arn = load_balancer_response["LoadBalancers"][0][ + "LoadBalancerArn" + ] + + # Create the listener + response = self.elb_client.create_listener( + LoadBalancerArn=load_balancer_arn, Protocol=target_group["Protocol"], Port=target_group["Port"], DefaultActions=[ @@ -173,65 +228,107 @@ def create_load_balancer(self, subnet_ids, target_group): ], ) log.info( - "Created listener to forward traffic from load balancer %s to target group %s.", - self.load_balancer_name, - target_group["TargetGroupName"], + f"Created listener to forward traffic from load balancer '{load_balancer_name}' to target group '{target_group['TargetGroupName']}'." ) + return response["Listeners"][0] except ClientError as err: - raise LoadBalancerError( - f"Failed to create load balancer {self.load_balancer_name}" - f"and add a listener for target group {target_group['TargetGroupName']}: {err}" + error_code = err.response["Error"]["Code"] + log.error( + f"Failed to add a listener on '{load_balancer_name}' for target group '{target_group['TargetGroupName']}'." ) - else: - self._endpoint = load_balancer["DNSName"] - return load_balancer + + if error_code == "ListenerNotFoundException": + log.error( + f"The listener could not be found for the load balancer '{load_balancer_name}'. " + "Please check the load balancer name and target group configuration." + ) + if error_code == "InvalidConfigurationRequestException": + log.error( + f"The configuration provided for the listener on load balancer '{load_balancer_name}' is invalid. " + "Please review the provided protocol, port, and target group settings." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.elbv2.CreateListener] - # snippet-end:[python.cross_service.resilient_service.elbv2.CreateLoadBalancer] # snippet-start:[python.cross_service.resilient_service.elbv2.DeleteLoadBalancer] - def delete_load_balancer(self): + def delete_load_balancer(self, load_balancer_name) -> None: """ Deletes a load balancer. + + :param load_balancer_name: The name of the load balancer to delete. """ try: response = self.elb_client.describe_load_balancers( - Names=[self.load_balancer_name] + Names=[load_balancer_name] ) lb_arn = response["LoadBalancers"][0]["LoadBalancerArn"] self.elb_client.delete_load_balancer(LoadBalancerArn=lb_arn) - log.info("Deleted load balancer %s.", self.load_balancer_name) + log.info("Deleted load balancer %s.", load_balancer_name) waiter = self.elb_client.get_waiter("load_balancers_deleted") log.info("Waiting for load balancer to be deleted...") - waiter.wait(Names=[self.load_balancer_name]) + waiter.wait(Names=[load_balancer_name]) except ClientError as err: - if err.response["Error"]["Code"] == "LoadBalancerNotFound": - log.info( - "Load balancer %s does not exist, nothing to do.", - self.load_balancer_name, - ) - else: - raise LoadBalancerError( - f"Couldn't delete load balancer {self.load_balancer_name}: {err}" + error_code = err.response["Error"]["Code"] + log.error( + f"Couldn't delete load balancer '{load_balancer_name}'. Error code: {error_code}, Message: {err.response['Error']['Message']}" + ) + + if error_code == "LoadBalancerNotFoundException": + log.error( + f"The load balancer '{load_balancer_name}' does not exist. " + "Please check the name and try again." ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.cross_service.resilient_service.elbv2.DeleteLoadBalancer] - def verify_load_balancer_endpoint(self): + # snippet-start:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] + def get_endpoint(self, load_balancer_name) -> str: + """ + Gets the HTTP endpoint of the load balancer. + + :return: The endpoint. + """ + try: + response = self.elb_client.describe_load_balancers( + Names=[load_balancer_name] + ) + return response["LoadBalancers"][0]["DNSName"] + except ClientError as err: + log.error( + f"Couldn't get the endpoint for load balancer {load_balancer_name}" + ) + error_code = err.response["Error"]["Code"] + if error_code == "LoadBalancerNotFoundException": + log.error( + "Verify load balancer name and ensure it exists in the AWS console." + ) + log.error(f"Full error:\n\t{err}") + pass + + # snippet-end:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] + @staticmethod + def verify_load_balancer_endpoint(endpoint) -> bool: """ Verify this computer can successfully send a GET request to the load balancer endpoint. + + :param endpoint: The endpoint to verify. + :return: True if the GET request is successful, False otherwise. """ - success = False retries = 3 - while not success and retries > 0: + verified = False + while not verified and retries > 0: try: - lb_response = requests.get(f"http://{self.endpoint()}") + lb_response = requests.get(f"http://{endpoint}") log.info( "Got response %s from load balancer endpoint.", lb_response.status_code, ) if lb_response.status_code == 200: - success = True + verified = True else: retries = 0 except requests.exceptions.ConnectionError: @@ -240,10 +337,10 @@ def verify_load_balancer_endpoint(self): ) retries -= 1 time.sleep(10) - return success + return verified # snippet-start:[python.cross_service.resilient_service.elbv2.DescribeTargetHealth] - def check_target_health(self): + def check_target_health(self, target_group_name: str) -> List[Dict[str, Any]]: """ Checks the health of the instances in the target group. @@ -251,15 +348,28 @@ def check_target_health(self): """ try: tg_response = self.elb_client.describe_target_groups( - Names=[self.target_group_name] + Names=[target_group_name] ) health_response = self.elb_client.describe_target_health( TargetGroupArn=tg_response["TargetGroups"][0]["TargetGroupArn"] ) except ClientError as err: - raise LoadBalancerError( - f"Couldn't check health of {self.target_group_name} targets: {err}" - ) + log.error(f"Couldn't check health of {target_group_name} target(s).") + error_code = err.response["Error"]["Code"] + if error_code == "LoadBalancerNotFoundException": + log.error( + "Load balancer associated with the target group was not found. " + "Ensure the load balancer exists, is in the correct AWS region, and " + "that you have the necessary permissions to access it.", + ) + elif error_code == "TargetGroupNotFoundException": + log.error( + "Target group was not found. " + "Verify the target group name, check that it exists in the correct region, " + "and ensure it has not been deleted or created in a different account.", + ) + log.error(f"Full error:\n\t{err}") + pass else: return health_response["TargetHealthDescriptions"] diff --git a/python/cross_service/resilient_service/parameters.py b/python/cross_service/resilient_service/parameters.py index 5b873ef4679..75e6fc65299 100644 --- a/python/cross_service/resilient_service/parameters.py +++ b/python/cross_service/resilient_service/parameters.py @@ -9,10 +9,6 @@ log = logging.getLogger(__name__) -class ParameterHelperError(Exception): - pass - - # snippet-start:[python.example_code.workflow.ResilientService_ParameterHelper] class ParameterHelper: """ @@ -21,12 +17,14 @@ class ParameterHelper: how the service responds to a health check. """ - table = "doc-example-resilient-architecture-table" - failure_response = "doc-example-resilient-architecture-failure-response" - health_check = "doc-example-resilient-architecture-health-check" + table: str = "doc-example-resilient-architecture-table" + failure_response: str = "doc-example-resilient-architecture-failure-response" + health_check: str = "doc-example-resilient-architecture-health-check" - def __init__(self, table_name, ssm_client): + def __init__(self, table_name: str, ssm_client: boto3.client): """ + Initializes the ParameterHelper class with the necessary parameters. + :param table_name: The name of the DynamoDB table that is used as a recommendation service. :param ssm_client: A Boto3 Systems Manager client. @@ -34,12 +32,7 @@ def __init__(self, table_name, ssm_client): self.ssm_client = ssm_client self.table_name = table_name - @classmethod - def from_client(cls, table_name): - ssm_client = boto3.client("ssm") - return cls(table_name, ssm_client) - - def reset(self): + def reset(self) -> None: """ Resets the Systems Manager parameters to starting values for the demo. These are the name of the DynamoDB recommendation table, no response when a @@ -49,22 +42,34 @@ def reset(self): self.put(self.failure_response, "none") self.put(self.health_check, "shallow") - def put(self, name, value): + def put(self, name: str, value: str) -> None: """ Sets the value of a named Systems Manager parameter. :param name: The name of the parameter. :param value: The new value of the parameter. + :raises ParameterHelperError: If the parameter value cannot be set. """ try: self.ssm_client.put_parameter( Name=name, Value=value, Overwrite=True, Type="String" ) - log.info("Setting demo parameter %s to '%s'.", name, value) + log.info("Setting parameter %s to '%s'.", name, value) except ClientError as err: - raise ParameterHelperError( - f"Couldn't set parameter {name} to {value}: {err}" - ) + error_code = err.response["Error"]["Code"] + log.error(f"Failed to set parameter {name}.") + if error_code == "ParameterLimitExceeded": + log.error( + "The parameter limit has been exceeded. " + "Consider deleting unused parameters or request a limit increase." + ) + elif error_code == "ParameterAlreadyExists": + log.error( + "The parameter already exists and overwrite is set to False. " + "Use Overwrite=True to update the parameter." + ) + log.error(f"Full error:\n\t{err}") + pass # snippet-end:[python.example_code.workflow.ResilientService_ParameterHelper] diff --git a/python/cross_service/resilient_service/recommendation_service.py b/python/cross_service/resilient_service/recommendation_service.py index dea782a042b..58aa3d78e2f 100644 --- a/python/cross_service/resilient_service/recommendation_service.py +++ b/python/cross_service/resilient_service/recommendation_service.py @@ -3,6 +3,7 @@ import json import logging +from typing import Any, Dict import boto3 from botocore.exceptions import ClientError @@ -11,7 +12,17 @@ class RecommendationServiceError(Exception): - def __init__(self, table_name, message): + """ + Custom exception for errors related to the RecommendationService. + """ + + def __init__(self, table_name: str, message: str): + """ + Initializes the RecommendationServiceError. + + :param table_name: The name of the DynamoDB table where the error occurred. + :param message: The error message. + """ self.table_name = table_name self.message = message super().__init__(self.message) @@ -24,32 +35,25 @@ class RecommendationService: and songs. """ - def __init__(self, table_name, dynamodb_client): + def __init__(self, table_name: str, dynamodb_client: boto3.client): """ + Initializes the RecommendationService class with the necessary parameters. + :param table_name: The name of the DynamoDB recommendations table. :param dynamodb_client: A Boto3 DynamoDB client. """ self.table_name = table_name self.dynamodb_client = dynamodb_client - @classmethod - def from_client(cls, table_name): + def create(self) -> Dict[str, Any]: """ - Creates this class from a Boto3 client. - - :param table_name: The name of the DynamoDB recommendations table. - """ - ddb_client = boto3.client("dynamodb") - return cls(table_name, ddb_client) - - def create(self): - """ - Creates a DynamoDB table to use a recommendation service. The table has a + Creates a DynamoDB table to use as a recommendation service. The table has a hash key named 'MediaType' that defines the type of media recommended, such as Book or Movie, and a range key named 'ItemId' that, combined with the MediaType, forms a unique identifier for the recommended item. :return: Data about the newly created table. + :raises RecommendationServiceError: If the table creation fails. """ try: response = self.dynamodb_client.create_table( @@ -70,7 +74,7 @@ def create(self): log.info("Table %s created.", self.table_name) except ClientError as err: if err.response["Error"]["Code"] == "ResourceInUseException": - log.info("Table %s exists, nothing to be do.", self.table_name) + log.info("Table %s exists, nothing to be done.", self.table_name) else: raise RecommendationServiceError( self.table_name, f"ClientError when creating table: {err}." @@ -78,11 +82,12 @@ def create(self): else: return response - def populate(self, data_file): + def populate(self, data_file: str) -> None: """ Populates the recommendations table from a JSON file. :param data_file: The path to the data file. + :raises RecommendationServiceError: If the table population fails. """ try: with open(data_file) as data: @@ -97,9 +102,11 @@ def populate(self, data_file): self.table_name, f"Couldn't populate table from {data_file}: {err}" ) - def destroy(self): + def destroy(self) -> None: """ Deletes the recommendations table. + + :raises RecommendationServiceError: If the table deletion fails. """ try: self.dynamodb_client.delete_table(TableName=self.table_name) diff --git a/python/cross_service/resilient_service/requirements.txt b/python/cross_service/resilient_service/requirements.txt index 8dd2ddb2241..a58421ca7c0 100644 --- a/python/cross_service/resilient_service/requirements.txt +++ b/python/cross_service/resilient_service/requirements.txt @@ -1,3 +1,4 @@ boto3>=1.26.79 pytest>=7.2.1 -requests>=2.29.0 \ No newline at end of file +requests>=2.29.0 +coloredlogs>=15.0.1 \ No newline at end of file diff --git a/python/cross_service/resilient_service/runner.py b/python/cross_service/resilient_service/runner.py index 74cc37964d1..1e14e97a924 100644 --- a/python/cross_service/resilient_service/runner.py +++ b/python/cross_service/resilient_service/runner.py @@ -12,26 +12,50 @@ import argparse import logging -from pprint import pp import sys +from pprint import pp +import boto3 +import coloredlogs import requests -from auto_scaler import AutoScaler -from load_balancer import LoadBalancer +from auto_scaler import AutoScalingWrapper +from load_balancer import ElasticLoadBalancerWrapper from parameters import ParameterHelper from recommendation_service import RecommendationService -# Add relative path to include demo_tools in this code example without need for setup. sys.path.append("../..") -import demo_tools.question as q +import demo_tools.question as q # noqa + +# Configure coloredlogs +coloredlogs.install( + level="INFO", fmt="%(asctime)s %(levelname)s: %(message)s", datefmt="%H:%M:%S" +) # snippet-start:[python.example_code.workflow.ResilientService_Runner] class Runner: + """ + Manages the deployment, demonstration, and destruction of resources for the resilient service. + """ + def __init__( - self, resource_path, recommendation, autoscaler, loadbalancer, param_helper + self, + resource_path: str, + recommendation: RecommendationService, + autoscaler: AutoScalingWrapper, + loadbalancer: ElasticLoadBalancerWrapper, + param_helper: ParameterHelper, ): + """ + Initializes the Runner class with the necessary parameters. + + :param resource_path: The path to resource files used by this example, such as IAM policies and instance scripts. + :param recommendation: An instance of the RecommendationService class. + :param autoscaler: An instance of the AutoScaler class. + :param loadbalancer: An instance of the LoadBalancer class. + :param param_helper: An instance of the ParameterHelper class. + """ self.resource_path = resource_path self.recommendation = recommendation self.autoscaler = autoscaler @@ -41,93 +65,65 @@ def __init__( self.port = 80 self.ssh_port = 22 - def deploy(self): + prefix = "doc-example-resilience" + self.target_group_name = f"{prefix}-tg" + self.load_balancer_name = f"{prefix}-lb" + + def deploy(self) -> None: + """ + Deploys the resources required for the resilient service, including the DynamoDB table, + EC2 instances, Auto Scaling group, and load balancer. + """ recommendations_path = f"{self.resource_path}/recommendations.json" startup_script = f"{self.resource_path}/server_startup_script.sh" instance_policy = f"{self.resource_path}/instance_policy.json" - print( - "\nFor this demo, we'll use the AWS SDK for Python (Boto3) to create several AWS resources\n" - "to set up a load-balanced web service endpoint and explore some ways to make it resilient\n" - "against various kinds of failures.\n\n" - "Some of the resources create by this demo are:\n" - ) - print( - "\t* A DynamoDB table that the web service depends on to provide book, movie, and song recommendations." - ) - print( - "\t* An EC2 launch template that defines EC2 instances that each contain a Python web server." - ) - print( - "\t* An EC2 Auto Scaling group that manages EC2 instances across several Availability Zones." - ) - print( - "\t* An Elastic Load Balancing (ELB) load balancer that targets the Auto Scaling group to distribute requests." - ) - print("-" * 88) - q.ask("Press Enter when you're ready to start deploying resources.") + logging.info("Starting deployment of resources for the resilient service.") - print( - f"Creating and populating a DynamoDB table named '{self.recommendation.table_name}'." + logging.info( + "Creating and populating DynamoDB table '%s'.", + self.recommendation.table_name, ) self.recommendation.create() self.recommendation.populate(recommendations_path) - print("-" * 88) - - print( - f"Creating an EC2 launch template that runs '{startup_script}' when an instance starts.\n" - f"This script starts a Python web server defined in the `server.py` script. The web server\n" - f"listens to HTTP requests on port 80 and responds to requests to '/' and to '/healthcheck'.\n" - f"For demo purposes, this server is run as the root user. In production, the best practice is to\n" - f"run a web server, such as Apache, with least-privileged credentials.\n" - ) - print( - f"The template also defines an IAM policy that each instance uses to assume a role that grants\n" - f"permissions to access the DynamoDB recommendation table and Systems Manager parameters\n" - f"that control the flow of the demo.\n" + + logging.info( + "Creating an EC2 launch template with the startup script '%s'.", + startup_script, ) self.autoscaler.create_template(startup_script, instance_policy) - print("-" * 88) - print( - f"Creating an EC2 Auto Scaling group that maintains three EC2 instances, each in a different\n" - f"Availability Zone." - ) - zones = self.autoscaler.create_group(3) - print("-" * 88) - print( - "At this point, you have EC2 instances created. Once each instance starts, it listens for\n" - "HTTP requests. You can see these instances in the console or continue with the demo." + logging.info( + "Creating an EC2 Auto Scaling group across multiple Availability Zones." ) - print("-" * 88) - q.ask("Press Enter when you're ready to continue.") + zones = self.autoscaler.create_autoscaling_group(3) - print(f"Creating variables that control the flow of the demo.\n") + logging.info("Creating variables that control the flow of the demo.") self.param_helper.reset() - print( - "\nCreating an Elastic Load Balancing target group and load balancer. The target group\n" - "defines how the load balancer connects to instances. The load balancer provides a\n" - "single endpoint where clients connect and dispatches requests to instances in the group.\n" - ) + logging.info("Creating Elastic Load Balancing target group and load balancer.") + vpc = self.autoscaler.get_default_vpc() subnets = self.autoscaler.get_subnets(vpc["VpcId"], zones) target_group = self.loadbalancer.create_target_group( - self.protocol, self.port, vpc["VpcId"] + self.target_group_name, self.protocol, self.port, vpc["VpcId"] ) self.loadbalancer.create_load_balancer( - [subnet["SubnetId"] for subnet in subnets], target_group + self.load_balancer_name, [subnet["SubnetId"] for subnet in subnets] ) + self.loadbalancer.create_listener(self.load_balancer_name, target_group) + self.autoscaler.attach_load_balancer_target_group(target_group) - print(f"Verifying access to the load balancer endpoint...") - lb_success = self.loadbalancer.verify_load_balancer_endpoint() + + logging.info("Verifying access to the load balancer endpoint.") + endpoint = self.loadbalancer.get_endpoint(self.load_balancer_name) + lb_success = self.loadbalancer.verify_load_balancer_endpoint(endpoint) + current_ip_address = requests.get("http://checkip.amazonaws.com").text.strip() + if not lb_success: - print( - "Couldn't connect to the load balancer, verifying that the port is open..." + logging.warning( + "Couldn't connect to the load balancer. Verifying that the port is open..." ) - current_ip_address = requests.get( - "http://checkip.amazonaws.com" - ).text.strip() sec_group, port_is_open = self.autoscaler.verify_inbound_port( vpc, self.port, current_ip_address ) @@ -135,10 +131,8 @@ def deploy(self): vpc, self.ssh_port, current_ip_address ) if not port_is_open: - print( - "For this example to work, the default security group for your default VPC must\n" - "allows access from this computer. You can either add it automatically from this\n" - "example or add it yourself using the AWS Management Console.\n" + logging.warning( + "The default security group for your VPC must allow access from this computer." ) if q.ask( f"Do you want to add a rule to security group {sec_group['GroupId']} to allow\n" @@ -157,21 +151,22 @@ def deploy(self): self.autoscaler.open_inbound_port( sec_group["GroupId"], self.ssh_port, current_ip_address ) - lb_success = self.loadbalancer.verify_load_balancer_endpoint() + lb_success = self.loadbalancer.verify_load_balancer_endpoint(endpoint) + if lb_success: - print("Your load balancer is ready. You can access it by browsing to:\n") - print(f"\thttp://{self.loadbalancer.endpoint()}\n") + logging.info( + "Load balancer is ready. Access it at: http://%s", current_ip_address + ) else: - print( - "Couldn't get a successful response from the load balancer endpoint. Troubleshoot by\n" - "manually verifying that your VPC and security group are configured correctly and that\n" - "you can successfully make a GET request to the load balancer endpoint:\n" + logging.error( + "Couldn't get a successful response from the load balancer endpoint. Please verify your VPC and security group settings." ) - print(f"\thttp://{self.loadbalancer.endpoint()}\n") - print("-" * 88) - q.ask("Press Enter when you're ready to continue with the demo.") - def demo_choices(self): + def demo_choices(self) -> None: + """ + Presents choices for interacting with the deployed service, such as sending requests to + the load balancer or checking the health of the targets. + """ actions = [ "Send a GET request to the load balancer endpoint.", "Check the health of load balancer targets.", @@ -179,86 +174,71 @@ def demo_choices(self): ] choice = 0 while choice != 2: - print("-" * 88) - print( - "\nSee the current state of the service by selecting one of the following choices:\n" - ) - choice = q.choose("\nWhich action would you like to take? ", actions) - print("-" * 88) + logging.info("Choose an action to interact with the service.") + choice = q.choose("Which action would you like to take? ", actions) if choice == 0: - print("Request:\n") - print(f"GET http://{self.loadbalancer.endpoint()}") - response = requests.get(f"http://{self.loadbalancer.endpoint()}") - print("\nResponse:\n") - print(f"{response.status_code}") + logging.info("Sending a GET request to the load balancer endpoint.") + endpoint = self.loadbalancer.get_endpoint(self.load_balancer_name) + logging.info("GET http://%s", endpoint) + response = requests.get(f"http://{endpoint}") + logging.info("Response: %s", response.status_code) if response.headers.get("content-type") == "application/json": pp(response.json()) elif choice == 1: - print("\nChecking the health of load balancer targets:\n") - health = self.loadbalancer.check_target_health() + logging.info("Checking the health of load balancer targets.") + health = self.loadbalancer.check_target_health(self.target_group_name) for target in health: state = target["TargetHealth"]["State"] - print( - f"\tTarget {target['Target']['Id']} on port {target['Target']['Port']} is {state}" + logging.info( + "Target %s on port %d is %s", + target["Target"]["Id"], + target["Target"]["Port"], + state, ) if state != "healthy": - print( - f"\t\t{target['TargetHealth']['Reason']}: {target['TargetHealth']['Description']}\n" + logging.warning( + "%s: %s", + target["TargetHealth"]["Reason"], + target["TargetHealth"]["Description"], ) - print( - f"\nNote that it can take a minute or two for the health check to update\n" - f"after changes are made.\n" + logging.info( + "Note that it can take a minute or two for the health check to update." ) elif choice == 2: - print("\nOkay, let's move on.") - print("-" * 88) + logging.info("Proceeding to the next part of the demo.") - def demo(self): + def demo(self) -> None: + """ + Runs the demonstration, showing how the service responds to different failure scenarios + and how a resilient architecture can keep the service running. + """ ssm_only_policy = f"{self.resource_path}/ssm_only_policy.json" - print("\nResetting parameters to starting values for demo.\n") + logging.info("Resetting parameters to starting values for the demo.") self.param_helper.reset() - print( - "\nThis part of the demonstration shows how to toggle different parts of the system\n" - "to create situations where the web service fails, and shows how using a resilient\n" - "architecture can keep the web service running in spite of these failures." - ) - print("-" * 88) - - print( - "At the start, the load balancer endpoint returns recommendations and reports that all targets are healthy." + logging.info( + "Starting demonstration of the service's resilience under various failure conditions." ) self.demo_choices() - print( - f"The web service running on the EC2 instances gets recommendations by querying a DynamoDB table.\n" - f"The table name is contained in a Systems Manager parameter named '{self.param_helper.table}'.\n" - f"To simulate a failure of the recommendation service, let's set this parameter to name a non-existent table.\n" + logging.info( + "Simulating failure by changing the Systems Manager parameter to a non-existent table." ) self.param_helper.put(self.param_helper.table, "this-is-not-a-table") - print( - "\nNow, sending a GET request to the load balancer endpoint returns a failure code. But, the service reports as\n" - "healthy to the load balancer because shallow health checks don't check for failure of the recommendation service." - ) + logging.info("Sending GET requests will now return failure codes.") self.demo_choices() - print( - f"Instead of failing when the recommendation service fails, the web service can return a static response.\n" - f"While this is not a perfect solution, it presents the customer with a somewhat better experience than failure.\n" - ) + logging.info("Switching to static response mode to mitigate failure.") self.param_helper.put(self.param_helper.failure_response, "static") - print( - f"\nNow, sending a GET request to the load balancer endpoint returns a static response.\n" - f"The service still reports as healthy because health checks are still shallow.\n" - ) + logging.info("Sending GET requests will now return static responses.") self.demo_choices() - print("Let's reinstate the recommendation service.\n") + logging.info("Restoring normal operation of the recommendation service.") self.param_helper.put(self.param_helper.table, self.recommendation.table_name) - print( - "\nLet's also substitute bad credentials for one of the instances in the target group so that it can't\n" - "access the DynamoDB recommendation table.\n" + + logging.info( + "Introducing a failure by assigning bad credentials to one of the instances." ) self.autoscaler.create_instance_profile( ssm_only_policy, @@ -270,93 +250,78 @@ def demo(self): instances = self.autoscaler.get_instances() bad_instance_id = instances[0] instance_profile = self.autoscaler.get_instance_profile(bad_instance_id) - print( - f"\nReplacing the profile for instance {bad_instance_id} with a profile that contains\n" - f"bad credentials...\n" + logging.info( + "Replacing instance profile with bad credentials for instance %s.", + bad_instance_id, ) self.autoscaler.replace_instance_profile( bad_instance_id, self.autoscaler.bad_creds_profile_name, instance_profile["AssociationId"], ) - print( - "Now, sending a GET request to the load balancer endpoint returns either a recommendation or a static response,\n" - "depending on which instance is selected by the load balancer.\n" + logging.info( + "Sending GET requests may return either a valid recommendation or a static response." ) self.demo_choices() - print( - "\nLet's implement a deep health check. For this demo, a deep health check tests whether\n" - "the web service can access the DynamoDB table that it depends on for recommendations. Note that\n" - "the deep health check is only for ELB routing and not for Auto Scaling instance health.\n" - "This kind of deep health check is not recommended for Auto Scaling instance health, because it\n" - "risks accidental termination of all instances in the Auto Scaling group when a dependent service fails.\n" - ) - print( - "By implementing deep health checks, the load balancer can detect when one of the instances is failing\n" - "and take that instance out of rotation.\n" - ) + logging.info("Implementing deep health checks to detect unhealthy instances.") self.param_helper.put(self.param_helper.health_check, "deep") - print( - f"\nNow, checking target health indicates that the instance with bad credentials ({bad_instance_id})\n" - f"is unhealthy. Note that it might take a minute or two for the load balancer to detect the unhealthy \n" - f"instance. Sending a GET request to the load balancer endpoint always returns a recommendation, because\n" - "the load balancer takes unhealthy instances out of its rotation.\n" - ) + logging.info("Checking the health of the load balancer targets.") self.demo_choices() - print( - "\nBecause the instances in this demo are controlled by an auto scaler, the simplest way to fix an unhealthy\n" - "instance is to terminate it and let the auto scaler start a new instance to replace it.\n" + logging.info( + "Terminating the unhealthy instance to let the auto scaler replace it." ) self.autoscaler.terminate_instance(bad_instance_id) - print( - "\nEven while the instance is terminating and the new instance is starting, sending a GET\n" - "request to the web service continues to get a successful recommendation response because\n" - "the load balancer routes requests to the healthy instances. After the replacement instance\n" - "starts and reports as healthy, it is included in the load balancing rotation.\n" - "\nNote that terminating and replacing an instance typically takes several minutes, during which time you\n" - "can see the changing health check status until the new instance is running and healthy.\n" - ) + logging.info("The service remains resilient during instance replacement.") self.demo_choices() - print( - "\nIf the recommendation service fails now, deep health checks mean all instances report as unhealthy.\n" - ) + logging.info("Simulating a complete failure of the recommendation service.") self.param_helper.put(self.param_helper.table, "this-is-not-a-table") - print( - "\nWhen all instances are unhealthy, the load balancer continues to route requests even to\n" - "unhealthy instances, allowing them to fail open and return a static response rather than fail\n" - "closed and report failure to the customer." + logging.info( + "All instances will report as unhealthy, but the service will still return static responses." ) self.demo_choices() self.param_helper.reset() - def destroy(self): - print( - "This concludes the demo of how to build and manage a resilient service.\n" - "To keep things tidy and to avoid unwanted charges on your account, we can clean up all AWS resources\n" - "that were created for this demo." + def destroy(self, automation=False) -> None: + """ + Destroys all resources created for the demo, including the load balancer, Auto Scaling group, + EC2 instances, and DynamoDB table. + """ + logging.info( + "This concludes the demo. Preparing to clean up all AWS resources created during the demo." ) - if q.ask("Do you want to clean up all demo resources? (y/n) ", q.is_yesno): - self.loadbalancer.delete_load_balancer() - self.loadbalancer.delete_target_group() - self.autoscaler.delete_group() + if automation: + cleanup = True + else: + cleanup = q.ask( + "Do you want to clean up all demo resources? (y/n) ", q.is_yesno + ) + + if cleanup: + logging.info("Deleting load balancer and related resources.") + self.loadbalancer.delete_load_balancer(self.load_balancer_name) + self.loadbalancer.delete_target_group(self.target_group_name) + self.autoscaler.delete_autoscaling_group(self.autoscaler.group_name) self.autoscaler.delete_key_pair() self.autoscaler.delete_template() self.autoscaler.delete_instance_profile( self.autoscaler.bad_creds_profile_name, self.autoscaler.bad_creds_role_name, ) + logging.info("Deleting DynamoDB table and other resources.") self.recommendation.destroy() else: - print( - "Okay, we'll leave the resources intact.\n" - "Don't forget to delete them when you're done with them or you might incur unexpected charges." + logging.warning( + "Resources have not been deleted. Ensure you clean them up manually to avoid unexpected charges." ) -def main(): +def main() -> None: + """ + Main function to parse arguments and run the appropriate actions for the demo. + """ parser = argparse.ArgumentParser() parser.add_argument( "--action", @@ -373,21 +338,41 @@ def main(): ) args = parser.parse_args() - print("-" * 88) - print( - "Welcome to the demonstration of How to Build and Manage a Resilient Service!" - ) - print("-" * 88) + logging.info("Starting the Resilient Service demo.") prefix = "doc-example-resilience" - recommendation = RecommendationService.from_client( - "doc-example-recommendation-service" + + # Service Clients + ddb_client = boto3.client("dynamodb") + elb_client = boto3.client("elbv2") + autoscaling_client = boto3.client("autoscaling") + ec2_client = boto3.client("ec2") + ssm_client = boto3.client("ssm") + iam_client = boto3.client("iam") + + # Wrapper instantiations + recommendation = RecommendationService( + "doc-example-recommendation-service", ddb_client ) - autoscaler = AutoScaler.from_client(prefix) - loadbalancer = LoadBalancer.from_client(prefix) - param_helper = ParameterHelper.from_client(recommendation.table_name) + autoscaling_wrapper = AutoScalingWrapper( + prefix, + "t3.micro", + "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2", + autoscaling_client, + ec2_client, + ssm_client, + iam_client, + ) + elb_wrapper = ElasticLoadBalancerWrapper(elb_client) + param_helper = ParameterHelper(recommendation.table_name, ssm_client) + + # Demo invocation runner = Runner( - args.resource_path, recommendation, autoscaler, loadbalancer, param_helper + args.resource_path, + recommendation, + autoscaling_wrapper, + elb_wrapper, + param_helper, ) actions = [args.action] if args.action != "all" else ["deploy", "demo", "destroy"] for action in actions: @@ -398,9 +383,7 @@ def main(): elif action == "destroy": runner.destroy() - print("-" * 88) - print("Thanks for watching!") - print("-" * 88) + logging.info("Demo completed successfully.") if __name__ == "__main__": diff --git a/python/cross_service/resilient_service/test/pytest.ini b/python/cross_service/resilient_service/test/pytest.ini new file mode 100644 index 00000000000..ca0c9f17105 --- /dev/null +++ b/python/cross_service/resilient_service/test/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -s diff --git a/python/cross_service/resilient_service/test/resources/server.py b/python/cross_service/resilient_service/test/resources/server.py index 97f0baae9b2..366c0c4fafa 100644 --- a/python/cross_service/resilient_service/test/resources/server.py +++ b/python/cross_service/resilient_service/test/resources/server.py @@ -10,10 +10,10 @@ """ import argparse -from http.server import BaseHTTPRequestHandler, HTTPServer import json -from functools import partial import random +from functools import partial +from http.server import BaseHTTPRequestHandler, HTTPServer import boto3 from botocore.exceptions import ClientError diff --git a/python/cross_service/resilient_service/test/test_demo.py b/python/cross_service/resilient_service/test/test_demo.py deleted file mode 100644 index 7d2bee6638d..00000000000 --- a/python/cross_service/resilient_service/test/test_demo.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -from datetime import datetime -import time -from unittest.mock import MagicMock -from botocore.exceptions import ClientError, WaiterError -from botocore.stub import ANY -import pytest -import requests - -from auto_scaler import AutoScalerError -from load_balancer import LoadBalancerError -from parameters import ParameterHelper, ParameterHelperError - - -class MockManager: - def __init__(self, stub_runner, scenario_data, input_mocker): - self.scenario_data = scenario_data - self.scenario_data.endpoint = "test-endpoint" - self.scenario_data.bad_profile_name = ( - f"{self.scenario_data.resource_prefix}-bc-prof" - ) - self.scenario_data.bad_profile_arn = ( - "arn:aws:iam:us-west-2:123456789012:instance-profile/test-bad-profile" - ) - self.scenario_data.association_id = "test-association-id" - self.scenario_args = [] - self.scenario_out = {} - answers = ["1", "2", "3", "3", "3", "3", "3", "3", "3"] - input_mocker.mock_answers(answers) - self.stub_runner = stub_runner - - def setup_stubs(self, error, stop_on): - with self.stub_runner(error, stop_on) as runner: - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.table, - self.scenario_data.table_name, - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.failure_response, - "none", - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.health_check, - "shallow", - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_load_balancers, - [self.scenario_data.lb_name], - [self.scenario_data.endpoint], - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_target_groups, - [self.scenario_data.tg_name], - [self.scenario_data.tg_arn], - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_target_health, - self.scenario_data.tg_arn, - [ - { - "id": "test-id", - "port": 80, - "state": "unhealthy", - "reason": "test reason", - "desc": "test desc", - } - ], - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.table, - "this-is-not-a-table", - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.failure_response, - "static", - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.table, - self.scenario_data.table_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_create_policy, - f"{self.scenario_data.resource_prefix}-bc-pol", - self.scenario_data.bad_policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_create_role, - self.scenario_data.bad_role_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_attach_role_policy, - self.scenario_data.bad_role_name, - self.scenario_data.bad_policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_attach_role_policy, - self.scenario_data.bad_role_name, - ANY, - ) - runner.add( - self.scenario_data.iam.stubber.stub_create_instance_profile, - self.scenario_data.bad_profile_name, - self.scenario_data.bad_profile_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_get_instance_profile, - self.scenario_data.bad_profile_name, - self.scenario_data.bad_profile_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_add_role_to_instance_profile, - self.scenario_data.bad_profile_name, - self.scenario_data.bad_role_name, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_describe_auto_scaling_groups, - [self.scenario_data.asg_name], - [ - { - "AutoScalingGroupName": self.scenario_data.asg_name, - "MinSize": 3, - "MaxSize": 3, - "DesiredCapacity": 0, - "DefaultCooldown": 0, - "AvailabilityZones": ["test-zone"], - "HealthCheckType": "EC2", - "CreatedTime": datetime.now(), - "Instances": [self.scenario_data.instance], - } - ], - ) - runner.add( - self.scenario_data.ec2.stubber.stub_describe_iam_instance_profile_associations, - self.scenario_data.instance["InstanceId"], - self.scenario_data.association_id, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_replace_iam_instance_profile_association, - self.scenario_data.bad_profile_name, - self.scenario_data.association_id, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_reboot_instances, - [self.scenario_data.instance["InstanceId"]], - ) - runner.add( - self.scenario_data.ssm.stubber.stub_describe_instance_information, - [self.scenario_data.instance["InstanceId"]], - ) - runner.add( - self.scenario_data.ssm.stubber.stub_send_command, - [self.scenario_data.instance["InstanceId"]], - commands=ANY, - timeout=None, - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.health_check, - "deep", - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_terminate_instance_in_auto_scaling_group, - self.scenario_data.instance["InstanceId"], - False, - None, - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.table, - "this-is-not-a-table", - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.table, - self.scenario_data.table_name, - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.failure_response, - "none", - ) - runner.add( - self.scenario_data.ssm.stubber.stub_put_parameter, - ParameterHelper.health_check, - "shallow", - ) - - -@pytest.fixture -def mock_mgr(stub_runner, scenario_data, input_mocker): - return MockManager(stub_runner, scenario_data, input_mocker) - - -def test_demo(mock_mgr, monkeypatch): - monkeypatch.setattr(time, "sleep", lambda x: None) - monkeypatch.setattr( - requests, "get", lambda x: MagicMock(status_code=200, text="test text") - ) - mock_mgr.setup_stubs(None, None) - - mock_mgr.scenario_data.scenario.demo() - - -@pytest.mark.parametrize( - "error, stub_name, stop_on_index", - [ - (ParameterHelperError, "stub_put_parameter", 0), - (ParameterHelperError, "stub_put_parameter", 1), - (ParameterHelperError, "stub_put_parameter", 2), - (LoadBalancerError, "stub_describe_load_balancers", 3), - (LoadBalancerError, "stub_describe_target_groups", 4), - (LoadBalancerError, "stub_describe_target_health", 5), - (ParameterHelperError, "stub_put_parameter", 6), - (ParameterHelperError, "stub_put_parameter", 7), - (ParameterHelperError, "stub_put_parameter", 8), - (AutoScalerError, "stub_create_policy", 9), - (AutoScalerError, "stub_create_role", 10), - (AutoScalerError, "stub_attach_role_policy", 11), - (AutoScalerError, "stub_attach_role_policy", 12), - (AutoScalerError, "stub_create_instance_profile", 13), - (WaiterError, "stub_get_instance_profile", 14), - (AutoScalerError, "stub_add_role_to_instance_profile", 15), - (AutoScalerError, "stub_describe_auto_scaling_groups", 16), - (AutoScalerError, "stub_describe_iam_instance_profile_associations", 17), - (AutoScalerError, "stub_replace_iam_instance_profile_association", 18), - (AutoScalerError, "stub_reboot_instances", 19), - (AutoScalerError, "stub_describe_instance_information", 20), - (AutoScalerError, "stub_send_command", 21), - (ParameterHelperError, "stub_put_parameter", 22), - (AutoScalerError, "stub_terminate_instance_in_auto_scaling_group", 23), - (ParameterHelperError, "stub_put_parameter", 24), - (ParameterHelperError, "stub_put_parameter", 25), - (ParameterHelperError, "stub_put_parameter", 26), - (ParameterHelperError, "stub_put_parameter", 27), - ], -) -def test_demo_error(mock_mgr, caplog, error, stub_name, stop_on_index, monkeypatch): - monkeypatch.setattr(time, "sleep", lambda x: None) - monkeypatch.setattr( - requests, "get", lambda x: MagicMock(status_code=200, text="test text") - ) - mock_mgr.setup_stubs(error, stop_on_index) - - with pytest.raises(error): - mock_mgr.scenario_data.scenario.demo() diff --git a/python/cross_service/resilient_service/test/test_deploy.py b/python/cross_service/resilient_service/test/test_deploy.py deleted file mode 100644 index f462662d0ad..00000000000 --- a/python/cross_service/resilient_service/test/test_deploy.py +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -import logging -import time -from unittest.mock import MagicMock -from botocore.exceptions import ClientError, WaiterError -from botocore.stub import ANY -import pytest -import requests - -from auto_scaler import AutoScalerError -from load_balancer import LoadBalancerError -from recommendation_service import RecommendationServiceError - - -class MockManager: - def __init__(self, stub_runner, scenario_data, input_mocker): - self.scenario_data = scenario_data - self.scenario_data.profile_arn = ( - "arn:aws:iam:us-west-2:123456789012:instance-profile/test-profile" - ) - self.scenario_data.ami_id = "test-ami-id" - self.scenario_data.zones = ["test-zone-1", "test-zone-2"] - self.scenario_data.vpc_id = "test-vpc" - self.scenario_data.subnet_ids = ["subnet-test-id"] - self.scenario_data.lb_endpoint = "test-endpoint" - self.scenario_data.sg_id = "test-sg-id" - self.scenario_data.ip_address = "test-address" - self.scenario_args = [] - self.scenario_out = {} - answers = ["", "", "y", ""] - input_mocker.mock_answers(answers) - self.stub_runner = stub_runner - - def setup_stubs(self, error, stop_on): - with self.stub_runner(error, stop_on) as runner: - runner.add( - self.scenario_data.ddb.stubber.stub_create_table, - self.scenario_data.table_name, - [ - {"name": "MediaType", "type": "S", "key_type": "HASH"}, - {"name": "ItemId", "type": "N", "key_type": "RANGE"}, - ], - {"read": 5, "write": 5}, - ) - runner.add( - self.scenario_data.ddb.stubber.stub_describe_table, - self.scenario_data.table_name, - ) - runner.add(self.scenario_data.ddb.stubber.stub_batch_write_item, ANY) - runner.add( - self.scenario_data.iam.stubber.stub_create_policy, - f"{self.scenario_data.resource_prefix}-pol", - self.scenario_data.policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_create_role, - self.scenario_data.role_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_attach_role_policy, - self.scenario_data.role_name, - self.scenario_data.policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_create_instance_profile, - self.scenario_data.profile_name, - self.scenario_data.profile_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_get_instance_profile, - self.scenario_data.profile_name, - self.scenario_data.profile_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_add_role_to_instance_profile, - self.scenario_data.profile_name, - self.scenario_data.role_name, - ) - runner.add( - self.scenario_data.ssm.stubber.stub_get_parameter, - self.scenario_data.ami_param, - self.scenario_data.ami_id, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_create_launch_template, - self.scenario_data.lt_name, - self.scenario_data.inst_type, - self.scenario_data.ami_id, - inst_profile=self.scenario_data.profile_name, - user_data=ANY, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_describe_availability_zones, - self.scenario_data.zones, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_create_auto_scaling_group, - self.scenario_data.asg_name, - self.scenario_data.zones, - self.scenario_data.lt_name, - 3, - 3, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_describe_vpcs, - {self.scenario_data.vpc_id: True}, - vpc_filters=[{"Name": "is-default", "Values": ["true"]}], - ) - runner.add( - self.scenario_data.ec2.stubber.stub_describe_subnets, - self.scenario_data.vpc_id, - self.scenario_data.zones, - self.scenario_data.subnet_ids, - ) - runner.add( - self.scenario_data.elb.stubber.stub_create_target_group, - self.scenario_data.tg_name, - "HTTP", - 80, - self.scenario_data.vpc_id, - { - "path": "/healthcheck", - "interval": 10, - "timeout": 5, - "thresh_healthy": 2, - "thresh_unhealthy": 2, - }, - self.scenario_data.tg_arn, - ) - runner.add( - self.scenario_data.elb.stubber.stub_create_load_balancer, - self.scenario_data.lb_name, - self.scenario_data.subnet_ids, - "HTTP", - 80, - self.scenario_data.lb_arn, - self.scenario_data.lb_endpoint, - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_load_balancers, - [self.scenario_data.lb_name], - ) - runner.add( - self.scenario_data.elb.stubber.stub_create_listener, - self.scenario_data.lb_arn, - "HTTP", - 80, - self.scenario_data.tg_arn, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_attach_load_balancer_target_groups, - self.scenario_data.asg_name, - [self.scenario_data.tg_arn], - ) - runner.add( - self.scenario_data.ec2.stubber.stub_describe_security_groups, - [ - { - "id": self.scenario_data.sg_id, - "group_name": "default", - "ip_permissions": [ - { - "FromPort": 80, - "IpRanges": [{"CidrIp": "test"}], - "PrefixListIds": [], - } - ], - } - ], - self.scenario_data.vpc_id, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_authorize_security_group_ingress, - self.scenario_data.sg_id, - cidr_ip=f"{self.scenario_data.ip_address}/32", - port=80, - ip_protocol="tcp", - ) - - -@pytest.fixture -def mock_mgr(stub_runner, scenario_data, input_mocker): - return MockManager(stub_runner, scenario_data, input_mocker) - - -def test_deploy(mock_mgr, caplog, monkeypatch): - caplog.set_level(logging.INFO) - monkeypatch.setattr(time, "sleep", lambda x: None) - monkeypatch.setattr( - requests, - "get", - lambda x: MagicMock(status_code=404, text=mock_mgr.scenario_data.ip_address), - ) - mock_mgr.setup_stubs(None, None) - - mock_mgr.scenario_data.scenario.deploy() - - assert len(caplog.records) > 0 - attrs = [ - attr - for attr in dir(mock_mgr.scenario_data) - if not callable(getattr(mock_mgr.scenario_data, attr)) - and not attr.startswith("__") - and attr - not in [ - "ami_param", - "lb_arn", - "lb_endpoint", - "profile_arn", - "tg_arn", - "vpc_id", - "bad_policy_arn", - "bad_prof_name", - "bad_role_name", - ] - ] - for attr in attrs: - val = getattr(mock_mgr.scenario_data, attr) - if isinstance(val, str): - assert any( - val in rec for rec in caplog.messages - ), f"'{val}' not in log messages!" - - -@pytest.mark.parametrize( - "error, stub_name, stop_on_index", - [ - (RecommendationServiceError, "stub_create_table", 0), - (WaiterError, "stub_describe_table", 1), - (RecommendationServiceError, "stub_batch_write_item", 2), - (AutoScalerError, "stub_create_policy", 3), - (AutoScalerError, "stub_create_role", 4), - (AutoScalerError, "stub_attach_role_policy", 5), - (AutoScalerError, "stub_create_instance_profile", 6), - (WaiterError, "stub_get_instance_profile", 7), - (AutoScalerError, "stub_add_role_to_instance_profile", 8), - (AutoScalerError, "stub_get_parameter", 9), - (AutoScalerError, "stub_create_launch_template", 10), - (AutoScalerError, "stub_describe_availability_zones", 11), - (AutoScalerError, "stub_create_auto_scaling_group", 12), - (AutoScalerError, "stub_describe_vpcs", 13), - (AutoScalerError, "stub_describe_subnets", 14), - (LoadBalancerError, "stub_create_target_group", 15), - (LoadBalancerError, "stub_create_load_balancer", 16), - (WaiterError, "stub_describe_load_balancers", 17), - (LoadBalancerError, "stub_create_listener", 18), - (AutoScalerError, "stub_attach_load_balancer_target_groups", 19), - (AutoScalerError, "stub_describe_security_groups", 20), - (AutoScalerError, "stub_authorize_security_group_ingress", 21), - ], -) -def test_deploy_error(mock_mgr, capsys, monkeypatch, error, stub_name, stop_on_index): - monkeypatch.setattr(time, "sleep", lambda x: None) - monkeypatch.setattr( - requests, - "get", - lambda x: MagicMock(status_code=404, text=mock_mgr.scenario_data.ip_address), - ) - mock_mgr.setup_stubs(error, stop_on_index) - - with pytest.raises(error): - mock_mgr.scenario_data.scenario.deploy() diff --git a/python/cross_service/resilient_service/test/test_destroy.py b/python/cross_service/resilient_service/test/test_destroy.py deleted file mode 100644 index 7d065aa9d8d..00000000000 --- a/python/cross_service/resilient_service/test/test_destroy.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -from botocore.exceptions import ClientError, WaiterError -import pytest - -from auto_scaler import AutoScalerError -from load_balancer import LoadBalancerError -from recommendation_service import RecommendationServiceError - - -class MockManager: - def __init__(self, stub_runner, scenario_data, input_mocker): - self.scenario_data = scenario_data - self.scenario_args = [] - self.scenario_out = {} - answers = ["y"] - input_mocker.mock_answers(answers) - self.stub_runner = stub_runner - - def setup_stubs(self, error, stop_on): - with self.stub_runner(error, stop_on) as runner: - runner.add( - self.scenario_data.elb.stubber.stub_describe_load_balancers, - [self.scenario_data.lb_name], - arns=[self.scenario_data.lb_arn], - ) - runner.add( - self.scenario_data.elb.stubber.stub_delete_load_balancer, - self.scenario_data.lb_arn, - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_load_balancers, - [self.scenario_data.lb_name], - error_code="LoadBalancerNotFound", - ) - runner.add( - self.scenario_data.elb.stubber.stub_describe_target_groups, - [self.scenario_data.tg_name], - [self.scenario_data.tg_arn], - ) - runner.add( - self.scenario_data.elb.stubber.stub_delete_target_group, - self.scenario_data.tg_arn, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_describe_auto_scaling_groups, - [self.scenario_data.asg_name], - [self.scenario_data.asg_group], - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_update_auto_scaling_group, - self.scenario_data.asg_name, - 0, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_terminate_instance_in_auto_scaling_group, - self.scenario_data.instance["InstanceId"], - True, - None, - ) - runner.add( - self.scenario_data.auto_scaling.stubber.stub_delete_auto_scaling_group, - self.scenario_data.asg_name, - ) - runner.add( - self.scenario_data.ec2.stubber.stub_delete_launch_template, - self.scenario_data.lt_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_remove_role_from_instance_profile, - self.scenario_data.profile_name, - self.scenario_data.role_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_instance_profile, - self.scenario_data.profile_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_list_attached_role_policies, - self.scenario_data.role_name, - {"1": self.scenario_data.policy_arn}, - ) - runner.add( - self.scenario_data.iam.stubber.stub_detach_role_policy, - self.scenario_data.role_name, - self.scenario_data.policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_policy, - self.scenario_data.policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_role, - self.scenario_data.role_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_remove_role_from_instance_profile, - self.scenario_data.bad_prof_name, - self.scenario_data.bad_role_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_instance_profile, - self.scenario_data.bad_prof_name, - ) - runner.add( - self.scenario_data.iam.stubber.stub_list_attached_role_policies, - self.scenario_data.bad_role_name, - {"1": self.scenario_data.bad_policy_arn}, - ) - runner.add( - self.scenario_data.iam.stubber.stub_detach_role_policy, - self.scenario_data.bad_role_name, - self.scenario_data.bad_policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_policy, - self.scenario_data.bad_policy_arn, - ) - runner.add( - self.scenario_data.iam.stubber.stub_delete_role, - self.scenario_data.bad_role_name, - ) - runner.add( - self.scenario_data.ddb.stubber.stub_delete_table, - self.scenario_data.table_name, - ) - runner.add( - self.scenario_data.ddb.stubber.stub_describe_table, - self.scenario_data.table_name, - error_code="ResourceNotFoundException", - ) - - -@pytest.fixture -def mock_mgr(stub_runner, scenario_data, input_mocker): - return MockManager(stub_runner, scenario_data, input_mocker) - - -def test_destroy(mock_mgr, capsys): - mock_mgr.setup_stubs(None, None) - - mock_mgr.scenario_data.scenario.destroy() - - -@pytest.mark.parametrize( - "error, stub_name, stop_on_index", - [ - (LoadBalancerError, "stub_describe_load_balancers", 0), - (LoadBalancerError, "stub_delete_load_balancer", 1), - (WaiterError, "stub_describe_load_balancers", 2), - (LoadBalancerError, "stub_describe_target_groups", 3), - (LoadBalancerError, "stub_delete_target_group", 4), - (AutoScalerError, "stub_describe_auto_scaling_groups", 5), - (AutoScalerError, "stub_update_auto_scaling_group", 6), - (AutoScalerError, "stub_terminate_instance_in_auto_scaling_group", 7), - (AutoScalerError, "stub_delete_auto_scaling_group", 8), - (AutoScalerError, "stub_delete_launch_template", 9), - (AutoScalerError, "stub_remove_role_from_instance_profile", 10), - (AutoScalerError, "stub_delete_instance_profile", 11), - (AutoScalerError, "stub_list_attached_role_policies", 12), - (AutoScalerError, "stub_detach_role_policy", 13), - (AutoScalerError, "stub_delete_policy", 14), - (AutoScalerError, "stub_delete_role", 15), - (AutoScalerError, "stub_remove_role_from_instance_profile", 16), - (AutoScalerError, "stub_delete_instance_profile", 17), - (AutoScalerError, "stub_list_attached_role_policies", 18), - (AutoScalerError, "stub_detach_role_policy", 19), - (AutoScalerError, "stub_delete_policy", 20), - (AutoScalerError, "stub_delete_role", 21), - (RecommendationServiceError, "stub_delete_table", 22), - (WaiterError, "stub_describe_table", 23), - ], -) -def test_destroy_error(mock_mgr, caplog, error, stub_name, stop_on_index): - mock_mgr.setup_stubs(error, stop_on_index) - - with pytest.raises(error): - mock_mgr.scenario_data.scenario.destroy() diff --git a/python/cross_service/resilient_service/test/test_runner_integ.py b/python/cross_service/resilient_service/test/test_runner_integ.py index b8a822b2bf9..d247a698c54 100644 --- a/python/cross_service/resilient_service/test/test_runner_integ.py +++ b/python/cross_service/resilient_service/test/test_runner_integ.py @@ -1,51 +1,102 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import unittest +from itertools import cycle +from unittest.mock import patch -import logging - +import boto3 import pytest +from botocore.exceptions import ClientError -from auto_scaler import AutoScaler -from load_balancer import LoadBalancer +from auto_scaler import AutoScalingWrapper +from load_balancer import ElasticLoadBalancerWrapper from parameters import ParameterHelper from recommendation_service import RecommendationService -import runner - - -@pytest.mark.integ -def test_runner_integ(input_mocker, caplog): - caplog.set_level(logging.INFO) - prefix = "doc-example-test-resilience" - table_name = "doc-example-test-recommendation-service" - recommendation = RecommendationService.from_client(table_name) - autoscaler = AutoScaler.from_client(prefix) - loadbalancer = LoadBalancer.from_client(prefix) - param_helper = ParameterHelper.from_client(recommendation.table_name) - scenario = runner.Runner( - "test/resources", recommendation, autoscaler, loadbalancer, param_helper - ) - - input_mocker.mock_answers( - [ - "", - "", - "y", - "", # deploy - "1", - "2", - "3", - "3", - "3", - "3", - "3", - "3", - "3", # demo - "y", # destroy - ] - ) - - scenario.deploy() - scenario.demo() - scenario.destroy() - - assert f"Table {table_name} deleted." in caplog.text +from runner import Runner + + +@pytest.fixture(autouse=True) +def disable_capture(pytestconfig): + pytestconfig.option.capture = "no" + + +class TestRunnerIntegration(unittest.TestCase): + @classmethod + def setUpClass(cls): + # Set up AWS clients + cls.ddb_client = boto3.client("dynamodb") + cls.elb_client = boto3.client("elbv2") + cls.autoscaling_client = boto3.client("autoscaling") + cls.ec2_client = boto3.client("ec2") + cls.ssm_client = boto3.client("ssm") + cls.iam_client = boto3.client("iam") + + # Initialize the services and runner + cls.prefix = "test-doc-example-resilience" + cls.resource_path = "../../../workflows/resilient_service/resources" + cls.recommendation = RecommendationService( + "test-recommendation-service", cls.ddb_client + ) + cls.autoscaling_wrapper = AutoScalingWrapper( + cls.prefix, + "t3.micro", + "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2", + cls.autoscaling_client, + cls.ec2_client, + cls.ssm_client, + cls.iam_client, + ) + cls.elb_wrapper = ElasticLoadBalancerWrapper(cls.elb_client) + cls.param_helper = ParameterHelper( + cls.recommendation.table_name, cls.ssm_client + ) + cls.runner = Runner( + cls.resource_path, + cls.recommendation, + cls.autoscaling_wrapper, + cls.elb_wrapper, + cls.param_helper, + ) + + @pytest.mark.integ + @pytest.mark.usefixtures("disable_capture") + @patch("builtins.input", side_effect=cycle(["3"])) + def test_deploy_resources(self, mock_input): + try: + self.runner.deploy() + # Verify that resources were created + table = self.ddb_client.describe_table( + TableName=self.recommendation.table_name + ) + self.assertEqual(table["Table"]["TableStatus"], "ACTIVE") + + lb = self.elb_client.describe_load_balancers( + Names=[self.runner.load_balancer_name] + ) + self.assertEqual(lb["LoadBalancers"][0]["State"]["Code"], "active") + except ClientError as e: + self.fail(f"Deployment failed with error: {e}") + + @pytest.mark.integ + @pytest.mark.usefixtures("disable_capture") + @patch("builtins.input", side_effect=cycle(["3"])) + def test_service_resilience(self, mock_input): + self.runner.deploy() + try: + self.runner.demo() + except Exception as e: + self.fail(f"Service resilience test failed with error: {e}") + finally: + self.runner.destroy(automation=True) + + @classmethod + def tearDownClass(cls): + # Clean up in case any resources were left + try: + cls.runner.destroy(automation=True) + except Exception as e: + print(f"Cleanup failed in tearDown with error: {e}") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/example_code/auto-scaling/README.md b/python/example_code/auto-scaling/README.md index 699f059dd85..90818c5420c 100644 --- a/python/example_code/auto-scaling/README.md +++ b/python/example_code/auto-scaling/README.md @@ -50,17 +50,17 @@ Code examples that show you how to perform the essential operations within a ser Code excerpts that show you how to call individual service functions. -- [AttachLoadBalancerTargetGroups](../../cross_service/resilient_service/auto_scaler.py#L535) -- [CreateAutoScalingGroup](action_wrapper.py#L30) -- [DeleteAutoScalingGroup](../../cross_service/resilient_service/auto_scaler.py#L562) -- [DescribeAutoScalingGroups](action_wrapper.py#L113) -- [DescribeAutoScalingInstances](action_wrapper.py#L195) -- [DescribeScalingActivities](action_wrapper.py#L220) -- [DisableMetricsCollection](action_wrapper.py#L271) -- [EnableMetricsCollection](action_wrapper.py#L248) -- [SetDesiredCapacity](action_wrapper.py#L169) -- [TerminateInstanceInAutoScalingGroup](action_wrapper.py#L139) -- [UpdateAutoScalingGroup](action_wrapper.py#L67) +- [AttachLoadBalancerTargetGroups](../../cross_service/resilient_service/auto_scaler.py#L615) +- [CreateAutoScalingGroup](action_wrapper.py#L31) +- [DeleteAutoScalingGroup](../../cross_service/resilient_service/auto_scaler.py#L656) +- [DescribeAutoScalingGroups](action_wrapper.py#L167) +- [DescribeAutoScalingInstances](action_wrapper.py#L284) +- [DescribeScalingActivities](action_wrapper.py#L317) +- [DisableMetricsCollection](action_wrapper.py#L399) +- [EnableMetricsCollection](action_wrapper.py#L359) +- [SetDesiredCapacity](action_wrapper.py#L248) +- [TerminateInstanceInAutoScalingGroup](action_wrapper.py#L204) +- [UpdateAutoScalingGroup](action_wrapper.py#L92) ### Scenarios diff --git a/python/example_code/auto-scaling/action_wrapper.py b/python/example_code/auto-scaling/action_wrapper.py index d55a8abf5a6..a1f2b6ff178 100644 --- a/python/example_code/auto-scaling/action_wrapper.py +++ b/python/example_code/auto-scaling/action_wrapper.py @@ -9,6 +9,7 @@ """ import logging +from typing import Any, Dict, List, Optional from botocore.exceptions import ClientError @@ -29,8 +30,13 @@ def __init__(self, autoscaling_client): # snippet-start:[python.example_code.auto-scaling.CreateAutoScalingGroup] def create_group( - self, group_name, group_zones, launch_template_name, min_size, max_size - ): + self, + group_name: str, + group_zones: List[str], + launch_template_name: str, + min_size: int, + max_size: int, + ) -> None: """ Creates an Auto Scaling group. @@ -41,6 +47,8 @@ def create_group( instances that are created by auto scaling activities. :param min_size: The minimum number of active instances in the group. :param max_size: The maximum number of active instances in the group. + :return: None + :raises ClientError: If there is an error creating the Auto Scaling group. """ try: self.autoscaling_client.create_auto_scaling_group( @@ -53,91 +61,150 @@ def create_group( MinSize=min_size, MaxSize=max_size, ) + + # Wait for the group to exist. + waiter = self.autoscaling_client.get_waiter("group_exists") + waiter.wait(AutoScalingGroupNames=[group_name]) + + logger.info(f"Successfully created Auto Scaling group {group_name}.") + except ClientError as err: - logger.error( - "Couldn't create group %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], - ) + error_code = err.response["Error"]["Code"] + logger.error(f"Failed to create Auto Scaling group {group_name}.") + if error_code == "AlreadyExistsFault": + logger.error( + f"An Auto Scaling group with the name '{group_name}' already exists. " + "Please use a different name or update the existing group.", + ) + elif error_code == "LimitExceededFault": + logger.error( + "The request failed because you have reached the limit " + "on the number of Auto Scaling groups or launch configurations. " + "Consider deleting unused resources or request a limit increase. " + "\nSee Auto Scaling Service Quota documentation here:" + "\n\thttps://docs.aws.amazon.com/autoscaling/ec2/userguide/ec2-auto-scaling-quotas.html" + ) + logger.error(f"Full error:\n\t{err}") raise # snippet-end:[python.example_code.auto-scaling.CreateAutoScalingGroup] # snippet-start:[python.example_code.auto-scaling.UpdateAutoScalingGroup] - def update_group(self, group_name, **kwargs): + def update_group(self, group_name: str, **kwargs: Any) -> None: """ Updates an Auto Scaling group. :param group_name: The name of the group to update. :param kwargs: Keyword arguments to pass through to the service. + :return: None + :raises ClientError: If there is an error updating the Auto Scaling group. """ try: self.autoscaling_client.update_auto_scaling_group( AutoScalingGroupName=group_name, **kwargs ) + logger.info(f"Successfully updated Auto Scaling group {group_name}.") + except ClientError as err: - logger.error( - "Couldn't update group %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], - ) + error_code = err.response["Error"]["Code"] + logger.error(f"Failed to update Auto Scaling group {group_name}.") + if error_code == "ResourceInUse": + logger.error( + "The Auto Scaling group '%s' is currently in use and cannot be modified. Please try again later.", + group_name, + ) + elif error_code == "ScalingActivityInProgress": + logger.error( + f"A scaling activity is currently in progress for the Auto Scaling group '{group_name}'." + "Please wait for the activity to complete before attempting to update the group." + ) + logger.error(f"Full error:\n\t{err}") raise # snippet-end:[python.example_code.auto-scaling.UpdateAutoScalingGroup] # snippet-start:[python.example_code.auto-scaling.DeleteAutoScalingGroup] - def delete_group(self, group_name): + def delete_group(self, group_name: str) -> None: """ Deletes an Auto Scaling group. All instances must be stopped before the group can be deleted. :param group_name: The name of the group to delete. + :return: None + :raises ClientError: If there is an error deleting the Auto Scaling group. """ try: self.autoscaling_client.delete_auto_scaling_group( AutoScalingGroupName=group_name ) + + # Wait for the group to be deleted. + waiter = self.autoscaling_client.get_waiter("group_not_exists") + waiter.wait(AutoScalingGroupNames=[group_name]) + + logger.info(f"Successfully deleted Auto Scaling group {group_name}.") + except ClientError as err: - logger.error( - "Couldn't delete group %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], - ) + error_code = err.response["Error"]["Code"] + logger.error(f"Failed to delete Auto Scaling group {group_name}.") + if error_code == "ScalingActivityInProgress": + logger.error( + "A scaling activity is currently in progress for the Auto Scaling group '%s'. " + "Please wait for the activity to complete before attempting to delete the group.", + group_name, + ) + elif error_code == "ResourceInUse": + logger.error( + "The Auto Scaling group '%s' or one of its associated resources is currently in use and cannot be deleted. " + "Ensure all instances are stopped and no other operations are pending before retrying.", + group_name, + ) + logger.error(f"Full error:\n\t{err}") raise # snippet-end:[python.example_code.auto-scaling.DeleteAutoScalingGroup] # snippet-start:[python.example_code.auto-scaling.DescribeAutoScalingGroups] - def describe_group(self, group_name): + def describe_group(self, group_name: str) -> Optional[Dict[str, Any]]: """ Gets information about an Auto Scaling group. :param group_name: The name of the group to look up. - :return: Information about the group, if found. + :return: A dictionary with information about the group if found, otherwise None. + :raises ClientError: If there is an error describing the Auto Scaling group. """ try: - response = self.autoscaling_client.describe_auto_scaling_groups( - AutoScalingGroupNames=[group_name] + paginator = self.autoscaling_client.get_paginator( + "describe_auto_scaling_groups" ) - except ClientError as err: - logger.error( - "Couldn't describe group %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + response_iterator = paginator.paginate(AutoScalingGroupNames=[group_name]) + groups = [] + for response in response_iterator: + groups.extend(response.get("AutoScalingGroups", [])) + + logger.info( + f"Successfully retrieved information for Auto Scaling group {group_name}." ) + + except ClientError as err: + error_code = err.response["Error"]["Code"] + logger.error(f"Failed to describe Auto Scaling group {group_name}.") + if error_code == "ResourceContentionFault": + logger.error( + "There is a conflict with another operation that is modifying the " + f"Auto Scaling group '{group_name}' Please try again later." + ) + logger.error(f"Full error:\n\t{err}") raise else: - groups = response.get("AutoScalingGroups", []) return groups[0] if len(groups) > 0 else None # snippet-end:[python.example_code.auto-scaling.DescribeAutoScalingGroups] # snippet-start:[python.example_code.auto-scaling.TerminateInstanceInAutoScalingGroup] - def terminate_instance(self, instance_id, decrease_capacity): + def terminate_instance( + self, instance_id: str, decrease_capacity: bool + ) -> Dict[str, Any]: """ Stops an instance. @@ -147,33 +214,47 @@ def terminate_instance(self, instance_id, decrease_capacity): you can stop an instance without having a replacement instance start when the desired capacity threshold is crossed. - :return: The scaling activity that occurs in response to this action. + :return: A dictionary containing details of the scaling activity that occurs + in response to this action. + :raises ClientError: If there is an error terminating the instance. """ try: response = self.autoscaling_client.terminate_instance_in_auto_scaling_group( InstanceId=instance_id, ShouldDecrementDesiredCapacity=decrease_capacity ) + logger.info(f"Successfully terminated instance {instance_id}.") + return response["Activity"] + except ClientError as err: - logger.error( - "Couldn't terminate instance %s. Here's why: %s: %s", - instance_id, - err.response["Error"]["Code"], - err.response["Error"]["Message"], - ) + error_code = err.response["Error"]["Code"] + logger.error(f"Failed to terminate instance {instance_id}.") + if error_code == "ScalingActivityInProgress": + logger.error( + "A scaling activity is currently in progress for the Auto Scaling group " + f"associated with instance '{instance_id}'. " + "Please wait for the activity to complete before attempting to terminate the instance." + ) + elif error_code == "ResourceInUse": + logger.error( + f"The instance '{instance_id}' or an associated resource is currently in use " + "and cannot be terminated. " + "Ensure the instance is not involved in any ongoing processes and try again." + ) + logger.error(f"Full error:\n\t{err}") raise - else: - return response["Activity"] # snippet-end:[python.example_code.auto-scaling.TerminateInstanceInAutoScalingGroup] # snippet-start:[python.example_code.auto-scaling.SetDesiredCapacity] - def set_desired_capacity(self, group_name, capacity): + def set_desired_capacity(self, group_name: str, capacity: int) -> None: """ Sets the desired capacity of the group. Amazon EC2 Auto Scaling tries to keep the number of running instances equal to the desired capacity. :param group_name: The name of the group to update. :param capacity: The desired number of running instances. + :return: None + :raises ClientError: If there is an error setting the desired capacity. """ try: self.autoscaling_client.set_desired_capacity( @@ -181,111 +262,168 @@ def set_desired_capacity(self, group_name, capacity): DesiredCapacity=capacity, HonorCooldown=False, ) + logger.info( + f"Successfully set desired capacity of {capacity} for Auto Scaling group '{group_name}'." + ) + except ClientError as err: + error_code = err.response["Error"]["Code"] logger.error( - "Couldn't set desired capacity %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + f"Failed to set desired capacity for Auto Scaling group '{group_name}'." ) + if error_code == "ScalingActivityInProgress": + logger.error( + f"A scaling activity is currently in progress for the Auto Scaling group '{group_name}'. " + "Please wait for the activity to complete before attempting to set the desired capacity." + ) + logger.error(f"Full error:\n\t{err}") raise # snippet-end:[python.example_code.auto-scaling.SetDesiredCapacity] # snippet-start:[python.example_code.auto-scaling.DescribeAutoScalingInstances] - def describe_instances(self, instance_ids): + def describe_instances(self, instance_ids: List[str]) -> List[Dict[str, Any]]: """ Gets information about instances. :param instance_ids: A list of instance IDs to look up. - :return: Information about instances, or an empty list if none are found. + :return: A list of dictionaries with information about each instance, + or an empty list if none are found. + :raises ClientError: If there is an error describing the instances. """ try: - response = self.autoscaling_client.describe_auto_scaling_instances( - InstanceIds=instance_ids + paginator = self.autoscaling_client.get_paginator( + "describe_auto_scaling_instances" ) + response_iterator = paginator.paginate(InstanceIds=instance_ids) + + instances = [] + for response in response_iterator: + instances.extend(response.get("AutoScalingInstances", [])) + + logger.info(f"Successfully described instances: {instance_ids}") + except ClientError as err: + error_code = err.response["Error"]["Code"] logger.error( - "Couldn't describe instances %s. Here's why: %s: %s", - instance_ids, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + f"Couldn't describe instances {instance_ids}. Error code: {error_code}, Message: {err.response['Error']['Message']}" ) raise else: - return response["AutoScalingInstances"] + return instances # snippet-end:[python.example_code.auto-scaling.DescribeAutoScalingInstances] # snippet-start:[python.example_code.auto-scaling.DescribeScalingActivities] - def describe_scaling_activities(self, group_name): + def describe_scaling_activities(self, group_name: str) -> List[Dict[str, Any]]: """ Gets information about scaling activities for the group. Scaling activities are things like instances stopping or starting in response to user requests or capacity changes. :param group_name: The name of the group to look up. - :return: The list of scaling activities for the group, ordered with the most - recent activity first. + :return: A list of dictionaries representing the scaling activities for the + group, ordered with the most recent activity first. + :raises ClientError: If there is an error describing the scaling activities. """ try: - response = self.autoscaling_client.describe_scaling_activities( - AutoScalingGroupName=group_name + paginator = self.autoscaling_client.get_paginator( + "describe_scaling_activities" ) + response_iterator = paginator.paginate(AutoScalingGroupName=group_name) + activities = [] + for response in response_iterator: + activities.extend(response.get("Activities", [])) + + logger.info( + f"Successfully described scaling activities for group '{group_name}'." + ) + except ClientError as err: + error_code = err.response["Error"]["Code"] logger.error( - "Couldn't describe scaling activities %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + f"Couldn't describe scaling activities for group '{group_name}'. Error code: {error_code}, Message: {err.response['Error']['Message']}" ) + + if error_code == "ResourceContentionFault": + logger.error( + f"There is a conflict with another operation that is modifying the Auto Scaling group '{group_name}'. " + "Please try again later." + ) raise else: - return response["Activities"] + return activities # snippet-end:[python.example_code.auto-scaling.DescribeScalingActivities] # snippet-start:[python.example_code.auto-scaling.EnableMetricsCollection] - def enable_metrics(self, group_name, metrics): + def enable_metrics(self, group_name: str, metrics: List[str]) -> Dict[str, Any]: """ Enables CloudWatch metric collection for Amazon EC2 Auto Scaling activities. :param group_name: The name of the group to enable. :param metrics: A list of metrics to collect. + :return: A dictionary with the response from enabling the metrics collection. + :raises ClientError: If there is an error enabling metrics collection. """ try: - self.autoscaling_client.enable_metrics_collection( + response = self.autoscaling_client.enable_metrics_collection( AutoScalingGroupName=group_name, Metrics=metrics, Granularity="1Minute" ) + logger.info( + f"Successfully enabled metrics for Auto Scaling group '{group_name}'." + ) + except ClientError as err: + error_code = err.response["Error"]["Code"] logger.error( - "Couldn't enable metrics on %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + f"Couldn't enable metrics on '{group_name}'. Error code: {error_code}, Message: {err.response['Error']['Message']}" ) + + if error_code == "ResourceContentionFault": + logger.error( + f"There is a conflict with another operation that is modifying the Auto Scaling group '{group_name}'. " + "Please try again later." + ) + elif error_code == "InvalidParameterCombination": + logger.error( + f"The combination of parameters provided for enabling metrics on '{group_name}' is not valid. " + "Please check the parameters and try again." + ) raise + else: + return response # snippet-end:[python.example_code.auto-scaling.EnableMetricsCollection] # snippet-start:[python.example_code.auto-scaling.DisableMetricsCollection] - def disable_metrics(self, group_name): + def disable_metrics(self, group_name: str) -> Dict[str, Any]: """ Stops CloudWatch metric collection for the Auto Scaling group. :param group_name: The name of the group. + :return: A dictionary with the response from disabling the metrics collection. + :raises ClientError: If there is an error disabling metrics collection. """ try: - self.autoscaling_client.disable_metrics_collection( + response = self.autoscaling_client.disable_metrics_collection( AutoScalingGroupName=group_name ) + logger.info( + f"Successfully disabled metrics collection for group '{group_name}'." + ) + return response except ClientError as err: + error_code = err.response["Error"]["Code"] logger.error( - "Couldn't disable metrics %s. Here's why: %s: %s", - group_name, - err.response["Error"]["Code"], - err.response["Error"]["Message"], + f"Couldn't disable metrics for group '{group_name}'. Error code: {error_code}, Message: {err.response['Error']['Message']}" ) + + if error_code == "ResourceContentionFault": + logger.error( + f"There is a conflict with another operation that is modifying the Auto Scaling group '{group_name}'. " + "Please try again later." + ) raise # snippet-end:[python.example_code.auto-scaling.DisableMetricsCollection] diff --git a/python/example_code/auto-scaling/hello/hello_autoscaling.py b/python/example_code/auto-scaling/hello/hello_autoscaling.py index 5beb6fa498c..2e5867dd2f8 100644 --- a/python/example_code/auto-scaling/hello/hello_autoscaling.py +++ b/python/example_code/auto-scaling/hello/hello_autoscaling.py @@ -12,7 +12,7 @@ def hello_autoscaling(autoscaling_client): This example uses the default settings specified in your shared credentials and config files. - :param auto-scaling_client: A Boto3 Amazon EC2 Auto Scaling client object. + :param autoscaling_client: A Boto3 Amazon EC2 Auto Scaling client object. """ print( "Hello, Amazon EC2 Auto Scaling! Let's list up to ten of you Auto Scaling groups:" diff --git a/python/example_code/auto-scaling/scenario_groups_and_instances.py b/python/example_code/auto-scaling/scenario_groups_and_instances.py index 7bd0bc3135e..c78bc3f4707 100644 --- a/python/example_code/auto-scaling/scenario_groups_and_instances.py +++ b/python/example_code/auto-scaling/scenario_groups_and_instances.py @@ -20,19 +20,26 @@ 9. Stop collecting metrics, terminate all instances, and delete the group. """ -from datetime import datetime, timedelta, timezone import logging -from pprint import pp import sys +from datetime import datetime, timedelta, timezone +from pprint import pp + import boto3 +import coloredlogs from botocore.exceptions import ClientError from action_wrapper import AutoScalingWrapper -# Add relative path to include demo_tools in this code example without needing to setup. +# Add relative path to include demo_tools in this code example without needing to set up. sys.path.append("../..") -import demo_tools.question as q -from demo_tools.retries import wait +import demo_tools.question as q # noqa +from demo_tools.retries import wait # noqa + +# Configure coloredlogs +coloredlogs.install( + level="INFO", fmt="%(asctime)s %(levelname)s: %(message)s", datefmt="%H:%M:%S" +) logger = logging.getLogger(__name__) @@ -49,19 +56,22 @@ def __init__(self, ec2_client, cloudwatch_resource): self.ec2_client = ec2_client self.cloudwatch_resource = cloudwatch_resource - def get_template(self, template_name): + def get_template(self, template_name: str) -> dict: """ Gets a launch template. Launch templates specify configuration for instances that are launched by Amazon EC2 Auto Scaling. :param template_name: The name of the template to look up. :return: The template, if it exists. + :raises ClientError: If there is an error retrieving the launch template. """ try: response = self.ec2_client.describe_launch_templates( LaunchTemplateNames=[template_name] ) template = response["LaunchTemplates"][0] + logger.info("Launch template %s retrieved successfully.", template_name) + return template except ClientError as err: if ( err.response["Error"]["Code"] @@ -70,16 +80,14 @@ def get_template(self, template_name): logger.warning("Launch template %s does not exist.", template_name) else: logger.error( - "Couldn't verify launch template %s. Here's why: %s: %s", + "Couldn't verify launch template %s. Error: %s: %s", template_name, err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - else: - return template - def create_template(self, template_name, inst_type, ami_id): + def create_template(self, template_name: str, inst_type: str, ami_id: str) -> dict: """ Creates an Amazon EC2 launch template to use with Amazon EC2 Auto Scaling. @@ -88,6 +96,7 @@ def create_template(self, template_name, inst_type, ami_id): :param ami_id: The ID of the Amazon Machine Image (AMI) to use when creating an instance. :return: Information about the newly created template. + :raises ClientError: If there is an error creating the launch template. """ try: response = self.ec2_client.create_launch_template( @@ -95,60 +104,69 @@ def create_template(self, template_name, inst_type, ami_id): LaunchTemplateData={"InstanceType": inst_type, "ImageId": ami_id}, ) template = response["LaunchTemplate"] + logger.info( + "Created launch template %s with instance type %s and AMI ID %s.", + template_name, + inst_type, + ami_id, + ) + return template except ClientError as err: logger.error( - "Couldn't create launch template %s. Here's why: %s: %s", + "Couldn't create launch template %s. Error: %s: %s", template_name, err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - else: - return template - def delete_template(self, template_name): + def delete_template(self, template_name: str) -> None: """ Deletes a launch template. :param template_name: The name of the template to delete. + :raises ClientError: If there is an error deleting the launch template. """ try: self.ec2_client.delete_launch_template(LaunchTemplateName=template_name) + logger.info("Deleted launch template %s.", template_name) except ClientError as err: logger.error( - "Couldn't delete launch template %s. Here's why: %s: %s", + "Couldn't delete launch template %s. Error: %s: %s", template_name, err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - def get_availability_zones(self): + def get_availability_zones(self) -> list: """ Gets a list of Availability Zones in the AWS Region of the Amazon EC2 client. :return: The list of Availability Zones for the client Region. + :raises ClientError: If there is an error retrieving availability zones. """ try: response = self.ec2_client.describe_availability_zones() zones = [zone["ZoneName"] for zone in response["AvailabilityZones"]] + logger.info("Retrieved availability zones: %s.", ", ".join(zones)) + return zones except ClientError as err: logger.error( - "Couldn't get availability zones. Here's why: %s: %s", + "Couldn't get availability zones. Error: %s: %s", err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - else: - return zones - def get_metrics(self, namespace, dimensions): + def get_metrics(self, namespace: str, dimensions: list) -> list: """ Gets a list of CloudWatch metrics filtered by namespace and dimensions. :param namespace: The namespace of the metrics to look up. :param dimensions: The dimensions of the metrics to look up. :return: The list of metrics. + :raises ClientError: If there is an error retrieving CloudWatch metrics. """ try: metrics = list( @@ -156,20 +174,26 @@ def get_metrics(self, namespace, dimensions): Namespace=namespace, Dimensions=dimensions ) ) + logger.info( + "Retrieved metrics for namespace %s with dimensions %s.", + namespace, + dimensions, + ) + return metrics except ClientError as err: logger.error( - "Couldn't get metrics for %s, %s. Here's why: %s: %s", + "Couldn't get metrics for %s, %s. Error: %s: %s", namespace, dimensions, err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - else: - return metrics @staticmethod - def get_metric_statistics(dimensions, metric, start, end): + def get_metric_statistics( + dimensions: list, metric, start: datetime, end: datetime + ) -> list: """ Gets statistics for a CloudWatch metric within a specified time span. @@ -178,6 +202,7 @@ def get_metric_statistics(dimensions, metric, start, end): :param start: The start of the time span for retrieved metrics. :param end: The end of the time span for retrieved metrics. :return: The list of data points found for the specified metric. + :raises ClientError: If there is an error retrieving metric statistics. """ try: response = metric.get_statistics( @@ -188,21 +213,24 @@ def get_metric_statistics(dimensions, metric, start, end): Statistics=["Sum"], ) data = response["Datapoints"] + logger.info("Retrieved statistics for metric %s.", metric.name) + return data except ClientError as err: logger.error( - "Couldn't get statistics for metric %s. Here's why: %s: %s", + "Couldn't get statistics for metric %s. Error: %s: %s", metric.name, err.response["Error"]["Code"], err.response["Error"]["Message"], ) raise - else: - return data -def print_simplified_group(group): +def print_simplified_group(group: dict) -> None: """ Prints a subset of data for an Auto Scaling group. + + :param group: The Auto Scaling group data to print. + :return: None """ print(group["AutoScalingGroupName"]) print(f"\tLaunch template: {group['LaunchTemplate']['LaunchTemplateName']}") @@ -215,20 +243,28 @@ def print_simplified_group(group): print(f"\t\t{inst['InstanceId']}: {inst['LifecycleState']}") -def wait_for_group(group_name, as_wrapper): +def wait_for_group(group_name: str, as_wrapper: AutoScalingWrapper) -> list: """ Waits for instances to start or stop in an Auto Scaling group. Prints the data for each instance after scaling activities are complete. + + :param group_name: The name of the Auto Scaling group. + :param as_wrapper: The AutoScalingWrapper that manages Auto Scaling groups. + :return: A list of instance IDs in the group. """ group = as_wrapper.describe_group(group_name) instance_ids = [i["InstanceId"] for i in group["Instances"]] return wait_for_instances(instance_ids, as_wrapper) -def wait_for_instances(instance_ids, as_wrapper): +def wait_for_instances(instance_ids: list, as_wrapper: AutoScalingWrapper) -> list: """ Waits for instances to start or stop in an Auto Scaling group. Prints the data for each instance after scaling activities are complete. + + :param instance_ids: A list of instance IDs to wait for. + :param as_wrapper: The AutoScalingWrapper that manages Auto Scaling groups. + :return: A list of instance IDs that were waited on. """ ready = False instances = [] @@ -251,9 +287,18 @@ def wait_for_instances(instance_ids, as_wrapper): # snippet-start:[python.example_code.auto-scaling.Scenario_GroupsAndInstances] -def run_scenario(as_wrapper, svc_helper): +def run_scenario(as_wrapper: AutoScalingWrapper, svc_helper: ServiceHelper) -> None: + """ + Runs the scenario demonstrating the management of Auto Scaling groups and instances. + + :param as_wrapper: An instance of the AutoScalingWrapper that manages Auto Scaling groups. + :param svc_helper: An instance of the ServiceHelper that interacts with AWS services. + :return: None + """ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + logger.info("Starting the Amazon EC2 Auto Scaling demo.") + print("-" * 88) print( "Welcome to the Amazon EC2 Auto Scaling demo for managing groups and instances." @@ -261,7 +306,7 @@ def run_scenario(as_wrapper, svc_helper): print("-" * 88) print( - "This example requires a launch template that specifies how to create\n" + "This example requires a launch template that specifies how to create " "EC2 instances. You can use an existing template or create a new one." ) template_name = q.ask( @@ -292,9 +337,10 @@ def run_scenario(as_wrapper, svc_helper): ) group_zones = [zones[zone_sel - 1]] if zone_sel <= len(zones) else zones print(f"Creating group {group_name}...") - as_wrapper.create_group(group_name, group_zones, template_name, 1, 1) + as_wrapper.create_autoscaling_group(group_name, group_zones, template_name, 1, 1) wait(10) group = as_wrapper.describe_group(group_name) + logger.info("Created Auto Scaling group %s.", group_name) print("Created group:") pp(group) print("Waiting for instance to start...") @@ -316,6 +362,7 @@ def run_scenario(as_wrapper, svc_helper): "GroupTotalInstances", ], ) + logger.info("Enabled metrics for Auto Scaling group %s.", group_name) print(f"Metrics enabled for {group_name}.") print("-" * 88) @@ -323,6 +370,7 @@ def run_scenario(as_wrapper, svc_helper): q.ask("Press Enter when you're ready.") as_wrapper.update_group(group_name, MaxSize=3) group = as_wrapper.describe_group(group_name) + logger.info("Updated maximum size for group %s to 3.", group_name) print("The group still has one running instance, but can have up to three:") print_simplified_group(group) print("-" * 88) @@ -332,6 +380,7 @@ def run_scenario(as_wrapper, svc_helper): as_wrapper.set_desired_capacity(group_name, 2) wait(10) group = as_wrapper.describe_group(group_name) + logger.info("Set desired capacity for group %s to 2.", group_name) print("Here's the current state of the group:") print_simplified_group(group) print("-" * 88) @@ -353,6 +402,9 @@ def run_scenario(as_wrapper, svc_helper): as_wrapper.terminate_instance(instance_ids[inst_sel - 1], False) wait(10) group = as_wrapper.describe_group(group_name) + logger.info( + "Terminated instance %s in group %s.", instance_ids[inst_sel - 1], group_name + ) print(f"Here's the state of {group_name}:") print_simplified_group(group) print("Waiting for the scaling activities to complete...") @@ -362,6 +414,9 @@ def run_scenario(as_wrapper, svc_helper): print(f"Let's get a report of scaling activities for {group_name}.") q.ask("Press Enter when you're ready.") activities = as_wrapper.describe_scaling_activities(group_name) + logger.info( + "Retrieved %d scaling activities for group %s.", len(activities), group_name + ) print( f"Found {len(activities)} activities.\n" f"Activities are ordered with the most recent one first:" @@ -406,6 +461,7 @@ def run_scenario(as_wrapper, svc_helper): if use_metrics: print(f"Stopping metrics collection for {group_name}.") as_wrapper.disable_metrics(group_name) + logger.info("Disabled metrics collection for group %s.", group_name) print( "You must terminate all instances in the group before you can delete the group." @@ -417,10 +473,12 @@ def run_scenario(as_wrapper, svc_helper): for inst_id in instance_ids: print(f"Stopping {inst_id}.") as_wrapper.terminate_instance(inst_id, True) + logger.info("Terminated instance %s in group %s.", inst_id, group_name) print("Waiting for instances to stop...") wait_for_instances(instance_ids, as_wrapper) print(f"Deleting {group_name}.") - as_wrapper.delete_group(group_name) + as_wrapper.delete_autoscaling_group(group_name) + logger.info("Deleted Auto Scaling group %s.", group_name) print("-" * 88) if template is not None: @@ -428,6 +486,7 @@ def run_scenario(as_wrapper, svc_helper): f"Do you want to delete launch template {template_name} used in this demo (y/n)? " ): svc_helper.delete_template(template_name) + logger.info("Deleted launch template %s.", template_name) print("Template deleted.") print("\nThanks for watching!") @@ -440,5 +499,5 @@ def run_scenario(as_wrapper, svc_helper): helper = ServiceHelper(boto3.client("ec2"), boto3.resource("cloudwatch")) run_scenario(wrapper, helper) except Exception: - logging.exception("Something went wrong with the demo!") + logger.exception("Something went wrong with the demo!") # snippet-end:[python.example_code.auto-scaling.Scenario_GroupsAndInstances] diff --git a/python/example_code/auto-scaling/test/conftest.py b/python/example_code/auto-scaling/test/conftest.py index cc2910b2309..0ca27fb479e 100644 --- a/python/example_code/auto-scaling/test/conftest.py +++ b/python/example_code/auto-scaling/test/conftest.py @@ -5,9 +5,8 @@ Contains common test fixtures used to run unit tests. """ -import random import sys # This is needed so that Python can find test_tools on the path. sys.path.append("../..") -from test_tools.fixtures.common import * +from test_tools.fixtures.common import * # noqa diff --git a/python/example_code/auto-scaling/test/test_action_wrapper.py b/python/example_code/auto-scaling/test/test_action_wrapper.py index ce12af05a0c..27a41f4a76a 100644 --- a/python/example_code/auto-scaling/test/test_action_wrapper.py +++ b/python/example_code/auto-scaling/test/test_action_wrapper.py @@ -6,9 +6,10 @@ """ from datetime import datetime + import boto3 -from botocore.exceptions import ClientError import pytest +from botocore.exceptions import ClientError from action_wrapper import AutoScalingWrapper @@ -24,7 +25,7 @@ def test_create_group(make_stubber, error_code): min_size = 1 max_size = 3 - autoscaling_stubber.stub_create_auto_scaling_group( + autoscaling_stubber.stub_create_group( group_name, zones, launch_template_name, @@ -73,9 +74,7 @@ def test_delete_group(make_stubber, error_code): wrapper = AutoScalingWrapper(autoscaling_client) group_name = "test-group_name" - autoscaling_stubber.stub_delete_auto_scaling_group( - group_name, error_code=error_code - ) + autoscaling_stubber.stub_delete_group(group_name, error_code=error_code) if error_code is None: wrapper.delete_group(group_name) diff --git a/python/example_code/auto-scaling/test/test_scenario_groups_and_instances.py b/python/example_code/auto-scaling/test/test_scenario_groups_and_instances.py index 73546336288..063597ab143 100644 --- a/python/example_code/auto-scaling/test/test_scenario_groups_and_instances.py +++ b/python/example_code/auto-scaling/test/test_scenario_groups_and_instances.py @@ -3,10 +3,11 @@ from datetime import datetime from unittest.mock import MagicMock, patch + import boto3 +import pytest from botocore.exceptions import ClientError from botocore.stub import ANY -import pytest import scenario_groups_and_instances as scenario diff --git a/python/example_code/ec2/README.md b/python/example_code/ec2/README.md index 6271628ade1..590ff78290d 100644 --- a/python/example_code/ec2/README.md +++ b/python/example_code/ec2/README.md @@ -54,24 +54,24 @@ Code excerpts that show you how to call individual service functions. - [AssociateAddress](elastic_ip.py#L82) - [AuthorizeSecurityGroupIngress](security_group.py#L68) - [CreateKeyPair](key_pair.py#L57) -- [CreateLaunchTemplate](../../cross_service/resilient_service/auto_scaler.py#L346) +- [CreateLaunchTemplate](../../cross_service/resilient_service/auto_scaler.py#L393) - [CreateSecurityGroup](security_group.py#L42) - [DeleteKeyPair](key_pair.py#L118) -- [DeleteLaunchTemplate](../../cross_service/resilient_service/auto_scaler.py#L409) +- [DeleteLaunchTemplate](../../cross_service/resilient_service/auto_scaler.py#L458) - [DeleteSecurityGroup](security_group.py#L148) -- [DescribeAvailabilityZones](../../cross_service/resilient_service/auto_scaler.py#L438) -- [DescribeIamInstanceProfileAssociations](../../cross_service/resilient_service/auto_scaler.py#L184) +- [DescribeAvailabilityZones](../../cross_service/resilient_service/auto_scaler.py#L485) +- [DescribeIamInstanceProfileAssociations](../../cross_service/resilient_service/auto_scaler.py#L220) - [DescribeImages](instance.py#L253) - [DescribeInstanceTypes](instance.py#L274) - [DescribeInstances](instance.py#L106) - [DescribeKeyPairs](key_pair.py#L89) - [DescribeSecurityGroups](security_group.py#L108) -- [DescribeSubnets](../../cross_service/resilient_service/auto_scaler.py#L731) -- [DescribeVpcs](../../cross_service/resilient_service/auto_scaler.py#L630) +- [DescribeSubnets](../../cross_service/resilient_service/auto_scaler.py#L842) +- [DescribeVpcs](../../cross_service/resilient_service/auto_scaler.py#L702) - [DisassociateAddress](elastic_ip.py#L121) -- [RebootInstances](../../cross_service/resilient_service/auto_scaler.py#L21) +- [RebootInstances](../../cross_service/resilient_service/auto_scaler.py#L18) - [ReleaseAddress](elastic_ip.py#L162) -- [ReplaceIamInstanceProfileAssociation](../../cross_service/resilient_service/auto_scaler.py#L205) +- [ReplaceIamInstanceProfileAssociation](../../cross_service/resilient_service/auto_scaler.py#L249) - [RunInstances](instance.py#L43) - [StartInstances](instance.py#L190) - [StopInstances](instance.py#L221) diff --git a/python/example_code/elastic-load-balancing/README.md b/python/example_code/elastic-load-balancing/README.md index 1b82dd7bbd7..a38b6073b4b 100644 --- a/python/example_code/elastic-load-balancing/README.md +++ b/python/example_code/elastic-load-balancing/README.md @@ -47,13 +47,13 @@ python -m pip install -r requirements.txt Code excerpts that show you how to call individual service functions. -- [CreateListener](../../cross_service/resilient_service/load_balancer.py#L143) +- [CreateListener](../../cross_service/resilient_service/load_balancer.py#L194) - [CreateLoadBalancer](../../cross_service/resilient_service/load_balancer.py#L142) -- [CreateTargetGroup](../../cross_service/resilient_service/load_balancer.py#L67) -- [DeleteLoadBalancer](../../cross_service/resilient_service/load_balancer.py#L192) -- [DeleteTargetGroup](../../cross_service/resilient_service/load_balancer.py#L106) -- [DescribeLoadBalancers](../../cross_service/resilient_service/load_balancer.py#L46) -- [DescribeTargetHealth](../../cross_service/resilient_service/load_balancer.py#L245) +- [CreateTargetGroup](../../cross_service/resilient_service/load_balancer.py#L28) +- [DeleteLoadBalancer](../../cross_service/resilient_service/load_balancer.py#L255) +- [DeleteTargetGroup](../../cross_service/resilient_service/load_balancer.py#L84) +- [DescribeLoadBalancers](../../cross_service/resilient_service/load_balancer.py#L288) +- [DescribeTargetHealth](../../cross_service/resilient_service/load_balancer.py#L342) ### Scenarios diff --git a/python/example_code/iam/README.md b/python/example_code/iam/README.md index 54fe379d338..611634ed2e3 100644 --- a/python/example_code/iam/README.md +++ b/python/example_code/iam/README.md @@ -47,7 +47,7 @@ Code excerpts that show you how to call individual service functions. - [AttachUserPolicy](user_wrapper.py#L107) - [CreateAccessKey](access_key_wrapper.py#L21) - [CreateAccountAlias](account_wrapper.py#L23) -- [CreateInstanceProfile](../../cross_service/resilient_service/auto_scaler.py#L86) +- [CreateInstanceProfile](../../cross_service/resilient_service/auto_scaler.py#L154) - [CreatePolicy](policy_wrapper.py#L25) - [CreatePolicyVersion](policy_wrapper.py#L79) - [CreateRole](role_wrapper.py#L23) @@ -55,7 +55,7 @@ Code excerpts that show you how to call individual service functions. - [CreateUser](user_wrapper.py#L25) - [DeleteAccessKey](access_key_wrapper.py#L47) - [DeleteAccountAlias](account_wrapper.py#L44) -- [DeleteInstanceProfile](../../cross_service/resilient_service/auto_scaler.py#L259) +- [DeleteInstanceProfile](../../cross_service/resilient_service/auto_scaler.py#L310) - [DeletePolicy](policy_wrapper.py#L61) - [DeleteRole](role_wrapper.py#L102) - [DeleteUser](user_wrapper.py#L46)