From c09442ab1f2a40b05a2f7c7c6bc2dacd4e6ff171 Mon Sep 17 00:00:00 2001 From: James Green Date: Mon, 9 Oct 2023 12:05:46 +0100 Subject: [PATCH] Added new max scaling metric to ECS During load testing we found that the average increase takes too long to hit the threshold, thus we needed a new "max" metric, this should hit the threshold sooner when there is a spike in traffic thus allowing it to scale" --- modules/dns/ecs_auto_scaling.tf | 57 +++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/modules/dns/ecs_auto_scaling.tf b/modules/dns/ecs_auto_scaling.tf index f6b88143..f40db42d 100644 --- a/modules/dns/ecs_auto_scaling.tf +++ b/modules/dns/ecs_auto_scaling.tf @@ -6,8 +6,8 @@ resource "aws_appautoscaling_target" "auth_ecs_target" { scalable_dimension = "ecs:service:DesiredCount" } -resource "aws_appautoscaling_policy" "ecs_policy_up" { - name = "ECS Scale Up" +resource "aws_appautoscaling_policy" "ecs_policy_up_average" { + name = "ECS Scale Up Average" service_namespace = "ecs" policy_type = "StepScaling" resource_id = "service/${aws_ecs_cluster.server_cluster.name}/${aws_ecs_service.service.name}" @@ -26,6 +26,27 @@ resource "aws_appautoscaling_policy" "ecs_policy_up" { depends_on = [aws_appautoscaling_target.auth_ecs_target] } +resource "aws_appautoscaling_policy" "ecs_policy_up_max" { + name = "ECS Scale Up Max" + service_namespace = "ecs" + policy_type = "StepScaling" + resource_id = "service/${aws_ecs_cluster.server_cluster.name}/${aws_ecs_service.service.name}" + scalable_dimension = "ecs:service:DesiredCount" + + step_scaling_policy_configuration { + adjustment_type = "ChangeInCapacity" + metric_aggregation_type = "Maximum" + cooldown = 300 + + step_adjustment { + metric_interval_lower_bound = 0 + scaling_adjustment = 1 + } + } + + depends_on = [aws_appautoscaling_target.auth_ecs_target] +} + resource "aws_appautoscaling_policy" "ecs_policy_down" { name = "ECS Scale Down" service_namespace = "ecs" @@ -46,8 +67,8 @@ resource "aws_appautoscaling_policy" "ecs_policy_down" { depends_on = [aws_appautoscaling_target.auth_ecs_target] } -resource "aws_cloudwatch_metric_alarm" "ecs_cpu_alarm_high" { - alarm_name = "${var.prefix}-ecs-cpu-alarm-high" +resource "aws_cloudwatch_metric_alarm" "ecs_cpu_average_alarm_high" { + alarm_name = "${var.prefix}-ecs-cpu-average-alarm-high" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = "1" metric_name = "CPUUtilization" @@ -61,10 +82,34 @@ resource "aws_cloudwatch_metric_alarm" "ecs_cpu_alarm_high" { ServiceName = aws_ecs_service.service.name } - alarm_description = "This alarm tells ECS to scale up based on high CPU" + alarm_description = "This alarm tells ECS to scale up based on average high CPU" + + alarm_actions = [ + aws_appautoscaling_policy.ecs_policy_up_average.arn + ] + + treat_missing_data = "breaching" +} + +resource "aws_cloudwatch_metric_alarm" "ecs_cpu_maximum_alarm_high" { + alarm_name = "${var.prefix}-ecs-cpu-maximum-alarm-high" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "CPUUtilization" + namespace = "AWS/ECS" + period = "60" + statistic = "Maximum" + threshold = "80" + + dimensions = { + ClusterName = aws_ecs_cluster.server_cluster.name + ServiceName = aws_ecs_service.service.name + } + + alarm_description = "This alarm tells ECS to scale up based on maxmium high CPU" alarm_actions = [ - aws_appautoscaling_policy.ecs_policy_up.arn + aws_appautoscaling_policy.ecs_policy_up_max.arn ] treat_missing_data = "breaching"