From ff85ff17e71bbd995a73a0dc1e0909ae061d6a5f Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Thu, 31 Oct 2024 11:44:24 +0000 Subject: [PATCH 001/103] Added EC2 instance to host Defect Dojo --- .../panda-cyber-appsec-lab/ec2.tf | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index 90201539a5b..d87e5b5bf93 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -1,6 +1,5 @@ # Kali Linux Instance resource "aws_instance" "kali_linux" { - #checkov:skip=CKV_AWS_88:instance requires internet access ami = "ami-0f398bcc12f72f967" // aws-marketplace/kali-last-snapshot-amd64-2024.2.0-804fcc46-63fc-4eb6-85a1-50e66d6c7215 associate_public_ip_address = true instance_type = "t2.micro" @@ -50,6 +49,54 @@ resource "aws_instance" "kali_linux" { } } + +# Defect Dojo Instance +resource "aws_instance" "defect_dojo" { + ami = "ami-0e8d228ad90af673b" + associate_public_ip_address = true + instance_type = "t2.micro" + subnet_id = module.vpc.private_subnets.0 + vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] + iam_instance_profile = aws_iam_instance_profile.ssm_instance_profile.name + ebs_optimized = true + metadata_options { + http_tokens = "required" + } + root_block_device { + encrypted = true + volume_size = 60 + } + ebs_block_device { + device_name = "/dev/xvda" + volume_size = 5 + encrypted = true + } + user_data = <<-EOF + #!/bin/bash + # Update and install dependencies + apt-get update + apt-get upgrade + apt-get install -y docker.io docker-compose + + # Start Docker + systemctl start docker + systemctl enable docker + + # Clone DefectDojo Docker repo + git clone https://github.com/DefectDojo/django-DefectDojo.git /opt/defectdojo + cd /opt/defectdojo + + + # Run DefectDojo using Docker Compose + docker-compose up -d + EOF + + tags = { + Name = "Defect-Dojo" + } +} + + # Security Group for Kali instance # trivy:ignore:AVD-AWS-0104 resource "aws_security_group" "kali_linux_sg" { From 18fbd84bcb0d8344558d06af317a11744405125c Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 4 Nov 2024 17:05:13 +0000 Subject: [PATCH 002/103] Get notification for events in DMS --- .../modules/components/dms/cloudwatch-alarms.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index be99cd4ee4f..ffce2b41dfe 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -106,6 +106,16 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { tags = var.tags } +resource "aws_dms_event_subscription" "dms_task_event_subscription" { + name = "dms-task-event-alerts" + sns_topic_arn = aws_sns_topic.dms_alerting.arn + source_type = "replication-task" + + # We do not filter by event type or replication task as we wish + # to be notified by any event on any replication task + enabled = true +} + # Pager duty integration # Get the map of pagerduty integration keys from the modernisation platform account From 24e5822619c11b3755c79fe19aa527dbc62a4af4 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 4 Nov 2024 17:27:10 +0000 Subject: [PATCH 003/103] Restrict event categories --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index ffce2b41dfe..0fcf0f0656f 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -110,9 +110,7 @@ resource "aws_dms_event_subscription" "dms_task_event_subscription" { name = "dms-task-event-alerts" sns_topic_arn = aws_sns_topic.dms_alerting.arn source_type = "replication-task" - - # We do not filter by event type or replication task as we wish - # to be notified by any event on any replication task + event_categories = ["state change", "failure"] enabled = true } From a1dad194ea455a0290b728087eaf1d158f403f8f Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 4 Nov 2024 17:44:01 +0000 Subject: [PATCH 004/103] Modify definition of production with respect to audit data --- terraform/environments/delius-core/locals_development.tf | 2 +- terraform/environments/delius-core/locals_preproduction.tf | 3 ++- terraform/environments/delius-core/locals_stage.tf | 3 ++- terraform/environments/delius-core/locals_test.tf | 2 +- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 5 ++++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/terraform/environments/delius-core/locals_development.tf b/terraform/environments/delius-core/locals_development.tf index 5ad2a0cb6be..df009ab3dc5 100644 --- a/terraform/environments/delius-core/locals_development.tf +++ b/terraform/environments/delius-core/locals_development.tf @@ -139,6 +139,6 @@ locals { user_target_endpoint = { write_database = "DMDNDA" } - is-production = local.is-production + is-production = false } } diff --git a/terraform/environments/delius-core/locals_preproduction.tf b/terraform/environments/delius-core/locals_preproduction.tf index f24addabc45..439fa1d281d 100644 --- a/terraform/environments/delius-core/locals_preproduction.tf +++ b/terraform/environments/delius-core/locals_preproduction.tf @@ -150,7 +150,8 @@ locals { user_target_endpoint = { write_database = "PRENDA" } - is-production = local.is-production + # Auditing from the Pre-Prod environment is considered production data + is-production = true } } diff --git a/terraform/environments/delius-core/locals_stage.tf b/terraform/environments/delius-core/locals_stage.tf index f083e3e0687..4220c9d49b7 100644 --- a/terraform/environments/delius-core/locals_stage.tf +++ b/terraform/environments/delius-core/locals_stage.tf @@ -150,6 +150,7 @@ locals { user_target_endpoint = { write_database = "STGNDA" } - is-production = local.is-production + # Auditing from the Stage environment is considered production data + is-production = true } } diff --git a/terraform/environments/delius-core/locals_test.tf b/terraform/environments/delius-core/locals_test.tf index b441b13a5d0..030520bb504 100644 --- a/terraform/environments/delius-core/locals_test.tf +++ b/terraform/environments/delius-core/locals_test.tf @@ -138,6 +138,6 @@ locals { read_database = "TSTNDA" } user_target_endpoint = {} - is-production = local.is-production + is-production = false } } diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 0fcf0f0656f..d9bae4500d7 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -110,7 +110,10 @@ resource "aws_dms_event_subscription" "dms_task_event_subscription" { name = "dms-task-event-alerts" sns_topic_arn = aws_sns_topic.dms_alerting.arn source_type = "replication-task" - event_categories = ["state change", "failure"] + # If this is production then we expect to see starting and stopping of replication tasks + # as this would not be normal behaviour. + # For non-production this will happen nightly due to automated stop/start + event_categories = var.dms_config.is-production ? ["state change", "failure"] : ["failure"] enabled = true } From 944609155e2d6d067860e4b3e74a3b31951e1c59 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Thu, 7 Nov 2024 10:59:19 +0000 Subject: [PATCH 005/103] Add logging --- .../components/dms/cloudwatch-alarms.tf | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 20e8466cd76..862797ae8f1 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -2,6 +2,32 @@ resource "aws_sns_topic" "dms_alerting" { name = "delius-dms-alerting" kms_master_key_id = var.account_config.kms_keys.general_shared + + http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn + http_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn +} + +resource "aws_iam_role" "sns_logging_role" { + name = "sns-logging-role" + + assume_role_policy = jsonencode({ + "Version": "2012-10-17", + "Statement": [ + { + "Action": "sts:AssumeRole", + "Principal": { + "Service": "sns.amazonaws.com" + }, + "Effect": "Allow", + "Sid": "" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "attach_sns_policy" { + role = aws_iam_role.sns_logging_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonSNSRole" } # Create a map of all possible replication tasks, so those that exist may have alarms applied to them. From 1d1ecd4149b326f314c2a1e8009ce563326a2c56 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Thu, 7 Nov 2024 11:37:04 +0000 Subject: [PATCH 006/103] Log all notifications --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 862797ae8f1..4e0a93adee3 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -4,6 +4,7 @@ resource "aws_sns_topic" "dms_alerting" { kms_master_key_id = var.account_config.kms_keys.general_shared http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn + http_success_feedback_sample_rate = 100 http_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn } From 915c4f2df241ff42a8d36ec8be10beab449dfa31 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Thu, 7 Nov 2024 14:11:21 +0000 Subject: [PATCH 007/103] Add Debug --- .../components/dms/cloudwatch-alarms.tf | 110 +++++++++++++++++- 1 file changed, 105 insertions(+), 5 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 4e0a93adee3..b2081520e7a 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -101,8 +101,8 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_source" { evaluation_periods = 3 period = 120 actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerting.arn] - ok_actions = [aws_sns_topic.dms_alerting.arn] + alarm_actions = [aws_sns_topic.dms_alerts.arn] + ok_actions = [aws_sns_topic.dms_alerts.arn] dimensions = { ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id # We only need to final element of the replication task ID (after the last :) @@ -123,8 +123,8 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { evaluation_periods = 3 period = 120 actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerting.arn] - ok_actions = [aws_sns_topic.dms_alerting.arn] + alarm_actions = [aws_sns_topic.dms_alerts.arn] + ok_actions = [aws_sns_topic.dms_alerts.arn] dimensions = { ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id # We only need to final element of the replication task ID (after the last :) @@ -135,7 +135,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { resource "aws_dms_event_subscription" "dms_task_event_subscription" { name = "dms-task-event-alerts" - sns_topic_arn = aws_sns_topic.dms_alerting.arn + sns_topic_arn = aws_sns_topic.dms_alerts.arn source_type = "replication-task" # If this is production then we expect to see starting and stopping of replication tasks # as this would not be normal behaviour. @@ -175,3 +175,103 @@ module "pagerduty_core_alerts" { sns_topics = [aws_sns_topic.dms_alerting.name] pagerduty_integration_key = local.pagerduty_integration_keys[local.integration_key_lookup] } + + +# DEBUG BELOW - WRITE MESSAGE PAYLOAD + +# Step 1: Create an IAM Role for the Lambda function with necessary permissions +resource "aws_iam_role" "lambda_sns_role" { + name = "lambda-sns-role" + assume_role_policy = jsonencode({ + "Version": "2012-10-17", + "Statement": [ + { + "Action": "sts:AssumeRole", + "Principal": { + "Service": "lambda.amazonaws.com" + }, + "Effect": "Allow", + "Sid": "" + } + ] + }) +} + +# Attach policies for Lambda logging and SNS access +resource "aws_iam_role_policy_attachment" "lambda_logging" { + role = aws_iam_role.lambda_sns_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +} + +resource "aws_iam_role_policy_attachment" "sns_publish" { + role = aws_iam_role.lambda_sns_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaSNSPolicy" +} + + +resource "local_file" "lambda_handler_py" { + filename = "${path.module}/lambda_function_payload_logger.py" + content = < Date: Thu, 7 Nov 2024 14:19:00 +0000 Subject: [PATCH 008/103] Policy not needed --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index b2081520e7a..bd4850c4f81 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -203,12 +203,6 @@ resource "aws_iam_role_policy_attachment" "lambda_logging" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "aws_iam_role_policy_attachment" "sns_publish" { - role = aws_iam_role.lambda_sns_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaSNSPolicy" -} - - resource "local_file" "lambda_handler_py" { filename = "${path.module}/lambda_function_payload_logger.py" content = < Date: Thu, 7 Nov 2024 14:40:27 +0000 Subject: [PATCH 009/103] Use index file name --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index bd4850c4f81..bcd63761b35 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -204,7 +204,7 @@ resource "aws_iam_role_policy_attachment" "lambda_logging" { } resource "local_file" "lambda_handler_py" { - filename = "${path.module}/lambda_function_payload_logger.py" + filename = "${path.module}/index.py" content = < Date: Thu, 7 Nov 2024 15:02:27 +0000 Subject: [PATCH 010/103] Force code change --- .../modules/components/dms/cloudwatch-alarms.tf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index bcd63761b35..fbc6a30d1bf 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -209,7 +209,7 @@ resource "local_file" "lambda_handler_py" { import json def lambda_handler(event, context): - print("Received event: " + json.dumps(event, indent=2)) + print("Received handler event: " + json.dumps(event, indent=2)) return { 'statusCode': 200, 'body': 'Success' @@ -240,12 +240,6 @@ resource "aws_lambda_function" "sns_handler" { } } -# Sample Python code for the Lambda function: -# def lambda_handler(event, context): -# import json -# print("Received event:", json.dumps(event, indent=2)) -# return {"statusCode": 200, "body": "Success"} - # Step 3: Create the SNS topic resource "aws_sns_topic" "dms_alerts" { name = "dms-alerts-topic" From 01f6fa0678289d3f704faca598f9acabbfaa4e39 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Thu, 7 Nov 2024 15:14:19 +0000 Subject: [PATCH 011/103] Replace handler name --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index fbc6a30d1bf..d7e6924cd24 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -204,7 +204,7 @@ resource "aws_iam_role_policy_attachment" "lambda_logging" { } resource "local_file" "lambda_handler_py" { - filename = "${path.module}/index.py" + filename = "${path.module}/lambda_function_payload_logger.py" content = < Date: Thu, 7 Nov 2024 18:03:57 +0000 Subject: [PATCH 012/103] Use Lambda to create Cloudwatch Metric from Event --- .../components/dms/cloudwatch-alarms.tf | 179 ++++++++++-------- 1 file changed, 102 insertions(+), 77 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index d7e6924cd24..c2d207a9d73 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -1,6 +1,6 @@ # SNS topic for monitoring to send alarms to -resource "aws_sns_topic" "dms_alerting" { - name = "delius-dms-alerting" +resource "aws_sns_topic" "dms_alerts_topic" { + name = "delius-dms-alerts-topic" kms_master_key_id = var.account_config.kms_keys.general_shared http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn @@ -101,8 +101,8 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_source" { evaluation_periods = 3 period = 120 actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerts.arn] - ok_actions = [aws_sns_topic.dms_alerts.arn] + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] + ok_actions = [aws_sns_topic.dms_alerts_topic.arn] dimensions = { ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id # We only need to final element of the replication task ID (after the last :) @@ -123,8 +123,8 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { evaluation_periods = 3 period = 120 actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerts.arn] - ok_actions = [aws_sns_topic.dms_alerts.arn] + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] + ok_actions = [aws_sns_topic.dms_alerts_topic.arn] dimensions = { ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id # We only need to final element of the replication task ID (after the last :) @@ -133,17 +133,6 @@ resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { tags = var.tags } -resource "aws_dms_event_subscription" "dms_task_event_subscription" { - name = "dms-task-event-alerts" - sns_topic_arn = aws_sns_topic.dms_alerts.arn - source_type = "replication-task" - # If this is production then we expect to see starting and stopping of replication tasks - # as this would not be normal behaviour. - # For non-production this will happen nightly due to automated stop/start - event_categories = var.dms_config.is-production ? ["state change", "failure"] : ["failure"] - enabled = true -} - # Pager duty integration # Get the map of pagerduty integration keys from the modernisation platform account @@ -169,97 +158,133 @@ locals { module "pagerduty_core_alerts" { #checkov:skip=CKV_TF_1 depends_on = [ - aws_sns_topic.dms_alerting + aws_sns_topic.dms_alerts_topic ] source = "github.com/ministryofjustice/modernisation-platform-terraform-pagerduty-integration?ref=v2.0.0" - sns_topics = [aws_sns_topic.dms_alerting.name] + sns_topics = [aws_sns_topic.dms_alerts_topic.name] pagerduty_integration_key = local.pagerduty_integration_keys[local.integration_key_lookup] } +resource "aws_iam_role" "lambda_put_metric_data_role" { + name = "lambda-put-metric-data-role" -# DEBUG BELOW - WRITE MESSAGE PAYLOAD - -# Step 1: Create an IAM Role for the Lambda function with necessary permissions -resource "aws_iam_role" "lambda_sns_role" { - name = "lambda-sns-role" assume_role_policy = jsonencode({ - "Version": "2012-10-17", - "Statement": [ + Version = "2012-10-17", + Statement = [ { - "Action": "sts:AssumeRole", - "Principal": { - "Service": "lambda.amazonaws.com" - }, - "Effect": "Allow", - "Sid": "" + Action = "sts:AssumeRole", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + } + } + ] + }) +} + +resource "aws_iam_policy" "lambda_put_metric_data_policy" { + name = "lambda-put-metric-data-policy" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "cloudwatch:PutMetricData" + ], + Resource = "*" } ] }) } -# Attach policies for Lambda logging and SNS access -resource "aws_iam_role_policy_attachment" "lambda_logging" { - role = aws_iam_role.lambda_sns_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_policy_attach" { + role = aws_iam_role.lambda_put_metric_data_role.name + policy_arn = aws_iam_policy.lambda_put_metric_data_policy.arn } -resource "local_file" "lambda_handler_py" { - filename = "${path.module}/lambda_function_payload_logger.py" +resource "local_file" "lambda_dms_replication_metric_py" { + filename = "${path.module}/lambda_dms_replication_metric.py" content = < Date: Thu, 7 Nov 2024 18:28:49 +0000 Subject: [PATCH 013/103] Add logging --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index c2d207a9d73..36cdea4b629 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -269,6 +269,10 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_alarm" { # DMS Replication (Events are NOT detected by CloudWatch Alarms) resource "aws_sns_topic" "dms_events_topic" { name = "dms_events_topic" + + http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn + http_success_feedback_sample_rate = 100 + http_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn } resource "aws_sns_topic_subscription" "dms_events_lambda_subscription" { From 88175822304227db64043f81e0a9072f18396e5c Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Fri, 8 Nov 2024 14:30:07 +0000 Subject: [PATCH 014/103] Removed installation of docker and cloning of defect dojo from user_data --- .../environments/panda-cyber-appsec-lab/ec2.tf | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index d87e5b5bf93..ba5d51e5cdd 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -76,19 +76,6 @@ resource "aws_instance" "defect_dojo" { # Update and install dependencies apt-get update apt-get upgrade - apt-get install -y docker.io docker-compose - - # Start Docker - systemctl start docker - systemctl enable docker - - # Clone DefectDojo Docker repo - git clone https://github.com/DefectDojo/django-DefectDojo.git /opt/defectdojo - cd /opt/defectdojo - - - # Run DefectDojo using Docker Compose - docker-compose up -d EOF tags = { From 0333241f9e3f397fdc099fe8eb5b68cefacdb3f0 Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Fri, 8 Nov 2024 15:00:54 +0000 Subject: [PATCH 015/103] Removed associate_public_ip_address --- terraform/environments/panda-cyber-appsec-lab/ec2.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index ba5d51e5cdd..ce8abfeaecc 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -1,7 +1,6 @@ # Kali Linux Instance resource "aws_instance" "kali_linux" { ami = "ami-0f398bcc12f72f967" // aws-marketplace/kali-last-snapshot-amd64-2024.2.0-804fcc46-63fc-4eb6-85a1-50e66d6c7215 - associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] @@ -53,7 +52,6 @@ resource "aws_instance" "kali_linux" { # Defect Dojo Instance resource "aws_instance" "defect_dojo" { ami = "ami-0e8d228ad90af673b" - associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] From 913842db009cf9571d7dc152a817fe21c272cc7b Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Fri, 8 Nov 2024 15:21:17 +0000 Subject: [PATCH 016/103] Log lambda success and failure instead of https --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 36cdea4b629..b5cca6e8062 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -270,9 +270,9 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_alarm" { resource "aws_sns_topic" "dms_events_topic" { name = "dms_events_topic" - http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn - http_success_feedback_sample_rate = 100 - http_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn + lambda_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn + lambda_success_feedback_sample_rate = 100 + lambda_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn } resource "aws_sns_topic_subscription" "dms_events_lambda_subscription" { From 42562bbeef76ffb8441a617c87d0ebd2de4928e6 Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Fri, 8 Nov 2024 15:50:47 +0000 Subject: [PATCH 017/103] Set associate_public_ip_address to false and added lifecycle block --- terraform/environments/panda-cyber-appsec-lab/ec2.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index ce8abfeaecc..4808cdc8f84 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -1,6 +1,7 @@ # Kali Linux Instance resource "aws_instance" "kali_linux" { ami = "ami-0f398bcc12f72f967" // aws-marketplace/kali-last-snapshot-amd64-2024.2.0-804fcc46-63fc-4eb6-85a1-50e66d6c7215 + associate_public_ip_address = false instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] @@ -52,6 +53,7 @@ resource "aws_instance" "kali_linux" { # Defect Dojo Instance resource "aws_instance" "defect_dojo" { ami = "ami-0e8d228ad90af673b" + associate_public_ip_address = false instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] @@ -79,6 +81,9 @@ resource "aws_instance" "defect_dojo" { tags = { Name = "Defect-Dojo" } + lifecycle { + replace_triggered_by = [aws_instance.defect_dojo.user_data] +} } From baab6a0b9ac2e92f163780e58c69baf24c9eb0da Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Fri, 8 Nov 2024 16:07:54 +0000 Subject: [PATCH 018/103] Removed lifecycle code block --- terraform/environments/panda-cyber-appsec-lab/ec2.tf | 3 --- 1 file changed, 3 deletions(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index 4808cdc8f84..e0ef2484906 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -81,9 +81,6 @@ resource "aws_instance" "defect_dojo" { tags = { Name = "Defect-Dojo" } - lifecycle { - replace_triggered_by = [aws_instance.defect_dojo.user_data] -} } From f3cf2ca1d5b996a74bf525ce1a86fe56ac0f8bd2 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Fri, 8 Nov 2024 17:10:19 +0000 Subject: [PATCH 019/103] Add lambda permission --- .../modules/components/dms/cloudwatch-alarms.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index b5cca6e8062..0887c6e0ca3 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -249,6 +249,16 @@ resource "aws_lambda_function" "dms_replication_metric_publisher" { depends_on = [data.archive_file.lambda_dms_replication_metric_zip] } +resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publisher_handler" { + statement_id = "AllowSNSInvoke" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.dms_replication_metric_publisher.function_name + principal = "sns.amazonaws.com" + + source_arn = aws_sns_topic.dms_alerts_topic.arn +} + + resource "aws_cloudwatch_metric_alarm" "dms_replication_alarm" { alarm_name = "DMSReplicationEventAlarm" comparison_operator = "GreaterThanOrEqualToThreshold" From 0f7eca1d1ed0542d602892b1eac6fd3b180aea11 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Fri, 8 Nov 2024 17:49:03 +0000 Subject: [PATCH 020/103] This is for events --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 0887c6e0ca3..6238249f13d 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -255,7 +255,7 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis function_name = aws_lambda_function.dms_replication_metric_publisher.function_name principal = "sns.amazonaws.com" - source_arn = aws_sns_topic.dms_alerts_topic.arn + source_arn = aws_sns_topic.dms_events_topic.arn } From adfe2c0a3eeec7cbe16a19f2fdf27988303c6755 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 09:07:07 +0000 Subject: [PATCH 021/103] Allow Lambda Function to log failures --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 6238249f13d..a69d2a9b68b 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -204,6 +204,12 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_policy_attach" policy_arn = aws_iam_policy.lambda_put_metric_data_policy.arn } +# Allow Cloudwatch Logging +resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach" { + role = aws_iam_role.lambda_put_metric_data_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +} + resource "local_file" "lambda_dms_replication_metric_py" { filename = "${path.module}/lambda_dms_replication_metric.py" content = < Date: Mon, 11 Nov 2024 15:56:13 +0000 Subject: [PATCH 022/103] Place lambda in a file --- .../components/dms/cloudwatch-alarms.tf | 46 +-- .../dms/lambda/dms_replication_metric.py | 310 ++++++++++++++++++ .../dms/lambda_dms_replication_metric.zip | Bin 0 -> 3048 bytes 3 files changed, 333 insertions(+), 23 deletions(-) create mode 100644 terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py create mode 100644 terraform/environments/delius-core/modules/components/dms/lambda_dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index a69d2a9b68b..7acc9e9b9b7 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -210,32 +210,32 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "local_file" "lambda_dms_replication_metric_py" { - filename = "${path.module}/lambda_dms_replication_metric.py" - content = < { + replication_task_arn = aws_dms_replication_task.business_interaction_inbound_replication[k].replication_task_arn + replication_task_id = aws_dms_replication_task.business_interaction_inbound_replication[k].replication_task_id + } + }, + { for k in keys(local.client_account_map) : + "audited_interaction_inbound_replication_from_${k}" => { + replication_task_arn = aws_dms_replication_task.audited_interaction_inbound_replication[k].replication_task_arn + replication_task_id = aws_dms_replication_task.audited_interaction_inbound_replication[k].replication_task_id + } + }, + { for k in keys(local.client_account_map) : + "audited_interaction_checksum_inbound_replication_from_${k}" => { + replication_task_arn = aws_dms_replication_task.audited_interaction_checksum_inbound_replication[k].replication_task_arn + replication_task_id = aws_dms_replication_task.audited_interaction_checksum_inbound_replication[k].replication_task_id + } + }, + try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { + audited_interaction_outbound_replication = { + replication_task_arn = aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_arn + replication_task_id = aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_id + } + }, + { for k in keys(local.client_account_map) : + "user_outbound_replication_to_${k}" => { + replication_task_arn = aws_dms_replication_task.user_outbound_replication[k].replication_task_arn + replication_task_id = aws_dms_replication_task.user_outbound_replication[k].replication_task_id + } + }, + try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { + business_interaction_outbound_replication = { + replication_task_arn = aws_dms_replication_task.business_interaction_outbound_replication[0].replication_task_arn + replication_task_id = aws_dms_replication_task.business_interaction_outbound_replication[0].replication_task_id + } + }, + try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { + audited_interaction_checksum_outbound_replication = { + replication_task_arn = aws_dms_replication_task.audited_interaction_checksum_outbound_replication[0].replication_task_arn + replication_task_id = aws_dms_replication_task.audited_interaction_checksum_outbound_replication[0].replication_task_id + } + } + ) +} + + + +resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_source" { + for_each = local.aws_dms_replication_tasks + alarm_name = "dms-cdc-latency-source-${each.value.replication_task_id}" + alarm_description = "High CDC source latency for dms replication task for ${each.value.replication_task_id}" + namespace = "AWS/DMS" + statistic = "Average" + metric_name = "CDCLatencySource" + comparison_operator = "GreaterThanThreshold" + threshold = 15 + evaluation_periods = 3 + period = 120 + actions_enabled = true + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] + ok_actions = [aws_sns_topic.dms_alerts_topic.arn] + dimensions = { + ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id + # We only need to final element of the replication task ID (after the last :) + ReplicationTaskIdentifier = split(":", each.value.replication_task_arn)[length(split(":", each.value.replication_task_arn)) - 1] + } + tags = var.tags +} + +resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { + for_each = local.aws_dms_replication_tasks + alarm_name = "dms-cdc-latency-target-${each.value.replication_task_id}" + alarm_description = "High CDC target latency for dms replication task for ${each.value.replication_task_id}" + namespace = "AWS/DMS" + statistic = "Average" + metric_name = "CDCLatencyTarget" + comparison_operator = "GreaterThanThreshold" + threshold = 15 + evaluation_periods = 3 + period = 120 + actions_enabled = true + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] + ok_actions = [aws_sns_topic.dms_alerts_topic.arn] + dimensions = { + ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id + # We only need to final element of the replication task ID (after the last :) + ReplicationTaskIdentifier = split(":", each.value.replication_task_arn)[length(split(":", each.value.replication_task_arn)) - 1] + } + tags = var.tags +} + +# Pager duty integration + +# Get the map of pagerduty integration keys from the modernisation platform account +data "aws_secretsmanager_secret" "pagerduty_integration_keys" { + provider = aws.modernisation-platform + name = "pagerduty_integration_keys" +} + +data "aws_secretsmanager_secret_version" "pagerduty_integration_keys" { + provider = aws.modernisation-platform + secret_id = data.aws_secretsmanager_secret.pagerduty_integration_keys.id +} + +# Add a local to get the keys +locals { + pagerduty_integration_keys = jsondecode(data.aws_secretsmanager_secret_version.pagerduty_integration_keys.secret_string) + integration_key_lookup = var.dms_config.is-production ? "delius_oracle_prod_alarms" : "delius_oracle_nonprod_alarms" +} + +# link the sns topic to the service +# Non-Prod alerts channel: #delius-aws-oracle-dev-alerts +# Prod alerts channel: #delius-aws-oracle-prod-alerts +module "pagerduty_core_alerts" { + #checkov:skip=CKV_TF_1 + depends_on = [ + aws_sns_topic.dms_alerts_topic + ] + source = "github.com/ministryofjustice/modernisation-platform-terraform-pagerduty-integration?ref=v2.0.0" + sns_topics = [aws_sns_topic.dms_alerts_topic.name] + pagerduty_integration_key = local.pagerduty_integration_keys[local.integration_key_lookup] +} + +resource "aws_iam_role" "lambda_put_metric_data_role" { + name = "lambda-put-metric-data-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Action = "sts:AssumeRole", + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + } + } + ] + }) +} + +resource "aws_iam_policy" "lambda_put_metric_data_policy" { + name = "lambda-put-metric-data-policy" + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "cloudwatch:PutMetricData" + ], + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_policy_attach" { + role = aws_iam_role.lambda_put_metric_data_role.name + policy_arn = aws_iam_policy.lambda_put_metric_data_policy.arn +} + +# Allow Cloudwatch Logging +resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach" { + role = aws_iam_role.lambda_put_metric_data_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +} + +resource "local_file" "lambda_dms_replication_metric_py" { + filename = "${path.module}/lambda_dms_replication_metric.py" + content = <<@Ag47(=6_ve-eHUoGqD;IjrWhk*mdBtHkUsj&p2TrYcazC!XBm z&Q%Y6&h^Ros+!tJLtdC;{DpYB#DNMLP|}esMZ5oJu-W9t-v+>GE0e6Yeq~-0i9}Sy zU4)_CnIWrquK^mVxon(mh6yQJQZh!VIleunUX5Td8B{U9Xw(H0R+31Zhx+&5c(Gv5 z2iVfXXJo^Wz`19m4eGxqEAJn;ZcZqs@jK*6InJ@t?v10av=1iEMc0T_IfbHc3&m0B z>8{F~Am2+07OSbk6$O{Epw!{(uyYxOF^f2zLGe|RE0dnOg@@60uKaSkt>Vps z@ADVi{C5vaShix`c+}Y`@Hk+5Dkr6*4j$hLYz=m`zxnh^=@{nH^Z;qN>t{r8b_Hm&JIR3R4-AC^iWmn}WQf`5ED2V56=A**ki7|c z<&p)L+zPKD!Gs~M3~p$bn~?=_VX(nU{+;)o=pwsnSjXLbP4jGyP6o z!Q77PgFNYFMo_idRV_?&ome%o=eSZQbKJPXUs8${uVnL@mOjiY!Dc`~`M7zY~!erAegpDjOC$_{df5gxX=Hgf!7exYZzfz0{^eT-qIN$zu!p z^!J4qtDnRY_3tMbc@1vG&DAc{jc=-7(#z|x%(qV((39XQqY3V46Pr4dHZN3dXuOY7 z{6KFFiWOYm#Q%0>r-Y=W&4dt559ppyWIQA~kVeaT`gHvjiB7O-S~*BSEWgKb3vjMJ zSi5mvHjcG?-s*H=?w6L`{BWxFr@Vxd?Z+gMnas}A-AL6QGCdNN&{ik}LMUW`C5J^8Etclo5pAJ|dP!b@$d zY(q&7;`vE}mp+et!1PQ^wMPC9j0uf~jf3GlrUd-9a1SFWza$w|bz4>vxVf~m^gJgl zIA@mWVYDaMkdQ`Z1UXy_pm^Dq$kE{MXz+NIm8L|yFsDo9fA77%1aT%GG$&cI}@jZwmu2Qo$WOyZJU(DG6c<(#I){*dPPzGOt z)DvbB+adSG1P@px>*H-L!?N$$_zB)SpBb3l1zX~3@YUQ!lw1l&NW4A)=kp*xaG4ky zGy3JFlD9<}ThdkXLnmb|#C*4C=58*%zQ02FuGFSb1WA@>Hs-~FG%R~a+`NIi zDU`cumm}bYLevY0L2$u1S*yDxMBy3dGBtZf24VS&Fj0meutV=r zn|xvHxE!zCFQcteRZU=ACG$-lK?qrSBiE&c(IlVxerdcw?YocBaKDMY$~<Xgz89nYK68uzVrX`1{Yqg`oCv zI;0$D1mvOV2+`RjHF7ab){bN(M_re znJ+Y&k|>3(jcb-Z7uBlR-}cu1JD7hRaNC45wK6X2Tm5EMEwaWBJH0nus7V`s#YAQJ zJlflRt7-gd%HXZqI`Dp~*!^m;_AV>uUkUL!JP zKGH5?I(ask<}#bt$!+!=<#3UGZt~0yLxlSf{xpx;fy($5NGF4TYiFu<9OLd(&Jx6e z>LLPpom2@!g$;2@;TV2Jc~um@u)Yqz2_aq=@#0DZFDsT%Yn(SC#w|hL8y!O;_r0GB zUX#f~y}!SaKkCPp=49oTRU)*+n6K5;Uo_@0cIZjcP867L97stR8(9co(sFbB`HIK5 z+1W#YOVO$_DlQt<6c1tP<0Lwxl7_&EE*y1wOzd@)+-AzewsY#V@2+?6DXgna`Fa#{ zn7txb9P4n(wJ7>?rFm~Tc#VkGb%n948voVJM%{1|l#axZ{mR~=_OA;Lb_zKc3@3Gj zagU1^6keD)HP;O}8gq8*k@Z;kK;L3lsFAAjb?{O!+S=RP&*{#nvJ2RSPJEj+T$yMo zmCrc`%IB-#xKIa;2Gl>ci=Hjy-5u8>5=di+SU?7C#E8)y6IUzCl(!`8b4((N1nX&ZMpk5yzYx=0<*Wdsq=x zqfVxu*G@NL9@S(DyY_{hR3P18Y6=haz_|fz1hmqx^@OvFq62(z7A}e`xbHk1ezqS1 zAy5v7I~K?7&v2x|A>eqk79f8w8(*G$ty7UOT$CCuJA+G$7;bo7HAZ`P>|V5gTWWOw zRr4SuC3edFheXy&=|OK3{Fdx9!0Mbg*tP8tA~H&F)LFeMm0m!M=tlmWDxG{zz9rFe z;5YTgqxuTfb%};A>565dwOG`?91KD)s5G`VBrT|IDi@@d&7=e!uIE?VsBDXB{=>5a(Iaql^Aa#hM z&HU-8;}9T4^ooUv`S{D;PTQdu{Xy%~Q+sO`RzdgjT@!Wyz(g4Uu(xJlJj?W782{hf gKQR7pmB#;*{x6!_TZ5SYd0_Zs^FRCXpLzh`U%DT-^#A|> literal 0 HcmV?d00001 From 32430e1fb8aa7d836fe5c701967d8ec26c416c73 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 15:56:47 +0000 Subject: [PATCH 023/103] Remove zip file --- .../dms/lambda_dms_replication_metric.zip | Bin 3048 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 terraform/environments/delius-core/modules/components/dms/lambda_dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/lambda_dms_replication_metric.zip b/terraform/environments/delius-core/modules/components/dms/lambda_dms_replication_metric.zip deleted file mode 100644 index bc2d09f1d2c8fd3736a33c7616c5b1aff6a878b6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3048 zcmZ{mc{me}AIFCcIfonxIW}M9%E*1+XUy4zlryx&T!k`MtvN$TVPA7g5jJNcS6M^k zKBLIyCanDW{_#8h`MuxI^ZC4==ks~qf4}UlnSf^jtbYsuNMQer|LEMG<@Ag47(=6_ve-eHUoGqD;IjrWhk*mdBtHkUsj&p2TrYcazC!XBm z&Q%Y6&h^Ros+!tJLtdC;{DpYB#DNMLP|}esMZ5oJu-W9t-v+>GE0e6Yeq~-0i9}Sy zU4)_CnIWrquK^mVxon(mh6yQJQZh!VIleunUX5Td8B{U9Xw(H0R+31Zhx+&5c(Gv5 z2iVfXXJo^Wz`19m4eGxqEAJn;ZcZqs@jK*6InJ@t?v10av=1iEMc0T_IfbHc3&m0B z>8{F~Am2+07OSbk6$O{Epw!{(uyYxOF^f2zLGe|RE0dnOg@@60uKaSkt>Vps z@ADVi{C5vaShix`c+}Y`@Hk+5Dkr6*4j$hLYz=m`zxnh^=@{nH^Z;qN>t{r8b_Hm&JIR3R4-AC^iWmn}WQf`5ED2V56=A**ki7|c z<&p)L+zPKD!Gs~M3~p$bn~?=_VX(nU{+;)o=pwsnSjXLbP4jGyP6o z!Q77PgFNYFMo_idRV_?&ome%o=eSZQbKJPXUs8${uVnL@mOjiY!Dc`~`M7zY~!erAegpDjOC$_{df5gxX=Hgf!7exYZzfz0{^eT-qIN$zu!p z^!J4qtDnRY_3tMbc@1vG&DAc{jc=-7(#z|x%(qV((39XQqY3V46Pr4dHZN3dXuOY7 z{6KFFiWOYm#Q%0>r-Y=W&4dt559ppyWIQA~kVeaT`gHvjiB7O-S~*BSEWgKb3vjMJ zSi5mvHjcG?-s*H=?w6L`{BWxFr@Vxd?Z+gMnas}A-AL6QGCdNN&{ik}LMUW`C5J^8Etclo5pAJ|dP!b@$d zY(q&7;`vE}mp+et!1PQ^wMPC9j0uf~jf3GlrUd-9a1SFWza$w|bz4>vxVf~m^gJgl zIA@mWVYDaMkdQ`Z1UXy_pm^Dq$kE{MXz+NIm8L|yFsDo9fA77%1aT%GG$&cI}@jZwmu2Qo$WOyZJU(DG6c<(#I){*dPPzGOt z)DvbB+adSG1P@px>*H-L!?N$$_zB)SpBb3l1zX~3@YUQ!lw1l&NW4A)=kp*xaG4ky zGy3JFlD9<}ThdkXLnmb|#C*4C=58*%zQ02FuGFSb1WA@>Hs-~FG%R~a+`NIi zDU`cumm}bYLevY0L2$u1S*yDxMBy3dGBtZf24VS&Fj0meutV=r zn|xvHxE!zCFQcteRZU=ACG$-lK?qrSBiE&c(IlVxerdcw?YocBaKDMY$~<Xgz89nYK68uzVrX`1{Yqg`oCv zI;0$D1mvOV2+`RjHF7ab){bN(M_re znJ+Y&k|>3(jcb-Z7uBlR-}cu1JD7hRaNC45wK6X2Tm5EMEwaWBJH0nus7V`s#YAQJ zJlflRt7-gd%HXZqI`Dp~*!^m;_AV>uUkUL!JP zKGH5?I(ask<}#bt$!+!=<#3UGZt~0yLxlSf{xpx;fy($5NGF4TYiFu<9OLd(&Jx6e z>LLPpom2@!g$;2@;TV2Jc~um@u)Yqz2_aq=@#0DZFDsT%Yn(SC#w|hL8y!O;_r0GB zUX#f~y}!SaKkCPp=49oTRU)*+n6K5;Uo_@0cIZjcP867L97stR8(9co(sFbB`HIK5 z+1W#YOVO$_DlQt<6c1tP<0Lwxl7_&EE*y1wOzd@)+-AzewsY#V@2+?6DXgna`Fa#{ zn7txb9P4n(wJ7>?rFm~Tc#VkGb%n948voVJM%{1|l#axZ{mR~=_OA;Lb_zKc3@3Gj zagU1^6keD)HP;O}8gq8*k@Z;kK;L3lsFAAjb?{O!+S=RP&*{#nvJ2RSPJEj+T$yMo zmCrc`%IB-#xKIa;2Gl>ci=Hjy-5u8>5=di+SU?7C#E8)y6IUzCl(!`8b4((N1nX&ZMpk5yzYx=0<*Wdsq=x zqfVxu*G@NL9@S(DyY_{hR3P18Y6=haz_|fz1hmqx^@OvFq62(z7A}e`xbHk1ezqS1 zAy5v7I~K?7&v2x|A>eqk79f8w8(*G$ty7UOT$CCuJA+G$7;bo7HAZ`P>|V5gTWWOw zRr4SuC3edFheXy&=|OK3{Fdx9!0Mbg*tP8tA~H&F)LFeMm0m!M=tlmWDxG{zz9rFe z;5YTgqxuTfb%};A>565dwOG`?91KD)s5G`VBrT|IDi@@d&7=e!uIE?VsBDXB{=>5a(Iaql^Aa#hM z&HU-8;}9T4^ooUv`S{D;PTQdu{Xy%~Q+sO`RzdgjT@!Wyz(g4Uu(xJlJj?W782{hf gKQR7pmB#;*{x6!_TZ5SYd0_Zs^FRCXpLzh`U%DT-^#A|> From 22082340e2f36599fc881d1a7a090ae69b718d85 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 16:11:49 +0000 Subject: [PATCH 024/103] Put the local file back --- .../components/dms/cloudwatch-alarms.tf | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 7acc9e9b9b7..a69d2a9b68b 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -210,32 +210,32 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -# resource "local_file" "lambda_dms_replication_metric_py" { -# filename = "${path.module}/lambda_dms_replication_metric.py" -# content = < Date: Mon, 11 Nov 2024 16:18:03 +0000 Subject: [PATCH 025/103] Refer to a file --- .../components/dms/cloudwatch-alarms.tf | 52 +++++++++++-------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index a69d2a9b68b..611cda135c8 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -210,32 +210,38 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "local_file" "lambda_dms_replication_metric_py" { - filename = "${path.module}/lambda_dms_replication_metric.py" - content = < Date: Mon, 11 Nov 2024 16:25:54 +0000 Subject: [PATCH 026/103] Add the local_file resource to allow it to run --- .../components/dms/cloudwatch-alarms.tf | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 611cda135c8..6e5f9c2ecc9 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -210,28 +210,28 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -# resource "local_file" "lambda_dms_replication_metric_py" { -# filename = "${path.module}/lambda_dms_replication_metric.py" -# content = < Date: Mon, 11 Nov 2024 16:55:54 +0000 Subject: [PATCH 027/103] Workaround --- .../components/dms/cloudwatch-alarms.tf | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 6e5f9c2ecc9..f8055e99e8e 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -213,23 +213,7 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach resource "local_file" "lambda_dms_replication_metric_py" { filename = "${path.module}/lambda_dms_replication_metric.py" content = < Date: Mon, 11 Nov 2024 17:04:01 +0000 Subject: [PATCH 028/103] Force dependency on zip file --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index f8055e99e8e..8704bfd2643 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -240,6 +240,9 @@ resource "aws_lambda_function" "dms_replication_metric_publisher" { METRIC_NAMESPACE = "CustomDMSMetrics", METRIC_NAME = "DMSReplicationEvent" } + depends_on = [ + archive_file.lambda_dms_replication_metric_zip + ] } depends_on = [data.archive_file.lambda_dms_replication_metric_zip] From c83788b8e10d22ee4245255e8c79350c01d66580 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 17:09:15 +0000 Subject: [PATCH 029/103] Add source file --- .../dms/lambda/dms_replication_metric.py | 327 +----------------- 1 file changed, 17 insertions(+), 310 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index a69d2a9b68b..4710a732ec5 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -1,310 +1,17 @@ -# SNS topic for monitoring to send alarms to -resource "aws_sns_topic" "dms_alerts_topic" { - name = "delius-dms-alerts-topic" - kms_master_key_id = var.account_config.kms_keys.general_shared - - http_success_feedback_role_arn = aws_iam_role.sns_logging_role.arn - http_success_feedback_sample_rate = 100 - http_failure_feedback_role_arn = aws_iam_role.sns_logging_role.arn -} - -resource "aws_iam_role" "sns_logging_role" { - name = "sns-logging-role" - - assume_role_policy = jsonencode({ - "Version": "2012-10-17", - "Statement": [ - { - "Action": "sts:AssumeRole", - "Principal": { - "Service": "sns.amazonaws.com" - }, - "Effect": "Allow", - "Sid": "" - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "attach_sns_policy" { - role = aws_iam_role.sns_logging_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonSNSRole" -} - -# Create a map of all possible replication tasks, so those that exist may have alarms applied to them. -# Note that the key of this map cannot be an apply time value, so cannot be the ARN or ID of the -# replication tasks - these should appear only as values. -locals { - aws_dms_replication_tasks = merge( - try(var.dms_config.user_target_endpoint.write_database, null) == null ? {} : { - user_inbound_replication = { - replication_task_arn = aws_dms_replication_task.user_inbound_replication[0].replication_task_arn, - replication_task_id = aws_dms_replication_task.user_inbound_replication[0].replication_task_id - } - }, - { for k in keys(local.client_account_map) : - "business_interaction_inbound_replication_from_${k}" => { - replication_task_arn = aws_dms_replication_task.business_interaction_inbound_replication[k].replication_task_arn - replication_task_id = aws_dms_replication_task.business_interaction_inbound_replication[k].replication_task_id - } - }, - { for k in keys(local.client_account_map) : - "audited_interaction_inbound_replication_from_${k}" => { - replication_task_arn = aws_dms_replication_task.audited_interaction_inbound_replication[k].replication_task_arn - replication_task_id = aws_dms_replication_task.audited_interaction_inbound_replication[k].replication_task_id - } - }, - { for k in keys(local.client_account_map) : - "audited_interaction_checksum_inbound_replication_from_${k}" => { - replication_task_arn = aws_dms_replication_task.audited_interaction_checksum_inbound_replication[k].replication_task_arn - replication_task_id = aws_dms_replication_task.audited_interaction_checksum_inbound_replication[k].replication_task_id - } - }, - try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { - audited_interaction_outbound_replication = { - replication_task_arn = aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_arn - replication_task_id = aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_id - } - }, - { for k in keys(local.client_account_map) : - "user_outbound_replication_to_${k}" => { - replication_task_arn = aws_dms_replication_task.user_outbound_replication[k].replication_task_arn - replication_task_id = aws_dms_replication_task.user_outbound_replication[k].replication_task_id - } - }, - try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { - business_interaction_outbound_replication = { - replication_task_arn = aws_dms_replication_task.business_interaction_outbound_replication[0].replication_task_arn - replication_task_id = aws_dms_replication_task.business_interaction_outbound_replication[0].replication_task_id - } - }, - try(var.dms_config.audit_source_endpoint.read_database, null) == null ? {} : { - audited_interaction_checksum_outbound_replication = { - replication_task_arn = aws_dms_replication_task.audited_interaction_checksum_outbound_replication[0].replication_task_arn - replication_task_id = aws_dms_replication_task.audited_interaction_checksum_outbound_replication[0].replication_task_id - } - } - ) -} - - - -resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_source" { - for_each = local.aws_dms_replication_tasks - alarm_name = "dms-cdc-latency-source-${each.value.replication_task_id}" - alarm_description = "High CDC source latency for dms replication task for ${each.value.replication_task_id}" - namespace = "AWS/DMS" - statistic = "Average" - metric_name = "CDCLatencySource" - comparison_operator = "GreaterThanThreshold" - threshold = 15 - evaluation_periods = 3 - period = 120 - actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] - ok_actions = [aws_sns_topic.dms_alerts_topic.arn] - dimensions = { - ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id - # We only need to final element of the replication task ID (after the last :) - ReplicationTaskIdentifier = split(":", each.value.replication_task_arn)[length(split(":", each.value.replication_task_arn)) - 1] - } - tags = var.tags -} - -resource "aws_cloudwatch_metric_alarm" "dms_cdc_latency_target" { - for_each = local.aws_dms_replication_tasks - alarm_name = "dms-cdc-latency-target-${each.value.replication_task_id}" - alarm_description = "High CDC target latency for dms replication task for ${each.value.replication_task_id}" - namespace = "AWS/DMS" - statistic = "Average" - metric_name = "CDCLatencyTarget" - comparison_operator = "GreaterThanThreshold" - threshold = 15 - evaluation_periods = 3 - period = 120 - actions_enabled = true - alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] - ok_actions = [aws_sns_topic.dms_alerts_topic.arn] - dimensions = { - ReplicationInstanceIdentifier = aws_dms_replication_instance.dms_replication_instance.replication_instance_id - # We only need to final element of the replication task ID (after the last :) - ReplicationTaskIdentifier = split(":", each.value.replication_task_arn)[length(split(":", each.value.replication_task_arn)) - 1] - } - tags = var.tags -} - -# Pager duty integration - -# Get the map of pagerduty integration keys from the modernisation platform account -data "aws_secretsmanager_secret" "pagerduty_integration_keys" { - provider = aws.modernisation-platform - name = "pagerduty_integration_keys" -} - -data "aws_secretsmanager_secret_version" "pagerduty_integration_keys" { - provider = aws.modernisation-platform - secret_id = data.aws_secretsmanager_secret.pagerduty_integration_keys.id -} - -# Add a local to get the keys -locals { - pagerduty_integration_keys = jsondecode(data.aws_secretsmanager_secret_version.pagerduty_integration_keys.secret_string) - integration_key_lookup = var.dms_config.is-production ? "delius_oracle_prod_alarms" : "delius_oracle_nonprod_alarms" -} - -# link the sns topic to the service -# Non-Prod alerts channel: #delius-aws-oracle-dev-alerts -# Prod alerts channel: #delius-aws-oracle-prod-alerts -module "pagerduty_core_alerts" { - #checkov:skip=CKV_TF_1 - depends_on = [ - aws_sns_topic.dms_alerts_topic - ] - source = "github.com/ministryofjustice/modernisation-platform-terraform-pagerduty-integration?ref=v2.0.0" - sns_topics = [aws_sns_topic.dms_alerts_topic.name] - pagerduty_integration_key = local.pagerduty_integration_keys[local.integration_key_lookup] -} - -resource "aws_iam_role" "lambda_put_metric_data_role" { - name = "lambda-put-metric-data-role" - - assume_role_policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Action = "sts:AssumeRole", - Effect = "Allow", - Principal = { - Service = "lambda.amazonaws.com" - } - } - ] - }) -} - -resource "aws_iam_policy" "lambda_put_metric_data_policy" { - name = "lambda-put-metric-data-policy" - - policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Action = [ - "cloudwatch:PutMetricData" - ], - Resource = "*" - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_policy_attach" { - role = aws_iam_role.lambda_put_metric_data_role.name - policy_arn = aws_iam_policy.lambda_put_metric_data_policy.arn -} - -# Allow Cloudwatch Logging -resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach" { - role = aws_iam_role.lambda_put_metric_data_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" -} - -resource "local_file" "lambda_dms_replication_metric_py" { - filename = "${path.module}/lambda_dms_replication_metric.py" - content = < Date: Mon, 11 Nov 2024 17:11:19 +0000 Subject: [PATCH 030/103] Remove dependency --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 3 --- 1 file changed, 3 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 8704bfd2643..f8055e99e8e 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -240,9 +240,6 @@ resource "aws_lambda_function" "dms_replication_metric_publisher" { METRIC_NAMESPACE = "CustomDMSMetrics", METRIC_NAME = "DMSReplicationEvent" } - depends_on = [ - archive_file.lambda_dms_replication_metric_zip - ] } depends_on = [data.archive_file.lambda_dms_replication_metric_zip] From d4510b4978e575d0cf17f5bb641715298abdabfa Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 17:17:42 +0000 Subject: [PATCH 031/103] Add hash value for lambda source --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index f8055e99e8e..b6a65e247ad 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -235,6 +235,7 @@ resource "aws_lambda_function" "dms_replication_metric_publisher" { handler = "lambda_dms_replication_metric.lambda_handler" runtime = "python3.8" filename = data.archive_file.lambda_dms_replication_metric_zip.output_path + source_code_hash = data.archive_file.lambda_dms_replication_metric_zip.output_base64sha256 environment { variables = { METRIC_NAMESPACE = "CustomDMSMetrics", From 2518bdd95c924450d0bddd44ca5cc091d56ed89d Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 17:30:43 +0000 Subject: [PATCH 032/103] Put the zip file somewhere else --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index b6a65e247ad..9f8b7ffb4ae 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -226,7 +226,8 @@ EOF data "archive_file" "lambda_dms_replication_metric_zip" { type = "zip" source_file = "${path.module}/lambda/dms_replication_metric.py" - output_path = "${path.module}/lambda_dms_replication_metric.zip" + output_path = "${path.module}/lambda/dms_replication_metric.zip" + excludes = ["lambda_dms_replication_metric.zip"] } resource "aws_lambda_function" "dms_replication_metric_publisher" { From 76ec5c43fada46fd97bd1d5e81bd2eb5fb48acfd Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 17:33:01 +0000 Subject: [PATCH 033/103] Specify source directory --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 9f8b7ffb4ae..046c546eafd 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -225,7 +225,7 @@ EOF data "archive_file" "lambda_dms_replication_metric_zip" { type = "zip" - source_file = "${path.module}/lambda/dms_replication_metric.py" + source_dir = "${path.module}/lambda" output_path = "${path.module}/lambda/dms_replication_metric.zip" excludes = ["lambda_dms_replication_metric.zip"] } From 766656374a58a82e8d715c4afdd9cc895d73048e Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 17:46:27 +0000 Subject: [PATCH 034/103] Update handler name --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 046c546eafd..66af6d3fc34 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -227,13 +227,13 @@ data "archive_file" "lambda_dms_replication_metric_zip" { type = "zip" source_dir = "${path.module}/lambda" output_path = "${path.module}/lambda/dms_replication_metric.zip" - excludes = ["lambda_dms_replication_metric.zip"] + excludes = ["dms_replication_metric.zip"] } resource "aws_lambda_function" "dms_replication_metric_publisher" { function_name = "dms-replication-metric-publisher" role = aws_iam_role.lambda_put_metric_data_role.arn - handler = "lambda_dms_replication_metric.lambda_handler" + handler = "dms_replication_metric.lambda_handler" runtime = "python3.8" filename = data.archive_file.lambda_dms_replication_metric_zip.output_path source_code_hash = data.archive_file.lambda_dms_replication_metric_zip.output_base64sha256 From f9369aff4b6888ecbb9183f0edec242b3bc2de68 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 18:17:35 +0000 Subject: [PATCH 035/103] Reset metric if the task starts --- .../dms/lambda/dms_replication_metric.py | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index 4710a732ec5..135ea33ee23 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -1,17 +1,38 @@ import boto3 +import json def lambda_handler(event, context): + cloudwatch = boto3.client('cloudwatch') - cloudwatch.put_metric_data( - Namespace='CustomDMSMetrics', - MetricData=[ - { - 'MetricName': 'DMSReplicationEvent', - 'Dimensions': [ - {'Name': 'Service', 'Value': 'DMS'} - ], - 'Value': 1, # Trigger threshold - 'Unit': 'Count' - } - ] - ) \ No newline at end of file + for record in event['Records']: + + message = json.loads(record['Sns']['Message']) + + if message.get("EventType") == "replication-task-state-change" and message.get("status") == "STARTED": + cloudwatch.put_metric_data( + Namespace='CustomDMSMetrics', + MetricData=[ + { + 'MetricName': 'DMSReplicationFailure', + 'Dimensions': [ + {'Name': 'Service', 'Value': 'DMS'} + ], + 'Value': 0, # Reset Below Trigger threshold (Task Started) + 'Unit': 'Count' + } + ] + ) + elif message.get("EventType") == "failure": + cloudwatch.put_metric_data( + Namespace='CustomDMSMetrics', + MetricData=[ + { + 'MetricName': 'DMSReplicationFailure', + 'Dimensions': [ + {'Name': 'Service', 'Value': 'DMS'} + ], + 'Value': 1, # Trigger threshold (Task Failed) + 'Unit': 'Count' + } + ] + ) \ No newline at end of file From ba7e8f5c50c009b75e62813c86eb1dcbbfe40ef8 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 18:19:34 +0000 Subject: [PATCH 036/103] Always report on the state change event (filter at SNS) --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 66af6d3fc34..943e27a9833 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -294,9 +294,6 @@ resource "aws_dms_event_subscription" "dms_task_event_subscription" { name = "dms-task-event-alerts" sns_topic_arn = aws_sns_topic.dms_events_topic.arn source_type = "replication-task" - # If this is production then we expect to see starting and stopping of replication tasks - # as this would not be normal behaviour. - # For non-production this will happen nightly due to automated stop/start - event_categories = var.dms_config.is-production ? ["state change", "failure"] : ["failure"] + event_categories = ["state change", "failure"] enabled = true } \ No newline at end of file From f649a99e53102fac2fe225b9c0c83635f6610194 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 18:45:49 +0000 Subject: [PATCH 037/103] Change to metric name --- .../modules/components/dms/cloudwatch-alarms.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 943e27a9833..a08dcdb0a7f 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -240,7 +240,7 @@ resource "aws_lambda_function" "dms_replication_metric_publisher" { environment { variables = { METRIC_NAMESPACE = "CustomDMSMetrics", - METRIC_NAME = "DMSReplicationEvent" + METRIC_NAME = "DMSReplicationFailure" } } @@ -258,15 +258,15 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis resource "aws_cloudwatch_metric_alarm" "dms_replication_alarm" { - alarm_name = "DMSReplicationEventAlarm" + alarm_name = "DMSReplicationFailureAlarm" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = "1" - metric_name = "DMSReplicationEvent" + metric_name = "DMSReplicationFailure" namespace = "CustomDMSMetrics" period = "60" statistic = "Sum" threshold = 1 - alarm_description = "Alarm when DMSReplicationEvent metric is >= 1" + alarm_description = "Alarm when DMSReplicationFailure metric is >= 1" alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } From 7b72e1c994d1df49b6cc151498f89a1a703bf79c Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 19:06:29 +0000 Subject: [PATCH 038/103] Add debug --- .../components/dms/lambda/dms_replication_metric.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index 135ea33ee23..90185a5bd2e 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -1,5 +1,9 @@ import boto3 import json +import logging + +logger = logging.getLogger() +logger.setLevel(logging.INFO) def lambda_handler(event, context): @@ -8,7 +12,12 @@ def lambda_handler(event, context): message = json.loads(record['Sns']['Message']) - if message.get("EventType") == "replication-task-state-change" and message.get("status") == "STARTED": + event_type = message.get("EventType") + status = message.get("status") + + logger.info("SNS Message: %",message) + + if event_type == "replication-task-state-change" and status == "STARTED": cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ @@ -22,7 +31,7 @@ def lambda_handler(event, context): } ] ) - elif message.get("EventType") == "failure": + elif event_type == "failure": cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ From 66f61b16498e89860ed8c3f82d9827157fff017a Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 19:20:05 +0000 Subject: [PATCH 039/103] Missing s --- .../modules/components/dms/lambda/dms_replication_metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index 90185a5bd2e..e1dea16e042 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -15,7 +15,7 @@ def lambda_handler(event, context): event_type = message.get("EventType") status = message.get("status") - logger.info("SNS Message: %",message) + logger.info("SNS Message: %s",message) if event_type == "replication-task-state-change" and status == "STARTED": cloudwatch.put_metric_data( From 16ebed12633b948895fd36fc1ceb5ddf8fb3b587 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Mon, 11 Nov 2024 19:33:32 +0000 Subject: [PATCH 040/103] Use correct attributes --- .../components/dms/lambda/dms_replication_metric.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index e1dea16e042..a4cae889425 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -1,6 +1,7 @@ import boto3 import json import logging +import re logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -12,12 +13,12 @@ def lambda_handler(event, context): message = json.loads(record['Sns']['Message']) - event_type = message.get("EventType") - status = message.get("status") + event_message = message.get("Event Message") logger.info("SNS Message: %s",message) - if event_type == "replication-task-state-change" and status == "STARTED": + if re.search(r"^Replication task has started.$",event_message): + logger.info("Task started") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ @@ -31,7 +32,8 @@ def lambda_handler(event, context): } ] ) - elif event_type == "failure": + elif re.search(r"^Replication task has failed..*$",event_message): + logger.info("Task failed") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ From 7c2ec68432cae4ff4417864cdbc1542313e933f4 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 09:55:38 +0000 Subject: [PATCH 041/103] More detailed metric --- .../dms/lambda/dms_replication_metric.py | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index a4cae889425..2e445e60a0e 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -12,35 +12,57 @@ def lambda_handler(event, context): for record in event['Records']: message = json.loads(record['Sns']['Message']) + logger.info("SNS Message: %s",message) event_message = message.get("Event Message") + event_source = message.get("Event Source") + source_id = message.get("SourceId") - logger.info("SNS Message: %s",message) + dms_event_id = re.search(r"#(DMS-EVENT-\d+) $",message.get("Event ID")) + + # DMS Event IDs are documented at https://docs.aws.amazon.com/dms/latest/userguide/CHAP_Events.html + # + # Those relevant for this metric are: + # + # Running Replication: + # DMS-EVENT-0069: The replication task has started. + # DMS-EVENT-0081: A reload of table details has been requested. + # DMS-EVENT-0093: Reading resumed. + running_replication = ["DMS-EVENT-0069","DMS-EVENT-0081","DMS-EVENT-0093"] + # + # Stopped Replication: + # DMS-EVENT-0079: The replication task has stopped. + # DMS-EVENT-0091: Reading paused, swap files limit reached. + # DMS-EVENT-0092: Reading paused, disk usage limit reached. + # DMS-EVENT-0078: A replication task has failed. + stopped_replication = ["DMS-EVENT-0079","DMS-EVENT-0091","DMS-EVENT-0092","DMS-EVENT-0078"] - if re.search(r"^Replication task has started.$",event_message): + if dms_event_id in running_replication: logger.info("Task started") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ { - 'MetricName': 'DMSReplicationFailure', + 'MetricName': 'DMSReplicationStopped', 'Dimensions': [ - {'Name': 'Service', 'Value': 'DMS'} + {'Name': 'EventSource', 'Value': event_source}, + {'Name': 'SourceId', 'Value': source_id} ], 'Value': 0, # Reset Below Trigger threshold (Task Started) 'Unit': 'Count' } ] ) - elif re.search(r"^Replication task has failed..*$",event_message): + elif dms_event_id in stopped_replication: logger.info("Task failed") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ { - 'MetricName': 'DMSReplicationFailure', + 'MetricName': 'DMSReplicationStopped', 'Dimensions': [ - {'Name': 'Service', 'Value': 'DMS'} + {'Name': 'EventSource', 'Value': event_source}, + {'Name': 'SourceId', 'Value': source_id} ], 'Value': 1, # Trigger threshold (Task Failed) 'Unit': 'Count' From 7acb03dcab7d756ac735e7582e9b59e073c3f302 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 10:28:47 +0000 Subject: [PATCH 042/103] Use the matched pattern --- .../modules/components/dms/lambda/dms_replication_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index 2e445e60a0e..2eb8bdd1881 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -37,7 +37,7 @@ def lambda_handler(event, context): # DMS-EVENT-0078: A replication task has failed. stopped_replication = ["DMS-EVENT-0079","DMS-EVENT-0091","DMS-EVENT-0092","DMS-EVENT-0078"] - if dms_event_id in running_replication: + if dms_event_id.group(1) in running_replication: logger.info("Task started") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', @@ -53,7 +53,7 @@ def lambda_handler(event, context): } ] ) - elif dms_event_id in stopped_replication: + elif dms_event_id.group(1) in stopped_replication: logger.info("Task failed") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', From 68939917d68f9b672f158d3544f0f5d3de5173cc Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 11:24:09 +0000 Subject: [PATCH 043/103] Set Alarm for Any Dimension --- .../components/dms/cloudwatch-alarms.tf | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index a08dcdb0a7f..4aa696a550d 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -256,17 +256,17 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis source_arn = aws_sns_topic.dms_events_topic.arn } - -resource "aws_cloudwatch_metric_alarm" "dms_replication_alarm" { - alarm_name = "DMSReplicationFailureAlarm" - comparison_operator = "GreaterThanOrEqualToThreshold" - evaluation_periods = "1" - metric_name = "DMSReplicationFailure" +resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { + alarm_name = "DMSReplicationStoppedAlarm" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "DMSReplicationStopped" namespace = "CustomDMSMetrics" - period = "60" - statistic = "Sum" - threshold = 1 - alarm_description = "Alarm when DMSReplicationFailure metric is >= 1" + period = 60 + statistic = "Maximum" + threshold = 0 + treat_missing_data = "missing" + alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } From 79076888fb0146e94b9e49aa3fcbe05ee91a3416 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 11:41:42 +0000 Subject: [PATCH 044/103] Treat missing data as not breaching Events will occur rarely --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 4aa696a550d..7a7e2dd8b99 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -262,10 +262,10 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { evaluation_periods = 1 metric_name = "DMSReplicationStopped" namespace = "CustomDMSMetrics" - period = 60 + period = 300 statistic = "Maximum" threshold = 0 - treat_missing_data = "missing" + treat_missing_data = "notBreaching" alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] From c91f55143b5d3d79abfe52813aa153cf309d3bf4 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 11:45:15 +0000 Subject: [PATCH 045/103] Keep existing alarm status --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 7a7e2dd8b99..a15549ea90a 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -265,7 +265,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { period = 300 statistic = "Maximum" threshold = 0 - treat_missing_data = "notBreaching" + treat_missing_data = "ignore" alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] From 821f01b8f48614772010f8dee99282381c24e3b7 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 11:46:11 +0000 Subject: [PATCH 046/103] Reduce period whilst testing --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index a15549ea90a..4ed9be0ae83 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -262,7 +262,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { evaluation_periods = 1 metric_name = "DMSReplicationStopped" namespace = "CustomDMSMetrics" - period = 300 + period = 60 statistic = "Maximum" threshold = 0 treat_missing_data = "ignore" From 03cf0c6aa59a2da564ab39f7b106f054fdbd151f Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 14:22:53 +0000 Subject: [PATCH 047/103] Put more information into the log --- .../modules/components/dms/lambda/dms_replication_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index 2eb8bdd1881..bff16021b90 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -38,7 +38,7 @@ def lambda_handler(event, context): stopped_replication = ["DMS-EVENT-0079","DMS-EVENT-0091","DMS-EVENT-0092","DMS-EVENT-0078"] if dms_event_id.group(1) in running_replication: - logger.info("Task started") + logger.info("TASK START: " + event_source + " task " + source_id + " started") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ @@ -54,7 +54,7 @@ def lambda_handler(event, context): ] ) elif dms_event_id.group(1) in stopped_replication: - logger.info("Task failed") + logger.info("TASK STOPPED: " + event_source + " task " + source_id + " stopped") cloudwatch.put_metric_data( Namespace='CustomDMSMetrics', MetricData=[ From 121fce7066f5ce6869b92b88ab75efa315822441 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 14:59:14 +0000 Subject: [PATCH 048/103] Sum stopped tasks --- .../components/dms/cloudwatch-alarms.tf | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 4ed9be0ae83..f33074c94ce 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -260,13 +260,29 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { alarm_name = "DMSReplicationStoppedAlarm" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - period = 60 - statistic = "Maximum" threshold = 0 - treat_missing_data = "ignore" alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" + actions_enabled = true + treat_missing_data = "ignore" + threshold_metric_id = "m1" + + # Defining the metric query to sum across all dimensions (replication tasks) + metric_query { + id = "m1" + expression = "SUM(m2)" + label = "Sum of Stopped Replication Tasks across all defined Tasks" + return_data = true + } + + metric_query { + id = "m2" + metric { + namespace = "CustomDMSMetrics" + metric_name = "DMSReplicationStopped" + period = 60 + stat = "Maximum" + } + } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } From 3bbd1863aff1cefdbb6104ffe7e129942cb3fca0 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 15:04:30 +0000 Subject: [PATCH 049/103] Threshold is zero --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index f33074c94ce..c3b2abbf877 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -264,7 +264,6 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" actions_enabled = true treat_missing_data = "ignore" - threshold_metric_id = "m1" # Defining the metric query to sum across all dimensions (replication tasks) metric_query { From cda4f4a86f834685e0856d818a60eda4b8344448 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 15:13:00 +0000 Subject: [PATCH 050/103] Sum over all dimensions --- .../components/dms/cloudwatch-alarms.tf | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index c3b2abbf877..0ddd1c8d3ef 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -260,29 +260,15 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { alarm_name = "DMSReplicationStoppedAlarm" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 + metric_name = "DMSReplicationStopped" + namespace = "CustomDMSMetrics" + period = 60 + statistic = "Sum" threshold = 0 + treat_missing_data = "ignore" alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" actions_enabled = true - treat_missing_data = "ignore" - - # Defining the metric query to sum across all dimensions (replication tasks) - metric_query { - id = "m1" - expression = "SUM(m2)" - label = "Sum of Stopped Replication Tasks across all defined Tasks" - return_data = true - } - - metric_query { - id = "m2" - metric { - namespace = "CustomDMSMetrics" - metric_name = "DMSReplicationStopped" - period = 60 - stat = "Maximum" - } - } - + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } From 85d1b0f60c17d7eb65a4b3cea4dea44d7736e795 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 15:50:58 +0000 Subject: [PATCH 051/103] Use maths expression for metric --- .../components/dms/cloudwatch-alarms.tf | 54 ++++++++++++++++--- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 0ddd1c8d3ef..5b0a456854b 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -256,22 +256,62 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis source_arn = aws_sns_topic.dms_events_topic.arn } + +# CloudWatch won't aggregate across dimensions for custom metrics (it will do so for some metrics published by other services, like EC2). +# resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { +# alarm_name = "DMSReplicationStoppedAlarm" +# comparison_operator = "GreaterThanThreshold" +# evaluation_periods = 1 +# metric_name = "DMSReplicationStopped" +# namespace = "CustomDMSMetrics" +# period = 60 +# statistic = "Sum" +# threshold = 0 +# treat_missing_data = "ignore" +# alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" +# actions_enabled = true + +# alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] +# } + + +# Define a CloudWatch metric alarm with a metric math expression resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { alarm_name = "DMSReplicationStoppedAlarm" + alarm_description = "Alarm when Stopped Replication Task across all Dimensions (tasks)" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - period = 60 - statistic = "Sum" threshold = 0 treat_missing_data = "ignore" - alarm_description = "Alarm when Any DMS Replication Task has Stopped or Failed" - actions_enabled = true - + + # Query for the custom metric across all dimensions + metric_query { + id = "m1" + metric_name = "DMSReplicationStopped" + namespace = "CustomDMSMetrics" + period = 60 + stat = "Sum" + } + + # Metric math expression to sum the metric across all dimensions + metric_query { + id = "e1" + expression = "SUM(METRICS('CustomDMSMetrics', 'DMSReplicationStopped', {}, 60))" + label = "TotalDMSReplicationStoppedAcrossAllDimensions" + } + + # Use the expression query result as the metric for the alarm + alarm_rule { + metric_query_id = "e1" + } + alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } + + + + # SNS Topic for DMS replication events # This is NOT the same as for DMS Cloudwatch Alarms (dms_alerting) # and is used to trigger the Lamda function if an event happens during From 94461318ce84fc3423717946abbb90510f1ade0a Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 16:00:03 +0000 Subject: [PATCH 052/103] Define which metric to use for the alarm --- .../modules/components/dms/cloudwatch-alarms.tf | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 5b0a456854b..a7caa65b3f8 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -291,18 +291,15 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { namespace = "CustomDMSMetrics" period = 60 stat = "Sum" + return_data = "false" } # Metric math expression to sum the metric across all dimensions metric_query { - id = "e1" - expression = "SUM(METRICS('CustomDMSMetrics', 'DMSReplicationStopped', {}, 60))" - label = "TotalDMSReplicationStoppedAcrossAllDimensions" - } - - # Use the expression query result as the metric for the alarm - alarm_rule { - metric_query_id = "e1" + id = "e1" + expression = "SUM(METRICS('CustomDMSMetrics', 'DMSReplicationStopped', {}, 60))" + label = "TotalDMSReplicationStoppedAcrossAllDimensions" + return_data = "false" } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] From 7b155f4719e6cefbc073efb58f82165e99437df6 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 16:08:03 +0000 Subject: [PATCH 053/103] Wrong attribute name --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index a7caa65b3f8..b9c43a4aea0 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -290,7 +290,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { metric_name = "DMSReplicationStopped" namespace = "CustomDMSMetrics" period = 60 - stat = "Sum" + statistic = "Sum" return_data = "false" } From 9d14a66a37bf011cddcc3e9395451537ff614887 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 16:13:41 +0000 Subject: [PATCH 054/103] Metric needs indenting --- .../modules/components/dms/cloudwatch-alarms.tf | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index b9c43a4aea0..7c5a469807e 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -287,10 +287,12 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { # Query for the custom metric across all dimensions metric_query { id = "m1" - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - period = 60 - statistic = "Sum" + metric { + metric_name = "DMSReplicationStopped" + namespace = "CustomDMSMetrics" + period = 60 + stat = "Sum" + } return_data = "false" } From fda1535ea59e7a72061dbe77887c91cf0caf0e5b Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 12 Nov 2024 16:14:42 +0000 Subject: [PATCH 055/103] Need to return the data --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 7c5a469807e..26cde0670fd 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -301,7 +301,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { id = "e1" expression = "SUM(METRICS('CustomDMSMetrics', 'DMSReplicationStopped', {}, 60))" label = "TotalDMSReplicationStoppedAcrossAllDimensions" - return_data = "false" + return_data = "true" } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] From 64fb45ce40b11b9c7beba1a28dc3bb66fc94749b Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 10:42:43 +0000 Subject: [PATCH 056/103] Remove unnecessary local_file resource --- .../modules/components/dms/cloudwatch-alarms.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 26cde0670fd..1edccccc1b8 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -210,12 +210,12 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "local_file" "lambda_dms_replication_metric_py" { - filename = "${path.module}/lambda_dms_replication_metric.py" - content = < Date: Wed, 13 Nov 2024 11:46:33 +0000 Subject: [PATCH 057/103] Fix metric query definition --- .../components/dms/cloudwatch-alarms.tf | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 1edccccc1b8..bdeed1ee304 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -284,33 +284,17 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { threshold = 0 treat_missing_data = "ignore" - # Query for the custom metric across all dimensions metric_query { id = "m1" - metric { - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - period = 60 - stat = "Sum" - } - return_data = "false" - } - - # Metric math expression to sum the metric across all dimensions - metric_query { - id = "e1" - expression = "SUM(METRICS('CustomDMSMetrics', 'DMSReplicationStopped', {}, 60))" - label = "TotalDMSReplicationStoppedAcrossAllDimensions" - return_data = "true" + expression = "SEARCH('{CustomDMSMetrics, DMSReplicationStopped}', 'Sum', 60)" + label = "Sum of DMSReplicationStopped across all task dimensions" + return_data = true } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } - - - # SNS Topic for DMS replication events # This is NOT the same as for DMS Cloudwatch Alarms (dms_alerting) # and is used to trigger the Lamda function if an event happens during From efa6b4875883f91bedf33995cda1cd2496364975 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 11:54:30 +0000 Subject: [PATCH 058/103] Add evaluation period --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index bdeed1ee304..5b0d92dd749 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -282,6 +282,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 threshold = 0 + period = 60 treat_missing_data = "ignore" metric_query { From facf0367accc57839cec5a539d5384129d32a5a9 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 11:59:36 +0000 Subject: [PATCH 059/103] Redefine sum --- .../modules/components/dms/cloudwatch-alarms.tf | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 5b0d92dd749..0269b19b5e3 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -287,7 +287,16 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { metric_query { id = "m1" - expression = "SEARCH('{CustomDMSMetrics, DMSReplicationStopped}', 'Sum', 60)" + metric_name = "CustomDMSMetrics" + namespace = "DMSReplicationStopped" + period = 60 + stat = "Sum" + return_data = false + } + + metric_query { + id = "e1" + expression = "m1" label = "Sum of DMSReplicationStopped across all task dimensions" return_data = true } From b300ca4967c996c9ee5753f1041f9b061f58967a Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 12:11:52 +0000 Subject: [PATCH 060/103] Refactor --- .../modules/components/dms/cloudwatch-alarms.tf | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 0269b19b5e3..5b0d92dd749 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -287,16 +287,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { metric_query { id = "m1" - metric_name = "CustomDMSMetrics" - namespace = "DMSReplicationStopped" - period = 60 - stat = "Sum" - return_data = false - } - - metric_query { - id = "e1" - expression = "m1" + expression = "SEARCH('{CustomDMSMetrics, DMSReplicationStopped}', 'Sum', 60)" label = "Sum of DMSReplicationStopped across all task dimensions" return_data = true } From 8ddb9b1a9f0e9906d5b48c0204069e15e8d0db10 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 13:58:04 +0000 Subject: [PATCH 061/103] Create alarm for each task --- .../components/dms/cloudwatch-alarms.tf | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 5b0d92dd749..4835c7c8771 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -274,22 +274,26 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis # alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] # } +# Fetch all DMS replication tasks +data "aws_dms_replication_tasks" "all_tasks" {} # Define a CloudWatch metric alarm with a metric math expression resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { - alarm_name = "DMSReplicationStoppedAlarm" - alarm_description = "Alarm when Stopped Replication Task across all Dimensions (tasks)" + for_each = { for task in data.aws_dms_replication_tasks.all_tasks.replication_tasks : task.replication_task_id => task } + alarm_name = "DMSReplicationStoppedAlarm_${each.key}" + alarm_description = "Alarm when Stopped Replication Task for ${each.key}" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 threshold = 0 period = 60 + statistic = "Maximum" treat_missing_data = "ignore" - metric_query { - id = "m1" - expression = "SEARCH('{CustomDMSMetrics, DMSReplicationStopped}', 'Sum', 60)" - label = "Sum of DMSReplicationStopped across all task dimensions" - return_data = true + metric_name = "DMSReplicationStopped" + namespace = "CustomDMSMetrics" + dimensions = { + SourceId = each.key + EventSouce = "replication-task" } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] From b2d1678363324fd5f2e921dc5d31222cc4ea8157 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 13:59:27 +0000 Subject: [PATCH 062/103] typo --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 4835c7c8771..61044e9670c 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -275,7 +275,7 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis # } # Fetch all DMS replication tasks -data "aws_dms_replication_tasks" "all_tasks" {} +data "aws_dms_replication_task" "all_tasks" {} # Define a CloudWatch metric alarm with a metric math expression resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { From 524488959ba6e3b532be7ddf3d73dd618c41d713 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 14:00:02 +0000 Subject: [PATCH 063/103] typo --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 61044e9670c..72971c6553e 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -279,7 +279,7 @@ data "aws_dms_replication_task" "all_tasks" {} # Define a CloudWatch metric alarm with a metric math expression resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { - for_each = { for task in data.aws_dms_replication_tasks.all_tasks.replication_tasks : task.replication_task_id => task } + for_each = { for task in data.aws_dms_replication_task.all_tasks.replication_tasks : task.replication_task_id => task } alarm_name = "DMSReplicationStoppedAlarm_${each.key}" alarm_description = "Alarm when Stopped Replication Task for ${each.key}" comparison_operator = "GreaterThanThreshold" From 7b87142676e1eea90e0028eb080b2eaf244e4b58 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 13 Nov 2024 14:20:37 +0000 Subject: [PATCH 064/103] Loop through list of task names --- .../modules/components/dms/cloudwatch-alarms.tf | 4 +--- .../delius-core/modules/components/dms/locals.tf | 11 +++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 72971c6553e..1c6de5c623f 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -274,12 +274,10 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis # alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] # } -# Fetch all DMS replication tasks -data "aws_dms_replication_task" "all_tasks" {} # Define a CloudWatch metric alarm with a metric math expression resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { - for_each = { for task in data.aws_dms_replication_task.all_tasks.replication_tasks : task.replication_task_id => task } + for_each = toset(local.replication_task_names) alarm_name = "DMSReplicationStoppedAlarm_${each.key}" alarm_description = "Alarm when Stopped Replication Task for ${each.key}" comparison_operator = "GreaterThanThreshold" diff --git a/terraform/environments/delius-core/modules/components/dms/locals.tf b/terraform/environments/delius-core/modules/components/dms/locals.tf index 7d7b9f0a42a..1516a8e8441 100644 --- a/terraform/environments/delius-core/modules/components/dms/locals.tf +++ b/terraform/environments/delius-core/modules/components/dms/locals.tf @@ -41,4 +41,15 @@ locals { dms_s3_writer_role_name = "${var.env_name}-dms-s3-writer-role" dms_s3_reader_role_name = "${var.env_name}-dms-s3-reader-role" + replication_task_names = concat( + try([aws_dms_replication_task.user_inbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.business_interaction_inbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.audited_interaction_inbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.audited_interaction_checksum_inbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.business_interaction_outbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.audited_interaction_outbound_replication[0].replication_task_id],[]), + try([aws_dms_replication_task.audited_interaction_checksum_outbound_replication[0].replication_task_id],[]) + ) + } \ No newline at end of file From 216e3e7643637608b5e417ccc838221d1f540099 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 07:36:36 +0000 Subject: [PATCH 065/103] Base alarm on math --- .../components/dms/cloudwatch-alarms.tf | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 1c6de5c623f..d673d485a3f 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -283,21 +283,33 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 threshold = 0 - period = 60 - statistic = "Maximum" treat_missing_data = "ignore" - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - dimensions = { - SourceId = each.key - EventSouce = "replication-task" - } + metric_query { + id = "e1" + expression = "FILL(m1,REPEAT)" + label = "DMSReplicationStoppedInterpolated" + return_data = "true" + } + + metric_query { + id = "m1" + + metric { + metric_name = "DMSReplicationStopped" + namespace = "CustmDMSMetrics" + period = 60 + stat = "Maximum" + + dimensions = { + SourceId = each.key + EventSource = "replication-task" + } + } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } - # SNS Topic for DMS replication events # This is NOT the same as for DMS Cloudwatch Alarms (dms_alerting) # and is used to trigger the Lamda function if an event happens during From c99e7314d698dad47984dff3764df92826c6034f Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 07:42:46 +0000 Subject: [PATCH 066/103] Missing bracket --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index d673d485a3f..83ce2fba90a 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -306,6 +306,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { EventSource = "replication-task" } } + } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] } From 37c481e3a95b6afbcec436535da60318f656c101 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 08:05:48 +0000 Subject: [PATCH 067/103] Missing data is not breaching --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 83ce2fba90a..346dadc69ea 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -283,7 +283,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 threshold = 0 - treat_missing_data = "ignore" + treat_missing_data = "notBreaching" metric_query { id = "e1" From 01e9291b337e782dcb2c3445f5f551d2d728f4d7 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 09:12:01 +0000 Subject: [PATCH 068/103] Data Points to Alarm --- .../modules/components/dms/cloudwatch-alarms.tf | 1 + .../dms/lambda/dms_replication_metric.zip | Bin 0 -> 1035 bytes 2 files changed, 1 insertion(+) create mode 100644 terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 346dadc69ea..e0bea7f9fb8 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -284,6 +284,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { evaluation_periods = 1 threshold = 0 treat_missing_data = "notBreaching" + datapoints_to_alarm = 1 metric_query { id = "e1" diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip new file mode 100644 index 0000000000000000000000000000000000000000..639ae399f939a9ec11c996f5dcf7bf396b7d6d21 GIT binary patch literal 1035 zcmWIWW@Zs#-~d7f2E{HQ0SA&mR!VMhd{JsaPG)jqNoIatd~RwALNk z!~;K4?)DekS5%zo4$9;{e`?id$Il;U2X6YSdnN1W(^Y@3SV&k1XGv_|SJC>dd3$T% z;~w5y{6CtMa+2S@yIb6N0&u>KK)jcCvWa%SB^z< zRTgW_Rs5p9^xn;rD;JB;`MG}X9IIb_taHyrB#M&W4Qove zbzjW)IyO_wZHUPydRJaH~-zG3SYVNAdF_J93Oz|tW%wsZp z&c?N6R{FZ$CFbupPMUOD)Fw%@!c8}PlQ74In+IRMy`t7b_CP&*9pL0r% z&XitOUc$bO#Y#_mdD*+Wx)NJW?sOGDpC^{#HS6JwJ0ZtbB^miHw>voDVO+`EnE~F% z{j2@T_kD@Ft5~{jwY!kQ-LG}FF~!oWMRScxzL{S8<+M-k$V_?3)vcDYRh%x(Vs$PH;}Vzc{BOK{r`u%h zg9>vj#g=@X>)viyKk?44zWfkX-Iu+$^UL<7zWnpEGuh;QWqQ5;=YD?v?UU=hO5&f_ zFUp_$U+Lm`n~VGtfA78iv3}<5dR7L8|NjHL**X4QK4*~5%)k)I&A<@g&B!FejL5IZ ga-jSQ14|k~ES$MFz?+o~q<|3!-GTHvpz#b00M9$g2LJ#7 literal 0 HcmV?d00001 From ee9aa8e39782e01b71306e748bee85c795b04278 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 09:33:09 +0000 Subject: [PATCH 069/103] Typo --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index e0bea7f9fb8..32d2912f1ea 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -298,7 +298,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { metric { metric_name = "DMSReplicationStopped" - namespace = "CustmDMSMetrics" + namespace = "CustomDMSMetrics" period = 60 stat = "Maximum" From b65666bc3e9a1cb588286a87b3293762f46c03f7 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 10:28:56 +0000 Subject: [PATCH 070/103] Add OK Action --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 32d2912f1ea..588146394cb 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -310,6 +310,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] + ok_actions = [aws_sns_topic.dms_alerts_topic.arn] } # SNS Topic for DMS replication events From d60cb122b13dd9d36f18559c6f9597117eab0dd2 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 11:17:54 +0000 Subject: [PATCH 071/103] Add commentary --- .../components/dms/cloudwatch-alarms.tf | 61 ++++++++---------- .../dms/lambda/dms_replication_metric.zip | Bin 1035 -> 0 bytes 2 files changed, 26 insertions(+), 35 deletions(-) delete mode 100644 terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 588146394cb..008badfd48f 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -165,6 +165,13 @@ module "pagerduty_core_alerts" { pagerduty_integration_key = local.pagerduty_integration_keys[local.integration_key_lookup] } + +# Raising a Cloudwatch Alarm on a DMS Replication Task Event is not directly possible using the +# Cloudwatch Alarm Integration in PagerDuty as the JSON payload is different. Therefore, as +# workaround for this we create a custom Cloudwatch Metric which is populated by the replication event and +# create a Cloudwatch Alarm on this Metric in the usual way to allow for raising alarms. + +# Create Role which allows Lamdba to put a custom cloudwatch metric resource "aws_iam_role" "lambda_put_metric_data_role" { name = "lambda-put-metric-data-role" @@ -210,19 +217,8 @@ resource "aws_iam_role_policy_attachment" "lambda_put_metric_data_logging_attach policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -# resource "local_file" "lambda_dms_replication_metric_py" { -# filename = "${path.module}/lambda_dms_replication_metric.py" -# content = <ALNk z!~;K4?)DekS5%zo4$9;{e`?id$Il;U2X6YSdnN1W(^Y@3SV&k1XGv_|SJC>dd3$T% z;~w5y{6CtMa+2S@yIb6N0&u>KK)jcCvWa%SB^z< zRTgW_Rs5p9^xn;rD;JB;`MG}X9IIb_taHyrB#M&W4Qove zbzjW)IyO_wZHUPydRJaH~-zG3SYVNAdF_J93Oz|tW%wsZp z&c?N6R{FZ$CFbupPMUOD)Fw%@!c8}PlQ74In+IRMy`t7b_CP&*9pL0r% z&XitOUc$bO#Y#_mdD*+Wx)NJW?sOGDpC^{#HS6JwJ0ZtbB^miHw>voDVO+`EnE~F% z{j2@T_kD@Ft5~{jwY!kQ-LG}FF~!oWMRScxzL{S8<+M-k$V_?3)vcDYRh%x(Vs$PH;}Vzc{BOK{r`u%h zg9>vj#g=@X>)viyKk?44zWfkX-Iu+$^UL<7zWnpEGuh;QWqQ5;=YD?v?UU=hO5&f_ zFUp_$U+Lm`n~VGtfA78iv3}<5dR7L8|NjHL**X4QK4*~5%)k)I&A<@g&B!FejL5IZ ga-jSQ14|k~ES$MFz?+o~q<|3!-GTHvpz#b00M9$g2LJ#7 From cf1da5d23747911f3562371e0e8e4ea5a0494b88 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 13:53:57 +0000 Subject: [PATCH 072/103] Remove Math metric --- .../components/dms/cloudwatch-alarms.tf | 69 ++++++++++++------ .../dms/lambda/dms_replication_metric.zip | Bin 0 -> 1035 bytes 2 files changed, 47 insertions(+), 22 deletions(-) create mode 100644 terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 008badfd48f..96e8ffd7adf 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -267,6 +267,44 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis # The SNS topic dms_alerts_topic is used to handle state changes into our out # of the alarm state. This is the same topic as used for the standard # CDC Latency alarms. +# resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { +# for_each = toset(local.replication_task_names) +# alarm_name = "DMSReplicationStoppedAlarm_${each.key}" +# alarm_description = "Alarm when Stopped Replication Task for ${each.key}" +# comparison_operator = "GreaterThanThreshold" +# evaluation_periods = 1 +# threshold = 0 +# treat_missing_data = "notBreaching" +# datapoints_to_alarm = 1 + +# metric_query { +# id = "e1" +# expression = "FILL(m1,REPEAT)" +# label = "DMSReplicationStoppedInterpolated" +# return_data = "true" +# } + +# metric_query { +# id = "m1" + +# metric { +# metric_name = "DMSReplicationStopped" +# namespace = "CustomDMSMetrics" +# period = 60 +# stat = "Maximum" + +# dimensions = { +# SourceId = each.key +# EventSource = "replication-task" +# } +# } +# } + +# alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] +# ok_actions = [aws_sns_topic.dms_alerts_topic.arn] +# } + + resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { for_each = toset(local.replication_task_names) alarm_name = "DMSReplicationStoppedAlarm_${each.key}" @@ -274,36 +312,23 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 threshold = 0 - treat_missing_data = "notBreaching" + treat_missing_data = "ignore" datapoints_to_alarm = 1 + namespace = "CustomDMSMetrics" + metric_name = "DMSReplicationStopped" + statistic = "Maximum" - metric_query { - id = "e1" - expression = "FILL(m1,REPEAT)" - label = "DMSReplicationStoppedInterpolated" - return_data = "true" - } - - metric_query { - id = "m1" - - metric { - metric_name = "DMSReplicationStopped" - namespace = "CustomDMSMetrics" - period = 60 - stat = "Maximum" - - dimensions = { - SourceId = each.key - EventSource = "replication-task" - } + dimensions = { + SourceId = each.key + EventSource = "replication-task" } - } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] ok_actions = [aws_sns_topic.dms_alerts_topic.arn] } + + # SNS Topic for DMS replication events # This is NOT the same as for DMS Cloudwatch Alarms (dms_alerting) # and is used to trigger the Lamda function if an event happens during diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip new file mode 100644 index 0000000000000000000000000000000000000000..639ae399f939a9ec11c996f5dcf7bf396b7d6d21 GIT binary patch literal 1035 zcmWIWW@Zs#-~d7f2E{HQ0SA&mR!VMhd{JsaPG)jqNoIatd~RwALNk z!~;K4?)DekS5%zo4$9;{e`?id$Il;U2X6YSdnN1W(^Y@3SV&k1XGv_|SJC>dd3$T% z;~w5y{6CtMa+2S@yIb6N0&u>KK)jcCvWa%SB^z< zRTgW_Rs5p9^xn;rD;JB;`MG}X9IIb_taHyrB#M&W4Qove zbzjW)IyO_wZHUPydRJaH~-zG3SYVNAdF_J93Oz|tW%wsZp z&c?N6R{FZ$CFbupPMUOD)Fw%@!c8}PlQ74In+IRMy`t7b_CP&*9pL0r% z&XitOUc$bO#Y#_mdD*+Wx)NJW?sOGDpC^{#HS6JwJ0ZtbB^miHw>voDVO+`EnE~F% z{j2@T_kD@Ft5~{jwY!kQ-LG}FF~!oWMRScxzL{S8<+M-k$V_?3)vcDYRh%x(Vs$PH;}Vzc{BOK{r`u%h zg9>vj#g=@X>)viyKk?44zWfkX-Iu+$^UL<7zWnpEGuh;QWqQ5;=YD?v?UU=hO5&f_ zFUp_$U+Lm`n~VGtfA78iv3}<5dR7L8|NjHL**X4QK4*~5%)k)I&A<@g&B!FejL5IZ ga-jSQ14|k~ES$MFz?+o~q<|3!-GTHvpz#b00M9$g2LJ#7 literal 0 HcmV?d00001 From d936c83ed1eb37f5b7cd95bcb937c3d20298cb33 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 14:00:09 +0000 Subject: [PATCH 073/103] Period must not be null --- .../delius-core/modules/components/dms/cloudwatch-alarms.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 96e8ffd7adf..8c3f7c71920 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -317,6 +317,7 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { namespace = "CustomDMSMetrics" metric_name = "DMSReplicationStopped" statistic = "Maximum" + period = "60" dimensions = { SourceId = each.key From 0bc67f09d9b8f150e0c9c7eebbbbb46f27439b15 Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Tue, 19 Nov 2024 16:30:54 +0000 Subject: [PATCH 074/103] Remove unneeded dimension replication-task --- .../components/dms/cloudwatch-alarms.tf | 48 ------------------ .../dms/lambda/dms_replication_metric.py | 2 - .../dms/lambda/dms_replication_metric.zip | Bin 1035 -> 0 bytes 3 files changed, 50 deletions(-) delete mode 100644 terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip diff --git a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf index 8c3f7c71920..ac7017bfbc7 100644 --- a/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf +++ b/terraform/environments/delius-core/modules/components/dms/cloudwatch-alarms.tf @@ -258,53 +258,6 @@ resource "aws_lambda_permission" "allow_sns_invoke_dms_replication_metric_publis source_arn = aws_sns_topic.dms_events_topic.arn } -# Define a CloudWatch metric alarm with a metric math expression. -# Because the Lambda function is only called intermittently when a DMS Replication -# Event is fired (i.e. it may be very infrequent), we use the FILL function -# to interpolate between data points - we assume the metric stays in the -# same state unless an event fires which causes it be changed. -# We loop through all Replication Tasks and create a separate alarm for each one. -# The SNS topic dms_alerts_topic is used to handle state changes into our out -# of the alarm state. This is the same topic as used for the standard -# CDC Latency alarms. -# resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { -# for_each = toset(local.replication_task_names) -# alarm_name = "DMSReplicationStoppedAlarm_${each.key}" -# alarm_description = "Alarm when Stopped Replication Task for ${each.key}" -# comparison_operator = "GreaterThanThreshold" -# evaluation_periods = 1 -# threshold = 0 -# treat_missing_data = "notBreaching" -# datapoints_to_alarm = 1 - -# metric_query { -# id = "e1" -# expression = "FILL(m1,REPEAT)" -# label = "DMSReplicationStoppedInterpolated" -# return_data = "true" -# } - -# metric_query { -# id = "m1" - -# metric { -# metric_name = "DMSReplicationStopped" -# namespace = "CustomDMSMetrics" -# period = 60 -# stat = "Maximum" - -# dimensions = { -# SourceId = each.key -# EventSource = "replication-task" -# } -# } -# } - -# alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] -# ok_actions = [aws_sns_topic.dms_alerts_topic.arn] -# } - - resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { for_each = toset(local.replication_task_names) alarm_name = "DMSReplicationStoppedAlarm_${each.key}" @@ -321,7 +274,6 @@ resource "aws_cloudwatch_metric_alarm" "dms_replication_stopped_alarm" { dimensions = { SourceId = each.key - EventSource = "replication-task" } alarm_actions = [aws_sns_topic.dms_alerts_topic.arn] diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py index bff16021b90..80195163a82 100644 --- a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py +++ b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.py @@ -45,7 +45,6 @@ def lambda_handler(event, context): { 'MetricName': 'DMSReplicationStopped', 'Dimensions': [ - {'Name': 'EventSource', 'Value': event_source}, {'Name': 'SourceId', 'Value': source_id} ], 'Value': 0, # Reset Below Trigger threshold (Task Started) @@ -61,7 +60,6 @@ def lambda_handler(event, context): { 'MetricName': 'DMSReplicationStopped', 'Dimensions': [ - {'Name': 'EventSource', 'Value': event_source}, {'Name': 'SourceId', 'Value': source_id} ], 'Value': 1, # Trigger threshold (Task Failed) diff --git a/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip b/terraform/environments/delius-core/modules/components/dms/lambda/dms_replication_metric.zip deleted file mode 100644 index 639ae399f939a9ec11c996f5dcf7bf396b7d6d21..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1035 zcmWIWW@Zs#-~d7f2E{HQ0SA&mR!VMhd{JsaPG)jqNoIatd~RwALNk z!~;K4?)DekS5%zo4$9;{e`?id$Il;U2X6YSdnN1W(^Y@3SV&k1XGv_|SJC>dd3$T% z;~w5y{6CtMa+2S@yIb6N0&u>KK)jcCvWa%SB^z< zRTgW_Rs5p9^xn;rD;JB;`MG}X9IIb_taHyrB#M&W4Qove zbzjW)IyO_wZHUPydRJaH~-zG3SYVNAdF_J93Oz|tW%wsZp z&c?N6R{FZ$CFbupPMUOD)Fw%@!c8}PlQ74In+IRMy`t7b_CP&*9pL0r% z&XitOUc$bO#Y#_mdD*+Wx)NJW?sOGDpC^{#HS6JwJ0ZtbB^miHw>voDVO+`EnE~F% z{j2@T_kD@Ft5~{jwY!kQ-LG}FF~!oWMRScxzL{S8<+M-k$V_?3)vcDYRh%x(Vs$PH;}Vzc{BOK{r`u%h zg9>vj#g=@X>)viyKk?44zWfkX-Iu+$^UL<7zWnpEGuh;QWqQ5;=YD?v?UU=hO5&f_ zFUp_$U+Lm`n~VGtfA78iv3}<5dR7L8|NjHL**X4QK4*~5%)k)I&A<@g&B!FejL5IZ ga-jSQ14|k~ES$MFz?+o~q<|3!-GTHvpz#b00M9$g2LJ#7 From 3af4cef54fe7443712b39476824ee1102ce513ec Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 20 Nov 2024 09:45:52 +0000 Subject: [PATCH 075/103] Add a transformation to remove the USER_ID column from USER_ data --- .../files/user_outbound_table_mapping.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/files/user_outbound_table_mapping.json b/terraform/environments/delius-core/files/user_outbound_table_mapping.json index f8a2e966334..4d8b7128d6e 100644 --- a/terraform/environments/delius-core/files/user_outbound_table_mapping.json +++ b/terraform/environments/delius-core/files/user_outbound_table_mapping.json @@ -21,6 +21,18 @@ }, "rule-action": "include", "filters": [] - } + }, + { + "rule-type": "transformation", + "rule-id": "32", + "rule-name": "remove_staff_id", + "rule-target": "column", + "object-locator": { + "schema-name": "DELIUS_APP_SCHEMA", + "table-name": "USER_", + "column-name": "STAFF_ID" + }, + "rule-action": "remove-column" + } ] } From 99d196828625a5f17d4814cfb49826be365e4061 Mon Sep 17 00:00:00 2001 From: Buckingham Date: Fri, 22 Nov 2024 15:03:31 +0000 Subject: [PATCH 076/103] Update_221124_3 --- terraform/environments/ppud/iam.tf | 21 ++++++++++++++++++++- terraform/environments/ppud/lambda.tf | 6 +++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/terraform/environments/ppud/iam.tf b/terraform/environments/ppud/iam.tf index aa7104e6601..15ecce0c131 100644 --- a/terraform/environments/ppud/iam.tf +++ b/terraform/environments/ppud/iam.tf @@ -1172,15 +1172,34 @@ resource "aws_iam_policy" "iam_policy_for_lambda_cloudwatch_get_metric_data_dev" policy = jsonencode({ "Version" : "2012-10-17", "Statement" : [{ + "Sid" : "CloudwatchMetricPolicy", "Effect" : "Allow", "Action" : [ - "cloudwatch:GetMetricData" + "cloudwatch:GetMetricData", + "cloudwatch:ListMetrics" ], "Resource" : [ "arn:aws:ssm:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*" ] }, { + "Sid" : "SQSPolicy", + "Effect" : "Allow", + "Action" : [ + "sqs:ChangeMessageVisibility", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ListQueueTags", + "sqs:ReceiveMessage", + "sqs:SendMessage" + ], + "Resource" : [ + "arn:aws:sqs:eu-west-2:${local.environment_management.account_ids["ppud-production"]}:Lambda-Queue-Production" + ] + }, + { + "Sid" : "SESPolicy", "Effect" : "Allow", "Action" : [ "ses:SendEmail" diff --git a/terraform/environments/ppud/lambda.tf b/terraform/environments/ppud/lambda.tf index 302ec946fa5..0cce3c4a951 100644 --- a/terraform/environments/ppud/lambda.tf +++ b/terraform/environments/ppud/lambda.tf @@ -236,11 +236,11 @@ resource "aws_lambda_function" "terraform_lambda_enable_cpu_alarm" { resource "aws_lambda_permission" "allow_cloudwatch_to_call_lambda_terminate_cpu_process_dev" { count = local.is-development == true ? 1 : 0 - statement_id = "AllowExecutionFromCloudWatch" + statement_id = "AllowCloudWatchAccess" action = "lambda:InvokeFunction" function_name = aws_lambda_function.terraform_lambda_func_terminate_cpu_process_dev[0].function_name - principal = "lambda.alarms.cloudwatch.amazonaws.com" - source_arn = "arn:aws:cloudwatch:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:alarm:*" + principal = "cloudwatch.amazonaws.com" + source_arn = "arn:aws:cloudwatch:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*" } resource "aws_lambda_function" "terraform_lambda_func_terminate_cpu_process_dev" { From a38616a99207a73b2ea9f143a599a2f1e3e4a1b5 Mon Sep 17 00:00:00 2001 From: Buckingham Date: Fri, 22 Nov 2024 15:27:36 +0000 Subject: [PATCH 077/103] Update_221124_4 --- terraform/environments/ppud/lambda.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/ppud/lambda.tf b/terraform/environments/ppud/lambda.tf index 0cce3c4a951..39fd5346772 100644 --- a/terraform/environments/ppud/lambda.tf +++ b/terraform/environments/ppud/lambda.tf @@ -504,7 +504,7 @@ resource "aws_lambda_function" "terraform_lambda_func_send_cpu_graph_dev" { handler = "send_cpu_graph_dev.lambda_handler" runtime = "python3.12" timeout = 300 - depends_on = [aws_iam_role_policy_attachment.attach_lambda_policy_cloudwatch_invoke_lambda_to_lambda_role_cloudwatch_invoke_lambda_dev] + depends_on = [aws_iam_role_policy_attachment.attach_lambda_policy_cloudwatch_get_metric_data_to_lambda_role_cloudwatch_get_metric_data_dev] reserved_concurrent_executions = 5 # code_signing_config_arn = "arn:aws:lambda:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:code-signing-config:csc-0c7136ccff2de748f" dead_letter_config { From 24c527f5ccdcbce6d2e5b2aab465453c98761c31 Mon Sep 17 00:00:00 2001 From: Tom Webber Date: Tue, 19 Nov 2024 13:19:10 +0000 Subject: [PATCH 078/103] feat: add athena query buckets (london) for environments, kms encrypted with bucket keys --- .../analytical-platform-compute/kms-keys.tf | 16 ++++ .../analytical-platform-compute/s3-buckets.tf | 76 +++++++++++++------ 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/kms-keys.tf b/terraform/environments/analytical-platform-compute/kms-keys.tf index a7b2d2bf3da..77f8052e839 100644 --- a/terraform/environments/analytical-platform-compute/kms-keys.tf +++ b/terraform/environments/analytical-platform-compute/kms-keys.tf @@ -275,6 +275,22 @@ module "mlflow_s3_kms" { tags = local.tags } +module "mojap_compute_athena_s3_kms_eu_west_2" { + #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions + #checkov:skip=CKV_TF_2:Module registry does not support tags for versions + + source = "terraform-aws-modules/kms/aws" + version = "3.1.1" + + aliases = ["s3/mlflow"] + description = "Mojap Athena query bucket S3 KMS key for eu-west-2" + enable_default_policy = true + + deletion_window_in_days = 7 + + tags = local.tags +} + module "mojap_compute_logs_s3_kms_eu_west_2" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions diff --git a/terraform/environments/analytical-platform-compute/s3-buckets.tf b/terraform/environments/analytical-platform-compute/s3-buckets.tf index e0bafa32fc1..811de3f16bc 100644 --- a/terraform/environments/analytical-platform-compute/s3-buckets.tf +++ b/terraform/environments/analytical-platform-compute/s3-buckets.tf @@ -136,28 +136,58 @@ module "mojap_compute_logs_bucket_eu_west_1" { ) } -moved { - from = module.mojap_compute_logs_bucket.aws_s3_bucket.this[0] - to = module.mojap_compute_logs_bucket_eu_west_2.aws_s3_bucket.this[0] -} -moved { - from = module.mojap_compute_logs_bucket.aws_s3_bucket_policy.this[0] - to = module.mojap_compute_logs_bucket_eu_west_2.aws_s3_bucket_policy.this[0] -} -moved { - from = module.mojap_compute_logs_bucket.aws_s3_bucket_public_access_block.this[0] - to = module.mojap_compute_logs_bucket_eu_west_2.aws_s3_bucket_public_access_block.this[0] -} -moved { - from = module.mojap_compute_logs_bucket.aws_s3_bucket_server_side_encryption_configuration.this[0] - to = module.mojap_compute_logs_bucket_eu_west_2.aws_s3_bucket_server_side_encryption_configuration.this[0] -} -moved { - from = module.mojap_compute_logs_bucket.aws_s3_bucket_versioning.this[0] - to = module.mojap_compute_logs_bucket_eu_west_2.aws_s3_bucket_versioning.this[0] -} -moved { - from = aws_iam_policy_document.s3_server_access_logs_policy - to = aws_iam_policy_document.s3_server_access_logs_eu_west_2_policy +data "aws_iam_policy_document" "athena_query_results_policy_eu_west_2" { + #checkov:skip=CKV_AWS_356:resource "*" limited by condition + statement { + sid = "DenyInsecureTransport" + effect = "Deny" + actions = ["s3:*"] + resources = [ + "arn:aws:s3:::mojap-compute-${local.environment}-athena-query-results-eu-west-2/*", + "arn:aws:s3:::mojap-compute-${local.environment}-athena-query-results-eu-west-2" + ] + principals { + type = "*" + identifiers = ["*"] + } + condition { + test = "Bool" + variable = "aws:SecureTransport" + values = ["false"] + } + } } + +module "mojap_compute_athena_query_results_bucket_eu_west_2" { + #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions + #checkov:skip=CKV_TF_2:Module registry does not support tags for versions + + source = "terraform-aws-modules/s3-bucket/aws" + version = "4.2.2" + + bucket = "mojap-compute-${local.environment}-athena-query-results-eu-west-2" + + force_destroy = false + + attach_policy = true + policy = data.aws_iam_policy_document.athena_query_results_policy_eu_west_2.json + + object_lock_enabled = false + + versioning = { + status = "Disabled" + } + + server_side_encryption_configuration = { + rule = { + bucket_key_enabled = true + apply_server_side_encryption_by_default = { + kms_master_key_id = module.mojap_compute_athena_s3_kms_eu_west_2.key_arn + sse_algorithm = "aws:kms" + } + } + } + + tags = local.tags +} \ No newline at end of file From 0142b6d80b13f9612593fe50dc345eb784102d1d Mon Sep 17 00:00:00 2001 From: Tom Webber Date: Fri, 22 Nov 2024 10:28:18 +0000 Subject: [PATCH 079/103] feat: no bucket backup (configured via tags) --- .../environments/analytical-platform-compute/s3-buckets.tf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/s3-buckets.tf b/terraform/environments/analytical-platform-compute/s3-buckets.tf index 811de3f16bc..448bb8b2221 100644 --- a/terraform/environments/analytical-platform-compute/s3-buckets.tf +++ b/terraform/environments/analytical-platform-compute/s3-buckets.tf @@ -189,5 +189,8 @@ module "mojap_compute_athena_query_results_bucket_eu_west_2" { } } - tags = local.tags + tags = merge( + local.tags, + { "backup" = "false" } + ) } \ No newline at end of file From e0ccaa42ff6e684937a5413e1517d0018a5a72f9 Mon Sep 17 00:00:00 2001 From: Tom Webber Date: Fri, 22 Nov 2024 10:28:29 +0000 Subject: [PATCH 080/103] feat: no bucket backup (configured via tags) --- .../environments/analytical-platform-compute/s3-buckets.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/analytical-platform-compute/s3-buckets.tf b/terraform/environments/analytical-platform-compute/s3-buckets.tf index 448bb8b2221..03712ea8813 100644 --- a/terraform/environments/analytical-platform-compute/s3-buckets.tf +++ b/terraform/environments/analytical-platform-compute/s3-buckets.tf @@ -168,7 +168,7 @@ module "mojap_compute_athena_query_results_bucket_eu_west_2" { bucket = "mojap-compute-${local.environment}-athena-query-results-eu-west-2" - force_destroy = false + force_destroy = true attach_policy = true policy = data.aws_iam_policy_document.athena_query_results_policy_eu_west_2.json @@ -193,4 +193,4 @@ module "mojap_compute_athena_query_results_bucket_eu_west_2" { local.tags, { "backup" = "false" } ) -} \ No newline at end of file +} From 78d515d58cc5f4b1183e5f8ce38f7e2623fdc14d Mon Sep 17 00:00:00 2001 From: Tom Webber Date: Fri, 22 Nov 2024 12:18:10 +0000 Subject: [PATCH 081/103] fix: update key alias --- terraform/environments/analytical-platform-compute/kms-keys.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/analytical-platform-compute/kms-keys.tf b/terraform/environments/analytical-platform-compute/kms-keys.tf index 77f8052e839..7ee087235b5 100644 --- a/terraform/environments/analytical-platform-compute/kms-keys.tf +++ b/terraform/environments/analytical-platform-compute/kms-keys.tf @@ -282,7 +282,7 @@ module "mojap_compute_athena_s3_kms_eu_west_2" { source = "terraform-aws-modules/kms/aws" version = "3.1.1" - aliases = ["s3/mlflow"] + aliases = ["s3/mojap-compute-athena-query-results-eu-west-2"] description = "Mojap Athena query bucket S3 KMS key for eu-west-2" enable_default_policy = true From a424ff64beedaa54602d628f25ea7b3a4c2517fd Mon Sep 17 00:00:00 2001 From: George Taylor Date: Fri, 22 Nov 2024 16:09:54 +0000 Subject: [PATCH 082/103] chore: pin weblogic task def revision (#8757) * Update variables.tf * tidy up alb healthchecks * pin for dev for now * correct version --- .../delius-core/locals_development.tf | 9 ++-- .../modules/delius_environment/ldap_ecs.tf | 2 +- .../modules/delius_environment/pwm.tf | 24 ++++++--- .../modules/delius_environment/weblogic.tf | 16 +++++- .../delius_environment/weblogic_eis.tf | 13 +++-- .../helpers/delius_microservice/ecs.tf | 12 ++--- .../delius_microservice/load_balancing.tf | 14 +++--- .../helpers/delius_microservice/variables.tf | 50 ++++++++++++------- 8 files changed, 90 insertions(+), 50 deletions(-) diff --git a/terraform/environments/delius-core/locals_development.tf b/terraform/environments/delius-core/locals_development.tf index 5eda99ec858..f2dc9fd2a72 100644 --- a/terraform/environments/delius-core/locals_development.tf +++ b/terraform/environments/delius-core/locals_development.tf @@ -76,10 +76,11 @@ locals { delius_microservices_configs_dev = { weblogic = { - image_tag = "6.2.0.3" - container_port = 8080 - container_memory = 4096 - container_cpu = 2048 + image_tag = "6.2.0.3" + container_port = 8080 + container_memory = 4096 + container_cpu = 2048 + task_definition_revision = 9 } weblogic_eis = { diff --git a/terraform/environments/delius-core/modules/delius_environment/ldap_ecs.tf b/terraform/environments/delius-core/modules/delius_environment/ldap_ecs.tf index 4e9b328bd62..b143dfe5e30 100644 --- a/terraform/environments/delius-core/modules/delius_environment/ldap_ecs.tf +++ b/terraform/environments/delius-core/modules/delius_environment/ldap_ecs.tf @@ -51,7 +51,7 @@ module "ldap_ecs" { container_image = "${var.platform_vars.environment_management.account_ids["core-shared-services-production"]}.dkr.ecr.eu-west-2.amazonaws.com/delius-core-openldap-ecr-repo:${var.delius_microservice_configs.ldap.image_tag}" account_config = var.account_config - health_check = { + container_health_check = { command = ["CMD-SHELL", "ldapsearch -x -H ldap://localhost:389 -b '' -s base '(objectclass=*)' namingContexts"] interval = 30 retries = 3 diff --git a/terraform/environments/delius-core/modules/delius_environment/pwm.tf b/terraform/environments/delius-core/modules/delius_environment/pwm.tf index 8a8af54987b..c4d7ecd4796 100644 --- a/terraform/environments/delius-core/modules/delius_environment/pwm.tf +++ b/terraform/environments/delius-core/modules/delius_environment/pwm.tf @@ -54,14 +54,22 @@ module "pwm" { platform_vars = var.platform_vars - container_image = "${var.platform_vars.environment_management.account_ids["core-shared-services-production"]}.dkr.ecr.eu-west-2.amazonaws.com/delius-core-password-management:${var.delius_microservice_configs.pwm.image_tag}" - account_config = var.account_config - health_check_path = "/" - health_check_interval = "15" - account_info = var.account_info - - target_group_protocol_version = "HTTP1" - health_check_grace_period_seconds = 10 + container_image = "${var.platform_vars.environment_management.account_ids["core-shared-services-production"]}.dkr.ecr.eu-west-2.amazonaws.com/delius-core-password-management:${var.delius_microservice_configs.pwm.image_tag}" + account_config = var.account_config + account_info = var.account_info + + target_group_protocol_version = "HTTP1" + + alb_health_check = { + path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" + healthy_threshold = 5 + interval = 30 + protocol = "HTTP" + unhealthy_threshold = 5 + matcher = "200-499" + timeout = 10 + grace_period_seconds = 180 + } container_cpu = var.delius_microservice_configs.pwm.container_cpu container_memory = var.delius_microservice_configs.pwm.container_memory diff --git a/terraform/environments/delius-core/modules/delius_environment/weblogic.tf b/terraform/environments/delius-core/modules/delius_environment/weblogic.tf index a9001a6f926..2d4314603c2 100644 --- a/terraform/environments/delius-core/modules/delius_environment/weblogic.tf +++ b/terraform/environments/delius-core/modules/delius_environment/weblogic.tf @@ -19,8 +19,20 @@ module "weblogic" { ecs_cluster_arn = module.ecs.ecs_cluster_arn env_name = var.env_name - health_check_path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" - microservice_lb = aws_lb.delius_core_frontend + pin_task_definition_revision = try(var.delius_microservice_configs.weblogic.task_definition_revision, 0) + + alb_health_check = { + path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" + healthy_threshold = 5 + interval = 30 + protocol = "HTTP" + unhealthy_threshold = 5 + matcher = "200-499" + timeout = 5 + grace_period_seconds = 300 + } + + microservice_lb = aws_lb.delius_core_frontend target_group_protocol_version = "HTTP1" diff --git a/terraform/environments/delius-core/modules/delius_environment/weblogic_eis.tf b/terraform/environments/delius-core/modules/delius_environment/weblogic_eis.tf index cd68c989724..ce08ba24080 100644 --- a/terraform/environments/delius-core/modules/delius_environment/weblogic_eis.tf +++ b/terraform/environments/delius-core/modules/delius_environment/weblogic_eis.tf @@ -68,9 +68,16 @@ module "weblogic_eis" { container_memory = var.delius_microservice_configs.weblogic_eis.container_memory container_cpu = var.delius_microservice_configs.weblogic_eis.container_cpu - health_check_path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" - health_check_grace_period_seconds = 600 - health_check_interval = 30 + alb_health_check = { + path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" + healthy_threshold = 5 + interval = 30 + protocol = "HTTP" + unhealthy_threshold = 5 + matcher = "200-499" + timeout = 10 + grace_period_seconds = 300 + } db_ingress_security_groups = [] diff --git a/terraform/environments/delius-core/modules/helpers/delius_microservice/ecs.tf b/terraform/environments/delius-core/modules/helpers/delius_microservice/ecs.tf index 6496b869f32..9fdfde284ab 100644 --- a/terraform/environments/delius-core/modules/helpers/delius_microservice/ecs.tf +++ b/terraform/environments/delius-core/modules/helpers/delius_microservice/ecs.tf @@ -1,5 +1,5 @@ module "container_definition" { - source = "git::https://github.com/ministryofjustice/modernisation-platform-terraform-ecs-cluster//container?ref=main" + source = "git::https://github.com/ministryofjustice/modernisation-platform-terraform-ecs-cluster//container?ref=v5.0.0" name = var.name image = var.container_image memory = var.container_memory @@ -9,7 +9,7 @@ module "container_definition" { environment = local.calculated_container_vars_list - health_check = var.health_check + health_check = var.container_health_check secrets = local.calculated_container_secrets_list port_mappings = var.container_port_config @@ -35,7 +35,7 @@ module "ecs_policies" { } module "ecs_service" { - source = "git::https://github.com/ministryofjustice/modernisation-platform-terraform-ecs-cluster//service?ref=main" + source = "git::https://github.com/ministryofjustice/modernisation-platform-terraform-ecs-cluster//service?ref=v5.0.0" container_definitions = nonsensitive(module.container_definition.json_encoded_list) cluster_arn = var.ecs_cluster_arn name = "${var.env_name}-${var.name}" @@ -43,6 +43,8 @@ module "ecs_service" { task_cpu = var.container_cpu task_memory = var.container_memory + pin_task_definition_revision = var.pin_task_definition_revision + desired_count = var.desired_count deployment_maximum_percent = var.deployment_maximum_percent deployment_minimum_healthy_percent = var.deployment_minimum_healthy_percent @@ -51,7 +53,7 @@ module "ecs_service" { task_role_arn = "arn:aws:iam::${var.account_info.id}:role/${module.ecs_policies.task_role.name}" task_exec_role_arn = "arn:aws:iam::${var.account_info.id}:role/${module.ecs_policies.task_exec_role.name}" - health_check_grace_period_seconds = var.health_check_grace_period_seconds + health_check_grace_period_seconds = var.alb_health_check.grace_period_seconds service_load_balancers = var.microservice_lb != null ? concat([{ target_group_arn = aws_lb_target_group.frontend[0].arn @@ -68,7 +70,5 @@ module "ecs_service" { enable_execute_command = true - ignore_changes = var.ignore_changes_service_task_definition - tags = var.tags } diff --git a/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf b/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf index 7f46a8cd665..90019fa63f9 100644 --- a/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf +++ b/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf @@ -28,13 +28,13 @@ resource "aws_lb_target_group" "frontend" { } health_check { - path = var.health_check_path - healthy_threshold = "5" - interval = var.health_check_interval - protocol = "HTTP" - unhealthy_threshold = "5" - matcher = "200-499" - timeout = "5" + path = var.alb_health_check.path + healthy_threshold = var.alb_health_check.healthy_threshold + interval = var.alb_health_check.interval + protocol = var.alb_health_check.protocol + unhealthy_threshold = var.alb_health_check.unhealthy_threshold + matcher = var.alb_health_check.matcher + timeout = var.alb_health_check.timeout } lifecycle { diff --git a/terraform/environments/delius-core/modules/helpers/delius_microservice/variables.tf b/terraform/environments/delius-core/modules/helpers/delius_microservice/variables.tf index 8676dcb62ed..93ac9c86fe9 100644 --- a/terraform/environments/delius-core/modules/helpers/delius_microservice/variables.tf +++ b/terraform/environments/delius-core/modules/helpers/delius_microservice/variables.tf @@ -217,12 +217,6 @@ variable "platform_vars" { }) } -variable "health_check_grace_period_seconds" { - description = "The amount of time, in seconds, that Amazon ECS waits before unhealthy instances are shut down." - type = number - default = 60 -} - variable "ecs_cluster_arn" { description = "The ARN of the ECS cluster" type = string @@ -376,18 +370,6 @@ variable "alb_security_group_id" { default = null } -variable "health_check_path" { - description = "The health check path for the alb target group" - type = string - default = "/" -} - -variable "health_check_interval" { - description = "The health check interval for the alb target group" - type = string - default = "300" -} - variable "alb_stickiness_enabled" { description = "Enable or disable stickiness" type = string @@ -581,7 +563,7 @@ variable "extra_task_exec_role_policies" { default = {} } -variable "health_check" { +variable "container_health_check" { description = "The health check configuration for the container" type = object({ command = list(string) @@ -593,6 +575,30 @@ variable "health_check" { default = null } +variable "alb_health_check" { + description = "The health check configuration for the ALB" + type = object({ + path = string + interval = number + timeout = number + healthy_threshold = number + unhealthy_threshold = number + matcher = string + protocol = string + grace_period_seconds = number + }) + default = { + path = "/" + interval = 30 + timeout = 5 + healthy_threshold = 5 + unhealthy_threshold = 5 + matcher = "200-499" + protocol = "HTTP" + grace_period_seconds = 120 + } +} + variable "nlb_ingress_security_group_ids" { description = "Security group ids to allow ingress to the ECS service" type = list(object({ @@ -622,3 +628,9 @@ variable "system_controls" { type = list(any) default = [] } + +variable "pin_task_definition_revision" { + type = number + description = "The revision of the task definition to use" + default = 0 +} From 564a0e823027d3d5e90330f080464ee456049a2c Mon Sep 17 00:00:00 2001 From: George Taylor Date: Fri, 22 Nov 2024 16:48:21 +0000 Subject: [PATCH 083/103] Update pwm.tf (#8766) --- .../environments/delius-core/modules/delius_environment/pwm.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/modules/delius_environment/pwm.tf b/terraform/environments/delius-core/modules/delius_environment/pwm.tf index c4d7ecd4796..c5c6164fcbf 100644 --- a/terraform/environments/delius-core/modules/delius_environment/pwm.tf +++ b/terraform/environments/delius-core/modules/delius_environment/pwm.tf @@ -61,7 +61,7 @@ module "pwm" { target_group_protocol_version = "HTTP1" alb_health_check = { - path = "/NDelius-war/delius/JSP/healthcheck.jsp?ping" + path = "/" healthy_threshold = 5 interval = 30 protocol = "HTTP" From aec7b9b7a5870bac1a7b78e5f9e8219a7289052f Mon Sep 17 00:00:00 2001 From: Bill Buchan Date: Wed, 20 Nov 2024 09:45:52 +0000 Subject: [PATCH 084/103] Add a transformation to remove the USER_ID column from USER_ data --- .../files/user_outbound_table_mapping.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/terraform/environments/delius-core/files/user_outbound_table_mapping.json b/terraform/environments/delius-core/files/user_outbound_table_mapping.json index f8a2e966334..4d8b7128d6e 100644 --- a/terraform/environments/delius-core/files/user_outbound_table_mapping.json +++ b/terraform/environments/delius-core/files/user_outbound_table_mapping.json @@ -21,6 +21,18 @@ }, "rule-action": "include", "filters": [] - } + }, + { + "rule-type": "transformation", + "rule-id": "32", + "rule-name": "remove_staff_id", + "rule-target": "column", + "object-locator": { + "schema-name": "DELIUS_APP_SCHEMA", + "table-name": "USER_", + "column-name": "STAFF_ID" + }, + "rule-action": "remove-column" + } ] } From b8119fc4d09b3160a334bbcfc37784e35ce2e0d8 Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:54:57 +0000 Subject: [PATCH 085/103] TM-720: remove ssm command monitoring from nomis (#8764) --- terraform/environments/nomis/locals.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/terraform/environments/nomis/locals.tf b/terraform/environments/nomis/locals.tf index ada318258c9..9b644f9da66 100644 --- a/terraform/environments/nomis/locals.tf +++ b/terraform/environments/nomis/locals.tf @@ -48,7 +48,6 @@ locals { enable_resource_explorer = true } - cloudwatch_metric_alarms = module.baseline_presets.cloudwatch_metric_alarms.ssm - security_groups = local.security_groups + security_groups = local.security_groups } } From 1e870b5ca6a607697e4ff3c2d85fa2b18f6a3288 Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:55:14 +0000 Subject: [PATCH 086/103] TM-720: add ssm endpoint monitoring to hmpp-oem (#8763) * add enable_ssm_command_monitoring option * enable ssm command monitoring --- terraform/environments/hmpps-oem/locals.tf | 1 + terraform/environments/hmpps-oem/main.tf | 1 + .../cloudwatch_metric_alarms.tf | 21 +++++++++++++++++++ terraform/modules/baseline_presets/outputs.tf | 16 ++++++-------- .../modules/baseline_presets/sns_topics.tf | 8 ++++++- .../modules/baseline_presets/variables.tf | 1 + 6 files changed, 37 insertions(+), 11 deletions(-) diff --git a/terraform/environments/hmpps-oem/locals.tf b/terraform/environments/hmpps-oem/locals.tf index 73791385114..e432fdb9efe 100644 --- a/terraform/environments/hmpps-oem/locals.tf +++ b/terraform/environments/hmpps-oem/locals.tf @@ -49,6 +49,7 @@ locals { enable_s3_db_backup_bucket = true enable_s3_shared_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/hmpps-oem/main.tf b/terraform/environments/hmpps-oem/main.tf index a23f7e6d41b..cd706d8538e 100644 --- a/terraform/environments/hmpps-oem/main.tf +++ b/terraform/environments/hmpps-oem/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) diff --git a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf index 2e16b5271e8..795670a383f 100644 --- a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf +++ b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf @@ -449,4 +449,25 @@ locals { } } } + + cloudwatch_metric_alarms_by_sns_topic = { + for sns_key, sns_value in local.sns_topics : sns_key => { + for namespace_key, namespace_value in local.cloudwatch_metric_alarms : namespace_key => { + for alarm_key, alarm_value in namespace_value : alarm_key => merge(alarm_value, { + alarm_actions = [sns_key] + ok_actions = [sns_key] + }) + } + } + } + + # alarms added via baseline. Put SSM command alerts in dso-pipelines so it doesn't clutter main application alerts + cloudwatch_metric_alarms_baseline = merge( + var.options.enable_ssm_command_monitoring ? { + "failed-ssm-command-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.failed-ssm-command + } : {}, + var.options.enable_ssm_command_monitoring ? { + "ssm-command-metrics-missing-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.ssm-command-metrics-missing + } : {}, + ) } diff --git a/terraform/modules/baseline_presets/outputs.tf b/terraform/modules/baseline_presets/outputs.tf index d085c54f7c9..01cc5560724 100644 --- a/terraform/modules/baseline_presets/outputs.tf +++ b/terraform/modules/baseline_presets/outputs.tf @@ -44,19 +44,15 @@ output "cloudwatch_metric_alarms" { value = local.cloudwatch_metric_alarms } +output "cloudwatch_metric_alarms_baseline" { + description = "Map of common cloudwatch metric alarms that can be passed into baseline directly as specified by var.options.enable_ssm_command_monitoring for example" + value = local.cloudwatch_metric_alarms_baseline +} + output "cloudwatch_metric_alarms_by_sns_topic" { description = "Map of sns topic key to cloudwatch metric alarms grouped by namespace, where the default action is the sns topic key" - value = { - for sns_key, sns_value in local.sns_topics : sns_key => { - for namespace_key, namespace_value in local.cloudwatch_metric_alarms : namespace_key => { - for alarm_key, alarm_value in namespace_value : alarm_key => merge(alarm_value, { - alarm_actions = [sns_key] - ok_actions = [sns_key] - }) - } - } - } + value = local.cloudwatch_metric_alarms_by_sns_topic } output "iam_roles" { diff --git a/terraform/modules/baseline_presets/sns_topics.tf b/terraform/modules/baseline_presets/sns_topics.tf index 89b75eade1a..346a1c9e403 100644 --- a/terraform/modules/baseline_presets/sns_topics.tf +++ b/terraform/modules/baseline_presets/sns_topics.tf @@ -6,8 +6,14 @@ # from the modernisation platform managed pagerduty_integration_keys locals { + + pagerduty_integrations = merge( + var.options.enable_ssm_command_monitoring ? { dso-pipelines-pagerduty = "dso-pipelines" } : {}, + var.options.sns_topics.pagerduty_integrations + ) + sns_topics_pagerduty_integrations = { - for key, value in var.options.sns_topics.pagerduty_integrations : key => { + for key, value in local.pagerduty_integrations : key => { display_name = "Pager duty integration for ${value}" kms_master_key_id = "general" subscriptions = { diff --git a/terraform/modules/baseline_presets/variables.tf b/terraform/modules/baseline_presets/variables.tf index 097391f56d0..250569f6f1c 100644 --- a/terraform/modules/baseline_presets/variables.tf +++ b/terraform/modules/baseline_presets/variables.tf @@ -42,6 +42,7 @@ variable "options" { enable_s3_db_backup_bucket = optional(bool, false) # create db-backup S3 buckets enable_s3_shared_bucket = optional(bool, false) # create devtest and preprodprod S3 bucket for sharing between accounts enable_s3_software_bucket = optional(bool, false) # create software S3 bucket in test account for image builder/configuration-management + enable_ssm_command_monitoring = optional(bool, false) # create SNS topic and alarms for SSM command monitoring enable_vmimport = optional(bool, false) # create role for vm imports route53_resolver_rules = optional(map(list(string)), {}) # create route53 resolver rules; list of map keys to filter local.route53_resolver_rules_all iam_service_linked_roles = optional(list(string)) # create iam service linked roles; list of map keys to filter local.iam_service_linked_roles; default is to create all From 4f60ea3937ef20e841d18c7b8d1f099edc4c6d52 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 00:16:26 +0000 Subject: [PATCH 087/103] Bump oxsecurity/megalinter from 8.2.0 to 8.3.0 Bumps [oxsecurity/megalinter](https://github.com/oxsecurity/megalinter) from 8.2.0 to 8.3.0. - [Release notes](https://github.com/oxsecurity/megalinter/releases) - [Changelog](https://github.com/oxsecurity/megalinter/blob/main/CHANGELOG.md) - [Commits](https://github.com/oxsecurity/megalinter/compare/d8c95fc6f2237031fb9e9322b0f97100168afa6e...1fc052d03c7a43c78fe0fee19c9d648b749e0c01) --- updated-dependencies: - dependency-name: oxsecurity/megalinter dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/format-code.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/format-code.yml b/.github/workflows/format-code.yml index b1dea43397b..291666fbab0 100644 --- a/.github/workflows/format-code.yml +++ b/.github/workflows/format-code.yml @@ -40,7 +40,7 @@ jobs: id: ml # You can override MegaLinter flavor used to have faster performances # More info at https://megalinter.io/flavors/ - uses: oxsecurity/megalinter/flavors/terraform@d8c95fc6f2237031fb9e9322b0f97100168afa6e #v8.2.0 + uses: oxsecurity/megalinter/flavors/terraform@1fc052d03c7a43c78fe0fee19c9d648b749e0c01 #v8.3.0 env: # All available variables are described in documentation # https://megalinter.io/configuration/#shared-variables From 79f9aee49890b17258ae21209661cc7447b4fb31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 00:17:08 +0000 Subject: [PATCH 088/103] Bump bridgecrewio/checkov-action from 12.2917.0 to 12.2918.0 Bumps [bridgecrewio/checkov-action](https://github.com/bridgecrewio/checkov-action) from 12.2917.0 to 12.2918.0. - [Release notes](https://github.com/bridgecrewio/checkov-action/releases) - [Commits](https://github.com/bridgecrewio/checkov-action/compare/cc23a656ff707900310d6870ca2b4289fa070396...05decb42b761b4c4ce4927c084165bb4705bbcef) --- updated-dependencies: - dependency-name: bridgecrewio/checkov-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/code-scanning.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-scanning.yml b/.github/workflows/code-scanning.yml index a80a099382c..fa761adf798 100644 --- a/.github/workflows/code-scanning.yml +++ b/.github/workflows/code-scanning.yml @@ -81,7 +81,7 @@ jobs: fetch-depth: 0 - name: Run Checkov action id: checkov - uses: bridgecrewio/checkov-action@cc23a656ff707900310d6870ca2b4289fa070396 # v12.2917.0 + uses: bridgecrewio/checkov-action@05decb42b761b4c4ce4927c084165bb4705bbcef # v12.2918.0 with: directory: ./ framework: terraform From 2d450695d9f70c96e534416945a4d6734240ba88 Mon Sep 17 00:00:00 2001 From: Buckingham Date: Mon, 25 Nov 2024 08:37:22 +0000 Subject: [PATCH 089/103] Update_251124_1 --- terraform/environments/ppud/lambda.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/terraform/environments/ppud/lambda.tf b/terraform/environments/ppud/lambda.tf index 39fd5346772..9f93dcc960d 100644 --- a/terraform/environments/ppud/lambda.tf +++ b/terraform/environments/ppud/lambda.tf @@ -488,11 +488,11 @@ data "archive_file" "zip_the_send_cpu_notification_code_prod" { resource "aws_lambda_permission" "allow_cloudwatch_to_call_lambda_send_cpu_graph_dev" { count = local.is-development == true ? 1 : 0 - statement_id = "AllowExecutionFromCloudWatch" + statement_id = "AllowAccesstoCloudWatch" action = "lambda:InvokeFunction" function_name = aws_lambda_function.terraform_lambda_func_send_cpu_graph_dev[0].function_name - principal = "lambda.alarms.cloudwatch.amazonaws.com" - source_arn = "arn:aws:cloudwatch:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:alarm:*" + principal = "cloudwatch.amazonaws.com" + source_arn = "arn:aws:cloudwatch:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*" } resource "aws_lambda_function" "terraform_lambda_func_send_cpu_graph_dev" { @@ -507,9 +507,9 @@ resource "aws_lambda_function" "terraform_lambda_func_send_cpu_graph_dev" { depends_on = [aws_iam_role_policy_attachment.attach_lambda_policy_cloudwatch_get_metric_data_to_lambda_role_cloudwatch_get_metric_data_dev] reserved_concurrent_executions = 5 # code_signing_config_arn = "arn:aws:lambda:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:code-signing-config:csc-0c7136ccff2de748f" - dead_letter_config { - target_arn = aws_sqs_queue.lambda_queue_dev[0].arn - } + # dead_letter_config { + # target_arn = aws_sqs_queue.lambda_queue_dev[0].arn + # } tracing_config { mode = "Active" } From 2bce6cc327ec45567661cf061ced3cfb92cbe1e0 Mon Sep 17 00:00:00 2001 From: julialawrence Date: Mon, 25 Nov 2024 09:40:24 +0000 Subject: [PATCH 090/103] Removing external collaborators. Trying in one PR --- .../environment-configuration.tf | 14 --- .../ext-user-2024.tf | 102 ------------------ 2 files changed, 116 deletions(-) delete mode 100644 terraform/environments/analytical-platform-ingestion/ext-user-2024.tf diff --git a/terraform/environments/analytical-platform-ingestion/environment-configuration.tf b/terraform/environments/analytical-platform-ingestion/environment-configuration.tf index 71ad724ce2a..252fc58ef8c 100644 --- a/terraform/environments/analytical-platform-ingestion/environment-configuration.tf +++ b/terraform/environments/analytical-platform-ingestion/environment-configuration.tf @@ -92,20 +92,6 @@ locals { egress_bucket = module.bold_egress_bucket.s3_bucket_id egress_bucket_kms_key = module.s3_bold_egress_kms.key_arn } - "darren-brooke" = { - ssh_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDAxeaj85/JshqYMQ1B97TtHyy81oF3L33s89NWCIiHSM/Hql6aFfxCCivsN4Y1OZic8S5drgxe7MdETaWeEKfaWIMgqESGOw5yhCuNSEvt896cc0hSU8/ZwUZrTzYfiCAwqBQHI13JBAP7VcWBR6v6CYQL8JB7lSEvq7vY2BJJ4N9HchlXBHvxHHOu7Y6+ta7BrODvCc0zLHWANE65U4DmZpXmwHHsBao4cOUIlrBIDIAGtXAJB/L+cByH2OPMsRPhUe2UMfTgRHCJdekics/7DzrR+hhZRnHM9du52TFT89eAKpQGpp0wEkFoYKntXesGFr1R/uhRtqzanzBggXIv db@ubuntu" - cidr_blocks = ["54.37.241.156/30"] - egress_bucket = module.ext_2024_egress_bucket.s3_bucket_id - egress_bucket_kms_key = module.s3_ext_2024_egress_kms.key_arn - - } - "aaron-willetts" = { - ssh_key = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAtHz+QozotArRIjRcmD4GDdiQLtXPTX+GGAXqpeqpBZ aaron@kali" - cidr_blocks = ["167.71.136.237/32"] - egress_bucket = module.ext_2024_egress_bucket.s3_bucket_id - egress_bucket_kms_key = module.s3_ext_2024_egress_kms.key_arn - - } } /* DataSync */ diff --git a/terraform/environments/analytical-platform-ingestion/ext-user-2024.tf b/terraform/environments/analytical-platform-ingestion/ext-user-2024.tf deleted file mode 100644 index dd723bd1057..00000000000 --- a/terraform/environments/analytical-platform-ingestion/ext-user-2024.tf +++ /dev/null @@ -1,102 +0,0 @@ -#tfsec:ignore:avd-aws-0088 - The bucket policy is attached to the bucket -#tfsec:ignore:avd-aws-0132 - The bucket policy is attached to the bucket -module "ext_2024_egress_bucket" { - #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions - - source = "terraform-aws-modules/s3-bucket/aws" - version = "4.1.2" - - bucket = "mojap-ingestion-${local.environment}-ext-2024-egress" - - force_destroy = true - - versioning = { - enabled = true - } - - server_side_encryption_configuration = { - rule = { - apply_server_side_encryption_by_default = { - kms_master_key_id = module.s3_ext_2024_egress_kms.key_arn - sse_algorithm = "aws:kms" - } - } - } -} - -module "s3_ext_2024_egress_kms" { - #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions - - source = "terraform-aws-modules/kms/aws" - version = "3.1.0" - - aliases = ["s3/ext-2024-egress"] - description = "Used in the External 2024 Egress Solution" - enable_default_policy = true - key_statements = [ - { - sid = "AllowReadOnlyRole" - actions = [ - "kms:Encrypt", - "kms:GenerateDataKey" - ] - resources = ["*"] - effect = "Allow" - principals = [ - { - type = "AWS" - identifiers = ["arn:aws:iam::${local.environment_management.account_ids[terraform.workspace]}:role/security-read-only"] - } - ] - } - ] - deletion_window_in_days = 7 -} - -data "aws_iam_policy_document" "ext_2024_target_bucket_policy" { - statement { - sid = "LandingPermissions" - effect = "Allow" - principals { - type = "AWS" - identifiers = ["arn:aws:iam::471112983409:role/transfer"] - } - actions = [ - "s3:GetObject", - "s3:PutObject", - "s3:DeleteObject", - "s3:PutObjectTagging" - ] - resources = [ - "arn:aws:s3:::mojap-ingestion-${local.environment}-ext-2024-target/*", - "arn:aws:s3:::mojap-ingestion-${local.environment}-ext-2024-target" - ] - } -} - -#tfsec:ignore:avd-aws-0088 - The bucket policy is attached to the bucket -#tfsec:ignore:avd-aws-0132 - The bucket policy is attached to the bucket -module "ext_2024_target_bucket" { - #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions - - source = "terraform-aws-modules/s3-bucket/aws" - version = "4.1.2" - - bucket = "mojap-ingestion-${local.environment}-ext-2024-target" - - force_destroy = true - - versioning = { - enabled = true - } - attach_policy = true - policy = data.aws_iam_policy_document.ext_2024_target_bucket_policy.json - - server_side_encryption_configuration = { - rule = { - apply_server_side_encryption_by_default = { - sse_algorithm = "AES256" - } - } - } -} From 6778c4772d6f5e33c8fb276085541e3bb220ef33 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 25 Nov 2024 09:48:00 +0000 Subject: [PATCH 091/103] Add cloudwatch_custom_namespaces for DPR Signed-off-by: Jacob Woffenden --- .../observability-platform/environment-configurations.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/terraform/environments/observability-platform/environment-configurations.tf b/terraform/environments/observability-platform/environment-configurations.tf index a50b07efc2e..da150b3d9f5 100644 --- a/terraform/environments/observability-platform/environment-configurations.tf +++ b/terraform/environments/observability-platform/environment-configurations.tf @@ -62,6 +62,7 @@ locals { "aws_accounts" = { "digital-prison-reporting-development" = { cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" prometheus_push_enabled = false amazon_prometheus_query_enabled = false xray_enabled = false @@ -69,6 +70,7 @@ locals { }, "digital-prison-reporting-preproduction" = { cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" prometheus_push_enabled = false amazon_prometheus_query_enabled = false xray_enabled = false @@ -76,6 +78,7 @@ locals { }, "digital-prison-reporting-test" = { cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" prometheus_push_enabled = false amazon_prometheus_query_enabled = false xray_enabled = false @@ -154,6 +157,7 @@ locals { "aws_accounts" = { "digital-prison-reporting-production" = { cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" prometheus_push_enabled = false amazon_prometheus_query_enabled = false xray_enabled = false From 7694fabb529fd5f9561d79ee4ebc8c83e46c5cee Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 25 Nov 2024 10:56:27 +0000 Subject: [PATCH 092/103] Initial update --- .devcontainer/README.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/.devcontainer/README.md b/.devcontainer/README.md index bf6a524f218..1d71e3c733e 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -1,28 +1,32 @@ # Dev Container +> [!NOTE] > This is a community supported feature -To assist in the development of `modernisation-platform-environments`, the community have built a [dev container](https://containers.dev/) with the required tooling +To assist with working on this repository, the community has configured a [dev container](https://containers.dev/) with the required tooling. -## Prerequisites +You can run this locally, or with [GitHub Codespaces](https://docs.github.com/en/codespaces/overview). -- GitHub Codespaces +## GitHub Codespaces -or +To launch a GitHub Codespace, use the button below: -- Docker +[![Open in Codespace](https://github.com/codespaces/badge.svg)](https://codespaces.new/ministryofjustice/modernisation-platform-environments) -- Visual Studio Code +## Locally - - Dev Containers Extention +> [!WARNING] +> This has only been tested on macOS -## Running +### Prerequisites -### GitHub Codespaces +- Docker -Launch from GitHub +- Visual Studio Code + + - Dev Containers Extention -### Locally +### Steps 1. Ensure prerequisites are met From 680937858a3dfec5cae5844f902a3c99ea6bcb1b Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 25 Nov 2024 11:00:15 +0000 Subject: [PATCH 093/103] Add button --- .devcontainer/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 1d71e3c733e..5aac7ac37f4 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -9,7 +9,7 @@ You can run this locally, or with [GitHub Codespaces](https://docs.github.com/en ## GitHub Codespaces -To launch a GitHub Codespace, use the button below: +To launch a GitHub Codespace, use the button below [![Open in Codespace](https://github.com/codespaces/badge.svg)](https://codespaces.new/ministryofjustice/modernisation-platform-environments) @@ -28,6 +28,8 @@ To launch a GitHub Codespace, use the button below: ### Steps +[![Open in Dev Container](https://raw.githubusercontent.com/ministryofjustice/.devcontainer/refs/heads/main/contrib/badge.svg)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/ministryofjustice/modernisation-platform-environments) + 1. Ensure prerequisites are met 1. Clone repository From 544a5418d7e0d3dd2749f77b7cab41c0a67deaa4 Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 25 Nov 2024 11:26:59 +0000 Subject: [PATCH 094/103] Update wording --- .devcontainer/README.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 5aac7ac37f4..a175a0a9dcc 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -7,12 +7,6 @@ To assist with working on this repository, the community has configured a [dev c You can run this locally, or with [GitHub Codespaces](https://docs.github.com/en/codespaces/overview). -## GitHub Codespaces - -To launch a GitHub Codespace, use the button below - -[![Open in Codespace](https://github.com/codespaces/badge.svg)](https://codespaces.new/ministryofjustice/modernisation-platform-environments) - ## Locally > [!WARNING] @@ -26,17 +20,18 @@ To launch a GitHub Codespace, use the button below - Dev Containers Extention -### Steps +To launch locally, ensure the prerequisites are met, and then click the button below [![Open in Dev Container](https://raw.githubusercontent.com/ministryofjustice/.devcontainer/refs/heads/main/contrib/badge.svg)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/ministryofjustice/modernisation-platform-environments) -1. Ensure prerequisites are met +## GitHub Codespaces -1. Clone repository +> [!IMPORTANT] +> GitHub Codespaces are not currently paid for by the Ministry of Justice and are subject to the quotas [here](https://docs.github.com/en/billing/managing-billing-for-your-products/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) -1. Open repository in Visual Studio Code +To launch a GitHub Codespace, click the button below -1. Reopen in container +[![Open in Codespace](https://github.com/codespaces/badge.svg)](https://codespaces.new/ministryofjustice/modernisation-platform-environments) ## Tools From f9a34e3ccf179e427ab34bb09973f1651731fc30 Mon Sep 17 00:00:00 2001 From: Robert Sweetman Date: Mon, 25 Nov 2024 11:27:15 +0000 Subject: [PATCH 095/103] deploy t2-onr-bods-1 and enable asg for testing adding 2nd bods machine (#8776) --- .../oasys-national-reporting/locals_test.tf | 44 +++++++++---------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/terraform/environments/oasys-national-reporting/locals_test.tf b/terraform/environments/oasys-national-reporting/locals_test.tf index 096a578a62d..cfc069059fc 100644 --- a/terraform/environments/oasys-national-reporting/locals_test.tf +++ b/terraform/environments/oasys-national-reporting/locals_test.tf @@ -133,10 +133,10 @@ locals { instance_profile_policies = concat(local.ec2_autoscaling_groups.bods.config.instance_profile_policies, [ "Ec2SecretPolicy", ]) - # user_data_raw = base64encode(templatefile( - # "./templates/user-data-onr-bods-pwsh.yaml.tftpl", { - # branch = "TM/TM-620/test-pagefile-change" - # })) + user_data_raw = base64encode(templatefile( + "./templates/user-data-onr-bods-pwsh.yaml.tftpl", { + branch = "TM/TM-660/onr-bods-second-server" + })) }) instance = merge(local.ec2_autoscaling_groups.bods.instance, { instance_type = "m4.xlarge" @@ -151,26 +151,22 @@ locals { ec2_instances = { - # t2-onr-bods-1 = merge(local.ec2_instances.bods, { - # config = merge(local.ec2_instances.bods.config, { - # availability_zone = "eu-west-2a" - # user_data_raw = base64encode(templatefile( - # "./templates/user-data-onr-bods-pwsh.yaml.tftpl", { - # } - # )) - # instance_profile_policies = concat(local.ec2_instances.bods.config.instance_profile_policies, [ - # "Ec2SecretPolicy", - # ]) - # }) - # instance = merge(local.ec2_instances.bods.instance, { - # instance_type = "m4.xlarge" - # }) - # cloudwatch_metric_alarms = null - # tags = merge(local.ec2_instances.bods.tags, { - # oasys-national-reporting-environment = "t2" - # domain-name = "azure.noms.root" - # }) - # }) + t2-onr-bods-1 = merge(local.ec2_instances.bods, { + config = merge(local.ec2_instances.bods.config, { + availability_zone = "eu-west-2a" + instance_profile_policies = concat(local.ec2_instances.bods.config.instance_profile_policies, [ + "Ec2SecretPolicy", + ]) + }) + instance = merge(local.ec2_instances.bods.instance, { + instance_type = "m4.xlarge" + }) + cloudwatch_metric_alarms = null + tags = merge(local.ec2_instances.bods.tags, { + oasys-national-reporting-environment = "t2" + domain-name = "azure.noms.root" + }) + }) # Pending sorting out cluster install of Bods in modernisation-platform-configuration-management repo # t2-onr-bods-2 = merge(local.ec2_instances.bods, { From 9b5b07fae1e60d21685c627b8806dbf22304ef31 Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:38:56 +0000 Subject: [PATCH 096/103] TM-720: add endpoint dashboard (#8780) * TM-720: add endpoint and pipeline dashboard * fix * revert * fix * fix * fix oem dash --- terraform/environments/hmpps-oem/locals.tf | 48 ++++++++++++++++++---- terraform/environments/hmpps-oem/main.tf | 5 +++ terraform/modules/baseline/ssm.tf | 2 +- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/terraform/environments/hmpps-oem/locals.tf b/terraform/environments/hmpps-oem/locals.tf index e432fdb9efe..2fa555f3fd2 100644 --- a/terraform/environments/hmpps-oem/locals.tf +++ b/terraform/environments/hmpps-oem/locals.tf @@ -19,6 +19,7 @@ locals { baseline_environment_specific = local.baseline_environments_specific[local.environment] cloudwatch_dashboard_default_widget_groups = [ + "ec2_instance_endpoint_monitoring", "network_lb", "lb", "ec2", @@ -72,6 +73,16 @@ locals { module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_instance_oracle_db_with_backup, ] } + "endpoints-and-pipelines" = { + account_name = "hmpps-oem-${local.environment}" + periodOverride = "auto" + start = "-PT6H" + widget_groups = [ + module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_instance_endpoint_monitoring, + module.baseline_presets.cloudwatch_dashboard_widget_groups.ssm_command, + module.baseline_presets.cloudwatch_dashboard_widget_groups.github_workflows, + ] + } "hmpps-domain-services-${local.environment}" = { account_name = "hmpps-domain-services-${local.environment}" periodOverride = "auto" @@ -83,16 +94,37 @@ locals { ] } "hmpps-oem-${local.environment}" = { - account_name = "hmpps-oem-${local.environment}" + account_name = null periodOverride = "auto" start = "-PT6H" - widget_groups = [ - module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2, - module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_linux, - module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_instance_linux, - module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_instance_oracle_db_with_backup, - module.baseline_presets.cloudwatch_dashboard_widget_groups.ec2_instance_textfile_monitoring, - ] + widget_groups = [{ + header_markdown = "## EC2 Oracle Enterprise Management" + width = 8 + height = 8 + add_ebs_widgets = { + iops = true + throughput = true + } + search_filter = { + ec2_tag = [ + { tag_name = "server-type", tag_value = "hmpps-oem" }, + ] + } + widgets = [ + module.baseline_presets.cloudwatch_dashboard_widgets.ec2.cpu-utilization-high, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2.instance-status-check-failed, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2.system-status-check-failed, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_cwagent_linux.free-disk-space-low, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_cwagent_linux.high-memory-usage, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_cwagent_linux.cpu-iowait-high, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_linux.free-disk-space-low, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_collectd_service_status_os.service-status-error-os-layer, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_collectd_service_status_app.service-status-error-app-layer, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_collectd_oracle_db_connected.oracle-db-disconnected, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_collectd_oracle_db_backup.oracle-db-rman-backup-error, + module.baseline_presets.cloudwatch_dashboard_widgets.ec2_instance_cwagent_collectd_oracle_db_backup.oracle-db-rman-backup-did-not-run, + ] + }] } "nomis-${local.environment}" = { account_name = "nomis-${local.environment}" diff --git a/terraform/environments/hmpps-oem/main.tf b/terraform/environments/hmpps-oem/main.tf index cd706d8538e..f6c8672c4bd 100644 --- a/terraform/environments/hmpps-oem/main.tf +++ b/terraform/environments/hmpps-oem/main.tf @@ -178,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms", {}), + lookup(local.baseline_environment_specific, "schedule_alarms", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/modules/baseline/ssm.tf b/terraform/modules/baseline/ssm.tf index 588f02e66a2..ffecf9b6634 100644 --- a/terraform/modules/baseline/ssm.tf +++ b/terraform/modules/baseline/ssm.tf @@ -59,7 +59,7 @@ resource "aws_ssm_association" "this" { apply_only_at_cron_interval = each.value.apply_only_at_cron_interval association_name = each.key - name = each.value.name + name = try(aws_ssm_document.this[each.value.name].name, each.value.name) # so ssm_doc is created first max_concurrency = each.value.max_concurrency max_errors = each.value.max_errors schedule_expression = each.value.schedule_expression From 82eb9d6f9e50de2c52fa3f1b9b8fdc7236d82d8f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 12:10:48 +0000 Subject: [PATCH 097/103] Updates from GitHub Actions Format Code workflow (#8772) Co-authored-by: modernisation-platform-ci Co-authored-by: dms1981 --- .../corporate-information-system/iam.tf | 72 +++++++++---------- .../application_variables.json | 20 ++---- .../modules/dms_s3_v2/versions.tf | 2 +- .../domains/dms-endpoints/variables.tf | 2 +- .../modules/s3_bucket/main.tf | 6 +- terraform/environments/edw/ec2.tf | 2 +- .../electronic-monitoring-data/s3.tf | 6 +- terraform/environments/ppud/iam.tf | 28 ++++---- .../environments/tribunals/cloudfront.tf | 12 ++-- 9 files changed, 69 insertions(+), 81 deletions(-) diff --git a/terraform/environments/corporate-information-system/iam.tf b/terraform/environments/corporate-information-system/iam.tf index aa2fdb4a3b8..4a6865831b6 100644 --- a/terraform/environments/corporate-information-system/iam.tf +++ b/terraform/environments/corporate-information-system/iam.tf @@ -74,44 +74,44 @@ resource "aws_iam_role_policy" "cis_s3fs_policy" { Version = "2012-10-17" Statement = [ { - "Action": [ - "s3:*" + "Action" : [ + "s3:*" ], - "Resource": [ - "arn:aws:s3:::laa-software-bucket2", - "arn:aws:s3:::laa-software-bucket2/*", - "arn:aws:s3:::laa-software-library", - "arn:aws:s3:::laa-software-library/*", - "arn:aws:s3:::laa-cis-inbound-production", - "arn:aws:s3:::laa-cis-inbound-production/*", - "arn:aws:s3:::laa-cis-outbound-production", - "arn:aws:s3:::laa-cis-outbound-production/*", - "arn:aws:s3:::laa-ccms-outbound-production", - "arn:aws:s3:::laa-ccms-outbound-production/*", - "arn:aws:s3:::laa-ccms-inbound-production", - "arn:aws:s3:::laa-ccms-inbound-production/*" + "Resource" : [ + "arn:aws:s3:::laa-software-bucket2", + "arn:aws:s3:::laa-software-bucket2/*", + "arn:aws:s3:::laa-software-library", + "arn:aws:s3:::laa-software-library/*", + "arn:aws:s3:::laa-cis-inbound-production", + "arn:aws:s3:::laa-cis-inbound-production/*", + "arn:aws:s3:::laa-cis-outbound-production", + "arn:aws:s3:::laa-cis-outbound-production/*", + "arn:aws:s3:::laa-ccms-outbound-production", + "arn:aws:s3:::laa-ccms-outbound-production/*", + "arn:aws:s3:::laa-ccms-inbound-production", + "arn:aws:s3:::laa-ccms-inbound-production/*" ], - "Effect": "Allow" - }, - { - "Action": [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:DescribeLogStreams", - "logs:PutRetentionPolicy", - "logs:PutLogEvents", - "ec2:DescribeInstances" - ], - "Resource": "*", - "Effect": "Allow" - }, - { - "Action": [ - "ec2:CreateTags" - ], - "Resource": "*", - "Effect": "Allow" - } + "Effect" : "Allow" + }, + { + "Action" : [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:DescribeLogStreams", + "logs:PutRetentionPolicy", + "logs:PutLogEvents", + "ec2:DescribeInstances" + ], + "Resource" : "*", + "Effect" : "Allow" + }, + { + "Action" : [ + "ec2:CreateTags" + ], + "Resource" : "*", + "Effect" : "Allow" + } ] }) } \ No newline at end of file diff --git a/terraform/environments/digital-prison-reporting/application_variables.json b/terraform/environments/digital-prison-reporting/application_variables.json index 5ef69277647..18c80c7cd78 100644 --- a/terraform/environments/digital-prison-reporting/application_variables.json +++ b/terraform/environments/digital-prison-reporting/application_variables.json @@ -94,10 +94,7 @@ "setup_sonatype_secrets": true, "setup_scheduled_action_iam_role": true, "setup_redshift_schedule": true, - "dps_domains": [ - "dps-activities", - "dps-case-notes" - ], + "dps_domains": ["dps-activities", "dps-case-notes"], "alarms": { "setup_cw_alarms": true, "redshift": { @@ -272,10 +269,7 @@ "setup_sonatype_secrets": false, "setup_scheduled_action_iam_role": true, "setup_redshift_schedule": true, - "dps_domains": [ - "dps-activities", - "dps-case-notes" - ], + "dps_domains": ["dps-activities", "dps-case-notes"], "alarms": { "setup_cw_alarms": true, "redshift": { @@ -452,10 +446,7 @@ "setup_scheduled_action_iam_role": true, "setup_redshift_schedule": true, "enable_redshift_health_check": true, - "dps_domains": [ - "dps-activities", - "dps-case-notes" - ], + "dps_domains": ["dps-activities", "dps-case-notes"], "alarms": { "setup_cw_alarms": true, "redshift": { @@ -648,10 +639,7 @@ "setup_sonatype_secrets": false, "setup_scheduled_action_iam_role": false, "setup_redshift_schedule": false, - "dps_domains": [ - "dps-activities", - "dps-case-notes" - ], + "dps_domains": ["dps-activities", "dps-case-notes"], "alarms": { "setup_cw_alarms": true, "redshift": { diff --git a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/versions.tf b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/versions.tf index d2163a87985..bf68a137672 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/versions.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/versions.tf @@ -6,7 +6,7 @@ terraform { } template = { - source = "hashicorp/template" + source = "hashicorp/template" version = "~> 2.2" } diff --git a/terraform/environments/digital-prison-reporting/modules/domains/dms-endpoints/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/dms-endpoints/variables.tf index aa7f9023442..a2dc57b6c98 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/dms-endpoints/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/dms-endpoints/variables.tf @@ -117,7 +117,7 @@ variable "identifier" { #-------------------------------------------------------------- variable "target_backup_retention_period" { - type = string + type = string # Days default = "30" description = "Retention of RDS backups" diff --git a/terraform/environments/digital-prison-reporting/modules/s3_bucket/main.tf b/terraform/environments/digital-prison-reporting/modules/s3_bucket/main.tf index 4cf22f0992a..50c941d071e 100644 --- a/terraform/environments/digital-prison-reporting/modules/s3_bucket/main.tf +++ b/terraform/environments/digital-prison-reporting/modules/s3_bucket/main.tf @@ -46,7 +46,7 @@ resource "aws_s3_bucket_lifecycle_configuration" "lifecycle" { # - Transitions objects to STANDARD_IA after 30 days (cost-effective storage for infrequent access). # - Deletes objects after 90 days. dynamic "transition" { - for_each = var.lifecycle_category == "short_term" ? [ { days = 30, storage_class = "STANDARD_IA" } ] : [] + for_each = var.lifecycle_category == "short_term" ? [{ days = 30, storage_class = "STANDARD_IA" }] : [] content { days = transition.value.days storage_class = transition.value.storage_class @@ -54,8 +54,8 @@ resource "aws_s3_bucket_lifecycle_configuration" "lifecycle" { } dynamic "expiration" { - for_each = var.lifecycle_category == "short_term" ? [ { days = 90 } ] : ( - var.lifecycle_category == "temporary" ? [ { days = 30 } ] : []) + for_each = var.lifecycle_category == "short_term" ? [{ days = 90 }] : ( + var.lifecycle_category == "temporary" ? [{ days = 30 }] : []) content { days = expiration.value.days } diff --git a/terraform/environments/edw/ec2.tf b/terraform/environments/edw/ec2.tf index 73dd891836b..67aab63cb4d 100644 --- a/terraform/environments/edw/ec2.tf +++ b/terraform/environments/edw/ec2.tf @@ -347,7 +347,7 @@ EOF ####### IAM role ####### resource "aws_iam_role" "edw_ec2_role" { - name = "${local.application_name}-ec2-instance-role" + name = "${local.application_name}-ec2-instance-role" tags = merge( local.tags, { diff --git a/terraform/environments/electronic-monitoring-data/s3.tf b/terraform/environments/electronic-monitoring-data/s3.tf index b9d23c5236b..a35631c8a09 100644 --- a/terraform/environments/electronic-monitoring-data/s3.tf +++ b/terraform/environments/electronic-monitoring-data/s3.tf @@ -5,15 +5,15 @@ locals { "production" = null "preproduction" = { "account_number" = 173142358744 - "role_name" = "juniper-datatransfer-lambda-role" + "role_name" = "juniper-datatransfer-lambda-role" } "test" = { "account_number" = 173142358744 - "role_name" = "dev-datatransfer-lambda-role" + "role_name" = "dev-datatransfer-lambda-role" } "development" = { "account_number" = 173142358744 - "role_name" = "dev-datatransfer-lambda-role" + "role_name" = "dev-datatransfer-lambda-role" } } } diff --git a/terraform/environments/ppud/iam.tf b/terraform/environments/ppud/iam.tf index 15ecce0c131..cd6af3eac6f 100644 --- a/terraform/environments/ppud/iam.tf +++ b/terraform/environments/ppud/iam.tf @@ -1183,20 +1183,20 @@ resource "aws_iam_policy" "iam_policy_for_lambda_cloudwatch_get_metric_data_dev" ] }, { - "Sid" : "SQSPolicy", - "Effect" : "Allow", - "Action" : [ - "sqs:ChangeMessageVisibility", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes", - "sqs:GetQueueUrl", - "sqs:ListQueueTags", - "sqs:ReceiveMessage", - "sqs:SendMessage" - ], - "Resource" : [ - "arn:aws:sqs:eu-west-2:${local.environment_management.account_ids["ppud-production"]}:Lambda-Queue-Production" - ] + "Sid" : "SQSPolicy", + "Effect" : "Allow", + "Action" : [ + "sqs:ChangeMessageVisibility", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ListQueueTags", + "sqs:ReceiveMessage", + "sqs:SendMessage" + ], + "Resource" : [ + "arn:aws:sqs:eu-west-2:${local.environment_management.account_ids["ppud-production"]}:Lambda-Queue-Production" + ] }, { "Sid" : "SESPolicy", diff --git a/terraform/environments/tribunals/cloudfront.tf b/terraform/environments/tribunals/cloudfront.tf index 489a37f5dc1..c81f76104f9 100644 --- a/terraform/environments/tribunals/cloudfront.tf +++ b/terraform/environments/tribunals/cloudfront.tf @@ -10,12 +10,12 @@ resource "aws_cloudfront_distribution" "tribunals_distribution" { origin_id = "tribunalsOrigin" custom_origin_config { - http_port = 80 - https_port = 443 - origin_protocol_policy = "https-only" - origin_ssl_protocols = ["TLSv1.2"] + http_port = 80 + https_port = 443 + origin_protocol_policy = "https-only" + origin_ssl_protocols = ["TLSv1.2"] origin_keepalive_timeout = 60 - origin_read_timeout = 60 + origin_read_timeout = 60 } custom_header { @@ -27,7 +27,7 @@ resource "aws_cloudfront_distribution" "tribunals_distribution" { default_cache_behavior { target_origin_id = "tribunalsOrigin" - cache_policy_id = data.aws_cloudfront_cache_policy.caching_disabled.id + cache_policy_id = data.aws_cloudfront_cache_policy.caching_disabled.id origin_request_policy_id = data.aws_cloudfront_origin_request_policy.all_viewer.id viewer_protocol_policy = "redirect-to-https" From cc41b2beb3fd602063d7112cbac28852f721c511 Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Mon, 25 Nov 2024 12:38:18 +0000 Subject: [PATCH 098/103] Remote deleted roles from secrets manager sharing (#8782) --- .../hmpps-domain-services/locals_secretsmanager.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/terraform/environments/hmpps-domain-services/locals_secretsmanager.tf b/terraform/environments/hmpps-domain-services/locals_secretsmanager.tf index 0273edb078c..9a1ccda3578 100644 --- a/terraform/environments/hmpps-domain-services/locals_secretsmanager.tf +++ b/terraform/environments/hmpps-domain-services/locals_secretsmanager.tf @@ -9,8 +9,6 @@ locals { "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-test}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.planetfm-development}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.planetfm-test}:role/EC2HmppsDomainSecretsRole", - "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-development}:role/LambdaFunctionADObjectCleanUp", - "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-test}:role/LambdaFunctionADObjectCleanUp", "arn:aws:iam::${module.environment.account_ids.core-shared-services-production}:role/ad-fixngo-ec2-nonlive-role", "arn:aws:iam::${module.environment.account_ids.nomis-development}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.nomis-test}:role/EC2HmppsDomainSecretsRole", @@ -26,8 +24,6 @@ locals { "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-production}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.planetfm-preproduction}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.planetfm-production}:role/EC2HmppsDomainSecretsRole", - "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-preproduction}:role/LambdaFunctionADObjectCleanUp", - "arn:aws:iam::${module.environment.account_ids.corporate-staff-rostering-production}:role/LambdaFunctionADObjectCleanUp", "arn:aws:iam::${module.environment.account_ids.core-shared-services-production}:role/ad-fixngo-ec2-live-role", "arn:aws:iam::${module.environment.account_ids.nomis-preproduction}:role/EC2HmppsDomainSecretsRole", "arn:aws:iam::${module.environment.account_ids.nomis-production}:role/EC2HmppsDomainSecretsRole", From 08f7720e915de7277982651f3e91a0385b901768 Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Mon, 25 Nov 2024 13:59:56 +0000 Subject: [PATCH 099/103] Change assocaited_public_ip_address to true --- terraform/environments/panda-cyber-appsec-lab/ec2.tf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index e0ef2484906..12f11452239 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -1,12 +1,13 @@ # Kali Linux Instance resource "aws_instance" "kali_linux" { ami = "ami-0f398bcc12f72f967" // aws-marketplace/kali-last-snapshot-amd64-2024.2.0-804fcc46-63fc-4eb6-85a1-50e66d6c7215 - associate_public_ip_address = false + associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] iam_instance_profile = aws_iam_instance_profile.ssm_instance_profile.name ebs_optimized = true + metadata_options { http_tokens = "required" } @@ -53,12 +54,13 @@ resource "aws_instance" "kali_linux" { # Defect Dojo Instance resource "aws_instance" "defect_dojo" { ami = "ami-0e8d228ad90af673b" - associate_public_ip_address = false + associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] iam_instance_profile = aws_iam_instance_profile.ssm_instance_profile.name ebs_optimized = true + metadata_options { http_tokens = "required" } From 7513bca1e1344efeb2a911f03552f77d61aa1e1d Mon Sep 17 00:00:00 2001 From: jodiejones-moj Date: Mon, 25 Nov 2024 14:44:51 +0000 Subject: [PATCH 100/103] Updated associate_public_ip_address to true --- terraform/environments/panda-cyber-appsec-lab/ec2.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index 447d2ff3e7b..12f11452239 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -1,7 +1,7 @@ # Kali Linux Instance resource "aws_instance" "kali_linux" { ami = "ami-0f398bcc12f72f967" // aws-marketplace/kali-last-snapshot-amd64-2024.2.0-804fcc46-63fc-4eb6-85a1-50e66d6c7215 - associate_public_ip_address = false + associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] @@ -54,7 +54,7 @@ resource "aws_instance" "kali_linux" { # Defect Dojo Instance resource "aws_instance" "defect_dojo" { ami = "ami-0e8d228ad90af673b" - associate_public_ip_address = false + associate_public_ip_address = true instance_type = "t2.micro" subnet_id = module.vpc.private_subnets.0 vpc_security_group_ids = [aws_security_group.kali_linux_sg.id] From 89fb79a4b7e198aa44f6c4fdf62fb4ea2df5b50c Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:47:52 +0000 Subject: [PATCH 101/103] TM-720: enable scheduled ssm command monitoring (#8785) * align main.tf across accounts * enable ssm monitoring and widgets * fix * fix * remove alarm --- .../environments/corporate-staff-rostering/locals.tf | 2 ++ terraform/environments/corporate-staff-rostering/main.tf | 6 ++++++ terraform/environments/hmpps-domain-services/locals.tf | 2 ++ .../hmpps-domain-services/locals_preproduction.tf | 5 +++-- terraform/environments/hmpps-domain-services/main.tf | 8 +++----- terraform/environments/hmpps-oem/locals.tf | 1 + .../hmpps-oem/locals_cloudwatch_metric_alarms.tf | 1 - terraform/environments/hmpps-oem/main.tf | 4 ++-- terraform/environments/nomis-combined-reporting/locals.tf | 2 ++ terraform/environments/nomis-combined-reporting/main.tf | 6 ++++++ terraform/environments/nomis-data-hub/locals.tf | 2 ++ terraform/environments/nomis-data-hub/main.tf | 6 ++++++ terraform/environments/nomis/locals.tf | 1 + terraform/environments/nomis/main.tf | 6 ++++++ terraform/environments/oasys-national-reporting/locals.tf | 2 ++ terraform/environments/oasys-national-reporting/main.tf | 6 ++++++ terraform/environments/oasys/locals.tf | 2 ++ terraform/environments/oasys/main.tf | 6 ++++++ terraform/environments/planetfm/locals.tf | 2 ++ terraform/environments/planetfm/main.tf | 6 ++++++ .../modules/baseline_presets/cloudwatch_metric_alarms.tf | 2 +- terraform/modules/baseline_presets/variables.tf | 1 + 22 files changed, 68 insertions(+), 11 deletions(-) diff --git a/terraform/environments/corporate-staff-rostering/locals.tf b/terraform/environments/corporate-staff-rostering/locals.tf index 27bb2195852..e44f27dc5e9 100644 --- a/terraform/environments/corporate-staff-rostering/locals.tf +++ b/terraform/environments/corporate-staff-rostering/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_linux", "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -45,6 +46,7 @@ locals { enable_s3_db_backup_bucket = true enable_s3_shared_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] software_bucket_name = "csr-software" } diff --git a/terraform/environments/corporate-staff-rostering/main.tf b/terraform/environments/corporate-staff-rostering/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/corporate-staff-rostering/main.tf +++ b/terraform/environments/corporate-staff-rostering/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/hmpps-domain-services/locals.tf b/terraform/environments/hmpps-domain-services/locals.tf index d6e5ed777f9..b06f71f4ad6 100644 --- a/terraform/environments/hmpps-domain-services/locals.tf +++ b/terraform/environments/hmpps-domain-services/locals.tf @@ -24,6 +24,7 @@ locals { "lb", "ec2", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -38,6 +39,7 @@ locals { enable_hmpps_domain = true enable_image_builder = true enable_s3_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/hmpps-domain-services/locals_preproduction.tf b/terraform/environments/hmpps-domain-services/locals_preproduction.tf index bb72afcc886..33544ecf6b1 100644 --- a/terraform/environments/hmpps-domain-services/locals_preproduction.tf +++ b/terraform/environments/hmpps-domain-services/locals_preproduction.tf @@ -153,9 +153,10 @@ locals { }) } - schedule_alarms = { + schedule_alarms_lambda = { + function_name = "schedule-alarms" alarm_patterns = [ - "public-https-*-https-unhealthy-load-balancer-host", + "public-https-*-unhealthy-load-balancer-host", ] } diff --git a/terraform/environments/hmpps-domain-services/main.tf b/terraform/environments/hmpps-domain-services/main.tf index 04db08b1919..d234a1e3a07 100644 --- a/terraform/environments/hmpps-domain-services/main.tf +++ b/terraform/environments/hmpps-domain-services/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -178,11 +179,8 @@ module "baseline" { ) schedule_alarms_lambda = merge( - { - function_name = "schedule-alarms" - }, - lookup(local.baseline_all_environments, "schedule_alarms", {}), - lookup(local.baseline_environment_specific, "schedule_alarms", {}), + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), ) secretsmanager_secrets = merge( diff --git a/terraform/environments/hmpps-oem/locals.tf b/terraform/environments/hmpps-oem/locals.tf index 2fa555f3fd2..73264f3b4cb 100644 --- a/terraform/environments/hmpps-oem/locals.tf +++ b/terraform/environments/hmpps-oem/locals.tf @@ -51,6 +51,7 @@ locals { enable_s3_shared_bucket = true enable_s3_software_bucket = true enable_ssm_command_monitoring = true + enable_ssm_missing_metric_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf index 0bbea69f30f..9921b4b0510 100644 --- a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf +++ b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf @@ -25,7 +25,6 @@ locals { csr-r4-pp = ["r4.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] csr-r5-pp = ["r5.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] csr-r6-pp = ["r6.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - hpa-preprod = ["hpa-preprod.service.hmpps.dsd.io", true, "azure-fixngo-pagerduty"] nomis-lsast = ["c.lsast-nomis.az.justice.gov.uk", true, "nomis-pagerduty"] nomis-pp = ["c.pp-nomis.az.justice.gov.uk", true, "nomis-pagerduty"] nomis-reporting-pp = ["reporting.pp-nomis.az.justice.gov.uk", true, "nomis-combined-reporting-pagerduty"] diff --git a/terraform/environments/hmpps-oem/main.tf b/terraform/environments/hmpps-oem/main.tf index f6c8672c4bd..d234a1e3a07 100644 --- a/terraform/environments/hmpps-oem/main.tf +++ b/terraform/environments/hmpps-oem/main.tf @@ -179,8 +179,8 @@ module "baseline" { ) schedule_alarms_lambda = merge( - lookup(local.baseline_all_environments, "schedule_alarms", {}), - lookup(local.baseline_environment_specific, "schedule_alarms", {}), + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), ) secretsmanager_secrets = merge( diff --git a/terraform/environments/nomis-combined-reporting/locals.tf b/terraform/environments/nomis-combined-reporting/locals.tf index 68b4c09eeca..3acc84b9578 100644 --- a/terraform/environments/nomis-combined-reporting/locals.tf +++ b/terraform/environments/nomis-combined-reporting/locals.tf @@ -27,6 +27,7 @@ locals { "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -44,6 +45,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/nomis-combined-reporting/main.tf b/terraform/environments/nomis-combined-reporting/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis-combined-reporting/main.tf +++ b/terraform/environments/nomis-combined-reporting/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/nomis-data-hub/locals.tf b/terraform/environments/nomis-data-hub/locals.tf index 794da1cd6bf..2ca908e66c0 100644 --- a/terraform/environments/nomis-data-hub/locals.tf +++ b/terraform/environments/nomis-data-hub/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_instance_linux", "ec2_instance_textfile_monitoring", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -42,6 +43,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/nomis-data-hub/main.tf b/terraform/environments/nomis-data-hub/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis-data-hub/main.tf +++ b/terraform/environments/nomis-data-hub/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/nomis/locals.tf b/terraform/environments/nomis/locals.tf index 9b644f9da66..2f5c27143a0 100644 --- a/terraform/environments/nomis/locals.tf +++ b/terraform/environments/nomis/locals.tf @@ -37,6 +37,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true route53_resolver_rules = { outbound-data-and-private-subnets = ["azure-fixngo-domain"] } s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] software_bucket_name = "ec2-image-builder-nomis" diff --git a/terraform/environments/nomis/main.tf b/terraform/environments/nomis/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis/main.tf +++ b/terraform/environments/nomis/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/oasys-national-reporting/locals.tf b/terraform/environments/oasys-national-reporting/locals.tf index c8298624dd8..05c0cc63abe 100644 --- a/terraform/environments/oasys-national-reporting/locals.tf +++ b/terraform/environments/oasys-national-reporting/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_linux", "ec2_instance_linux", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -41,6 +42,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_shared_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/oasys-national-reporting/main.tf b/terraform/environments/oasys-national-reporting/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/oasys-national-reporting/main.tf +++ b/terraform/environments/oasys-national-reporting/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/oasys/locals.tf b/terraform/environments/oasys/locals.tf index 150199c622a..21c2dff9d24 100644 --- a/terraform/environments/oasys/locals.tf +++ b/terraform/environments/oasys/locals.tf @@ -28,6 +28,7 @@ locals { "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", "ec2_instance_textfile_monitoring", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -45,6 +46,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_shared_bucket = true + enable_ssm_command_monitoring = true enable_vmimport = true s3_bucket_name = "${local.application_name}-${local.environment}" s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] diff --git a/terraform/environments/oasys/main.tf b/terraform/environments/oasys/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/oasys/main.tf +++ b/terraform/environments/oasys/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/planetfm/locals.tf b/terraform/environments/planetfm/locals.tf index f0e16d6eda3..3fc3d4c065e 100644 --- a/terraform/environments/planetfm/locals.tf +++ b/terraform/environments/planetfm/locals.tf @@ -24,6 +24,7 @@ locals { "network_lb", "ec2", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -39,6 +40,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/planetfm/main.tf b/terraform/environments/planetfm/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/planetfm/main.tf +++ b/terraform/environments/planetfm/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf index 795670a383f..8a29488c93b 100644 --- a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf +++ b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf @@ -466,7 +466,7 @@ locals { var.options.enable_ssm_command_monitoring ? { "failed-ssm-command-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.failed-ssm-command } : {}, - var.options.enable_ssm_command_monitoring ? { + var.options.enable_ssm_missing_metric_monitoring ? { "ssm-command-metrics-missing-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.ssm-command-metrics-missing } : {}, ) diff --git a/terraform/modules/baseline_presets/variables.tf b/terraform/modules/baseline_presets/variables.tf index 250569f6f1c..e1a08a9bc19 100644 --- a/terraform/modules/baseline_presets/variables.tf +++ b/terraform/modules/baseline_presets/variables.tf @@ -43,6 +43,7 @@ variable "options" { enable_s3_shared_bucket = optional(bool, false) # create devtest and preprodprod S3 bucket for sharing between accounts enable_s3_software_bucket = optional(bool, false) # create software S3 bucket in test account for image builder/configuration-management enable_ssm_command_monitoring = optional(bool, false) # create SNS topic and alarms for SSM command monitoring + enable_ssm_missing_metric_monitoring = optional(bool, false) # create alarm if SSM command metrics are missing enable_vmimport = optional(bool, false) # create role for vm imports route53_resolver_rules = optional(map(list(string)), {}) # create route53 resolver rules; list of map keys to filter local.route53_resolver_rules_all iam_service_linked_roles = optional(list(string)) # create iam service linked roles; list of map keys to filter local.iam_service_linked_roles; default is to create all From fe8d815fcb86fe8a9657e1ce15efdee2aecb961f Mon Sep 17 00:00:00 2001 From: Jacob Woffenden Date: Mon, 25 Nov 2024 14:53:45 +0000 Subject: [PATCH 102/103] Migrate tenants Signed-off-by: Jacob Woffenden --- .../environment-configurations.tf | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/terraform/environments/observability-platform/environment-configurations.tf b/terraform/environments/observability-platform/environment-configurations.tf index da150b3d9f5..4266af984e0 100644 --- a/terraform/environments/observability-platform/environment-configurations.tf +++ b/terraform/environments/observability-platform/environment-configurations.tf @@ -127,6 +127,13 @@ locals { "analytical-platform" = { identity_centre_team = "analytical-platform" aws_accounts = { + "analytical-platform-ingestion-development" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = true + athena_enabled = false + }, "analytical-platform-ingestion-production" = { cloudwatch_enabled = true prometheus_push_enabled = false @@ -134,6 +141,14 @@ locals { xray_enabled = true athena_enabled = false }, + "analytical-platform-compute-development" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = true + amazon_prometheus_workspace_id = "ws-bfdd5d7a-5571-4686-bfd4-43ab07cf8d54ba" + xray_enabled = true + athena_enabled = false + }, "analytical-platform-compute-production" = { cloudwatch_enabled = true prometheus_push_enabled = false @@ -142,6 +157,14 @@ locals { xray_enabled = true athena_enabled = false }, + "analytical-platform-compute-test" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = true + amazon_prometheus_workspace_id = "ws-a9d7f576-58b7-4748-b4c1-b02bbdc54a2922" + xray_enabled = true + athena_enabled = false + }, "analytical-platform-production" = { cloudwatch_enabled = true prometheus_push_enabled = false @@ -152,9 +175,37 @@ locals { } } }, + "data-engineering" = { + "identity_centre_team" = "data-engineering", + "aws_accounts" = { + "analytical-platform-data-engineering-sandboxa" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false + } + } + }, "digital-prison-reporting" = { "identity_centre_team" = "hmpps-digital-prison-reporting", "aws_accounts" = { + "digital-prison-reporting-development" = { + cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false + }, + "digital-prison-reporting-preproduction" = { + cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false + }, "digital-prison-reporting-production" = { cloudwatch_enabled = true cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" @@ -162,6 +213,33 @@ locals { amazon_prometheus_query_enabled = false xray_enabled = false athena_enabled = false + }, + "digital-prison-reporting-test" = { + cloudwatch_enabled = true + cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false + } + } + }, + "digital-studio-operations" = { + "identity_centre_team" = "studio-webops" + "aws_accounts" = { + "nomis-test" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false + } + "oasys-test" = { + cloudwatch_enabled = true + prometheus_push_enabled = false + amazon_prometheus_query_enabled = false + xray_enabled = false + athena_enabled = false } } }, From 038d8a151825c7b805df57b48165ba2890394631 Mon Sep 17 00:00:00 2001 From: Matthew Price Date: Mon, 25 Nov 2024 15:50:05 +0000 Subject: [PATCH 103/103] Add kms key generation for use with landing bucket (#8755) * Add kms key generation for use with landing bucket * Change kms permission to use lambda role not lambda * Add cross account encyption grant * Add lambda decrypt * alternate lambda policy * Final tidy * Remove context as lamdba would need to use context also. --- .../modules/landing_bucket/main.tf | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf index 1438a386f02..3560af40d81 100644 --- a/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/landing_bucket/main.tf @@ -77,6 +77,46 @@ module "this-bucket" { ) } +#----------------------------------------------------------------------------------- +# KMS - customer managed key for use with cross account data +#----------------------------------------------------------------------------------- + +module "kms_key" { + #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions + #checkov:skip=CKV_TF_2:Module registry does not support tags for versions + + source = "terraform-aws-modules/kms/aws" + version = "3.1.1" + + aliases = ["s3/landing_bucket_${var.data_feed}_${var.order_type}"] + description = "${var.data_feed} ${var.order_type} landing bucket KMS key" + + # Give full access to key for root account, and lambda role ability to use. + enable_default_policy = true + key_users = [aws_iam_role.process_landing_bucket_files.arn] + + deletion_window_in_days = 7 + + # Grant external account role specific operations. + # To view grants, need to use cli: + # aws kms list-grants --region=eu-west-2 --key-id + grants = var.cross_account_access_role != null ? { + cross_account_access_role = { + grantee_principal = "arn:aws:iam::${var.cross_account_access_role.account_number}:role/${var.cross_account_access_role.role_name}" + operations = [ + "Encrypt", + "GenerateDataKey", + ] + } + } : {} + + tags = merge( + var.local_tags, + { order_type = var.order_type }, + { data_feed = var.data_feed } + ) +} + #----------------------------------------------------------------------------------- # Process landing bucket files - lambda triggers #----------------------------------------------------------------------------------- @@ -155,6 +195,17 @@ data "aws_iam_policy_document" "process_landing_bucket_files_s3_policy_document" "arn:aws:s3:::${var.received_files_bucket_id}/*", ] } + + statement { + sid = "KMSDecryptObjects" + effect = "Allow" + actions = [ + "kms:Decrypt", + ] + resources = [ + module.kms_key.key_arn, + ] + } } resource "aws_iam_policy" "process_landing_bucket_files_s3" {