From 89fb79a4b7e198aa44f6c4fdf62fb4ea2df5b50c Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:47:52 +0000 Subject: [PATCH] TM-720: enable scheduled ssm command monitoring (#8785) * align main.tf across accounts * enable ssm monitoring and widgets * fix * fix * remove alarm --- .../environments/corporate-staff-rostering/locals.tf | 2 ++ terraform/environments/corporate-staff-rostering/main.tf | 6 ++++++ terraform/environments/hmpps-domain-services/locals.tf | 2 ++ .../hmpps-domain-services/locals_preproduction.tf | 5 +++-- terraform/environments/hmpps-domain-services/main.tf | 8 +++----- terraform/environments/hmpps-oem/locals.tf | 1 + .../hmpps-oem/locals_cloudwatch_metric_alarms.tf | 1 - terraform/environments/hmpps-oem/main.tf | 4 ++-- terraform/environments/nomis-combined-reporting/locals.tf | 2 ++ terraform/environments/nomis-combined-reporting/main.tf | 6 ++++++ terraform/environments/nomis-data-hub/locals.tf | 2 ++ terraform/environments/nomis-data-hub/main.tf | 6 ++++++ terraform/environments/nomis/locals.tf | 1 + terraform/environments/nomis/main.tf | 6 ++++++ terraform/environments/oasys-national-reporting/locals.tf | 2 ++ terraform/environments/oasys-national-reporting/main.tf | 6 ++++++ terraform/environments/oasys/locals.tf | 2 ++ terraform/environments/oasys/main.tf | 6 ++++++ terraform/environments/planetfm/locals.tf | 2 ++ terraform/environments/planetfm/main.tf | 6 ++++++ .../modules/baseline_presets/cloudwatch_metric_alarms.tf | 2 +- terraform/modules/baseline_presets/variables.tf | 1 + 22 files changed, 68 insertions(+), 11 deletions(-) diff --git a/terraform/environments/corporate-staff-rostering/locals.tf b/terraform/environments/corporate-staff-rostering/locals.tf index 27bb2195852..e44f27dc5e9 100644 --- a/terraform/environments/corporate-staff-rostering/locals.tf +++ b/terraform/environments/corporate-staff-rostering/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_linux", "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -45,6 +46,7 @@ locals { enable_s3_db_backup_bucket = true enable_s3_shared_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] software_bucket_name = "csr-software" } diff --git a/terraform/environments/corporate-staff-rostering/main.tf b/terraform/environments/corporate-staff-rostering/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/corporate-staff-rostering/main.tf +++ b/terraform/environments/corporate-staff-rostering/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/hmpps-domain-services/locals.tf b/terraform/environments/hmpps-domain-services/locals.tf index d6e5ed777f9..b06f71f4ad6 100644 --- a/terraform/environments/hmpps-domain-services/locals.tf +++ b/terraform/environments/hmpps-domain-services/locals.tf @@ -24,6 +24,7 @@ locals { "lb", "ec2", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -38,6 +39,7 @@ locals { enable_hmpps_domain = true enable_image_builder = true enable_s3_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/hmpps-domain-services/locals_preproduction.tf b/terraform/environments/hmpps-domain-services/locals_preproduction.tf index bb72afcc886..33544ecf6b1 100644 --- a/terraform/environments/hmpps-domain-services/locals_preproduction.tf +++ b/terraform/environments/hmpps-domain-services/locals_preproduction.tf @@ -153,9 +153,10 @@ locals { }) } - schedule_alarms = { + schedule_alarms_lambda = { + function_name = "schedule-alarms" alarm_patterns = [ - "public-https-*-https-unhealthy-load-balancer-host", + "public-https-*-unhealthy-load-balancer-host", ] } diff --git a/terraform/environments/hmpps-domain-services/main.tf b/terraform/environments/hmpps-domain-services/main.tf index 04db08b1919..d234a1e3a07 100644 --- a/terraform/environments/hmpps-domain-services/main.tf +++ b/terraform/environments/hmpps-domain-services/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -178,11 +179,8 @@ module "baseline" { ) schedule_alarms_lambda = merge( - { - function_name = "schedule-alarms" - }, - lookup(local.baseline_all_environments, "schedule_alarms", {}), - lookup(local.baseline_environment_specific, "schedule_alarms", {}), + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), ) secretsmanager_secrets = merge( diff --git a/terraform/environments/hmpps-oem/locals.tf b/terraform/environments/hmpps-oem/locals.tf index 2fa555f3fd2..73264f3b4cb 100644 --- a/terraform/environments/hmpps-oem/locals.tf +++ b/terraform/environments/hmpps-oem/locals.tf @@ -51,6 +51,7 @@ locals { enable_s3_shared_bucket = true enable_s3_software_bucket = true enable_ssm_command_monitoring = true + enable_ssm_missing_metric_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf index 0bbea69f30f..9921b4b0510 100644 --- a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf +++ b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf @@ -25,7 +25,6 @@ locals { csr-r4-pp = ["r4.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] csr-r5-pp = ["r5.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] csr-r6-pp = ["r6.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - hpa-preprod = ["hpa-preprod.service.hmpps.dsd.io", true, "azure-fixngo-pagerduty"] nomis-lsast = ["c.lsast-nomis.az.justice.gov.uk", true, "nomis-pagerduty"] nomis-pp = ["c.pp-nomis.az.justice.gov.uk", true, "nomis-pagerduty"] nomis-reporting-pp = ["reporting.pp-nomis.az.justice.gov.uk", true, "nomis-combined-reporting-pagerduty"] diff --git a/terraform/environments/hmpps-oem/main.tf b/terraform/environments/hmpps-oem/main.tf index f6c8672c4bd..d234a1e3a07 100644 --- a/terraform/environments/hmpps-oem/main.tf +++ b/terraform/environments/hmpps-oem/main.tf @@ -179,8 +179,8 @@ module "baseline" { ) schedule_alarms_lambda = merge( - lookup(local.baseline_all_environments, "schedule_alarms", {}), - lookup(local.baseline_environment_specific, "schedule_alarms", {}), + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), ) secretsmanager_secrets = merge( diff --git a/terraform/environments/nomis-combined-reporting/locals.tf b/terraform/environments/nomis-combined-reporting/locals.tf index 68b4c09eeca..3acc84b9578 100644 --- a/terraform/environments/nomis-combined-reporting/locals.tf +++ b/terraform/environments/nomis-combined-reporting/locals.tf @@ -27,6 +27,7 @@ locals { "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -44,6 +45,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/nomis-combined-reporting/main.tf b/terraform/environments/nomis-combined-reporting/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis-combined-reporting/main.tf +++ b/terraform/environments/nomis-combined-reporting/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/nomis-data-hub/locals.tf b/terraform/environments/nomis-data-hub/locals.tf index 794da1cd6bf..2ca908e66c0 100644 --- a/terraform/environments/nomis-data-hub/locals.tf +++ b/terraform/environments/nomis-data-hub/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_instance_linux", "ec2_instance_textfile_monitoring", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -42,6 +43,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/nomis-data-hub/main.tf b/terraform/environments/nomis-data-hub/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis-data-hub/main.tf +++ b/terraform/environments/nomis-data-hub/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/nomis/locals.tf b/terraform/environments/nomis/locals.tf index 9b644f9da66..2f5c27143a0 100644 --- a/terraform/environments/nomis/locals.tf +++ b/terraform/environments/nomis/locals.tf @@ -37,6 +37,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true route53_resolver_rules = { outbound-data-and-private-subnets = ["azure-fixngo-domain"] } s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] software_bucket_name = "ec2-image-builder-nomis" diff --git a/terraform/environments/nomis/main.tf b/terraform/environments/nomis/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/nomis/main.tf +++ b/terraform/environments/nomis/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/oasys-national-reporting/locals.tf b/terraform/environments/oasys-national-reporting/locals.tf index c8298624dd8..05c0cc63abe 100644 --- a/terraform/environments/oasys-national-reporting/locals.tf +++ b/terraform/environments/oasys-national-reporting/locals.tf @@ -26,6 +26,7 @@ locals { "ec2_linux", "ec2_instance_linux", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -41,6 +42,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_shared_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/oasys-national-reporting/main.tf b/terraform/environments/oasys-national-reporting/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/oasys-national-reporting/main.tf +++ b/terraform/environments/oasys-national-reporting/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/oasys/locals.tf b/terraform/environments/oasys/locals.tf index 150199c622a..21c2dff9d24 100644 --- a/terraform/environments/oasys/locals.tf +++ b/terraform/environments/oasys/locals.tf @@ -28,6 +28,7 @@ locals { "ec2_instance_linux", "ec2_instance_oracle_db_with_backup", "ec2_instance_textfile_monitoring", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -45,6 +46,7 @@ locals { enable_s3_bucket = true enable_s3_db_backup_bucket = true enable_s3_shared_bucket = true + enable_ssm_command_monitoring = true enable_vmimport = true s3_bucket_name = "${local.application_name}-${local.environment}" s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] diff --git a/terraform/environments/oasys/main.tf b/terraform/environments/oasys/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/oasys/main.tf +++ b/terraform/environments/oasys/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/environments/planetfm/locals.tf b/terraform/environments/planetfm/locals.tf index f0e16d6eda3..3fc3d4c065e 100644 --- a/terraform/environments/planetfm/locals.tf +++ b/terraform/environments/planetfm/locals.tf @@ -24,6 +24,7 @@ locals { "network_lb", "ec2", "ec2_windows", + "ssm_command", ] cloudwatch_metric_alarms_default_actions = ["pagerduty"] cloudwatch_metric_oam_links_ssm_parameters = ["hmpps-oem-${local.environment}"] @@ -39,6 +40,7 @@ locals { enable_image_builder = true enable_s3_bucket = true enable_s3_software_bucket = true + enable_ssm_command_monitoring = true s3_iam_policies = ["EC2S3BucketWriteAndDeleteAccessPolicy"] } } diff --git a/terraform/environments/planetfm/main.tf b/terraform/environments/planetfm/main.tf index a23f7e6d41b..d234a1e3a07 100644 --- a/terraform/environments/planetfm/main.tf +++ b/terraform/environments/planetfm/main.tf @@ -74,6 +74,7 @@ module "baseline" { ) cloudwatch_metric_alarms = merge( + module.baseline_presets.cloudwatch_metric_alarms_baseline, lookup(local.baseline_all_environments, "cloudwatch_metric_alarms", {}), lookup(local.baseline_environment_specific, "cloudwatch_metric_alarms", {}), ) @@ -177,6 +178,11 @@ module "baseline" { lookup(local.baseline_environment_specific, "s3_buckets", {}), ) + schedule_alarms_lambda = merge( + lookup(local.baseline_all_environments, "schedule_alarms_lambda", {}), + lookup(local.baseline_environment_specific, "schedule_alarms_lambda", {}), + ) + secretsmanager_secrets = merge( module.baseline_presets.secretsmanager_secrets, lookup(local.baseline_all_environments, "secretsmanager_secrets", {}), diff --git a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf index 795670a383f..8a29488c93b 100644 --- a/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf +++ b/terraform/modules/baseline_presets/cloudwatch_metric_alarms.tf @@ -466,7 +466,7 @@ locals { var.options.enable_ssm_command_monitoring ? { "failed-ssm-command-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.failed-ssm-command } : {}, - var.options.enable_ssm_command_monitoring ? { + var.options.enable_ssm_missing_metric_monitoring ? { "ssm-command-metrics-missing-${var.environment.account_name}" = local.cloudwatch_metric_alarms_by_sns_topic["dso-pipelines-pagerduty"].ssm.ssm-command-metrics-missing } : {}, ) diff --git a/terraform/modules/baseline_presets/variables.tf b/terraform/modules/baseline_presets/variables.tf index 250569f6f1c..e1a08a9bc19 100644 --- a/terraform/modules/baseline_presets/variables.tf +++ b/terraform/modules/baseline_presets/variables.tf @@ -43,6 +43,7 @@ variable "options" { enable_s3_shared_bucket = optional(bool, false) # create devtest and preprodprod S3 bucket for sharing between accounts enable_s3_software_bucket = optional(bool, false) # create software S3 bucket in test account for image builder/configuration-management enable_ssm_command_monitoring = optional(bool, false) # create SNS topic and alarms for SSM command monitoring + enable_ssm_missing_metric_monitoring = optional(bool, false) # create alarm if SSM command metrics are missing enable_vmimport = optional(bool, false) # create role for vm imports route53_resolver_rules = optional(map(list(string)), {}) # create route53 resolver rules; list of map keys to filter local.route53_resolver_rules_all iam_service_linked_roles = optional(list(string)) # create iam service linked roles; list of map keys to filter local.iam_service_linked_roles; default is to create all