diff --git a/.github/workflows/awsnuke.yml b/.github/workflows/awsnuke.yml index d49ab888f49..49558eaa5b7 100644 --- a/.github/workflows/awsnuke.yml +++ b/.github/workflows/awsnuke.yml @@ -133,11 +133,11 @@ jobs: - name: Slack failure notification uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0 with: + webhook-type: incoming-webhook payload: | {"blocks":[{"type": "section","text": {"type": "mrkdwn","text": ":no_entry: Failed GitHub Action:"}},{"type": "section","fields":[{"type": "mrkdwn","text": "*Workflow:*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>"},{"type": "mrkdwn","text": "*Job:*\n${{ github.job }}"},{"type": "mrkdwn","text": "*Repo:*\n${{ github.repository }}"}]}]} env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK if: ${{ failure() }} env: ACCOUNT_NAME: ${{ matrix.nuke_accts }} @@ -217,11 +217,11 @@ jobs: - name: Slack failure notification uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0 with: + webhook-type: incoming-webhook payload: | {"blocks":[{"type": "section","text": {"type": "mrkdwn","text": ":no_entry: Failed GitHub Action:"}},{"type": "section","fields":[{"type": "mrkdwn","text": "*Workflow:*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>"},{"type": "mrkdwn","text": "*Job:*\n${{ github.job }}"},{"type": "mrkdwn","text": "*Repo:*\n${{ github.repository }}"}]}]} env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK if: ${{ failure() }} env: AWS_ACCESS_KEY_ID: ${{ secrets.TESTING_AWS_ACCESS_KEY_ID }} diff --git a/.github/workflows/code-scanning.yml b/.github/workflows/code-scanning.yml index e7dd1d1e13e..53abb75e399 100644 --- a/.github/workflows/code-scanning.yml +++ b/.github/workflows/code-scanning.yml @@ -81,7 +81,7 @@ jobs: fetch-depth: 0 - name: Run Checkov action id: checkov - uses: bridgecrewio/checkov-action@5ae57a8860ce0657cb09591f5b8b8d9ead999a68 # v12.2920.0 + uses: bridgecrewio/checkov-action@b8f970b660bc01f598fc2f108eabd9e8dee728f8 # v12.2924.0 with: directory: ./ framework: terraform diff --git a/.github/workflows/nuke-redeploy.yml b/.github/workflows/nuke-redeploy.yml index 80c2bb6772c..c95fcb7965b 100644 --- a/.github/workflows/nuke-redeploy.yml +++ b/.github/workflows/nuke-redeploy.yml @@ -93,11 +93,11 @@ jobs: - name: Slack failure notification uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0 with: + webhook-type: incoming-webhook payload: | {"blocks":[{"type": "section","text": {"type": "mrkdwn","text": ":no_entry: Failed GitHub Action:"}},{"type": "section","fields":[{"type": "mrkdwn","text": "*Workflow:*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>"},{"type": "mrkdwn","text": "*Job:*\n${{ github.job }}"},{"type": "mrkdwn","text": "*Repo:*\n${{ github.repository }}"}]}]} env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK if: ${{ failure() }} env: diff --git a/terraform/environments/analytical-platform-compute/environment-configuration.tf b/terraform/environments/analytical-platform-compute/environment-configuration.tf index 220d4b3a868..0cde0ad3c9f 100644 --- a/terraform/environments/analytical-platform-compute/environment-configuration.tf +++ b/terraform/environments/analytical-platform-compute/environment-configuration.tf @@ -43,6 +43,7 @@ locals { /* UI */ ui_hostname = "development.analytical-platform.service.justice.gov.uk" + } test = { /* VPC */ @@ -131,6 +132,11 @@ locals { /* UI */ ui_hostname = "analytical-platform.service.justice.gov.uk" + + /* LF Domain Tags */ + cadet_lf_tags = { + domain = ["bold", "civil", "courts", "general", "criminal_history", "development_sandpit", "electronic_monitoring", "finance", "interventions", "opg", "performance", "risk", "people", "prison", "probation", "victims", "victims_case_management"] # extracted from bucket paths + } } } } diff --git a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf b/terraform/environments/analytical-platform-compute/helm-charts-applications.tf deleted file mode 100644 index 967af2b0763..00000000000 --- a/terraform/environments/analytical-platform-compute/helm-charts-applications.tf +++ /dev/null @@ -1,17 +0,0 @@ -resource "helm_release" "ui" { - /* https://github.com/ministryofjustice/analytical-platform-ui */ - name = "ui" - repository = "oci://ghcr.io/ministryofjustice/analytical-platform-charts" - version = "0.2.6" - chart = "analytical-platform-ui" - namespace = kubernetes_namespace.ui.metadata[0].name - values = [ - templatefile( - "${path.module}/src/helm/values/ui/values.yml.tftpl", - { - ui_hostname = local.environment_configuration.ui_hostname - eks_role_arn = module.analytical_platform_ui_service_role.iam_role_arn - } - ) - ] -} diff --git a/terraform/environments/analytical-platform-compute/iam-policies.tf b/terraform/environments/analytical-platform-compute/iam-policies.tf index 9885f6511c0..607480dde57 100644 --- a/terraform/environments/analytical-platform-compute/iam-policies.tf +++ b/terraform/environments/analytical-platform-compute/iam-policies.tf @@ -350,7 +350,7 @@ module "data_production_mojap_derived_bucket_lake_formation_policy" { tags = local.tags } -data "aws_iam_policy_document" "analytical_platform_cadet_runner_compute_policy" { +data "aws_iam_policy_document" "copy_apdp_cadet_metadata_to_compute_policy" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions statement { @@ -396,6 +396,7 @@ data "aws_iam_policy_document" "analytical_platform_cadet_runner_compute_policy" ] } statement { + sid = "GlueFetchMetadataAccess" effect = "Allow" actions = [ "glue:GetTable", @@ -404,18 +405,52 @@ data "aws_iam_policy_document" "analytical_platform_cadet_runner_compute_policy" ] resources = ["arn:aws:glue:eu-west-2:${data.aws_caller_identity.current.account_id}:*"] } + statement { + sid = "AthenaQueryBucketAccess" + effect = "Allow" + actions = [ + "s3:GetBucketLocation", + "s3:GetObject", + "s3:ListBucket", + "s3:ListBucketMultipartUploads", + "s3:ListMultipartUploadParts", + "s3:AbortMultipartUpload", + "s3:PutObject" + ] + resources = [ + module.mojap_compute_athena_query_results_bucket_eu_west_2.s3_bucket_arn, + "${module.mojap_compute_athena_query_results_bucket_eu_west_2.s3_bucket_arn}/*" + ] + } + statement { + sid = "AlterLFTags" + effect = "Allow" + actions = [ + "lakeformation:ListLFTags", + "lakeformation:GetLFTag", + "lakeformation:CreateLFTag", + "lakeformation:UpdateLFTag", + "lakeformation:AddLFTagsToResource", + "lakeformation:RemoveLFTagsFromResource", + "lakeformation:GetResourceLFTags", + "lakeformation:SearchTablesByLFTags", + "lakeformation:SearchDatabasesByLFTags", + ] + resources = ["*"] + } + } -module "analytical_platform_cadet_runner_compute_policy" { +module "copy_apdp_cadet_metadata_to_compute_policy" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-policy" version = "5.48.0" - name_prefix = "analytical-platform-cadet-runner-compute-policy" + name_prefix = "copy-apdp-cadet-metadata-to-compute-" - policy = data.aws_iam_policy_document.analytical_platform_cadet_runner_compute_policy.json + policy = data.aws_iam_policy_document.copy_apdp_cadet_metadata_to_compute_policy.json tags = local.tags } diff --git a/terraform/environments/analytical-platform-compute/iam-roles.tf b/terraform/environments/analytical-platform-compute/iam-roles.tf index 74671b1f0b0..e8dbb45c0d5 100644 --- a/terraform/environments/analytical-platform-compute/iam-roles.tf +++ b/terraform/environments/analytical-platform-compute/iam-roles.tf @@ -374,7 +374,7 @@ module "lake_formation_to_data_production_mojap_derived_tables_role" { tags = local.tags } -module "analytical_platform_cadet_runner" { +module "copy_apdp_cadet_metadata_to_compute_assumable_role" { #checkov:skip=CKV_TF_1:Module registry does not support commit hashes for versions #checkov:skip=CKV_TF_2:Module registry does not support tags for versions source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" @@ -384,9 +384,13 @@ module "analytical_platform_cadet_runner" { trusted_role_arns = ["arn:aws:iam::${local.environment_management.account_ids["analytical-platform-data-production"]}:role/create-a-derived-table"] create_role = true role_requires_mfa = false - role_name = "analytical-platform-cadet-runner-assumable" + role_name = "copy-apdp-cadet-metadata-to-compute" - custom_role_policy_arns = [module.analytical_platform_cadet_runner_compute_policy.arn] + custom_role_policy_arns = [module.copy_apdp_cadet_metadata_to_compute_policy.arn] # number_of_custom_role_policy_arns = 1 +} +moved { + from = module.analytical_platform_cadet_runner + to = module.copy_apdp_cadet_metadata_to_compute_assumable_role } diff --git a/terraform/environments/analytical-platform-compute/lakeformation-data-lake-settings.tf b/terraform/environments/analytical-platform-compute/lakeformation-data-lake-settings.tf index 8d7f889e1a2..bd8fc44557a 100644 --- a/terraform/environments/analytical-platform-compute/lakeformation-data-lake-settings.tf +++ b/terraform/environments/analytical-platform-compute/lakeformation-data-lake-settings.tf @@ -5,7 +5,8 @@ resource "aws_lakeformation_data_lake_settings" "london" { module.analytical_platform_ui_service_role.iam_role_arn, module.analytical_platform_data_eng_dba_service_role.iam_role_arn, "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-reserved/sso.amazonaws.com/${data.aws_region.current.name}/${one(data.aws_iam_roles.data_engineering_sso_role.names)}", - "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-reserved/sso.amazonaws.com/${data.aws_region.current.name}/${one(data.aws_iam_roles.eks_sso_access_role.names)}" + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-reserved/sso.amazonaws.com/${data.aws_region.current.name}/${one(data.aws_iam_roles.eks_sso_access_role.names)}", + module.copy_apdp_cadet_metadata_to_compute_assumable_role.iam_role_arn ] } diff --git a/terraform/environments/analytical-platform-compute/lakeformation-permissions.tf b/terraform/environments/analytical-platform-compute/lakeformation-permissions.tf new file mode 100644 index 00000000000..bf586290764 --- /dev/null +++ b/terraform/environments/analytical-platform-compute/lakeformation-permissions.tf @@ -0,0 +1,64 @@ + +resource "aws_lakeformation_lf_tag" "source" { + count = terraform.workspace == "analytical-platform-compute-production" ? 1 : 0 + key = "source" + values = ["create-a-derived-table"] +} + +resource "aws_lakeformation_permissions" "cadet_all_data" { + for_each = (terraform.workspace == "analytical-platform-compute-production" ? + toset(["TABLE", "DATABASE"]) : toset([])) + + principal = module.copy_apdp_cadet_metadata_to_compute_assumable_role.iam_role_arn + permissions = ["ALL"] # https://docs.aws.amazon.com/lake-formation/latest/dg/lf-permissions-reference.html + + lf_tag_policy { + resource_type = each.value + expression { + key = "source" + values = ["create-a-derived-table"] + } + } +} + +resource "aws_lakeformation_lf_tag" "domain" { + for_each = try(local.environment_configuration.cadet_lf_tags, {}) + key = each.key + values = each.value +} + +resource "aws_lakeformation_permissions" "cadet_domain_database_data" { + for_each = try(local.environment_configuration.cadet_lf_tags, {}) + + principal = module.copy_apdp_cadet_metadata_to_compute_assumable_role.iam_role_arn + permissions = ["ALL"] # https://docs.aws.amazon.com/lake-formation/latest/dg/lf-permissions-reference.html + + lf_tag_policy { + resource_type = "DATABASE" + expression { + key = each.key + values = each.value + } + } +} + +resource "aws_lakeformation_permissions" "cadet_domain_table_data" { + for_each = try(local.environment_configuration.cadet_lf_tags, {}) + + principal = module.copy_apdp_cadet_metadata_to_compute_assumable_role.iam_role_arn + permissions = ["ALL"] # https://docs.aws.amazon.com/lake-formation/latest/dg/lf-permissions-reference.html + + lf_tag_policy { + resource_type = "TABLE" + expression { + key = each.key + values = each.value + } + } +} + +import { + for_each = try(local.environment_configuration.cadet_lf_tags, {}) + to = aws_lakeformation_lf_tag.domain[each.key] + id = "${local.environment_management.account_ids[terraform.workspace]}:${each.key}" +} diff --git a/terraform/environments/analytical-platform-compute/src/helm/values/amazon-prometheus-proxy/values.yml.tftpl b/terraform/environments/analytical-platform-compute/src/helm/values/amazon-prometheus-proxy/values.yml.tftpl index 469f7871cf6..0a6f30fe7ad 100644 --- a/terraform/environments/analytical-platform-compute/src/helm/values/amazon-prometheus-proxy/values.yml.tftpl +++ b/terraform/environments/analytical-platform-compute/src/helm/values/amazon-prometheus-proxy/values.yml.tftpl @@ -14,6 +14,10 @@ alertmanager: grafana: enabled: false +kube-state-metrics: + extraArgs: + - --metric-labels-allowlist=pods=[*] + prometheus: agentMode: true serviceAccount: diff --git a/terraform/environments/apex/application_variables.json b/terraform/environments/apex/application_variables.json index 88dc67f6171..8696d1cc328 100644 --- a/terraform/environments/apex/application_variables.json +++ b/terraform/environments/apex/application_variables.json @@ -153,17 +153,17 @@ "lz_account_id": "484221692666" }, "production": { - "ec2amiid": "ami-0fd11105aa6dcd77d", + "ec2amiid": "ami-0484bb2dca8e69b20", "ec2instancetype": "t3.xlarge", "workspace_cidr": "10.200.16.0/20", "u01_orahome_size": "20", - "u01_orahome_snapshot": "snap-0ff8db461fc1cc4af", + "u01_orahome_snapshot": "snap-0f65cdb141493bd72", "u02_oradata_size": "100", - "u02_oradata_snapshot": "snap-00aee055837c0f329", + "u02_oradata_snapshot": "snap-0497aeb9f2f4029e3", "u03_redo_size": "50", - "u03_redo_snapshot": "snap-07292192ef32185e2", + "u03_redo_snapshot": "snap-0f0890c80d8c4efe9", "u04_arch_size": "50", - "u04_arch_snapshot": "snap-0d419d52da8066a22", + "u04_arch_snapshot": "snap-07f384ea32af67b25", "container_instance_type": "linux", "instance_type": "m5.large", "key_name": "", diff --git a/terraform/environments/apex/ec2.tf b/terraform/environments/apex/ec2.tf index 0aeb376f0bd..9a30ed3597e 100644 --- a/terraform/environments/apex/ec2.tf +++ b/terraform/environments/apex/ec2.tf @@ -65,14 +65,14 @@ resource "aws_vpc_security_group_ingress_rule" "db_ecs" { to_port = 1521 } -# resource "aws_vpc_security_group_ingress_rule" "db_mp_vpc" { -# security_group_id = aws_security_group.database.id -# description = "Allow MP VPC (OAS) to access database instance" -# cidr_ipv4 = data.aws_vpc.shared.cidr_block -# from_port = 1521 -# ip_protocol = "tcp" -# to_port = 1521 -# } +resource "aws_vpc_security_group_ingress_rule" "db_mp_vpc" { + security_group_id = aws_security_group.database.id + description = "Allow MP VPC (OAS) to access database instance" + cidr_ipv4 = data.aws_vpc.shared.cidr_block + from_port = 1521 + ip_protocol = "tcp" + to_port = 1521 +} resource "aws_vpc_security_group_ingress_rule" "db_lambda" { security_group_id = aws_security_group.database.id @@ -83,24 +83,24 @@ resource "aws_vpc_security_group_ingress_rule" "db_lambda" { to_port = 22 } -# resource "aws_vpc_security_group_ingress_rule" "db_workspace" { -# security_group_id = aws_security_group.database.id -# description = "Database listener port access to Workspaces" -# cidr_ipv4 = local.application_data.accounts[local.environment].workspace_cidr -# from_port = 1521 -# ip_protocol = "tcp" -# to_port = 1521 -# } +resource "aws_vpc_security_group_ingress_rule" "db_workspace" { + security_group_id = aws_security_group.database.id + description = "Database listener port access to Workspaces" + cidr_ipv4 = local.application_data.accounts[local.environment].workspace_cidr + from_port = 1521 + ip_protocol = "tcp" + to_port = 1521 +} # This is a temp rule whilst OAS resides in LZ -# resource "aws_vpc_security_group_ingress_rule" "oas_lz" { -# security_group_id = aws_security_group.database.id -# description = "Allow OAS in LZ to access APEX" -# cidr_ipv4 = local.application_data.accounts[local.environment].oas_lz_cidr -# from_port = 1521 -# ip_protocol = "tcp" -# to_port = 1521 -# } +resource "aws_vpc_security_group_ingress_rule" "oas_lz" { + security_group_id = aws_security_group.database.id + description = "Allow OAS in LZ to access APEX" + cidr_ipv4 = local.application_data.accounts[local.environment].oas_lz_cidr + from_port = 1521 + ip_protocol = "tcp" + to_port = 1521 +} resource "aws_vpc_security_group_egress_rule" "db_outbound" { security_group_id = aws_security_group.database.id diff --git a/terraform/environments/contract-work-administration/backup_lambda.tf b/terraform/environments/contract-work-administration/backup_lambda.tf deleted file mode 100644 index 30c2eb38708..00000000000 --- a/terraform/environments/contract-work-administration/backup_lambda.tf +++ /dev/null @@ -1,300 +0,0 @@ -locals { - create_db_snapshots_script_prefix = "dbsnapshot" - delete_db_snapshots_script_prefix = "deletesnapshots" - db_connect_script_prefix = "dbconnect" -} - -resource "aws_ssm_parameter" "ssh_key" { - name = "EC2_SSH_KEY" # This needs to match the name supplied to the dbconnect.js script - description = "SSH Key used by Lambda function to access database instance for backup. Value is updated manually." - type = "SecureString" - value = "Placeholder" - - tags = merge( - local.tags, - { Name = "EC2_SSH_KEY" } - ) - lifecycle { - ignore_changes = [ - value, - ] - } -} - -################################## -### IAM Role for BackUp Lambda -################################## - -data "aws_iam_policy_document" "backup_lambda" { - statement { - effect = "Allow" - - principals { - type = "Service" - identifiers = ["lambda.amazonaws.com", "ssm.amazonaws.com"] - } - - actions = ["sts:AssumeRole"] - } -} - -resource "aws_iam_role" "backup_lambda" { - name = "${local.application_name_short}-backup-lambda-role" - assume_role_policy = data.aws_iam_policy_document.backup_lambda.json - tags = merge( - local.tags, - { Name = "${local.application_name_short}-backup-lambda-role" } - ) -} - -resource "aws_iam_policy" "backup_lambda" { #tfsec:ignore:aws-iam-no-policy-wildcards - name = "${local.application_name_short}-${local.environment}-backup-lambda-policy" - tags = merge( - local.tags, - { Name = "${local.application_name_short}-${local.environment}-backup-lambda-policy" } - ) - policy = <> " + address); - const conn = new Client(); - console.log(`[+] Running "begin backup commands" as Oracle`); - conn.on('ready', () => { - console.log('Client :: ready'); - conn.exec('sudo su - oracle -c "sqlplus / as sysdba < { - if (err) { - reject(err); - } - stream.on('close', (code, signal) => { - conn.end(); - console.log('Stream :: close :: code: ' + code + ', signal: ' + signal); - setTimeout(() => { resolve(); }, 2000); // Ugly solution to wait until the ssh socket closes before resolving... - }).on('data', (data) => { - console.log('STDOUT: ' + data); - if (data.toString().toUpperCase().includes("ERROR")) exec_error = true; - }).stderr.on('data', (data) => { - console.log('STDERR: ' + data); - if (data.toString().toUpperCase().includes("ERROR")) exec_error = true; - }) - ; - }); - }).connect({ - host: address, - port: 22, - username: username, - privateKey: myKey, - // debug: console.log, // Uncomment to get more detailed logs - algorithms: { - kex: ["diffie-hellman-group1-sha1"] - } - }); - } else if (action == "end"){ - console.log("[+] Trying connecting to EC2 ==>> " + address); - console.log(`[+] Running "begin backup commands" as Oracle`); - - const conn = new Client(); - conn.on('ready', () => { - console.log('Client :: ready'); - conn.exec('sudo su - oracle -c "sqlplus / as sysdba < { - if (err) { - reject(err); - } - stream.on('close', (code, signal) => { - conn.end(); - console.log('Stream :: close :: code: ' + code + ', signal: ' + signal); - setTimeout(() => { resolve(); }, 2000); // Ugly solution to wait until the ssh socket closes before resolving... - }).on('data', (data) => { - console.log('STDOUT: ' + data); - if (data.toString().toUpperCase().includes("ERROR")) exec_error = true; - }).stderr.on('data', (data) => { - console.log('STDERR: ' + data); - if (data.toString().toUpperCase().includes("ERROR")) exec_error = true; - }) - ; - }); - }).connect({ - host: address, - port: 22, - username: username, - privateKey: myKey, - // debug: console.log, // Uncomment to get more detailed logs - algorithms: { - kex: ["diffie-hellman-group1-sha1"] - } - }); - } - }); - try { - await prom; - console.log('EXEC_ERROR: ' + exec_error); - if (exec_error) { - throw new Error('Please see logs above for more detail.') - } - console.log(`[+] Completed DB alter state: ${action} ==>> ` + address); - } catch (e) { - throw new Error(`SSH Exec did not run successfully on the instance ${address}: ` + e ); - } - } -} - - -exports.handler = async (event, context) => { - try { - console.log("[+} Received event:", JSON.stringify(event, null, 2)); - await connSSH(event.action, event.appname); - - context.done(); - } catch (error) { - throw new Error(error); - } -}; diff --git a/terraform/environments/contract-work-administration/scripts/dbsnapshot.js b/terraform/environments/contract-work-administration/scripts/dbsnapshot.js deleted file mode 100644 index d276e578688..00000000000 --- a/terraform/environments/contract-work-administration/scripts/dbsnapshot.js +++ /dev/null @@ -1,315 +0,0 @@ -///////////////////////////////////////////////////////////////////// -// Automated backup script -// - Calls dbconnect lambda to put DB in backup mode -// - Triggers volume snapshots for all volumes connected to instance -// -// version: 1.0 (for migration to MP) -///////////////////////////////////////////////////////////////////// - -const AWS = require("aws-sdk"); - -//Set date format -var date_ob = new Date(); -var day = ("0" + date_ob.getDate()).slice(-2); -var month = ("0" + (date_ob.getMonth() + 1)).slice(-2); -var year = date_ob.getFullYear(); - -var date = day + "/" + month + "/" + year; - -//lambda object -let lambda = new AWS.Lambda({ apiVersion: "2015-03-31" }); - -//EC2 object -let ec2 = new AWS.EC2({ apiVersion: "2014-10-31" }); - -async function invokeLambdaStart(appname) { - // try { - console.log("[+] Putting DB into backup mode"); - - const lambdaInvokeStart = await lambda - .invoke({ - FunctionName: "connectDBFunction", - InvocationType: "RequestResponse", // This means invoking the function synchronously. Note that if Lambda was able to run the function, the status code is 200, even if the function returned an error. - Payload: JSON.stringify({ action: "begin", appname: appname }), - }) - .promise(); - - //Check lambda returns success - if (lambdaInvokeStart["FunctionError"] == null) - { - // Run the volume snapshots - console.log("[+] Creating volume snapshot"); - await handleSnapshot(appname); - } else { - console.log("Return output: ", lambdaInvokeStart); - throw new Error("The connectDBFunction (begin) Lambda function has an error. Please see that function's logs for more information."); - } - - // } catch (e) { - // throw new Error("[-] " + e); - // } -} - -async function invokeLambdaStop(appname) { - // try { - console.log("[+] Putting DB into normal operations mode"); - - // setTimeout(() => { - // console.log("[+] Waiting for DB....."); - // }, 7000); - - const lambdaInvokeStop = await lambda - .invoke({ - FunctionName: "connectDBFunction", - InvocationType: "RequestResponse", - Payload: JSON.stringify({ action: "end", appname: appname }), - }) - .promise(); - - //Check lambda returns success - if (lambdaInvokeStop["FunctionError"] == null) - { - // Run the volume snapshots - console.log("[+] Datatbase is back in normal operations mode"); - } else { - console.log("Return output: ", lambdaInvokeStop); - throw new Error("The connectDBFunction (end) Lambda function has an error. Please see that function's logs for more information."); - } - - // } catch (e) { - // console.log("[-] " + e); - // throw new Error("The connectDBFunction Lambda (end) function has an error. Please see that function's logs for more information."); - // } -} - -async function invokeLambdaFinal(appname) { - try { - console.log("Waiting for DB to be ready"); - await new Promise(resolve => setTimeout(resolve, 30000)); - console.log("[+] Taking final snapshots out of backup mode"); - await handleSnapshot2(appname); - } catch (e) { - console.log("[-]" + e); - throw new Error("There is an error taking final shapshots."); - } -} - - -// Grab volume id all volumes attached to the instance and snapshot - -async function handleSnapshot(appname) { - try { - // Get all instances of our app - const instances = await getInstanceId(appname); - - // Get all volumes on all instances of our app - var volumes_list = []; - var snapshot_list = []; - for (const instance of instances) { - const volumes = await listVolumes(instance); - volumes_list.push(volumes); - } - - // Loop over instance, if more than 1 instance returned - for (const instance_list of volumes_list) { - for (const volume of instance_list["Volumes"]) { - console.log("Taking snapshot of Volume: ", volume); - var volume_id = volume["VolumeId"]; - var volume_device = volume["Attachments"][0]["Device"]; - var volume_name = ''; - for(var tag of volume['Tags']){ - if(tag['Key'].includes('Name')){ - volume_name = tag['Value']; - } - } - // Trigger EBS snapshots - let snap = await ec2CreateSnapshot(volume_id, appname, volume_device, volume_name, date); - snapshot_list.push(snap.SnapshotId); - } - } - } catch (error) { - console.log(error); - } -} - -//Get instanceId for EC2 instances tagged with Name:{ appname } -// May return more than 1 instance if there are multiple instances with the same name -async function getInstance(appname) { - console.log("Getting all instances tagged with Name:", appname); - return ec2 - .describeInstances({ Filters: [{ Name: "tag:Name", Values: [appname] }] }) - .promise(); -} - -// Capture all app instance IPs in a list -async function getInstanceId(appname) { - var instance_id_list = []; - var instance_data = await getInstance(appname); - for (const res of instance_data["Reservations"]) { - for (const instance of res["Instances"]) { - instance_id_list.push(instance["InstanceId"]); - } - } - console.log("Found ", instance_id_list.length, " instances"); - return instance_id_list; -} - -// List all volumes for EC2 instance - -async function listVolumes(instance_id) { - console.log("getting volumes for ", instance_id); - return ec2 - .describeVolumes({ - Filters: [{ Name: "attachment.instance-id", Values: [instance_id] }], - }) - .promise(); -} - -// Create EC2 snapshot based on volume id - -async function ec2CreateSnapshot(volume, appname, volume_device, volume_name, date) { - console.log("Creating snapshot of volume:", volume, volume_device, volume_name, date); - let params = { - VolumeId: volume, - Description: - appname + " automatically created snapshot and resource volume id: " + volume, - TagSpecifications: [ - { - ResourceType: "snapshot", - Tags: [ - { - Key: "Name", - Value: appname + "-" + volume_name + "-" + volume_device + "-" + date - }, - { - Key: "Application", - Value: appname - }, - { - Key: "Date", - Value: date - }, - { - Key: "dlm:snapshot-with:volume-hourly-35-day-retention", - Value: "yes" - }, - { - Key: "Created_by", - Value: "Automated snapshot created by DBSnapshotFunction Lambda" - } - ], - }, - ], - }; - return ec2.createSnapshot(params).promise(); -} - -async function handleSnapshot2(appname) { - try { - // Get all instances of our app - const instances = await getInstanceId(appname); - - // Get all volumes on all instances of our app - var volumes_list = []; - for (const instance of instances) { - const volumes = await listVolumes(instance); - volumes_list.push(volumes); - } - - // Loop over instance, if more than 1 instance returned - for (const instance_list of volumes_list) { - for (const volume of instance_list["Volumes"]) { - var volume_id = volume["VolumeId"]; - var volume_device = volume["Attachments"][0]["Device"]; - var volume_name=''; - for(var tag of volume['Tags']){ - if(tag['Key'].includes('Name')){ - volume_name = tag['Value']; - } - } - // if the drive is oraarch/oraredo trigger an EBS snapsot - for(const tag of volume['Tags']){ - if (tag['Value'].includes('arch')){ - console.log(volume_id, "is oraarch volume"); - let snap = await ec2CreateSnapshot2(volume_id, appname, volume_device, volume_name, date); - console.log("[+] Taking snapshot " + snap.SnapshotId); - break; - }} - for(const tag of volume['Tags']){ - if (tag['Value'].includes('redo')){ - console.log(volume_id, "is oraredo volume"); - let snap = await ec2CreateSnapshot2(volume_id, appname, volume_device, volume_name, date); - console.log("[+] Taking snapshot " + snap.SnapshotId); - break; - } - } - } - } - } catch (error) { - console.log(error); - } -} - -async function ec2CreateSnapshot2(volume, appname, volume_device, volume_name, date) { - console.log("Creating snapshot of volume:", volume, volume_device, volume_name, date); - let params = { - VolumeId: volume, - Description: - appname + " automatically created snapshot OUT OF BACKUPMODE and resource volume id: " + volume, - TagSpecifications: [ - { - ResourceType: "snapshot", - Tags: [ - { - Key: "Name", - Value: appname + "-" + volume_name + "-" + volume_device + "-" + date - }, - { - Key: "Application", - Value: appname - }, - { - Key: "Date", - Value: date - }, - { - Key: "dlm:snapshot-with:volume-hourly-35-day-retention", - Value: "yes" - }, - { - Key: "Created_by", - Value: "Automated OUT OF BACKUPMODE snapshot created by DBSnapshotFunction Lambda" - } - ], - }, - ], - }; - return ec2.createSnapshot(params).promise(); -} - -exports.handler = async (event, context) => { - const appname = event.appname; - try { - console.log("Putting DB into Hotbackup mode and taking snapshot"); - await invokeLambdaStart(appname); - } - catch (error) { - throw new Error(error); - } - try{ - console.log("Taking DB out of Hotbackup mode"); - await invokeLambdaStop(appname); - } catch (error) { - throw new Error(error); - } - ////////////////////////////////// - // Unsure why this part is required to take a second set of oraarch and oraredo snapshots, thus disabling it for now - ////////////////////////////////// - // try{ - // console.log("Operating outside of Hotbackup mode"); - // await invokeLambdaFinal(appname); - // console.log("Snapshots Complete"); - // } catch (error) { - // throw new Error(error); - // } -}; diff --git a/terraform/environments/contract-work-administration/scripts/deletesnapshots.py b/terraform/environments/contract-work-administration/scripts/deletesnapshots.py deleted file mode 100755 index e222aa8bed8..00000000000 --- a/terraform/environments/contract-work-administration/scripts/deletesnapshots.py +++ /dev/null @@ -1,27 +0,0 @@ -import boto3 -from datetime import datetime - -ec2 = boto3.client('ec2', 'eu-west-2') -paginator = ec2.get_paginator('describe_snapshots') -page_iterator = paginator.paginate(OwnerIds=['self']) - -def lambda_handler(event, context): - count = 0 - for page in page_iterator: - for snapshot in page['Snapshots']: - a = snapshot['StartTime'] - b = a.date() - c = datetime.now().date() - d = c-b - try: - if d.days > 35 and "automatically created snapshot" in snapshot['Description']: - id = snapshot['SnapshotId'] - print("Found an automatically created snapshot older than 35 days", id) - ec2.delete_snapshot(SnapshotId=id) - count += 1 - except Exception as e: - print(e) - if 'InvalidSnapshot.InUse' in str(e): - print("skipping this snapshot") - continue - print(f"Deleted a total of {count} snapshots") \ No newline at end of file diff --git a/terraform/environments/contract-work-administration/versions.tf b/terraform/environments/contract-work-administration/versions.tf index 6161ef3bc02..f54ae34948d 100644 --- a/terraform/environments/contract-work-administration/versions.tf +++ b/terraform/environments/contract-work-administration/versions.tf @@ -8,6 +8,10 @@ terraform { version = "~> 3.0" source = "hashicorp/http" } + archive = { + source = "hashicorp/archive" + version = "~> 2.0" + } } required_version = "~> 1.0" } diff --git a/terraform/environments/contract-work-administration/zipfiles/nodejs.zip b/terraform/environments/contract-work-administration/zipfiles/nodejs.zip deleted file mode 100644 index a9b62c12f7f..00000000000 Binary files a/terraform/environments/contract-work-administration/zipfiles/nodejs.zip and /dev/null differ diff --git a/terraform/environments/delius-core/locals_environments_all.tf b/terraform/environments/delius-core/locals_environments_all.tf index 878b62a5b17..09cda09fd84 100644 --- a/terraform/environments/delius-core/locals_environments_all.tf +++ b/terraform/environments/delius-core/locals_environments_all.tf @@ -17,7 +17,7 @@ locals { ordered_subnets = [local.ordered_subnet_ids] data_subnet_ids = data.aws_subnets.shared-data.ids data_subnet_a_id = data.aws_subnet.data_subnets_a.id - route53_inner_zone_info = data.aws_route53_zone.inner + route53_inner_zone = data.aws_route53_zone.inner route53_network_services_zone = data.aws_route53_zone.network-services route53_external_zone = data.aws_route53_zone.external shared_vpc_id = data.aws_vpc.shared.id @@ -26,7 +26,8 @@ locals { general_shared = data.aws_kms_key.general_shared.arn rds_shared = data.aws_kms_key.rds_shared.arn } - dns_suffix = "${local.application_name}.${var.networking[0].business-unit}-${local.environment}.modernisation-platform.service.justice.gov.uk" + dns_suffix = "${local.application_name}.${var.networking[0].business-unit}-${local.environment}.modernisation-platform.service.justice.gov.uk" + internal_dns_suffix = "${local.application_name}.${var.networking[0].business-unit}-${local.environment}.modernisation-platform.internal" } platform_vars = { diff --git a/terraform/environments/delius-core/locals_preproduction.tf b/terraform/environments/delius-core/locals_preproduction.tf index 2b3601c34eb..ab4a084988b 100644 --- a/terraform/environments/delius-core/locals_preproduction.tf +++ b/terraform/environments/delius-core/locals_preproduction.tf @@ -21,13 +21,13 @@ locals { encrypted = true migration_source_account_id = "010587221707" migration_lambda_role = "ldap-data-migration-lambda-role" - efs_throughput_mode = "bursting" + efs_throughput_mode = "elastic" efs_provisioned_throughput = null efs_backup_schedule = "cron(0 19 * * ? *)", efs_backup_retention_period = "30" port = 389 tls_port = 636 - desired_count = 0 + desired_count = 1 } diff --git a/terraform/environments/delius-core/modules/components/dms/dms_db_source_endpoints.tf b/terraform/environments/delius-core/modules/components/dms/dms_db_source_endpoints.tf index 23d86e73ca7..b9595237945 100644 --- a/terraform/environments/delius-core/modules/components/dms/dms_db_source_endpoints.tf +++ b/terraform/environments/delius-core/modules/components/dms/dms_db_source_endpoints.tf @@ -13,9 +13,9 @@ resource "aws_dms_endpoint" "dms_audit_source_endpoint_db" { engine_name = "oracle" username = local.dms_audit_username password = join(",", [jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username], jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username]]) - server_name = join(".", [var.oracle_db_server_names[var.dms_config.audit_source_endpoint.read_host], var.account_config.route53_inner_zone_info.name]) + server_name = join(".", [var.oracle_db_server_names[var.dms_config.audit_source_endpoint.read_host], var.account_config.route53_inner_zone.name]) port = local.db_tcps_port - extra_connection_attributes = "ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names[var.dms_config.audit_source_endpoint.read_host], var.account_config.route53_inner_zone_info.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" + extra_connection_attributes = "ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names[var.dms_config.audit_source_endpoint.read_host], var.account_config.route53_inner_zone.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" # We initially use an empty wallet for encryption - a populated wallet will be added by DMS configuration ssl_mode = "verify-ca" certificate_arn = aws_dms_certificate.empty_oracle_wallet.certificate_arn @@ -37,9 +37,9 @@ resource "aws_dms_endpoint" "dms_user_source_endpoint_db" { engine_name = "oracle" username = local.dms_audit_username password = join(",", [jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username], jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username]]) - server_name = join(".", [var.oracle_db_server_names[var.dms_config.user_source_endpoint.read_host], var.account_config.route53_inner_zone_info.name]) + server_name = join(".", [var.oracle_db_server_names[var.dms_config.user_source_endpoint.read_host], var.account_config.route53_inner_zone.name]) port = local.db_tcps_port - extra_connection_attributes = "ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names[var.dms_config.user_source_endpoint.read_host], var.account_config.route53_inner_zone_info.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" + extra_connection_attributes = "ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names[var.dms_config.user_source_endpoint.read_host], var.account_config.route53_inner_zone.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" # We initially use an empty wallet for encryption - a populated wallet will be added by DMS configuration ssl_mode = "verify-ca" certificate_arn = aws_dms_certificate.empty_oracle_wallet.certificate_arn diff --git a/terraform/environments/delius-core/modules/components/dms/dms_db_target_endpoints.tf b/terraform/environments/delius-core/modules/components/dms/dms_db_target_endpoints.tf index 8610fea4df1..4a963e03ed0 100644 --- a/terraform/environments/delius-core/modules/components/dms/dms_db_target_endpoints.tf +++ b/terraform/environments/delius-core/modules/components/dms/dms_db_target_endpoints.tf @@ -9,9 +9,9 @@ resource "aws_dms_endpoint" "dms_user_target_endpoint_db" { engine_name = "oracle" username = local.dms_audit_username password = join(",", [jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username], jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username]]) - server_name = join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone_info.name]) + server_name = join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone.name]) port = local.db_tcps_port - extra_connection_attributes = "UseDirectPathFullLoad=false;ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone_info.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" + extra_connection_attributes = "UseDirectPathFullLoad=false;ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" # We initially use an empty wallet for encryption - a populated wallet will be added by DMS configuration ssl_mode = "verify-ca" certificate_arn = aws_dms_certificate.empty_oracle_wallet.certificate_arn @@ -32,9 +32,9 @@ resource "aws_dms_endpoint" "dms_audit_target_endpoint_db" { engine_name = "oracle" username = local.dms_audit_username password = join(",", [jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username], jsondecode(data.aws_secretsmanager_secret_version.delius_core_application_passwords.secret_string)[local.dms_audit_username]]) - server_name = join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone_info.name]) + server_name = join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone.name]) port = local.db_tcps_port - extra_connection_attributes = "UseDirectPathFullLoad=false;ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone_info.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" + extra_connection_attributes = "UseDirectPathFullLoad=false;ArchivedLogDestId=1;AdditionalArchivedLogDestId=32;asm_server=${join(".", [var.oracle_db_server_names["primarydb"], var.account_config.route53_inner_zone.name])}:${local.db_tcps_port}/+ASM;asm_user=${local.dms_audit_username};UseBFile=true;UseLogminerReader=false;" # We initially use an empty wallet for encryption - a populated wallet will be added by DMS configuration ssl_mode = "verify-ca" certificate_arn = aws_dms_certificate.empty_oracle_wallet.certificate_arn @@ -43,4 +43,4 @@ resource "aws_dms_endpoint" "dms_audit_target_endpoint_db" { ignore_changes = [certificate_arn] } depends_on = [aws_dms_certificate.empty_oracle_wallet] -} \ No newline at end of file +} diff --git a/terraform/environments/delius-core/modules/components/oracle_db_instance/dns.tf b/terraform/environments/delius-core/modules/components/oracle_db_instance/dns.tf index 4db68e76596..0a7b0ea8f94 100644 --- a/terraform/environments/delius-core/modules/components/oracle_db_instance/dns.tf +++ b/terraform/environments/delius-core/modules/components/oracle_db_instance/dns.tf @@ -1,7 +1,7 @@ resource "aws_route53_record" "db_ec2_instance_internal" { provider = aws.core-vpc - zone_id = var.account_config.route53_inner_zone_info.zone_id - name = var.db_type == "primary" ? "${var.account_info.application_name}-${var.env_name}-${var.db_suffix}-${var.db_count_index}.${var.account_config.route53_inner_zone_info.name}" : "${var.account_info.application_name}-${var.env_name}-${var.db_suffix}-${var.db_count_index + 1}.${var.account_config.route53_inner_zone_info.name}" + zone_id = var.account_config.route53_inner_zone.zone_id + name = var.db_type == "primary" ? "${var.account_info.application_name}-${var.env_name}-${var.db_suffix}-${var.db_count_index}.${var.account_config.route53_inner_zone.name}" : "${var.account_info.application_name}-${var.env_name}-${var.db_suffix}-${var.db_count_index + 1}.${var.account_config.route53_inner_zone.name}" type = "CNAME" ttl = 60 records = [module.instance.aws_instance.private_dns] diff --git a/terraform/environments/delius-core/modules/delius_environment/alfresco.tf b/terraform/environments/delius-core/modules/delius_environment/alfresco.tf new file mode 100644 index 00000000000..c0024d20346 --- /dev/null +++ b/terraform/environments/delius-core/modules/delius_environment/alfresco.tf @@ -0,0 +1,264 @@ +module "alfresco_efs" { + source = "../helpers/efs" + + name = "alfresco" + env_name = var.env_name + creation_token = "${var.env_name}-sfs" + + kms_key_arn = var.account_config.kms_keys.general_shared + throughput_mode = "elastic" + provisioned_throughput_in_mibps = null + tags = var.tags + enable_platform_backups = false + + vpc_id = var.account_config.shared_vpc_id + subnet_ids = var.account_config.private_subnet_ids + vpc_cidr = var.account_config.shared_vpc_cidr + account_info = var.account_info +} + + +module "alfresco_sfs_ecs" { + source = "../helpers/delius_microservice" + + name = "alfresco-sfs" + env_name = var.env_name + + container_cpu = 2048 + container_memory = 4096 + + container_vars_default = { + "scheduler.content.age.millis" = 518400000 # 6 days + "scheduler.cleanup.interval" = 259200000 # 3 days + } + + container_vars_env_specific = {} + + container_secrets_default = {} + container_secrets_env_specific = {} + + desired_count = 1 + deployment_minimum_healthy_percent = 0 + deployment_maximum_percent = 200 + + container_port_config = [ + { + containerPort = 8099 + protocol = "tcp" + } + ] + + microservice_lb = aws_lb.alfresco_sfs + microservice_lb_https_listener_arn = aws_lb_listener.alfresco_sfs_listener_https.arn + + alb_listener_rule_host_header = "alf-sfs.${var.env_name}.${var.account_config.dns_suffix}" + + target_group_protocol_version = "HTTP1" + + + alb_health_check = { + path = "/" + healthy_threshold = 5 + interval = 30 + protocol = "HTTP" + unhealthy_threshold = 5 + matcher = "200-499" + timeout = 10 + grace_period_seconds = 180 + } + + ecs_cluster_arn = module.ecs.ecs_cluster_arn + cluster_security_group_id = aws_security_group.cluster.id + + bastion_sg_id = module.bastion_linux.bastion_security_group + tags = var.tags + + platform_vars = var.platform_vars + container_image = "ghcr.io/ministryofjustice/hmpps-delius-alfresco-shared-file-store:2.1.2-4" + account_config = var.account_config + + account_info = var.account_info + + ignore_changes_service_task_definition = true + + extra_task_exec_role_policies = { + efs = data.aws_iam_policy_document.alfresco_efs_access_policy + } + + providers = { + aws.core-vpc = aws.core-vpc + aws.core-network-services = aws.core-network-services + } + + log_error_pattern = "%${join("|", local.ldap_formatted_error_codes)}%" + sns_topic_arn = aws_sns_topic.delius_core_alarms.arn + enable_platform_backups = false + frontend_lb_arn_suffix = aws_lb.alfresco_sfs.arn_suffix + + efs_volumes = [ + { + host_path = null + name = "sfs" + efs_volume_configuration = [{ + file_system_id = module.alfresco_efs.fs_id + root_directory = "/" + transit_encryption = "ENABLED" + transit_encryption_port = 2049 + authorization_config = [{ + access_point_id = module.alfresco_efs.access_point_id + iam = "DISABLED" + }] + }] + } + ] + + mount_points = [{ + sourceVolume = "sfs" + containerPath = "/tmp/Alfresco" + readOnly = false + }] + + ecs_service_egress_security_group_ids = [ + { + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + description = "Allow all outbound traffic to any IPv4 address" + } + ] + + nlb_ingress_security_group_ids = [ + { + port = 8099 + ip_protocol = "tcp" + cidr_ipv4 = var.account_config.shared_vpc_cidr + description = "Allow inbound traffic from VPC" + }, + { + port = 8099 + ip_protocol = "udp" + cidr_ipv4 = var.account_config.shared_vpc_cidr + description = "Allow inbound traffic from VPC" + }, + { + port = 8099 + ip_protocol = "tcp" + cidr_ipv4 = var.account_info.cp_cidr + description = "Allow inbound LDAP traffic from CP" + }, + { + port = 8099 + ip_protocol = "udp" + cidr_ipv4 = var.account_info.cp_cidr + description = "Allow inbound LDAP traffic from CP" + }, + { + port = 2049 + ip_protocol = "tcp" + referenced_security_group_id = module.ldap.efs_sg_id + description = "EFS ingress" + } + ] + + ecs_service_ingress_security_group_ids = [ + { + port = 8099 + ip_protocol = "tcp" + cidr_ipv4 = var.account_config.shared_vpc_cidr + description = "Allow inbound traffic from VPC" + }, + { + port = 8099 + ip_protocol = "udp" + cidr_ipv4 = var.account_config.shared_vpc_cidr + description = "Allow inbound traffic from VPC" + }, + { + port = 8099 + ip_protocol = "tcp" + cidr_ipv4 = var.account_info.cp_cidr + description = "Allow inbound web traffic from CP" + }, + { + port = 8099 + ip_protocol = "udp" + cidr_ipv4 = var.account_info.cp_cidr + description = "Allow inbound web traffic from CP" + }, + { + port = 2049 + ip_protocol = "tcp" + referenced_security_group_id = module.ldap.efs_sg_id + description = "EFS ingress" + } + ] +} + +data "aws_iam_policy_document" "alfresco_efs_access_policy" { + statement { + actions = [ + "elasticfilesystem:ClientRootAccess", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:ClientMount" + ] + resources = [ + module.ldap.efs_fs_arn + ] + effect = "Allow" + } +} + +resource "aws_security_group" "alfresco_sfs_alb" { + name = "${var.env_name}-alf-sfs-alb" + description = "controls access to and from alfresco sfs load balancer" + vpc_id = var.account_config.shared_vpc_id + tags = local.tags + lifecycle { + create_before_destroy = true + } +} + +resource "aws_vpc_security_group_ingress_rule" "alfresco_sfs_alb" { + for_each = toset([var.account_info.cp_cidr, var.account_config.shared_vpc_cidr]) + security_group_id = aws_security_group.alfresco_sfs_alb.id + description = "Access into alb over https" + from_port = "443" + to_port = "443" + ip_protocol = "tcp" + cidr_ipv4 = each.key +} + +resource "aws_vpc_security_group_egress_rule" "alfresco_sfs_alb" { + security_group_id = aws_security_group.alfresco_sfs_alb.id + description = "egress from alb to ecs cluster" + ip_protocol = "-1" + cidr_ipv4 = var.account_config.shared_vpc_cidr +} + +# internal application load balancer +resource "aws_lb" "alfresco_sfs" { + name = "${var.app_name}-${var.env_name}-alf-sfs-alb" + internal = true + load_balancer_type = "application" + security_groups = [aws_security_group.alfresco_sfs_alb.id] + subnets = var.account_config.private_subnet_ids + + enable_deletion_protection = false + drop_invalid_header_fields = true +} + + +resource "aws_lb_listener" "alfresco_sfs_listener_https" { + load_balancer_arn = aws_lb.alfresco_sfs.id + port = 443 + protocol = "HTTPS" + certificate_arn = local.certificate_arn + ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01" + + default_action { + type = "fixed-response" + fixed_response { + content_type = "text/plain" + status_code = "404" + } + } +} diff --git a/terraform/environments/delius-core/modules/delius_environment/db_ec2.tf b/terraform/environments/delius-core/modules/delius_environment/db_ec2.tf index b9083d57c68..6aa6ceca16a 100644 --- a/terraform/environments/delius-core/modules/delius_environment/db_ec2.tf +++ b/terraform/environments/delius-core/modules/delius_environment/db_ec2.tf @@ -140,8 +140,8 @@ # for item in var.db_config : item.name => item # } # provider = aws.core-vpc -# zone_id = var.account_config.route53_inner_zone_info.zone_id -# name = each.key == "primary-db" ? "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone_info.name}" : "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone_info.name}" +# zone_id = var.account_config.route53_inner_zone.zone_id +# name = each.key == "primary-db" ? "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone.name}" : "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone.name}" # type = "CNAME" # ttl = 300 # records = [aws_instance.db_ec2_instance[each.key].private_dns] @@ -350,8 +350,8 @@ # for item in var.db_config : item.name => item # } # provider = aws.core-vpc -# zone_id = var.account_config.route53_inner_zone_info.zone_id -# name = each.key == "primary-db" ? "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone_info.name}" : "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone_info.name}" +# zone_id = var.account_config.route53_inner_zone.zone_id +# name = each.key == "primary-db" ? "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone.name}" : "delius-${var.env_name}-db-${index(var.db_config, each.value) + 1}.${var.account_config.route53_inner_zone.name}" # type = "CNAME" # ttl = 300 # records = [aws_instance.db_ec2_instance[each.key].private_dns] diff --git a/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf b/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf index 90019fa63f9..5b48d9eb624 100644 --- a/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf +++ b/terraform/environments/delius-core/modules/helpers/delius_microservice/load_balancing.tf @@ -149,7 +149,7 @@ resource "aws_lb_listener" "services" { resource "aws_route53_record" "services_nlb_r53_record" { provider = aws.core-vpc - zone_id = var.account_config.route53_inner_zone_info.zone_id + zone_id = var.account_config.route53_inner_zone.zone_id name = "${var.name}.service.${var.env_name}" type = "A" alias { diff --git a/terraform/environments/delius-mis/locals_environments_all.tf b/terraform/environments/delius-mis/locals_environments_all.tf index ed82f49e100..fe53dee7e0c 100644 --- a/terraform/environments/delius-mis/locals_environments_all.tf +++ b/terraform/environments/delius-mis/locals_environments_all.tf @@ -18,7 +18,7 @@ locals { subnet_set = local.subnet_set data_subnet_ids = data.aws_subnets.shared-data.ids data_subnet_a_id = data.aws_subnet.data_subnets_a.id - route53_inner_zone_info = data.aws_route53_zone.inner + route53_inner_zone = data.aws_route53_zone.inner route53_network_services_zone = data.aws_route53_zone.network-services route53_external_zone = data.aws_route53_zone.external shared_vpc_id = data.aws_vpc.shared.id diff --git a/terraform/environments/delius-nextcloud/locals_environments_all.tf b/terraform/environments/delius-nextcloud/locals_environments_all.tf index 7960768e454..9063ac740bb 100644 --- a/terraform/environments/delius-nextcloud/locals_environments_all.tf +++ b/terraform/environments/delius-nextcloud/locals_environments_all.tf @@ -18,7 +18,7 @@ locals { subnet_set = local.subnet_set data_subnet_ids = data.aws_subnets.shared-data.ids data_subnet_a_id = data.aws_subnet.data_subnets_a.id - route53_inner_zone_info = data.aws_route53_zone.inner + route53_inner_zone = data.aws_route53_zone.inner route53_network_services_zone = data.aws_route53_zone.network-services route53_external_zone = data.aws_route53_zone.external shared_vpc_id = data.aws_vpc.shared.id diff --git a/terraform/environments/digital-prison-reporting/glue-connections.tf b/terraform/environments/digital-prison-reporting/glue-connections.tf index 0ba8d5028c6..71ab036c02b 100644 --- a/terraform/environments/digital-prison-reporting/glue-connections.tf +++ b/terraform/environments/digital-prison-reporting/glue-connections.tf @@ -79,6 +79,7 @@ resource "aws_glue_connection" "glue_dps_connection" { resource "aws_security_group" "glue_job_connection_sg" { #checkov:skip=CKV2_AWS_5 + #checkov:skip=CKV_AWS_382: "Ensure no security groups allow egress from 0.0.0.0:0 to port -1" name = "${local.project}-glue-connection_sg" description = "Security group for glue jobs when using Glue Connections" vpc_id = data.aws_vpc.shared.id diff --git a/terraform/environments/digital-prison-reporting/locals.tf b/terraform/environments/digital-prison-reporting/locals.tf index 4296b1ec895..1113141203b 100644 --- a/terraform/environments/digital-prison-reporting/locals.tf +++ b/terraform/environments/digital-prison-reporting/locals.tf @@ -403,19 +403,15 @@ locals { environment_configuration = local.environment_configurations[local.environment] environment_configurations = { development = { - observability_platform_account_id = local.environment_management.account_ids["observability-platform-development"] analytical_platform_runner_suffix = "-dev" } test = { - observability_platform_account_id = local.environment_management.account_ids["observability-platform-development"] analytical_platform_runner_suffix = "-test" } preproduction = { - observability_platform_account_id = local.environment_management.account_ids["observability-platform-development"] analytical_platform_runner_suffix = "-pp" } production = { - observability_platform_account_id = local.environment_management.account_ids["observability-platform-production"] analytical_platform_runner_suffix = "" } } diff --git a/terraform/environments/digital-prison-reporting/main.tf b/terraform/environments/digital-prison-reporting/main.tf index 5968653ae62..2007d32f1ab 100644 --- a/terraform/environments/digital-prison-reporting/main.tf +++ b/terraform/environments/digital-prison-reporting/main.tf @@ -1231,7 +1231,7 @@ module "dms_nomis_ingestor" { dms_target_name = "kinesis" short_name = "nomis" migration_type = "full-load-and-cdc" - replication_instance_version = "3.4.7" # Upgrade + replication_instance_version = "3.5.2" replication_instance_class = "dms.t3.medium" subnet_ids = [ data.aws_subnet.data_subnets_a.id, data.aws_subnet.data_subnets_b.id, data.aws_subnet.data_subnets_c.id @@ -1249,10 +1249,6 @@ module "dms_nomis_ingestor" { "kinesis_target_stream" = "arn:aws:kinesis:eu-west-2:${data.aws_caller_identity.current.account_id}:stream/${local.kinesis_stream_ingestor}" } - availability_zones = { - 0 = "eu-west-2a" - } - tags = merge( local.all_tags, { @@ -1300,10 +1296,6 @@ module "dms_nomis_to_s3_ingestor" { bucket_name = module.s3_raw_bucket.bucket_id - availability_zones = { - 0 = "eu-west-2a" - } - depends_on = [ module.s3_raw_bucket.bucket_id ] diff --git a/terraform/environments/digital-prison-reporting/modules/dms/main.tf b/terraform/environments/digital-prison-reporting/modules/dms/main.tf index 504ad6b9441..c5c313de978 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms/main.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms/main.tf @@ -104,7 +104,6 @@ resource "aws_dms_s3_endpoint" "dms-s3-target-endpoint" { max_file_size = 120000 cdc_max_batch_interval = 10 - cdc_inserts_and_updates = true depends_on = [aws_iam_policy.dms-s3-target-policy, aws_iam_policy.dms-operator-s3-policy] @@ -126,6 +125,7 @@ resource "aws_dms_replication_subnet_group" "dms-s3-target-subnet-group" { resource "aws_security_group" "dms_s3_target_sec_group" { #checkov:skip=CKV2_AWS_5 #checkov:skip=CKV_AWS_23: "Ensure every security group and rule has a description" + #checkov:skip=CKV_AWS_382: "Ensure no security groups allow egress from 0.0.0.0:0 to port -1" count = var.setup_dms_instance ? 1 : 0 diff --git a/terraform/environments/digital-prison-reporting/modules/dms/variables.tf b/terraform/environments/digital-prison-reporting/modules/dms/variables.tf index ce82700e249..d482c2b5da1 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms/variables.tf @@ -1,4 +1,5 @@ variable "name" { + type = string description = "DMS Replication name." } @@ -56,14 +57,6 @@ variable "migration_type" { description = "DMS Migration Type" } -variable "availability_zones" { - default = [ - { - 0 = "eu-west-2a" - } - ] -} - variable "rename_rule_source_schema" { description = "The source schema we will rename to a target output 'space'" type = string @@ -81,19 +74,26 @@ variable "subnet_ids" { default = [] } -variable "source_address" {} +variable "source_address" { + type = string +} -variable "vpc" {} +variable "vpc" { + type = string +} variable "availability_zone" { + type = string default = null } variable "create" { + type = bool default = true } variable "create_iam_roles" { + type = bool default = true } @@ -106,11 +106,13 @@ variable "iam_role_permissions_boundary" { # Used in tagginga and naming the resources variable "stack_name" { + type = string description = "The name of our application" default = "dblink" } variable "owner" { + type = string description = "A group email address to be used in tags" default = "autobots@ga.gov.au" } @@ -120,6 +122,7 @@ variable "owner" { #-------------------------------------------------------------- variable "identifier" { + type = string default = "rds" description = "Name of the database in the RDS" } @@ -129,51 +132,42 @@ variable "identifier" { #-------------------------------------------------------------- variable "target_backup_retention_period" { + type = string # Days default = "30" description = "Retention of RDS backups" } variable "target_backup_window" { + type = string default = "14:00-17:00" description = "RDS backup window" } variable "target_db_port" { + type = number description = "The port the Application Server will access the database on" default = 5432 } variable "target_engine_version" { + type = string description = "Engine version" default = "9.3.14" } variable "target_instance_class" { + type = string default = "db.t2.micro" description = "Instance class" } variable "target_maintenance_window" { + type = string default = "Mon:00:00-Mon:03:00" description = "RDS maintenance window" } -variable "target_rds_is_multi_az" { - description = "Create backup database in separate availability zone" - default = "false" -} - -variable "target_storage" { - default = "10" - description = "Storage size in GB" -} - -variable "target_storage_encrypted" { - description = "Encrypt storage or leave unencrypted" - default = false -} - #variable "target_username" { # description = "Username to access the target database" #} @@ -183,81 +177,78 @@ variable "target_storage_encrypted" { #-------------------------------------------------------------- variable "source_app_password" { + type = string description = "Password for the endpoint to access the source database" } variable "source_app_username" { + type = string description = "Username for the endpoint to access the source database" } -variable "source_backup_retention_period" { - # Days - default = "1" - description = "Retention of RDS backups" -} - variable "source_backup_window" { + type = string # 12:00AM-03:00AM AEST default = "14:00-17:00" description = "RDS backup window" } variable "source_db_name" { + type = string description = "Name of the target database" default = "oracle" } variable "source_db_port" { + type = number description = "The port the Application Server will access the database on" default = null } variable "source_engine" { + type = string default = "oracle-se2" description = "Engine type, example values mysql, postgres" } variable "source_engine_name" { + type = string default = "" description = "Engine name for DMS" } variable "source_engine_version" { + type = string description = "Engine version" default = "12.1.0.2.v8" } variable "source_instance_class" { + type = string default = "db.t2.micro" description = "Instance class" } variable "source_maintenance_window" { + type = string default = "Mon:00:00-Mon:03:00" description = "RDS maintenance window" } variable "source_password" { + type = string description = "Password of the source database" default = "" } -variable "source_rds_is_multi_az" { - description = "Create backup database in separate availability zone" - default = "false" -} - -variable "source_storage" { - default = "10" - description = "Storage size in GB" -} - variable "source_storage_encrypted" { + type = bool description = "Encrypt storage or leave unencrypted" default = false } variable "source_username" { + type = string description = "Username to access the source database" default = "" } @@ -267,21 +258,25 @@ variable "source_username" { #-------------------------------------------------------------- variable "replication_instance_maintenance_window" { + type = string description = "Maintenance window for the replication instance" default = "sun:10:30-sun:14:30" } variable "replication_instance_storage" { + type = number description = "Size of the replication instance in GB" - default = "10" + default = 10 } variable "replication_instance_version" { + type = string description = "Engine version of the replication instance" default = "3.4.6" } variable "replication_instance_class" { + type = string description = "Instance class of replication instance" default = "dms.t2.micro" } @@ -297,6 +292,7 @@ variable "allow_major_version_upgrade" { #-------------------------------------------------------------- variable "database_subnet_cidr" { + type = list(string) default = ["10.26.25.208/28", "10.26.25.224/28", "10.26.25.240/28"] description = "List of subnets to be used for databases" } diff --git a/terraform/environments/digital-prison-reporting/modules/dms/versions.tf b/terraform/environments/digital-prison-reporting/modules/dms/versions.tf new file mode 100644 index 00000000000..bf68a137672 --- /dev/null +++ b/terraform/environments/digital-prison-reporting/modules/dms/versions.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + version = "~> 5.0" + source = "hashicorp/aws" + } + + template = { + source = "hashicorp/template" + version = "~> 2.2" + } + + } + required_version = "~> 1.0" +} diff --git a/terraform/environments/digital-prison-reporting/modules/dms_dps/main.tf b/terraform/environments/digital-prison-reporting/modules/dms_dps/main.tf index e003d334d96..f4061811053 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms_dps/main.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms_dps/main.tf @@ -135,6 +135,7 @@ resource "aws_dms_replication_subnet_group" "dms" { resource "aws_security_group" "dms_sec_group" { #checkov:skip=CKV_AWS_23: "Ensure every security group and rule has a description" + #checkov:skip=CKV_AWS_382: "Ensure no security groups allow egress from 0.0.0.0:0 to port -1" count = var.setup_dms_instance ? 1 : 0 name = "${var.project_id}-dms-${var.short_name}-${var.dms_source_name}-${var.dms_target_name}-security-group" diff --git a/terraform/environments/digital-prison-reporting/modules/dms_dps/variables.tf b/terraform/environments/digital-prison-reporting/modules/dms_dps/variables.tf index f3939ab9acb..cb7a96375bf 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms_dps/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms_dps/variables.tf @@ -1,4 +1,5 @@ variable "name" { + type = string description = "DMS Replication name." } @@ -52,38 +53,36 @@ variable "migration_type" { description = "DMS Migration Type" } -variable "availability_zones" { - default = [ - { - 0 = "eu-west-2a" - 1 = "eu-west-2b" - 2 = "eu-west-2c" - } - ] -} - - variable "subnet_ids" { description = "An List of VPC subnet IDs to use in the subnet group" type = list(string) default = [] } -variable "source_address" {} +variable "source_address" { + type = string +} -variable "vpc" {} +variable "vpc" { + type = string +} -variable "kinesis_stream_policy" {} +variable "kinesis_stream_policy" { + type = string +} variable "availability_zone" { + type = string default = null } variable "create" { + type = bool default = true } variable "create_iam_roles" { + type = bool default = true } @@ -96,11 +95,13 @@ variable "iam_role_permissions_boundary" { # Used in tagginga and naming the resources variable "stack_name" { + type = string description = "The name of our application" default = "dblink" } variable "owner" { + type = string description = "A group email address to be used in tags" default = "autobots@ga.gov.au" } @@ -110,6 +111,7 @@ variable "owner" { #-------------------------------------------------------------- variable "identifier" { + type = string default = "rds" description = "Name of the database in the RDS" } @@ -118,13 +120,8 @@ variable "identifier" { # DMS target config #-------------------------------------------------------------- -variable "target_backup_retention_period" { - # Days - default = "30" - description = "Retention of RDS backups" -} - variable "target_backup_window" { + type = string # 12:00AM-03:00AM AEST default = "14:00-17:00" description = "RDS backup window" @@ -135,26 +132,31 @@ variable "target_backup_window" { #} variable "target_db_port" { + type = number description = "The port the Application Server will access the database on" default = 5432 } variable "target_engine" { + type = string default = "kinesis" description = "Engine type, example values mysql, postgres" } variable "target_engine_version" { + type = string description = "Engine version" default = "9.3.14" } variable "target_instance_class" { + type = string default = "db.t2.micro" description = "Instance class" } variable "target_maintenance_window" { + type = string default = "Mon:00:00-Mon:03:00" description = "RDS maintenance window" } @@ -163,21 +165,6 @@ variable "target_maintenance_window" { # description = "Password of the target database" #} -variable "target_rds_is_multi_az" { - description = "Create backup database in separate availability zone" - default = "false" -} - -variable "target_storage" { - default = "10" - description = "Storage size in GB" -} - -variable "target_storage_encrypted" { - description = "Encrypt storage or leave unencrypted" - default = false -} - #variable "target_username" { # description = "Username to access the target database" #} @@ -192,81 +179,78 @@ variable "kinesis_settings" { #-------------------------------------------------------------- variable "source_app_password" { + type = string description = "Password for the endpoint to access the source database" } variable "source_app_username" { + type = string description = "Username for the endpoint to access the source database" } -variable "source_backup_retention_period" { - # Days - default = "1" - description = "Retention of RDS backups" -} - variable "source_backup_window" { + type = string # 12:00AM-03:00AM AEST default = "14:00-17:00" description = "RDS backup window" } variable "source_db_name" { + type = string description = "Name of the target database" default = "oracle" } variable "source_db_port" { + type = number description = "The port the Application Server will access the database on" default = null } variable "source_engine" { + type = string default = "oracle-se2" description = "Engine type, example values mysql, postgres" } variable "source_engine_name" { + type = string default = "" description = "Engine name for DMS" } variable "source_engine_version" { + type = string description = "Engine version" default = "12.1.0.2.v8" } variable "source_instance_class" { + type = string default = "db.t2.micro" description = "Instance class" } variable "source_maintenance_window" { + type = string default = "Mon:00:00-Mon:03:00" description = "RDS maintenance window" } variable "source_password" { + type = string description = "Password of the source database" default = "" } -variable "source_rds_is_multi_az" { - description = "Create backup database in separate availability zone" - default = "false" -} - -variable "source_storage" { - default = "10" - description = "Storage size in GB" -} - variable "source_storage_encrypted" { + type = bool description = "Encrypt storage or leave unencrypted" default = false } variable "source_username" { + type = string description = "Username to access the source database" default = "" } @@ -276,21 +260,25 @@ variable "source_username" { #-------------------------------------------------------------- variable "replication_instance_maintenance_window" { + type = string description = "Maintenance window for the replication instance" default = "sun:10:30-sun:14:30" } variable "replication_instance_storage" { + type = number description = "Size of the replication instance in GB" - default = "10" + default = 10 } variable "replication_instance_version" { + type = string description = "Engine version of the replication instance" default = "3.4.6" } variable "replication_instance_class" { + type = string description = "Instance class of replication instance" default = "dms.t2.micro" } @@ -300,6 +288,7 @@ variable "replication_instance_class" { #-------------------------------------------------------------- variable "database_subnet_cidr" { + type = list(string) default = ["10.26.25.208/28", "10.26.25.224/28", "10.26.25.240/28"] description = "List of subnets to be used for databases" } diff --git a/terraform/environments/digital-prison-reporting/modules/dms_dps/versions.tf b/terraform/environments/digital-prison-reporting/modules/dms_dps/versions.tf new file mode 100644 index 00000000000..bf68a137672 --- /dev/null +++ b/terraform/environments/digital-prison-reporting/modules/dms_dps/versions.tf @@ -0,0 +1,15 @@ +terraform { + required_providers { + aws = { + version = "~> 5.0" + source = "hashicorp/aws" + } + + template = { + source = "hashicorp/template" + version = "~> 2.2" + } + + } + required_version = "~> 1.0" +} diff --git a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/main.tf b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/main.tf index 1a0acdf776b..1bb4c8ebe78 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/main.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/main.tf @@ -134,10 +134,15 @@ resource "aws_dms_endpoint" "dms-s3-target-source" { ssl_mode = var.source_ssl_mode username = var.source_app_username - postgres_settings { - map_boolean_as_boolean = true - heartbeat_enable = true - heartbeat_frequency = 5 + dynamic postgres_settings { + for_each = var.source_engine_name == "postgres" ? [1]: [] + + content { + map_boolean_as_boolean = true + fail_tasks_on_lob_truncation = true + heartbeat_enable = true + heartbeat_frequency = 5 + } } extra_connection_attributes = var.extra_attributes @@ -164,7 +169,6 @@ resource "aws_dms_s3_endpoint" "dms-s3-target-endpoint" { max_file_size = 120000 cdc_max_batch_interval = 10 - cdc_inserts_and_updates = true tags = merge( var.tags, diff --git a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/variables.tf b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/variables.tf index 346d5f716d3..a27b4da5ffe 100644 --- a/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/dms_s3_v2/variables.tf @@ -291,7 +291,7 @@ variable "source_engine" { variable "source_engine_name" { default = "" type = string - description = "Engine name for DMS" + description = "Type of engine for the source endpoint. Example valid values are postgres, oracle" } diff --git a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf index 4281d701e6f..84b4f581882 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/maintenance-pipeline/pipeline.tf @@ -26,7 +26,7 @@ module "maintenance_pipeline" { }, "Check All Pending Files Have Been Processed" : { "Type" : "Task", - "Resource" : "arn:aws:states:::glue:startJobRun", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { diff --git a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf index 0c08a0b111b..40b941765bf 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf @@ -49,7 +49,7 @@ module "reload_pipeline" { }, "Check All Pending Files Have Been Processed" : { "Type" : "Task", - "Resource" : "arn:aws:states:::glue:startJobRun", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { diff --git a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf index ea6f367fa08..e6a20b0c2fc 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/replay-pipeline/pipeline.tf @@ -48,7 +48,7 @@ module "replay_pipeline" { }, "Check All Pending Files Have Been Processed" : { "Type" : "Task", - "Resource" : "arn:aws:states:::glue:startJobRun", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { @@ -246,7 +246,7 @@ module "replay_pipeline" { }, "Check All Files Have Been Replayed" : { "Type" : "Task", - "Resource" : "arn:aws:states:::glue:startJobRun", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { diff --git a/terraform/environments/digital-prison-reporting/modules/domains/start-cdc-pipeline/versions.tf b/terraform/environments/digital-prison-reporting/modules/domains/start-cdc-pipeline/versions.tf new file mode 100644 index 00000000000..ea265eb2f9b --- /dev/null +++ b/terraform/environments/digital-prison-reporting/modules/domains/start-cdc-pipeline/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_providers { + aws = { + version = "~> 5.0" + source = "hashicorp/aws" + } + + } + required_version = "~> 1.0" +} diff --git a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf index 92b1179f255..3143ffbbd55 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/pipeline.tf @@ -25,7 +25,7 @@ module "cdc_stop_pipeline" { }, "Check All Pending Files Have Been Processed" : { "Type" : "Task", - "Resource" : "arn:aws:states:::glue:startJobRun", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_unprocessed_raw_files_check_job, "Arguments" : { diff --git a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf index 22af2a709f4..62ab1e00c38 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/variables.tf @@ -24,11 +24,6 @@ variable "glue_reporting_hub_cdc_jobname" { type = string } -variable "s3_glue_bucket_id" { - description = "S3, Glue Bucket ID" - type = string -} - variable "glue_stop_glue_instance_job" { description = "Name of job to stop the current running instance of the streaming job" type = string @@ -48,9 +43,4 @@ variable "tags" { type = map(string) default = {} description = "(Optional) Key-value map of resource tags" -} - -variable "domain" { - type = string - description = "Domain Name" } \ No newline at end of file diff --git a/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/versions.tf b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/versions.tf new file mode 100644 index 00000000000..ea265eb2f9b --- /dev/null +++ b/terraform/environments/digital-prison-reporting/modules/domains/stop-cdc-pipeline/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_providers { + aws = { + version = "~> 5.0" + source = "hashicorp/aws" + } + + } + required_version = "~> 1.0" +} diff --git a/terraform/environments/digital-prison-reporting/modules/rds/postgres/sg.tf b/terraform/environments/digital-prison-reporting/modules/rds/postgres/sg.tf index 2403b86bce7..7328ffb11a8 100644 --- a/terraform/environments/digital-prison-reporting/modules/rds/postgres/sg.tf +++ b/terraform/environments/digital-prison-reporting/modules/rds/postgres/sg.tf @@ -37,6 +37,7 @@ resource "aws_security_group_rule" "rule" { } resource "aws_security_group_rule" "rds_allow_all" { + #checkov:skip=CKV_AWS_382: "Ensure no security groups allow egress from 0.0.0.0:0 to port -1" count = var.enable_rds ? 1 : 0 type = "egress" diff --git a/terraform/environments/digital-prison-reporting/observability-platform.tf b/terraform/environments/digital-prison-reporting/observability-platform.tf index 58035f4b337..17803453dc6 100644 --- a/terraform/environments/digital-prison-reporting/observability-platform.tf +++ b/terraform/environments/digital-prison-reporting/observability-platform.tf @@ -5,9 +5,9 @@ module "observability_platform_tenant" { source = "ministryofjustice/observability-platform-tenant/aws" version = "1.2.0" - observability_platform_account_id = local.environment_configuration.observability_platform_account_id + observability_platform_account_id = local.environment_management.account_ids["observability-platform-production"] enable_xray = true enable_prometheus = true tags = local.tags -} \ No newline at end of file +} diff --git a/terraform/environments/edw/ec2.tf b/terraform/environments/edw/ec2.tf index 67aab63cb4d..480b063eb69 100644 --- a/terraform/environments/edw/ec2.tf +++ b/terraform/environments/edw/ec2.tf @@ -227,24 +227,30 @@ else fi #### Prevent timeout on DB + +# Increase ssh session timeout +sed -i 's/#ClientAliveInterval.*/ClientAliveInterval 1200/' /etc/ssh/sshd_config +sed -i 's/#ClientAliveCountMax.*/ClientAliveCountMax 5/' /etc/ssh/sshd_config +service sshd restart + + # Add TCP keepalive time to sysctl.conf ---> keepalive solution -echo "net.ipv4.tcp_keepalive_time = 300" >> /etc/sysctl.conf +echo "net.ipv4.tcp_keepalive_time = 120" >> /etc/sysctl.conf sysctl -p + # Add SQLNET.EXPIRE_TIME to sqlnet.ora ---> keepalive solution # Check if SQLNET.EXPIRE_TIME exists in the file and update it, otherwise add it if grep -q "^SQLNET.EXPIRE_TIME" /oracle/software/product/10.2.0/network/admin/sqlnet.ora; then - # If the line exists, update it to "SQLNET.EXPIRE_TIME = 1" - sed -i 's/^SQLNET\.EXPIRE_TIME.*/SQLNET.EXPIRE_TIME = 1/' /oracle/software/product/10.2.0/network/admin/sqlnet.ora + # If the line exists, update it to "SQLNET.EXPIRE_TIME = 2" + sed -i 's/^SQLNET\.EXPIRE_TIME.*/SQLNET.EXPIRE_TIME = 2/' /oracle/software/product/10.2.0/network/admin/sqlnet.ora else # If the line does not exist, append it to the end of the file echo "SQLNET.EXPIRE_TIME = 1" >> /oracle/software/product/10.2.0/network/admin/sqlnet.ora fi + # Modify tnsnames.ora to insert (ENABLE=broken) ---> keepalive solution -grep -q '(ENABLE *= *broken)' /oracle/software/product/10.2.0/network/admin/tnsnames.ora || sed -i '/(DESCRIPTION =/a\\ (ENABLE = broken)' /oracle/software/product/10.2.0/network/admin/tnsnames.ora -# Add inbound connection timeout option to sqlnet -grep -qxF "SQLNET.INBOUND_CONNECT_TIMEOUT = 0" /oracle/software/product/10.2.0/network/admin/sqlnet.ora || echo "SQLNET.INBOUND_CONNECT_TIMEOUT = 0" >> /oracle/software/product/10.2.0/network/admin/sqlnet.ora -# Add inbound connection timeout option to listener -grep -qxF "INBOUND_CONNECT_TIMEOUT_LISTENER = 0" /oracle/software/product/10.2.0/network/admin/listener.ora || echo "INBOUND_CONNECT_TIMEOUT_LISTENER = 0" >> /oracle/software/product/10.2.0/network/admin/listener.ora +grep -q '(ENABLE = broken)' /oracle/software/product/10.2.0/network/admin/tnsnames.ora || sed -i '/(DESCRIPTION =/a\\ (ENABLE = broken)' /oracle/software/product/10.2.0/network/admin/tnsnames.ora + sudo mkdir -p /var/opt/oracle chown oracle:dba /var/opt/oracle @@ -287,6 +293,7 @@ cat < /etc/cron.d/backup_cron 0 06 * * 01 /home/oracle/backup_scripts/rman_full_backup.sh $APPNAME 00 07,10,13,16 * * * /home/oracle/scripts/freespace_alert.sh 00,15,30,45 * * * * /home/oracle/scripts/pmon_check.sh +# 0 7 * * 1 /home/oracle/scripts/maat_05365_ware_db_changes.sh EOC3 chown root:root /etc/cron.d/backup_cron diff --git a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf index 95995067e04..5d216d0f0c2 100644 --- a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf +++ b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf @@ -200,7 +200,7 @@ module "load_unstructured_atrium_database" { module "load_fms" { - count = local.is-test ? 1 : 0 + count = local.is-test || local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" name = "fms" diff --git a/terraform/environments/electronic-monitoring-data/dms_data_validation_glue_job_v2.tf b/terraform/environments/electronic-monitoring-data/dms_data_validation_glue_job_v2.tf index a36b3d400fd..787fc2183e2 100644 --- a/terraform/environments/electronic-monitoring-data/dms_data_validation_glue_job_v2.tf +++ b/terraform/environments/electronic-monitoring-data/dms_data_validation_glue_job_v2.tf @@ -29,31 +29,31 @@ resource "aws_glue_job" "dms_dv_rds_to_s3_parquet_v1" { worker_type = "G.1X" number_of_workers = 4 default_arguments = { - "--script_bucket_name" = module.s3-glue-job-script-bucket.bucket.id - "--rds_db_host_ep" = split(":", aws_db_instance.database_2022.endpoint)[0] - "--rds_db_pwd" = aws_db_instance.database_2022.password - "--rds_sqlserver_db" = "" - "--rds_sqlserver_db_schema" = "dbo" - "--rds_exclude_db_tbls" = "" - "--rds_select_db_tbls" = "" - "--rds_db_tbl_pkeys_col_list" = "" - "--rds_df_trim_str_columns" = "false" - "--rds_df_trim_micro_sec_ts_col_list" = "" - "--num_of_repartitions" = 0 - "--read_partition_size_mb" = 128 - "--max_table_size_mb" = 4000 - "--parquet_tbl_folder_if_different" = "" - "--extra-py-files" = "s3://${module.s3-glue-job-script-bucket.bucket.id}/${aws_s3_object.aws_s3_object_pyzipfile_to_s3folder.id}" - "--parquet_src_bucket_name" = module.s3-dms-target-store-bucket.bucket.id - "--parquet_output_bucket_name" = module.s3-dms-data-validation-bucket.bucket.id - "--glue_catalog_db_name" = aws_glue_catalog_database.dms_dv_glue_catalog_db.name - "--glue_catalog_tbl_name" = "glue_df_output" - "--continuous-log-logGroup" = "/aws-glue/jobs/${aws_cloudwatch_log_group.dms_dv_rds_to_s3_parquet_v1.name}" - "--enable-continuous-cloudwatch-log" = "true" - "--enable-continuous-log-filter" = "true" - "--enable-metrics" = "true" - "--enable-auto-scaling" = "true" - "--conf" = <> given_skip_columns_comparison_list = {given_skip_columns_comparison_list}<<""") + + select_compare_columns = [col for col in df_rds_temp.columns + if col not in given_skip_columns_comparison_list] + LOGGER.warn(f""">> Only the below selected columns are compared \n{select_compare_columns}<<""") + skip_columns_msg = f"""; columns_skipped = {given_skip_columns_comparison_list}""" + + final_select_columns = df_rds_temp.columns if select_compare_columns is None \ + else select_compare_columns + + df_rds_temp = df_rds_temp.select(*final_select_columns) df_rds_temp_t1 = df_rds_temp.selectExpr( *CustomPysparkMethods.get_nvl_select_list( df_rds_temp, @@ -249,34 +268,8 @@ def process_dv_for_table(rds_jdbc_conn_obj, trim_str_msg = "; [str column(s) - extra spaces trimmed]" t2_rds_str_col_trimmed = True # ------------------------------------------------------- - - trim_ts_ms_msg = "" - t3_rds_ts_col_msec_trimmed = False - if args.get("rds_df_trim_micro_sec_ts_col_list", None) is not None: - - msg_prefix = f"""Given -> rds_df_trim_micro_sec_ts_col_list = {given_rds_df_trim_micro_seconds_col_list}""" - given_rds_df_trim_micro_seconds_col_str = args["rds_df_trim_micro_sec_ts_col_list"] - given_rds_df_trim_micro_seconds_col_list = [f"""{col.strip().strip("'").strip('"')}""" - for col in given_rds_df_trim_micro_seconds_col_str.split(",")] - LOGGER.info(f"""{msg_prefix}, {type(given_rds_df_trim_micro_seconds_col_list)}""") - - if t2_rds_str_col_trimmed == True: - df_rds_temp_t3 = CustomPysparkMethods.rds_df_trim_microseconds_timestamp( - df_rds_temp_t2, - given_rds_df_trim_micro_seconds_col_list) - else: - df_rds_temp_t3 = CustomPysparkMethods.rds_df_trim_microseconds_timestamp( - df_rds_temp_t1, - given_rds_df_trim_micro_seconds_col_list) - # ------------------------------------------------------- - - trim_ts_ms_msg = "; [timestamp column(s) - micro-seconds trimmed]" - t3_rds_ts_col_msec_trimmed = True - # ------------------------------------------------------- - - if t3_rds_ts_col_msec_trimmed: - df_rds_temp_t4 = df_rds_temp_t3 - elif t2_rds_str_col_trimmed: + + if t2_rds_str_col_trimmed: df_rds_temp_t4 = df_rds_temp_t2 else: df_rds_temp_t4 = df_rds_temp_t1 @@ -284,6 +277,7 @@ def process_dv_for_table(rds_jdbc_conn_obj, df_rds_temp_t5 = df_rds_temp_t4.cache() + df_prq_temp = df_prq_temp.select(*final_select_columns) df_prq_temp_t1 = df_prq_temp.selectExpr( *CustomPysparkMethods.get_nvl_select_list( df_rds_temp, @@ -296,6 +290,7 @@ def process_dv_for_table(rds_jdbc_conn_obj, df_prq_temp_count = df_prq_temp_t1.count() # ------------------------------------------------------- + validated_msg = f"""{rds_tbl_name} - Validated.\n{skip_columns_msg}\n{trim_str_msg}""" if df_rds_temp_count == df_prq_temp_count: df_rds_prq_subtract_t1 = df_rds_temp_t5.subtract(df_prq_temp_t1) @@ -305,7 +300,7 @@ def process_dv_for_table(rds_jdbc_conn_obj, df_temp = df_dv_output.selectExpr( "current_timestamp as run_datetime", "'' as json_row", - f"""'{rds_tbl_name} - Validated.\n{trim_str_msg}\n{trim_ts_ms_msg}' as validation_msg""", + f""""{validated_msg}" as validation_msg""", f"""'{rds_db_name}' as database_name""", f"""'{db_sch_tbl}' as full_table_name""", """'False' as table_to_ap""" @@ -314,7 +309,10 @@ def process_dv_for_table(rds_jdbc_conn_obj, df_dv_output = df_dv_output.union(df_temp) else: df_subtract_temp = (df_rds_prq_subtract_t1 - .withColumn('json_row', F.to_json(F.struct(*[F.col(c) for c in df_rds_temp.columns]))) + .withColumn('json_row', + F.to_json( + F.struct(*[F.col(c) + for c in df_rds_temp.columns]))) .selectExpr("json_row") .limit(100)) @@ -322,7 +320,7 @@ def process_dv_for_table(rds_jdbc_conn_obj, df_subtract_temp = df_subtract_temp.selectExpr( "current_timestamp as run_datetime", "json_row", - f""""{subtract_validation_msg} - Dataframe(s)-Subtract Non-Zero Row Count!" as validation_msg""", + f""""{subtract_validation_msg}: - Rows not matched!" as validation_msg""", f"""'{rds_db_name}' as database_name""", f"""'{db_sch_tbl}' as full_table_name""", """'False' as table_to_ap""" @@ -536,7 +534,7 @@ def write_parquet_to_s3(df_dv_output: DataFrame, database, table): total_files, total_size = S3Methods.get_s3_folder_info( PRQ_FILES_SRC_S3_BUCKET_NAME, - f"{rds_db_name}/{rds_sqlserver_db_schema}/{rds_tbl_name}") + f"{rds_db_name}/{rds_sqlserver_db_schema}/{rds_tbl_name}/") total_size_mb = total_size/1024/1024 # ------------------------------------------------------- diff --git a/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_rds_to_s3_parquet_v2.py b/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_rds_to_s3_parquet_v2.py index 610779f27f1..3503518d11d 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_rds_to_s3_parquet_v2.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_rds_to_s3_parquet_v2.py @@ -653,7 +653,7 @@ def write_parquet_to_s3(df_dv_output: DataFrame, database, db_sch_tbl_name): total_files, total_size = S3Methods.get_s3_folder_info( PRQ_FILES_SRC_S3_BUCKET_NAME, - f"{rds_db_name}/{rds_sqlserver_db_schema}/{rds_sqlserver_db_table}") + f"{rds_db_name}/{rds_sqlserver_db_schema}/{rds_sqlserver_db_table}/") total_size_mb = total_size/1024/1024 LOGGER.warn(f""">> '{db_sch_tbl}' Size: {total_size_mb} MB <<""") diff --git a/terraform/environments/electronic-monitoring-data/glue-job/etl_dv_rds_to_s3_parquet_partitionby_yyyy_mm.py b/terraform/environments/electronic-monitoring-data/glue-job/etl_dv_rds_to_s3_parquet_partitionby_yyyy_mm.py index f7db41b2546..1199cecfcb0 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/etl_dv_rds_to_s3_parquet_partitionby_yyyy_mm.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/etl_dv_rds_to_s3_parquet_partitionby_yyyy_mm.py @@ -639,7 +639,7 @@ def write_to_s3_parquet(df_dv_output: DataFrame, LOGGER.info(f"""prq_table_folder_path = {prq_table_folder_path}""") total_files, total_size = S3Methods.get_s3_folder_info(PARQUET_OUTPUT_S3_BUCKET_NAME, - prq_table_folder_path) + f"{prq_table_folder_path}/") msg_part_1 = f"""> total_files={total_files}""" msg_part_2 = f"""> total_size_mb={total_size/1024/1024:.2f}""" LOGGER.info(f"""{msg_part_1}, {msg_part_2}""") diff --git a/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_sqlserver_query_to_s3_parquet.py b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_sqlserver_query_to_s3_parquet.py new file mode 100644 index 00000000000..c3fdffe3607 --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_sqlserver_query_to_s3_parquet.py @@ -0,0 +1,473 @@ + +import sys + +# from logging import getLogger +# import pandas as pd + +from glue_data_validation_lib import RDSConn_Constants +from glue_data_validation_lib import SparkSession +from glue_data_validation_lib import Logical_Constants +from glue_data_validation_lib import RDS_JDBC_CONNECTION +from glue_data_validation_lib import S3Methods +from glue_data_validation_lib import CustomPysparkMethods +from rds_transform_queries import SQLServer_Extract_Transform + +from awsglue.utils import getResolvedOptions +from awsglue.transforms import * + +from awsglue.dynamicframe import DynamicFrame +from awsglue.job import Job + +# from pyspark.conf import SparkConf +from pyspark.sql import DataFrame +import pyspark.sql.functions as F +import pyspark.sql.types as T + +# from pyspark.storagelevel import StorageLevel + +# =============================================================================== + +sc = SparkSession.sc +sc._jsc.hadoopConfiguration().set("spark.dynamicAllocation.enabled", "true") + +spark = SparkSession.spark + +glueContext = SparkSession.glueContext +LOGGER = glueContext.get_logger() + +# =============================================================================== + +# =============================================================================== + +# Organise capturing input parameters. +DEFAULT_INPUTS_LIST = ["JOB_NAME", + "script_bucket_name", + "rds_db_host_ep", + "rds_db_pwd", + "jdbc_read_partitions_num", + "rds_sqlserver_db", + "rds_sqlserver_db_schema", + "rds_sqlserver_db_table", + "rds_db_tbl_pkey_column", + "rds_df_repartition_num", + "rds_to_parquet_output_s3_bucket", + "validation_only_run", + "validation_sample_fraction_float", + "validation_sample_df_repartition_num", + "glue_catalog_db_name", + "glue_catalog_tbl_name", + "glue_catalog_dv_bucket" + ] + +OPTIONAL_INPUTS = [ + "rename_migrated_prq_tbl_folder" +] + +AVAILABLE_ARGS_LIST = CustomPysparkMethods.resolve_args(DEFAULT_INPUTS_LIST+OPTIONAL_INPUTS) + +args = getResolvedOptions(sys.argv, AVAILABLE_ARGS_LIST) + +# ------------------------------ + +job = Job(glueContext) +job.init(args["JOB_NAME"], args) + +# ------------------------------ + +RDS_DB_HOST_ENDPOINT = args["rds_db_host_ep"] +RDS_DB_PORT = RDSConn_Constants.RDS_DB_PORT +RDS_DB_INSTANCE_USER = RDSConn_Constants.RDS_DB_INSTANCE_USER +RDS_DB_INSTANCE_PWD = args["rds_db_pwd"] +RDS_DB_INSTANCE_DRIVER = RDSConn_Constants.RDS_DB_INSTANCE_DRIVER + +PARQUET_OUTPUT_S3_BUCKET_NAME = args["rds_to_parquet_output_s3_bucket"] + +GLUE_CATALOG_DB_NAME = args["glue_catalog_db_name"] +GLUE_CATALOG_TBL_NAME = args["glue_catalog_tbl_name"] +GLUE_CATALOG_DV_BUCKET = args["glue_catalog_dv_bucket"] + +CATALOG_DB_TABLE_PATH = f"""{GLUE_CATALOG_DB_NAME}/{GLUE_CATALOG_TBL_NAME}""" +CATALOG_TABLE_S3_FULL_PATH = f'''s3://{GLUE_CATALOG_DV_BUCKET}/{CATALOG_DB_TABLE_PATH}''' + + +NVL_DTYPE_DICT = Logical_Constants.NVL_DTYPE_DICT + +INT_DATATYPES_LIST = Logical_Constants.INT_DATATYPES_LIST + +RECORDED_PKEYS_LIST = Logical_Constants.RECORDED_PKEYS_LIST + +QUERY_STR_DICT = SQLServer_Extract_Transform.QUERY_STR_DICT + +# ================================================================== +# USER-DEFINED-FUNCTIONS +# ---------------------- + +def print_existing_s3parquet_stats(prq_table_folder_path): + total_files, total_size = S3Methods.get_s3_folder_info( + PARQUET_OUTPUT_S3_BUCKET_NAME, + prq_table_folder_path) + + msg_part_1 = f"""> total_files={total_files}""" + msg_part_2 = f"""> total_size_mb={total_size/1024/1024:.2f}""" + LOGGER.info(f"""{msg_part_1}, {msg_part_2}""") + + +def compare_rds_parquet_samples(rds_jdbc_conn_obj, + rds_db_table_name, + df_rds_query_read: DataFrame, + jdbc_partition_column, + prq_table_folder_path, + validation_sample_fraction_float) -> DataFrame: + + df_dv_output_schema = T.StructType( + [T.StructField("run_datetime", T.TimestampType(), True), + T.StructField("json_row", T.StringType(), True), + T.StructField("validation_msg", T.StringType(), True), + T.StructField("database_name", T.StringType(), True), + T.StructField("full_table_name", T.StringType(), True), + T.StructField("table_to_ap", T.StringType(), True)]) + + df_dv_output = CustomPysparkMethods.get_pyspark_empty_df(df_dv_output_schema) + + s3_table_folder_path = f"""s3://{PARQUET_OUTPUT_S3_BUCKET_NAME}/{prq_table_folder_path}""" + LOGGER.info(f"""Parquet Source being used for comparison: {s3_table_folder_path}""") + + df_parquet_read = spark.read.schema(df_rds_query_read.schema).parquet(s3_table_folder_path) + + df_parquet_read_sample = df_parquet_read.sample(validation_sample_fraction_float) + + df_parquet_read_sample_t1 = df_parquet_read_sample.selectExpr( + *CustomPysparkMethods.get_nvl_select_list( + df_parquet_read_sample, + rds_jdbc_conn_obj, + rds_db_table_name + ) + ) + + validation_sample_df_repartition_num = int(args['validation_sample_df_repartition_num']) + if validation_sample_df_repartition_num != 0: + df_parquet_read_sample_t1 = df_parquet_read_sample_t1.repartition( + validation_sample_df_repartition_num, + jdbc_partition_column + ) + # -------- + + df_rds_read_sample = df_rds_query_read.join(df_parquet_read_sample, + on=jdbc_partition_column, + how='leftsemi') + + df_rds_read_sample_t1 = df_rds_read_sample.selectExpr( + *CustomPysparkMethods.get_nvl_select_list( + df_rds_read_sample, + rds_jdbc_conn_obj, + rds_db_table_name + ) + ) + if validation_sample_df_repartition_num != 0: + df_rds_read_sample_t1 = df_rds_read_sample_t1.repartition( + validation_sample_df_repartition_num, + jdbc_partition_column + ) + # -------- + + df_prq_leftanti_rds = df_parquet_read_sample_t1.alias("L")\ + .join(df_rds_read_sample_t1.alias("R"), + on=df_parquet_read_sample_t1.columns, + how='leftanti') + + # df_prq_leftanti_rds = df_parquet_read_sample_t1.alias("L")\ + # .join(df_rds_read_sample_t1.alias("R"), + # on=jdbc_partition_column, how='left')\ + # .where(" or ".join([f"L.{column} != R.{column}" + # for column in df_rds_read_sample_t1.columns + # if column != jdbc_partition_column]))\ + # .select("L.*") + + df_prq_read_filtered_count = df_prq_leftanti_rds.count() + + LOGGER.info(f"""Rows sample taken = {df_parquet_read_sample.count()}""") + + if df_prq_read_filtered_count == 0: + temp_msg = f"""{validation_sample_fraction_float} - Sample Rows Validated.""" + df_temp_row = spark.sql(f"""select + current_timestamp() as run_datetime, + '' as json_row, + "{temp_msg}" as validation_msg, + '{rds_jdbc_conn_obj.rds_db_name}' as database_name, + '{db_sch_tbl}' as full_table_name, + 'False' as table_to_ap + """.strip()) + + LOGGER.info(f"{rds_db_table_name}: Validation Successful - 1") + df_dv_output = df_dv_output.union(df_temp_row) + else: + + LOGGER.warn( + f"""Parquet-RDS Subtract Report: ({df_prq_read_filtered_count}): Row(s) differences found!""") + + df_subtract_temp = (df_prq_leftanti_rds + .withColumn('json_row', F.to_json(F.struct(*[F.col(c) + for c in df_rds_query_read.columns]))) + .selectExpr("json_row") + .limit(100)) + + temp_msg = f"""{validation_sample_fraction_float}-Rows Sample Used:\n""" + df_subtract_temp = df_subtract_temp.selectExpr( + "current_timestamp as run_datetime", + "json_row", + f""""{temp_msg}>{df_prq_read_filtered_count} Rows - Validation Failed !" as validation_msg""", + f"""'{rds_jdbc_conn_obj.rds_db_name}' as database_name""", + f"""'{db_sch_tbl}' as full_table_name""", + """'False' as table_to_ap""" + ) + LOGGER.warn(f"{rds_db_table_name}: Validation Failed - 2") + df_dv_output = df_dv_output.union(df_subtract_temp) + # ----------------------------------------------------- + + return df_dv_output + + +def write_rds_to_s3parquet(df_rds_query_read: DataFrame, prq_table_folder_path): + + s3_table_folder_path = f"""s3://{PARQUET_OUTPUT_S3_BUCKET_NAME}/{prq_table_folder_path}""" + + if S3Methods.check_s3_folder_path_if_exists(PARQUET_OUTPUT_S3_BUCKET_NAME, + prq_table_folder_path): + + LOGGER.info(f"""Purging S3-path: {s3_table_folder_path}""") + glueContext.purge_s3_path(s3_table_folder_path, options={"retentionPeriod": 0}) + # -------------------------------------------------------------------- + + dydf = DynamicFrame.fromDF(df_rds_query_read, glueContext, "final_spark_df") + + glueContext.write_dynamic_frame.from_options(frame=dydf, connection_type='s3', format='parquet', + connection_options={ + 'path': f"""{s3_table_folder_path}/""" + }, + format_options={ + 'useGlueParquetWriter': True, + 'compression': 'snappy', + 'blockSize': 13421773, + 'pageSize': 1048576 + }) + LOGGER.info(f"""df_rds_query_read - dataframe written to -> {s3_table_folder_path}/""") + + +def write_dv_report_to_s3parquet(df_dv_output: DataFrame, + rds_jdbc_conn_obj, + db_sch_tbl_name): + + db_name = rds_jdbc_conn_obj.rds_db_name + df_dv_output = df_dv_output.repartition(1) + + prq_table_folder_path = f"""{args["glue_catalog_db_name"]}/{args["glue_catalog_tbl_name"]}""" + s3_table_folder_path = f'''s3://{GLUE_CATALOG_DV_BUCKET}/{prq_table_folder_path}''' + + if S3Methods.check_s3_folder_path_if_exists(GLUE_CATALOG_DV_BUCKET, + f'''{prq_table_folder_path}/database_name={db_name}/full_table_name={db_sch_tbl_name}''' + ): + LOGGER.info( + f"""Purging S3-path: {s3_table_folder_path}/database_name={db_name}/full_table_name={db_sch_tbl_name}""") + + glueContext.purge_s3_path(f"""{s3_table_folder_path}/database_name={db_name}/full_table_name={db_sch_tbl_name}""", + options={"retentionPeriod": 0} + ) + # --------------------------------------------------------------------- + + dydf = DynamicFrame.fromDF(df_dv_output, glueContext, "final_spark_df") + + glueContext.write_dynamic_frame.from_options(frame=dydf, connection_type='s3', format='parquet', + connection_options={ + 'path': f"""{s3_table_folder_path}/""", + "partitionKeys": ["database_name", "full_table_name"] + }, + format_options={ + 'useGlueParquetWriter': True, + 'compression': 'snappy', + 'blockSize': 13421773, + 'pageSize': 1048576 + }) + LOGGER.info( + f"""'{db_sch_tbl_name}' validation report written to -> {s3_table_folder_path}/""") + +# =================================================================================================== + + +if __name__ == "__main__": + + # ------------------------------------------- + if args.get("rds_sqlserver_db", None) is None: + LOGGER.error(f"""'rds_sqlserver_db' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db = args["rds_sqlserver_db"] + LOGGER.info(f"""Given rds_sqlserver_db = {rds_sqlserver_db}""") + + if args.get("rds_sqlserver_db_schema", None) is None: + LOGGER.error( + f"""'rds_sqlserver_db_schema' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db_schema = args["rds_sqlserver_db_schema"] + LOGGER.info( + f"""Given rds_sqlserver_db_schema = {rds_sqlserver_db_schema}""") + # ------------------------------------------- + + rds_jdbc_conn_obj = RDS_JDBC_CONNECTION(RDS_DB_HOST_ENDPOINT, + RDS_DB_INSTANCE_PWD, + rds_sqlserver_db, + rds_sqlserver_db_schema) + # ------------------------------------------- + + try: + rds_db_name = rds_jdbc_conn_obj.check_if_rds_db_exists()[0] + except IndexError: + LOGGER.error( + f"""Given database name not found! >> {args['rds_sqlserver_db']} <<""") + sys.exit(1) + except Exception as e: + LOGGER.error(e) + # ------------------------------------------------------- + + rds_sqlserver_db_tbl_list = rds_jdbc_conn_obj.get_rds_db_tbl_list() + if not rds_sqlserver_db_tbl_list: + LOGGER.error(f"""rds_sqlserver_db_tbl_list - is empty. Exiting ...!""") + sys.exit(1) + else: + message_prefix = f"""Total List of tables available in {rds_db_name}.{rds_sqlserver_db_schema}""" + LOGGER.info(f"""{message_prefix}\n{rds_sqlserver_db_tbl_list}""") + # ------------------------------------------------------- + + if args.get("rds_sqlserver_db_table", None) is None: + LOGGER.error( + f"""'rds_sqlserver_db_table' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db_table = args["rds_sqlserver_db_table"] + table_name_prefix = f"""{rds_db_name}_{rds_sqlserver_db_schema}""" + db_sch_tbl = f"""{table_name_prefix}_{rds_sqlserver_db_table}""" + # ------------------------------------------------------- + + if db_sch_tbl not in rds_sqlserver_db_tbl_list: + LOGGER.error(f"""'{db_sch_tbl}' - is not an existing table! Exiting ...""") + sys.exit(1) + else: + LOGGER.info(f""">> Given RDS SqlServer-DB Table: {rds_sqlserver_db_table} <<""") + # ------------------------------------------------------- + + rds_db_tbl_pkey_column = args['rds_db_tbl_pkey_column'] + LOGGER.info(f"""rds_db_tbl_pkey_column = {rds_db_tbl_pkey_column}""") + # ----------------------------------------- + + rds_db_table_empty_df = rds_jdbc_conn_obj.get_rds_db_table_empty_df(rds_sqlserver_db_table) + + df_rds_dtype_dict = CustomPysparkMethods.get_dtypes_dict(rds_db_table_empty_df) + int_dtypes_colname_list = [colname for colname, dtype in df_rds_dtype_dict.items() + if dtype in INT_DATATYPES_LIST] + + if rds_db_tbl_pkey_column not in int_dtypes_colname_list: + LOGGER.error(f"""rds_db_tbl_pkey_column = {rds_db_tbl_pkey_column} is not an integer datatype column! + """.strip()) + sys.exit(1) + # ---------------------------------------------------- + + jdbc_read_partitions_num = int(args.get('jdbc_read_partitions_num', 0)) + + jdbc_read_partitions_num = 1 if jdbc_read_partitions_num <= 0 \ + else jdbc_read_partitions_num + LOGGER.info(f"""jdbc_read_partitions_num = {jdbc_read_partitions_num}""") + + agg_row_dict = rds_jdbc_conn_obj.get_min_max_pkey_filter( + rds_sqlserver_db_table, + rds_db_tbl_pkey_column + ) + min_pkey = agg_row_dict['min_value'] + LOGGER.info(f"""min_pkey = {min_pkey}""") + + max_pkey = agg_row_dict['max_value'] + LOGGER.info(f"""max_pkey = {max_pkey}""") + + rds_transformed_query = QUERY_STR_DICT[f"{db_sch_tbl}"] + LOGGER.info(f"""rds_transformed_query = \n{rds_transformed_query}""") + + df_rds_query_read = rds_jdbc_conn_obj.get_rds_df_read_query_pkey_parallel( + rds_transformed_query, + rds_db_tbl_pkey_column, + min_pkey, + max_pkey, + jdbc_read_partitions_num + ) + + LOGGER.info( + f"""df_rds_query_read-{db_sch_tbl}: READ PARTITIONS = {df_rds_query_read.rdd.getNumPartitions()}""") + + df_rds_query_read_columns = df_rds_query_read.columns + LOGGER.info(f"""df_rds_query_read_columns = {df_rds_query_read_columns}""") + + df_rds_query_read_schema = df_rds_query_read.schema + LOGGER.info(f"""df_rds_query_read_schema = \n{[obj for obj in df_rds_query_read_schema]}""") + + rds_df_repartition_num = int(args['rds_df_repartition_num']) + + if rds_df_repartition_num != 0: + df_rds_query_read = df_rds_query_read.repartition(rds_df_repartition_num, + rds_db_tbl_pkey_column) + int_repartitions = df_rds_query_read.rdd.getNumPartitions() + LOGGER.info( + f"""df_rds_query_read: After Repartitioning -> {int_repartitions} partitions.""") + # ---------------------------------------------------- + + rename_output_table_folder = args.get('rename_migrated_prq_tbl_folder', None) + prq_table_folder_name = rds_sqlserver_db_table if rename_output_table_folder is None \ + else rename_output_table_folder + # --------------------------------------- + + prq_table_folder_path = f"""{rds_db_name}/{rds_sqlserver_db_schema}/{prq_table_folder_name}""" + LOGGER.info(f"""prq_table_folder_path = {prq_table_folder_path}""") + + validation_only_run = args['validation_only_run'] + + validation_sample_fraction_float = float(args.get('validation_sample_fraction_float', 0)) + validation_sample_fraction_float = 1.0 if validation_sample_fraction_float > 1 \ + else validation_sample_fraction_float + + temp_msg = f"""validation_sample_fraction_float = {validation_sample_fraction_float}""" + if validation_only_run != "true": + if validation_sample_fraction_float != 0: + df_rds_query_read = df_rds_query_read.cache() + write_rds_to_s3parquet(df_rds_query_read, prq_table_folder_path) + print_existing_s3parquet_stats(prq_table_folder_path) + LOGGER.info(f"""> Starting validation: {temp_msg}""") + df_dv_output = compare_rds_parquet_samples(rds_jdbc_conn_obj, + rds_sqlserver_db_table, + df_rds_query_read, + rds_db_tbl_pkey_column, + prq_table_folder_path, + validation_sample_fraction_float + ) + write_dv_report_to_s3parquet(df_dv_output, rds_jdbc_conn_obj, db_sch_tbl) + df_rds_query_read.unpersist() + else: + write_rds_to_s3parquet(df_rds_query_read, prq_table_folder_path) + print_existing_s3parquet_stats(prq_table_folder_path) + LOGGER.warn(f"""{temp_msg}\nValidation not enabled. Skipping ...""") + + else: + LOGGER.warn(f""">> validation_only_run - ENABLED <<""") + print_existing_s3parquet_stats(prq_table_folder_path) + + if validation_sample_fraction_float != 0: + LOGGER.info(f"""> Starting validation: {temp_msg}""") + df_dv_output = compare_rds_parquet_samples(rds_jdbc_conn_obj, + rds_sqlserver_db_table, + df_rds_query_read, + rds_db_tbl_pkey_column, + prq_table_folder_path, + validation_sample_fraction_float + ) + write_dv_report_to_s3parquet(df_dv_output, rds_jdbc_conn_obj, db_sch_tbl) + else: + LOGGER.warn(f"""{temp_msg} => Skipping Validation !""") + # --------------------------------------------------------------- + + job.commit() diff --git a/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_tbl_hash_rows_to_s3_prq_partitionby_yyyy_mm.py b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_tbl_hash_rows_to_s3_prq_partitionby_yyyy_mm.py new file mode 100644 index 00000000000..f53680b494b --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_tbl_hash_rows_to_s3_prq_partitionby_yyyy_mm.py @@ -0,0 +1,422 @@ + +import sys + +# from logging import getLogger +# import pandas as pd + +from glue_data_validation_lib import RDSConn_Constants +from glue_data_validation_lib import SparkSession +from glue_data_validation_lib import Logical_Constants +from glue_data_validation_lib import RDS_JDBC_CONNECTION +from glue_data_validation_lib import S3Methods +from glue_data_validation_lib import CustomPysparkMethods + +from awsglue.utils import getResolvedOptions +from awsglue.transforms import * + +from awsglue.dynamicframe import DynamicFrame +from awsglue.job import Job + +# from pyspark.conf import SparkConf +from pyspark.sql import DataFrame +import pyspark.sql.functions as F +# import pyspark.sql.types as T + +# from pyspark.storagelevel import StorageLevel + +# =============================================================================== + +sc = SparkSession.sc +sc._jsc.hadoopConfiguration().set("spark.dynamicAllocation.enabled", "true") + +spark = SparkSession.spark + +glueContext = SparkSession.glueContext +LOGGER = glueContext.get_logger() + +# =============================================================================== + + +# =============================================================================== + + +# Organise capturing input parameters. +DEFAULT_INPUTS_LIST = ["JOB_NAME", + "script_bucket_name", + "rds_db_host_ep", + "rds_db_pwd", + "rds_sqlserver_db", + "rds_sqlserver_db_schema", + "rds_sqlserver_db_table", + "rds_db_tbl_pkey_column", + "date_partition_column_name", + "parallel_jdbc_conn_num", + "rds_yyyy_mm_df_repartition_num", + "year_partition_bool", + "month_partition_bool", + "hashed_output_s3_bucket_name", + "rds_db_table_hashed_rows_parent_dir" + ] + +OPTIONAL_INPUTS = [ + "rds_query_where_clause", + "coalesce_int" +] + +AVAILABLE_ARGS_LIST = CustomPysparkMethods.resolve_args(DEFAULT_INPUTS_LIST+OPTIONAL_INPUTS) + +args = getResolvedOptions(sys.argv, AVAILABLE_ARGS_LIST) + +# ------------------------------ + +job = Job(glueContext) +job.init(args["JOB_NAME"], args) + +# ------------------------------ + +RDS_DB_HOST_ENDPOINT = args["rds_db_host_ep"] +RDS_DB_PORT = RDSConn_Constants.RDS_DB_PORT +RDS_DB_INSTANCE_USER = RDSConn_Constants.RDS_DB_INSTANCE_USER +RDS_DB_INSTANCE_PWD = args["rds_db_pwd"] +RDS_DB_INSTANCE_DRIVER = RDSConn_Constants.RDS_DB_INSTANCE_DRIVER + +HASHED_OUTPUT_S3_BUCKET_NAME = args["hashed_output_s3_bucket_name"] +RDS_DB_TABLE_HASHED_ROWS_PARENT_DIR = args["rds_db_table_hashed_rows_parent_dir"] + +ATHENA_RUN_OUTPUT_LOCATION = f"s3://{HASHED_OUTPUT_S3_BUCKET_NAME}/athena_temp_store/" + +INT_DATATYPES_LIST = Logical_Constants.INT_DATATYPES_LIST + +TBL_COLS_CONVERT_FMT_DICT = {'GPSPosition': + {'Latitude': 'CONVERT(VARCHAR(MAX), CONVERT(DECIMAL(10,7), Latitude))', + 'RecordedDatetime':'CONVERT(VARCHAR, RecordedDatetime, 120)', + 'AuditDateTime':'CONVERT(VARCHAR, AuditDateTime, 121)' + } + } + +# =============================================================================== + + +def write_rds_df_to_s3_parquet_v2(df_rds_write: DataFrame, + partition_by_cols, + prq_table_folder_path): + """ + Write dynamic frame in S3 and catalog it. + """ + + # s3://dms-rds-to-parquet-20240606144708618700000001/g4s_emsys_mvp/dbo/GPSPosition_V2/ + # s3://dms-rds-to-parquet-20240606144708618700000001/g4s_emsys_mvp/dbo/GPSPosition_V2/year=2019/month=10/ + + s3_table_folder_path = f"""s3://{HASHED_OUTPUT_S3_BUCKET_NAME}/{prq_table_folder_path}""" + + # Note: The below block of code erases the existing partition & use cautiously. + # partition_path = f"""{s3_table_folder_path}/year=2019/month=10/""" + # if check_s3_folder_path_if_exists(PARQUET_OUTPUT_S3_BUCKET_NAME, partition_path): + + # LOGGER.info(f"""Purging S3-path: {partition_path}""") + # glueContext.purge_s3_path(partition_path, options={"retentionPeriod": 0}) + # # -------------------------------------------------------------------- + + dynamic_df_write = glueContext.getSink( + format_options={ + "compression": "snappy", + "useGlueParquetWriter": True + }, + path=f"""{s3_table_folder_path}/""", + connection_type="s3", + updateBehavior="UPDATE_IN_DATABASE", + partitionKeys=partition_by_cols, + enableUpdateCatalog=True, + transformation_ctx="dynamic_df_write", + ) + + catalog_db, catalog_db_tbl = prq_table_folder_path.split(f"""/{args['rds_sqlserver_db_schema']}/""") + dynamic_df_write.setCatalogInfo( + catalogDatabase=catalog_db.lower(), + catalogTableName=catalog_db_tbl.lower() + ) + + dynamic_df_write.setFormat("glueparquet") + + dydf_rds_read = DynamicFrame.fromDF(df_rds_write, glueContext, "final_spark_df") + dynamic_df_write.writeFrame(dydf_rds_read) + + LOGGER.info(f"""'{db_sch_tbl}' table data written to -> {s3_table_folder_path}/""") + + # ddl_refresh_table_partitions = f"msck repair table {catalog_db.lower()}.{catalog_db_tbl.lower()}" + # LOGGER.info(f"""ddl_refresh_table_partitions:> \n{ddl_refresh_table_partitions}""") + + # # Refresh table prtitions + # execution_id = run_athena_query(ddl_refresh_table_partitions) + # LOGGER.info(f"SQL-Statement execution id: {execution_id}") + + # # Check query execution + # query_status = has_query_succeeded(execution_id=execution_id) + # LOGGER.info(f"Query state: {query_status}") + + +def write_rds_df_to_s3_parquet(df_rds_write: DataFrame, + partition_by_cols, + prq_table_folder_path): + + # s3://dms-rds-to-parquet-20240606144708618700000001/g4s_cap_dw/dbo/F_History/ + + s3_table_folder_path = f"""s3://{HASHED_OUTPUT_S3_BUCKET_NAME}/{prq_table_folder_path}""" + + if S3Methods.check_s3_folder_path_if_exists(HASHED_OUTPUT_S3_BUCKET_NAME, + prq_table_folder_path): + + LOGGER.info(f"""Purging S3-path: {s3_table_folder_path}""") + glueContext.purge_s3_path(s3_table_folder_path, options={"retentionPeriod": 0}) + # -------------------------------------------------------------------- + + # catalog_db, catalog_db_tbl = prq_table_folder_path.split(f"""/{args['rds_sqlserver_db_schema']}/""") + + dydf = DynamicFrame.fromDF(df_rds_write, glueContext, "final_spark_df") + + glueContext.write_dynamic_frame.from_options(frame=dydf, connection_type='s3', format='parquet', + connection_options={ + 'path': f"""{s3_table_folder_path}/""", + "partitionKeys": partition_by_cols + }, + format_options={ + 'useGlueParquetWriter': True, + 'compression': 'snappy', + 'blockSize': 13421773, + 'pageSize': 1048576 + }) + LOGGER.info(f"""'{db_sch_tbl}' table data written to -> {s3_table_folder_path}/""") + +# =================================================================================================== + + +if __name__ == "__main__": + + # VERIFY GIVEN INPUTS - START + # ------------------------------------------- + + if args.get("rds_sqlserver_db", None) is None: + LOGGER.error(f"""'rds_sqlserver_db' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db = args["rds_sqlserver_db"] + LOGGER.info(f"""Given rds_sqlserver_db = {rds_sqlserver_db}""") + + if args.get("rds_sqlserver_db_schema", None) is None: + LOGGER.error(f"""'rds_sqlserver_db_schema' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db_schema = args["rds_sqlserver_db_schema"] + LOGGER.info(f"""Given rds_sqlserver_db_schema = {rds_sqlserver_db_schema}""") + # ------------------------------------------- + + rds_jdbc_conn_obj = RDS_JDBC_CONNECTION(RDS_DB_HOST_ENDPOINT, + RDS_DB_INSTANCE_PWD, + rds_sqlserver_db, + rds_sqlserver_db_schema) + + try: + rds_db_name = rds_jdbc_conn_obj.check_if_rds_db_exists()[0] + except IndexError: + LOGGER.error(f"""Given database name not found! >> {args['rds_sqlserver_db']} <<""") + sys.exit(1) + except Exception as e: + LOGGER.error(e) + # ------------------------------------------------------- + + rds_sqlserver_db_tbl_list = rds_jdbc_conn_obj.get_rds_db_tbl_list() + if not rds_sqlserver_db_tbl_list: + LOGGER.error(f"""rds_sqlserver_db_tbl_list - is empty. Exiting ...!""") + sys.exit(1) + else: + message_prefix = f"""Total List of tables available in {rds_db_name}.{rds_sqlserver_db_schema}""" + LOGGER.info(f"""{message_prefix}\n{rds_sqlserver_db_tbl_list}""") + # ------------------------------------------------------- + + if args.get("rds_sqlserver_db_table", None) is None: + LOGGER.error(f"""'rds_sqlserver_db_table' runtime input is missing! Exiting ...""") + sys.exit(1) + else: + rds_sqlserver_db_table = args["rds_sqlserver_db_table"] + table_name_prefix = f"""{rds_db_name}_{rds_sqlserver_db_schema}""" + db_sch_tbl = f"""{table_name_prefix}_{rds_sqlserver_db_table}""" + # -------------------------------------------------------------------- + + if db_sch_tbl not in rds_sqlserver_db_tbl_list: + LOGGER.error(f"""'{db_sch_tbl}' - is not an existing table! Exiting ...""") + sys.exit(1) + else: + LOGGER.info(f""">> Given RDS SqlServer-DB Table: {rds_sqlserver_db_table} <<""") + # ------------------------------------------------------- + + rds_db_tbl_pkey_column = args['rds_db_tbl_pkey_column'] + LOGGER.info(f""">> rds_db_tbl_pkey_column = {rds_db_tbl_pkey_column} <<""") + + rds_db_table_empty_df = rds_jdbc_conn_obj.get_rds_db_table_empty_df( + rds_sqlserver_db_table) + + df_rds_dtype_dict = CustomPysparkMethods.get_dtypes_dict(rds_db_table_empty_df) + int_dtypes_colname_list = [colname for colname, dtype in df_rds_dtype_dict.items() + if dtype in INT_DATATYPES_LIST] + + if rds_db_tbl_pkey_column not in int_dtypes_colname_list: + LOGGER.error( + f"""PrimaryKey column-'{rds_db_tbl_pkey_column}' is not an integer datatype !""") + sys.exit(1) + # --------------------------------------- + + all_columns_except_pkey = list() + conversion_col_list = list() + if TBL_COLS_CONVERT_FMT_DICT.get( + f"{rds_sqlserver_db_table}", None) is not None: + conversion_col_list = list( + TBL_COLS_CONVERT_FMT_DICT[ + f"{rds_sqlserver_db_table}"].keys() + ) + for e in rds_db_table_empty_df.schema.fields: + if e.name == rds_db_tbl_pkey_column: + continue + + if e.name in conversion_col_list: + all_columns_except_pkey.append( + TBL_COLS_CONVERT_FMT_DICT[f"{rds_sqlserver_db_table}"][f"{e.name}"] + ) + else: + all_columns_except_pkey.append(f"{e.name}") + + LOGGER.info(f""">> all_columns_except_pkey = {all_columns_except_pkey} <<""") + # --------------------------------------- + + date_partition_column_name = args['date_partition_column_name'] + LOGGER.info(f"""date_partition_column_name = {date_partition_column_name}""") + + parallel_jdbc_conn_num = int(args['parallel_jdbc_conn_num']) + LOGGER.info(f"""parallel_jdbc_conn_num = {parallel_jdbc_conn_num}""") + + rds_yyyy_mm_df_repartition_num = int(args['rds_yyyy_mm_df_repartition_num']) + LOGGER.info(f"""rds_yyyy_mm_df_repartition_num = {rds_yyyy_mm_df_repartition_num}""") + + yyyy_mm_partition_by_cols = list() + if args['year_partition_bool'] == 'true': + yyyy_mm_partition_by_cols.append("year") + + if args['month_partition_bool'] == 'true': + yyyy_mm_partition_by_cols.append("month") + + LOGGER.info(f"""yyyy_mm_partition_by_cols = {yyyy_mm_partition_by_cols}""") + + prq_table_folder_path = f""" + {RDS_DB_TABLE_HASHED_ROWS_PARENT_DIR}/{rds_db_name}/{rds_sqlserver_db_schema}/{rds_sqlserver_db_table}""".lstrip() + # ----------------------------------------- + # VERIFY GIVEN INPUTS - END + # ----------------------------------------- + + agg_row_dict_list = rds_jdbc_conn_obj.get_min_max_groupby_month( + rds_sqlserver_db_table, + date_partition_column_name, + rds_db_tbl_pkey_column, + args.get('rds_query_where_clause', None) + ) + LOGGER.info(f"""agg_row_dict_list:>\n{[agg_row_dict for agg_row_dict in agg_row_dict_list]}""") + + rds_db_select_query_str = f""" + SELECT {rds_db_tbl_pkey_column}, + LOWER(SUBSTRING(CONVERT(VARCHAR(66), + HASHBYTES('SHA2_256', CONCAT_WS('', {', '.join(all_columns_except_pkey)})), 1), 3, 66)) AS RowHash, + YEAR({date_partition_column_name}) AS year, + MONTH({date_partition_column_name}) AS month + FROM {rds_sqlserver_db_schema}.[{rds_sqlserver_db_table}] + """.strip() + + rds_query_where_clause = args.get('rds_query_where_clause', None) + + + for agg_row_dict in agg_row_dict_list: + + agg_row_year = agg_row_dict['year'] + agg_row_month = agg_row_dict['month'] + min_pkey_value = agg_row_dict['min_pkey_value'] + max_pkey_value = agg_row_dict['max_pkey_value'] + LOGGER.info(f"""agg_row_year = {agg_row_year}""") + LOGGER.info(f"""agg_row_month = {agg_row_month}""") + LOGGER.info(f"""min_pkey_value = {min_pkey_value}""") + LOGGER.info(f"""max_pkey_value = {max_pkey_value}""") + + pkey_between_clause_str = f""" + WHERE {rds_db_tbl_pkey_column} between {min_pkey_value} and {max_pkey_value}""".strip() + + rds_db_select_query_str = rds_db_select_query_str + pkey_between_clause_str + + if rds_query_where_clause is not None: + rds_query_where_clause = rds_query_where_clause.strip() + rds_db_select_query_str = rds_db_select_query_str + \ + f""" AND {rds_query_where_clause}""" + + rds_hashed_rows_df = rds_jdbc_conn_obj.get_rds_df_read_query_pkey_parallel( + rds_db_select_query_str, + rds_db_tbl_pkey_column, + min_pkey_value, + max_pkey_value, + parallel_jdbc_conn_num + ) + # ---------------------------------------------------------- + temp_msg = f"""{agg_row_year}_{agg_row_month}-rds_hashed_rows_df""" + LOGGER.info( + f"""{temp_msg}: READ PARTITIONS = {rds_hashed_rows_df.rdd.getNumPartitions()}""") + + if 'year' in yyyy_mm_partition_by_cols \ + and 'year' not in rds_hashed_rows_df.columns: + rds_hashed_rows_df = rds_hashed_rows_df.withColumn( + "year", F.year(date_partition_column_name)) + + if 'month' in yyyy_mm_partition_by_cols \ + and 'month' not in rds_hashed_rows_df.columns: + rds_hashed_rows_df = rds_hashed_rows_df.withColumn( + "month", F.month(date_partition_column_name)) + + rds_hashed_rows_df = rds_hashed_rows_df.where( + f"""year = {agg_row_year} and month = {agg_row_month}""") + + if rds_yyyy_mm_df_repartition_num != 0: + # Note: Default 'partitionby_columns' values may not be appropriate for all the scenarios. + # So, the user can edit the list-'partitionby_columns' value(s) if required at runtime. + # Example: partitionby_columns = ['month'] + # The above scenario may be when the rds-source-dataframe filtered on single 'year' value. + partitionby_columns = yyyy_mm_partition_by_cols + [rds_db_tbl_pkey_column] + + LOGGER.info(f"""{temp_msg}: Repartitioning on {partitionby_columns}""") + rds_hashed_rows_df = rds_hashed_rows_df.repartition(rds_yyyy_mm_df_repartition_num, *partitionby_columns) + + LOGGER.info( + f"""{temp_msg}: After Repartitioning -> {rds_hashed_rows_df.rdd.getNumPartitions()} partitions.""") + # ---------------------------------------------------- + + # Note: If many small size parquet files are created for each partition, + # consider using 'orderBy', 'coalesce' features appropriately before writing dataframe into S3 bucket. + # df_rds_write = rds_hashed_rows_df.coalesce(1) + + # NOTE: When filtered rows (ex: based on 'year') are used in separate consecutive batch runs, + # consider to appropriately use the parquet write functions with features in built as per the below details. + # - write_rds_df_to_s3_parquet(): Overwrites the existing partitions by default. + # - write_rds_df_to_s3_parquet_v2(): Adds the new partitions & also the corresponding partitions are updated in athena tables. + coalesce_int = int(args.get('coalesce_int', 0)) + if coalesce_int != 0: + LOGGER.warn(f"""{temp_msg}:> coalesce_int = {coalesce_int}""") + rds_hashed_rows_df_write = rds_hashed_rows_df.coalesce(coalesce_int) + else: + rds_hashed_rows_df_write = rds_hashed_rows_df.alias("rds_hashed_rows_df_write") + + write_rds_df_to_s3_parquet(rds_hashed_rows_df_write, + yyyy_mm_partition_by_cols, + prq_table_folder_path) + + LOGGER.info(f"""Partition - '{prq_table_folder_path}/{agg_row_year}/{agg_row_month}' writing completed.""") + # ----------------------------------------------- + + total_files, total_size = S3Methods.get_s3_folder_info(HASHED_OUTPUT_S3_BUCKET_NAME, + f"{prq_table_folder_path}/") + msg_part_1 = f"""total_files={total_files}""" + msg_part_2 = f"""total_size_mb={total_size/1024/1024:.2f}""" + LOGGER.info(f"""'{prq_table_folder_path}': {msg_part_1}, {msg_part_2}""") + + job.commit() diff --git a/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_to_s3_parquet_partitionby_yyyy_mm.py b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_to_s3_parquet_partitionby_yyyy_mm.py index a9d461e091c..5003568823b 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_to_s3_parquet_partitionby_yyyy_mm.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/etl_rds_to_s3_parquet_partitionby_yyyy_mm.py @@ -420,7 +420,7 @@ def write_rds_df_to_s3_parquet(df_rds_write: DataFrame, # ----------------------------------------------- total_files, total_size = S3Methods.get_s3_folder_info(PARQUET_OUTPUT_S3_BUCKET_NAME, - prq_table_folder_path) + f"{prq_table_folder_path}/") msg_part_1 = f"""total_files={total_files}""" msg_part_2 = f"""total_size_mb={total_size/1024/1024:.2f}""" LOGGER.info(f"""'{prq_table_folder_path}': {msg_part_1}, {msg_part_2}""") diff --git a/terraform/environments/electronic-monitoring-data/glue-job/etl_table_rows_hashvalue_to_parquet.py b/terraform/environments/electronic-monitoring-data/glue-job/etl_table_rows_hashvalue_to_parquet.py index bc6896c1eb6..ffba0e5cad5 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/etl_table_rows_hashvalue_to_parquet.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/etl_table_rows_hashvalue_to_parquet.py @@ -201,7 +201,7 @@ def write_parquet_to_s3(hashed_rows_prq_df_write: DataFrame, hashed_rows_prq_ful FROM {rds_sqlserver_db_schema}.[{rds_sqlserver_db_table}] """.strip() - parallel_jdbc_conn_num = args['parallel_jdbc_conn_num'] + parallel_jdbc_conn_num = int(args['parallel_jdbc_conn_num']) parquet_df_write_repartition_num = int(args.get('parquet_df_write_repartition_num', 0)) diff --git a/terraform/environments/electronic-monitoring-data/glue-job/glue_data_validation_lib.zip b/terraform/environments/electronic-monitoring-data/glue-job/glue_data_validation_lib.zip index 3bf9af07730..b3757dd3162 100644 Binary files a/terraform/environments/electronic-monitoring-data/glue-job/glue_data_validation_lib.zip and b/terraform/environments/electronic-monitoring-data/glue-job/glue_data_validation_lib.zip differ diff --git a/terraform/environments/electronic-monitoring-data/glue-job/parquet_resize_or_partitionby_yyyy_mm_dd.py b/terraform/environments/electronic-monitoring-data/glue-job/parquet_resize_or_partitionby_yyyy_mm_dd.py index 5523b6a8560..6362f375934 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/parquet_resize_or_partitionby_yyyy_mm_dd.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/parquet_resize_or_partitionby_yyyy_mm_dd.py @@ -215,7 +215,7 @@ def write_to_s3_parquet(df_prq_write: DataFrame, # ----------------------------------------------- total_files, total_size = S3Methods.get_s3_folder_info(PARQUET_WRITE_S3_BUCKET_NAME, - output_partition_path) + f"{output_partition_path}/") msg_part_1 = f"""total_files={total_files}""" msg_part_2 = f"""total_size_mb={total_size/1024/1024:.2f}""" LOGGER.info(f"""'{PRQ_WRITE_TABLE_FOLDER_PATH}': {msg_part_1}, {msg_part_2}""") diff --git a/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/glue_data_validation_lib.py b/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/glue_data_validation_lib.py index 3862c16a1b3..a78700aaac3 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/glue_data_validation_lib.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/glue_data_validation_lib.py @@ -193,7 +193,7 @@ def get_rds_df_read_query_pkey_parallel(self, jdbc_partition_column, jdbc_partition_col_lowerbound, jdbc_partition_col_upperbound, - jdbc_read_partitions_num + jdbc_read_partitions_num=1 ) -> DataFrame: numPartitions = jdbc_read_partitions_num @@ -219,6 +219,16 @@ def get_rds_df_read_query_pkey_parallel(self, .option("numPartitions", numPartitions) .load()) + def get_rds_df_read_query(self, in_db_query) -> DataFrame: + + return (self.spark.read.format("jdbc") + .option("url", self.rds_jdbc_url_v2) + .option("driver", self.RDS_DB_INSTANCE_DRIVER) + .option("user", self.RDS_DB_INSTANCE_USER) + .option("password", self.RDS_DB_INSTANCE_PWD) + .option("dbtable", f"""({in_db_query}) as t""") + .load()) + def get_rds_df_query_min_max_count(self, rds_table_name, @@ -695,10 +705,8 @@ def get_rds_tbl_col_attr_dict(df_col_stats: DataFrame) -> DataFrame: def get_nvl_select_list(in_rds_df: DataFrame, rds_jdbc_conn_obj, in_rds_tbl_name): - df_col_attr = rds_jdbc_conn_obj.get_rds_tbl_col_attributes( - in_rds_tbl_name) - df_col_attr_dict = CustomPysparkMethods.get_rds_tbl_col_attr_dict( - df_col_attr) + df_col_attr = rds_jdbc_conn_obj.get_rds_tbl_col_attributes(in_rds_tbl_name) + df_col_attr_dict = CustomPysparkMethods.get_rds_tbl_col_attr_dict(df_col_attr) df_col_dtype_dict = CustomPysparkMethods.get_dtypes_dict(in_rds_df) temp_select_list = list() diff --git a/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/rds_transform_queries.py b/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/rds_transform_queries.py new file mode 100644 index 00000000000..96f1f3b8ad8 --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/glue-job/reusable_module/rds_transform_queries.py @@ -0,0 +1,48 @@ + +class SQLServer_Extract_Transform: + + QUERY_STR_DICT = { + "g4s_emsys_tpims_dbo_CurfewSegment": """ + SELECT [CurfewSegmentID] + ,[CurfewID] + ,[CurfewSegmentType] + ,[BeginDatetime] + ,[EndDatetime] + ,[LastModifiedDatetime] + ,[DayFlags] + ,[AdditionalInfo] + ,[WeeksOn] + ,[WeeksOff] + ,[WeeksOffset] + ,[ExportToGovernment] + ,[PublicHolidaySegmentID] + ,[IsPublicHoliday] + ,[RowVersion] + ,CAST(StartTime as varchar(8)) as StartTime + ,CAST(EndTime as varchar(8)) as EndTime + ,[SegmentCategoryLookupID] + ,[ParentCurfewSegmentID] + ,[TravelTimeBefore] + ,[TravelTimeAfter] + FROM [g4s_emsys_tpims].[dbo].[CurfewSegment] + """.strip(), + "g4s_emsys_tpims_dbo_GPSPositionLatest": """ + SELECT [GPSPositionID] + ,[PersonID] + ,[DeviceID] + ,[Latitude] + ,[Longitude] + ,[RecordedDatetime] + ,[Source] + ,[Pdop] + ,[Hdop] + ,[Vdop] + ,[Speed] + ,[Direction] + ,[SequenceNumber] + ,[AuditDateTime] + , SpatialPosition.STAsText() AS SpatialPosition + ,[SeparationViolation] + FROM [g4s_emsys_tpims].[dbo].[GPSPositionLatest] + """.strip() + } \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/lake_formation.tf b/terraform/environments/electronic-monitoring-data/lake_formation.tf new file mode 100644 index 00000000000..534d696659d --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/lake_formation.tf @@ -0,0 +1,22 @@ +# ------------------------------------------------------------------------ +# Lake Formation - admin permissions +# https://user-guide.modernisation-platform.service.justice.gov.uk/runbooks/adding-admin-data-lake-formation-permissions.html +# ------------------------------------------------------------------------ + +data "aws_iam_role" "github_actions_role" { + name = "github-actions" +} + +data "aws_iam_roles" "modernisation_platform_sandbox_role" { + name_regex = "AWSReservedSSO_modernisation-platform-sandbox_.*" + path_prefix = "/aws-reserved/sso.amazonaws.com/" +} + +resource "aws_lakeformation_data_lake_settings" "emds_development" { + count = local.is-development ? 1 : 0 + + admins = [ + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/aws-reserved/sso.amazonaws.com/${data.aws_region.current.name}/${one(data.aws_iam_roles.modernisation_platform_sandbox_role.names)}", + data.aws_iam_role.github_actions_role.arn + ] +} diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf index d4fc62fbaa8..e9a60188021 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf @@ -50,7 +50,7 @@ data "aws_iam_policy_document" "load_data" { ] } statement { - sid = "GluePermissionsForLoadAtriumUnstructured${local.camel-sid}" + sid = "GluePermissionsForLoad${local.camel-sid}" effect = "Allow" actions = [ "glue:GetTable", diff --git a/terraform/environments/hmpps-domain-services/locals_test.tf b/terraform/environments/hmpps-domain-services/locals_test.tf index 37caffd9ad0..c1b3dbb1a9d 100644 --- a/terraform/environments/hmpps-domain-services/locals_test.tf +++ b/terraform/environments/hmpps-domain-services/locals_test.tf @@ -180,6 +180,12 @@ locals { }) } + schedule_alarms_lambda = { + alarm_patterns = [ + "public-https-*-unhealthy-load-balancer-host", + ] + } + route53_zones = { "test.hmpps-domain.service.justice.gov.uk" = { lb_alias_records = [ diff --git a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf index b2021f033ae..e480dabc3a6 100644 --- a/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf +++ b/terraform/environments/hmpps-oem/locals_cloudwatch_metric_alarms.tf @@ -26,13 +26,13 @@ locals { } preproduction = { - # corporate-staff-rostering - csr-r1-pp = ["r1.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - csr-r2-pp = ["r2.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - csr-r3-pp = ["r3.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - csr-r4-pp = ["r4.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - csr-r5-pp = ["r5.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] - csr-r6-pp = ["r6.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + # corporate-staff-rostering - alarms disabled on request from Glenn + #csr-r1-pp = ["r1.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + #csr-r2-pp = ["r2.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + #csr-r3-pp = ["r3.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + #csr-r4-pp = ["r4.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + #csr-r5-pp = ["r5.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] + #csr-r6-pp = ["r6.pp.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] csr-traina = ["traina.csr.service.justice.gov.uk", false, "corporate-staff-rostering-pagerduty"] # hmpps-domain-services @@ -51,9 +51,9 @@ locals { # oasys-national-reporting onr-pp = ["onr.pp-oasys.az.justice.gov.uk", true, "oasys-national-reporting-pagerduty"] - # planetfm - cafmtx-pp = ["cafmtx.pp.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] - cafmwebx-pp = ["cafmwebx.pp.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] + # planetfm - alarms disabled on request from Glenn + #cafmtx-pp = ["cafmtx.pp.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] + #cafmwebx-pp = ["cafmwebx.pp.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] } production = { @@ -88,10 +88,10 @@ locals { # oasys-national-reporting onr = ["onr.oasys.az.justice.gov.uk", true, "oasys-national-reporting-pagerduty"] - # planetfm - cafmtrainweb = ["cafmtrainweb.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] - cafmtx = ["cafmtx.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] - cafmwebx2 = ["cafmwebx2.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] + # planetfm - alarms disabled on request from Glenn + #cafmtrainweb = ["cafmtrainweb.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] + #cafmtx = ["cafmtx.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] + #cafmwebx2 = ["cafmwebx2.planetfm.service.justice.gov.uk", true, "planetfm-pagerduty"] } } diff --git a/terraform/environments/long-term-storage/call-centre-migration.tf b/terraform/environments/long-term-storage/call-centre-migration.tf new file mode 100644 index 00000000000..355872cca1c --- /dev/null +++ b/terraform/environments/long-term-storage/call-centre-migration.tf @@ -0,0 +1,64 @@ +resource "aws_cloudwatch_log_group" "call_centre" { + name_prefix = "call-centre-migration" + retention_in_days = 365 + tags = local.tags +} + +resource "aws_kms_key" "call_centre" { + enable_key_rotation = true + rotation_period_in_days = 90 + tags = local.tags +} + +resource "aws_kms_key_policy" "call_centre" { + key_id = aws_kms_key.call_centre.id + policy = data.aws_iam_policy_document.call_centre_kms_policy.json +} + +resource "aws_s3_bucket" "call_centre" { + bucket_prefix = "call-centre-migration" + tags = local.tags +} + +resource "aws_s3_bucket_policy" "call_centre" { + bucket = aws_s3_bucket.call_centre.id + policy = data.aws_iam_policy_document.call_centre_bucket_policy.json +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "call_centre" { + bucket = aws_s3_bucket.call_centre.id + rule { + apply_server_side_encryption_by_default { + kms_master_key_id = aws_kms_key.call_centre.arn + sse_algorithm = "aws:kms" + } + } +} + +resource "aws_secretsmanager_secret" "call_centre" { + description = "Secret containing key-value pairs for AWS Transfer connector." + force_overwrite_replica_secret = true + name = "aws/transfer/${aws_transfer_server.call_centre.id}/call-centre" + recovery_window_in_days = 0 + tags = local.tags +} + +resource "aws_transfer_server" "call_centre" { + logging_role = aws_iam_role.call_centre_transfer_logging.arn + structured_log_destinations = ["${aws_cloudwatch_log_group.call_centre.arn}:*"] + tags = merge( + local.tags, + { Name = "call-centre-migration" } + ) +} + +resource "aws_iam_role" "call_centre_transfer_logging" { + name_prefix = "call-centre-migration-logging" + assume_role_policy = data.aws_iam_policy_document.aws_transfer_assume_role_policy.json + tags = local.tags +} + +resource "aws_iam_role_policy_attachments_exclusive" "call_centre_transfer_logging" { + policy_arns = ["arn:aws:iam::aws:policy/service-role/AWSTransferLoggingAccess"] + role_name = aws_iam_role.call_centre_transfer_logging.name +} diff --git a/terraform/environments/long-term-storage/data.tf b/terraform/environments/long-term-storage/data.tf index 96a2521d17e..b1890ef759b 100644 --- a/terraform/environments/long-term-storage/data.tf +++ b/terraform/environments/long-term-storage/data.tf @@ -1 +1,97 @@ #### This file can be used to store data specific to the member account #### +data "aws_iam_policy_document" "aws_transfer_assume_role_policy" { + statement { + effect = "Allow" + + principals { + type = "Service" + identifiers = ["transfer.amazonaws.com"] + } + actions = ["sts:AssumeRole"] + condition { + test = "StringEquals" + values = [data.aws_caller_identity.current.account_id] + variable = "aws:SourceAccount" + } + condition { + test = "ArnLike" + values = ["arn:aws:transfer:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:user/*"] + variable = "aws:SourceArn" + } + } +} + +data "aws_iam_policy_document" "call_centre_bucket_policy" { + statement { + actions = [ + "s3:ListBucket", + "s3:GetBucketLocation" + ] + effect = "Allow" + principals { + type = "Service" + identifiers = ["transfer.amazonaws.com"] + } + resources = [aws_s3_bucket.call_centre.arn] + sid = "AllowListingOfBucket" + } + statement { + actions = [ + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:GetObjectVersion", + "s3:GetObjectACL", + "s3:PutObjectACL" + ] + effect = "Allow" + principals { + type = "Service" + identifiers = ["transfer.amazonaws.com"] + } + resources = ["${aws_s3_bucket.call_centre.arn}/*"] + sid = "AllowAccessToBucketObjects" + } +} + +data "aws_iam_policy_document" "call_centre_kms_policy" { + statement { + sid = "KeyAdministration" + effect = "Allow" + + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] + } + + actions = ["kms:*"] + resources = [aws_kms_key.call_centre.arn] + } + statement { + sid = "AllowAWSServiceAccess" + effect = "Allow" + principals { + type = "Service" + identifiers = ["transfer.amazonaws.com", "s3.amazonaws.com"] + } + actions = [ + "kms:Decrypt", + "kms:DescribeKey", + "kms:Encrypt", + "kms:GenerateDataKey", + "kms:ReEncrypt*" + ] + condition { + test = "StringEquals" + values = [data.aws_caller_identity.current.account_id] + variable = "kms:CallerAccount" + } + condition { + test = "StringLike" + values = ["transfer.amazonaws.com", "s3.amazonaws.com"] + variable = "kms:ViaService" + } + resources = [aws_kms_key.call_centre.arn] + } +} \ No newline at end of file diff --git a/terraform/environments/nomis-combined-reporting/locals_secretsmanager.tf b/terraform/environments/nomis-combined-reporting/locals_secretsmanager.tf index bd30bf49807..722662cdb32 100644 --- a/terraform/environments/nomis-combined-reporting/locals_secretsmanager.tf +++ b/terraform/environments/nomis-combined-reporting/locals_secretsmanager.tf @@ -3,26 +3,34 @@ locals { secretsmanager_secrets = { bip = { secrets = { - passwords = { description = "BIP Passwords" } - config = { description = "BIP Configuration" } - } - } - bip_app = { - secrets = { - passwords = { description = "BIP Passwords" } - config = { description = "BIP Configuration" } - } - } - bip_web = { - secrets = { - passwords = { description = "Web Passwords" } - config = { description = "Web Configuration" } + passwords = { + description = "BIP Passwords" + tags = { + instance-access-policy = "full" + } + } + config = { + description = "BIP Configuration" + tags = { + instance-access-policy = "limited" + } + } } } bods = { secrets = { - passwords = { description = "BODS Passwords" } - config = { description = "BODS Configuration" } + passwords = { + description = "BODS Passwords" + tags = { + instance-access-policy = "full" + } + } + config = { + description = "BODS Configuration" + tags = { + instance-access-policy = "limited" + } + } } } db = { diff --git a/terraform/environments/nomis-combined-reporting/locals_test.tf b/terraform/environments/nomis-combined-reporting/locals_test.tf index d5b47b89bbc..8262f32ef38 100644 --- a/terraform/environments/nomis-combined-reporting/locals_test.tf +++ b/terraform/environments/nomis-combined-reporting/locals_test.tf @@ -13,7 +13,39 @@ locals { # please keep resources in alphabetical order baseline_test = { + acm_certificates = { + nomis_combined_reporting_wildcard_cert = { + cloudwatch_metric_alarms = module.baseline_presets.cloudwatch_metric_alarms.acm + domain_name = "modernisation-platform.service.justice.gov.uk" + subject_alternate_names = [ + "test.reporting.nomis.service.justice.gov.uk", + "*.test.reporting.nomis.service.justice.gov.uk", + ] + tags = { + description = "Wildcard certificate for the test environment" + } + } + } + ec2_instances = { + t1-ncr-cms-1 = merge(local.ec2_instances.bip_cms, { + config = merge(local.ec2_instances.bip_cms.config, { + availability_zone = "eu-west-2a" + instance_profile_policies = concat(local.ec2_instances.bip_cms.config.instance_profile_policies, [ + "Ec2T1ReportingPolicy", + ]) + }) + user_data_cloud_init = merge(local.ec2_instances.bip_cms.user_data_cloud_init, { + args = merge(local.ec2_instances.bip_cms.user_data_cloud_init.args, { + branch = "main" + }) + }) + tags = merge(local.ec2_instances.bip_cms.tags, { + instance-scheduling = "skip-scheduling" + nomis-combined-reporting-environment = "t1" + }) + }) + t1-ncr-db-1-a = merge(local.ec2_instances.db, { cloudwatch_metric_alarms = merge( local.cloudwatch_metric_alarms.db, @@ -41,10 +73,35 @@ locals { tags = merge(local.ec2_instances.db.tags, { description = "T1 NCR DATABASE" nomis-combined-reporting-environment = "t1" - oracle-sids = "T1BIPSYS T1BIPAUD" + oracle-sids = "T1BIPSYS T1BIPAUD T1BISYS T1BIAUD" instance-scheduling = "skip-scheduling" }) }) + + t1-ncr-web-1 = merge(local.ec2_instances.bip_web, { + config = merge(local.ec2_instances.bip_web.config, { + availability_zone = "eu-west-2a" + instance_profile_policies = concat(local.ec2_instances.bip_web.config.instance_profile_policies, [ + "Ec2T1ReportingPolicy", + ]) + }) + instance = merge(local.ec2_instances.bip_web.instance, { + instance_type = "r6i.large" + }) + user_data_cloud_init = merge(local.ec2_instances.bip_cms.user_data_cloud_init, { + args = merge(local.ec2_instances.bip_cms.user_data_cloud_init.args, { + branch = "main" + }) + }) + tags = merge(local.ec2_instances.bip_web.tags, { + instance-scheduling = "skip-scheduling" + nomis-combined-reporting-environment = "t1" + }) + }) + } + + efs = { + t1-ncr-sap-share = local.efs.sap_share } iam_policies = { @@ -90,14 +147,46 @@ locals { instance_target_groups = {} listeners = {} }) + + public = merge(local.lbs.public, { + instance_target_groups = { + t1-http-7777 = merge(local.lbs.public.instance_target_groups.http-7777, { + attachments = [ + { ec2_instance_name = "t1-ncr-web-1" }, + ] + }) + } + listeners = merge(local.lbs.public.listeners, { + https = merge(local.lbs.public.listeners.https, { + alarm_target_group_names = [] + rules = { + web = { + priority = 200 + actions = [{ + type = "forward" + target_group_name = "t1-http-7777" + }] + conditions = [{ + host_header = { + values = [ + "t1.test.reporting.nomis.service.justice.gov.uk", + ] + } + }] + } + } + }) + }) + }) } route53_zones = { "test.reporting.nomis.service.justice.gov.uk" = { records = [ { name = "db", type = "CNAME", ttl = "3600", records = ["t1-ncr-db-1-a.nomis-combined-reporting.hmpps-test.modernisation-platform.service.justice.gov.uk"] }, - { name = "web", type = "CNAME", ttl = "3600", records = ["t1-ncr-web-1-a.nomis-combined-reporting.hmpps-test.modernisation-platform.service.justice.gov.uk"] }, - { name = "etl", type = "CNAME", ttl = "3600", records = ["t1-ncr-etl-1-a.nomis-combined-reporting.hmpps-test.modernisation-platform.service.justice.gov.uk"] } + ] + lb_alias_records = [ + { name = "t1", type = "A", lbs_map_key = "public" }, ] } } diff --git a/terraform/environments/nomis/locals_test.tf b/terraform/environments/nomis/locals_test.tf index e691faaa08a..9153c57fd68 100644 --- a/terraform/environments/nomis/locals_test.tf +++ b/terraform/environments/nomis/locals_test.tf @@ -7,7 +7,6 @@ locals { baseline_presets_test = { options = { - enable_observability_platform_monitoring = true sns_topics = { pagerduty_integrations = { pagerduty = "nomis-test" diff --git a/terraform/environments/oasys-national-reporting/locals_ec2_instances.tf b/terraform/environments/oasys-national-reporting/locals_ec2_instances.tf index 4a925b3718b..c9971660068 100644 --- a/terraform/environments/oasys-national-reporting/locals_ec2_instances.tf +++ b/terraform/environments/oasys-national-reporting/locals_ec2_instances.tf @@ -22,9 +22,9 @@ locals { } ebs_volumes = { "/dev/sda1" = { type = "gp3", size = 128 } # root volume - "/dev/xvdk" = { type = "gp3", size = 128 } # D:/ Temp - "/dev/xvdl" = { type = "gp3", size = 128 } # E:/ App - "/dev/xvdm" = { type = "gp3", size = 700 } # F:/ Storage + "xvdd" = { type = "gp3", size = 128 } # D:/ Temp + "xvde" = { type = "gp3", size = 128 } # E:/ App + "xvdf" = { type = "gp3", size = 700 } # F:/ Storage } instance = { disable_api_termination = false diff --git a/terraform/environments/oasys-national-reporting/locals_preproduction.tf b/terraform/environments/oasys-national-reporting/locals_preproduction.tf index a61ee51aa57..eb4895ee310 100644 --- a/terraform/environments/oasys-national-reporting/locals_preproduction.tf +++ b/terraform/environments/oasys-national-reporting/locals_preproduction.tf @@ -87,6 +87,13 @@ locals { "Ec2SecretPolicy", ]) }) + # IMPORTANT: EBS volume initialization, labelling, formatting was carried out manually on this instance. It was not automated so these ebs_volume settings are bespoke. Additional volumes should NOT be /dev/xvd* see the local.ec2_instances.bods.ebs_volumes setting for the correct device names. + ebs_volumes = { + "/dev/sda1" = { type = "gp3", size = 128 } # root volume + "/dev/xvdk" = { type = "gp3", size = 128 } # D:/ Temp + "/dev/xvdl" = { type = "gp3", size = 128 } # E:/ App + "/dev/xvdm" = { type = "gp3", size = 700 } # F:/ Storage + } instance = merge(local.ec2_instances.bods.instance, { instance_type = "r6i.2xlarge" }) diff --git a/terraform/environments/oasys-national-reporting/locals_test.tf b/terraform/environments/oasys-national-reporting/locals_test.tf index cfc069059fc..6b8016b3d18 100644 --- a/terraform/environments/oasys-national-reporting/locals_test.tf +++ b/terraform/environments/oasys-national-reporting/locals_test.tf @@ -168,15 +168,9 @@ locals { }) }) - # Pending sorting out cluster install of Bods in modernisation-platform-configuration-management repo # t2-onr-bods-2 = merge(local.ec2_instances.bods, { # config = merge(local.ec2_instances.bods.config, { - # availability_zone = "eu-west-2b" - # user_data_raw = base64encode(templatefile( - # "./templates/user-data-onr-bods-pwsh.yaml.tftpl", { - # branch = "main" - # } - # )) + # availability_zone = "eu-west-2a" # instance_profile_policies = concat(local.ec2_instances.bods.config.instance_profile_policies, [ # "Ec2SecretPolicy", # ]) @@ -184,55 +178,60 @@ locals { # instance = merge(local.ec2_instances.bods.instance, { # instance_type = "m4.xlarge" # }) + # user_data_raw = base64encode(templatefile( + # "./templates/user-data-onr-bods-pwsh.yaml.tftpl", { + # branch = "TM/TM-660/onr-bods-second-server" + # })) # cloudwatch_metric_alarms = null # tags = merge(local.ec2_instances.bods.tags, { # oasys-national-reporting-environment = "t2" - # domain-name = "azure.noms.root" + # domain-name = "azure.noms.root" # }) # }) - t2-onr-boe-1-a = merge(local.ec2_instances.boe_app, { - config = merge(local.ec2_instances.boe_app.config, { - availability_zone = "eu-west-2a" - instance_profile_policies = setunion(local.ec2_instances.boe_app.config.instance_profile_policies, [ - "Ec2SecretPolicy", - ]) - }) - instance = merge(local.ec2_instances.boe_app.instance, { - instance_type = "m4.xlarge" - }) - tags = merge(local.ec2_instances.boe_app.tags, { - oasys-national-reporting-environment = "t2" - }) - }) + # NOTE: These are all BOE 3.1 instances and are not currently needed + # t2-onr-boe-1-a = merge(local.ec2_instances.boe_app, { + # config = merge(local.ec2_instances.boe_app.config, { + # availability_zone = "eu-west-2a" + # instance_profile_policies = setunion(local.ec2_instances.boe_app.config.instance_profile_policies, [ + # "Ec2SecretPolicy", + # ]) + # }) + # instance = merge(local.ec2_instances.boe_app.instance, { + # instance_type = "m4.xlarge" + # }) + # tags = merge(local.ec2_instances.boe_app.tags, { + # oasys-national-reporting-environment = "t2" + # }) + # }) - # NOTE: currently using a Rhel 6 instance for onr-web instances, not Rhel 7 & independent Tomcat install - t2-onr-web-1-a = merge(local.ec2_instances.boe_web, { - config = merge(local.ec2_instances.boe_web.config, { - ami_name = "base_rhel_6_10_*" - availability_zone = "eu-west-2a" - instance_profile_policies = setunion(local.ec2_instances.boe_web.config.instance_profile_policies, [ - "Ec2SecretPolicy", - ]) - }) - instance = merge(local.ec2_instances.boe_web.instance, { - instance_type = "m4.large" - metadata_options_http_tokens = "optional" # required as Rhel 6 cloud-init does not support IMDSv2 - }) - tags = merge(local.ec2_instances.boe_web.tags, { - ami = "base_rhel_6_10" - oasys-national-reporting-environment = "t2" - }) - }) - t2-onr-client-a = merge(local.ec2_instances.jumpserver, { - config = merge(local.ec2_instances.jumpserver.config, { - ami_name = "base_windows_server_2012_r2_release_2024-06-01T00-00-32.450Z" - availability_zone = "eu-west-2a" - }) - tags = merge(local.ec2_instances.jumpserver.tags, { - domain-name = "azure.noms.root" - }) - }) + # # NOTE: currently using a Rhel 6 instance for onr-web instances, not Rhel 7 & independent Tomcat install + # t2-onr-web-1-a = merge(local.ec2_instances.boe_web, { + # config = merge(local.ec2_instances.boe_web.config, { + # ami_name = "base_rhel_6_10_*" + # availability_zone = "eu-west-2a" + # instance_profile_policies = setunion(local.ec2_instances.boe_web.config.instance_profile_policies, [ + # "Ec2SecretPolicy", + # ]) + # }) + # instance = merge(local.ec2_instances.boe_web.instance, { + # instance_type = "m4.large" + # metadata_options_http_tokens = "optional" # required as Rhel 6 cloud-init does not support IMDSv2 + # }) + # tags = merge(local.ec2_instances.boe_web.tags, { + # ami = "base_rhel_6_10" + # oasys-national-reporting-environment = "t2" + # }) + # }) + # t2-onr-client-a = merge(local.ec2_instances.jumpserver, { + # config = merge(local.ec2_instances.jumpserver.config, { + # ami_name = "base_windows_server_2012_r2_release_2024-06-01T00-00-32.450Z" + # availability_zone = "eu-west-2a" + # }) + # tags = merge(local.ec2_instances.jumpserver.tags, { + # domain-name = "azure.noms.root" + # }) + # }) } iam_policies = { diff --git a/terraform/environments/oasys-national-reporting/templates/user-data-onr-bods-pwsh.yaml.tftpl b/terraform/environments/oasys-national-reporting/templates/user-data-onr-bods-pwsh.yaml.tftpl index 9329a892d73..a8085a34a5e 100644 --- a/terraform/environments/oasys-national-reporting/templates/user-data-onr-bods-pwsh.yaml.tftpl +++ b/terraform/environments/oasys-national-reporting/templates/user-data-onr-bods-pwsh.yaml.tftpl @@ -4,22 +4,26 @@ # See C:\Windows\System32\config\systemprofile\AppData\Local\Temp\EC2Launch* for script output version: 1.0 # version 1.0 is required as this executes AFTER the SSM Agent is running tasks: - - task: initializeVolume + - task: executeScript inputs: - initialize: devices - devices: - - device: xvdk - name: Temp - letter: D - partition: gpt - - device: xvdl - name: App - letter: E - partition: gpt - - device: xvdm - name: Storage - letter: F - partition: gpt + - frequency: once + type: powershell + runAs: admin + content: |- + # Initialize all offline disks + $offlineDisks = Get-Disk | Where-Object IsOffline -Eq $true + foreach ($disk in $offlineDisks) { + Initialize-Disk -Number $disk.Number -PartitionStyle GPT + } + # Create partitions and assign drive letters + $letters = @('D', 'E', 'F') + $labels = @('Temp', 'App', 'Storage') + $disks = Get-Disk | Where-Object PartitionStyle -Eq 'GPT' | Where-Object IsSystem -Eq $false + for ($i = 0; $i -lt $disks.Count; $i++) { + $partition = New-Partition -DiskNumber $disks[$i].Number -UseMaximumSize + Format-Volume -Partition $partition -FileSystem NTFS -NewFileSystemLabel $labels[$i] -Confirm:$false + Set-Partition -InputObject $partition -NewDriveLetter $letters[$i] + } - task: executeScript inputs: - frequency: once diff --git a/terraform/environments/oasys/iam.tf b/terraform/environments/oasys/iam.tf deleted file mode 100644 index 5d7491e0574..00000000000 --- a/terraform/environments/oasys/iam.tf +++ /dev/null @@ -1,45 +0,0 @@ -# Create user for MGN - for mgn agents running on azure vms -#tfsec:ignore:aws-iam-no-user-attached-policies -#tfsec:ignore:AWS273 -resource "aws_iam_user" "mgn_user" { - #checkov:skip=CKV_AWS_273: "Skipping as tfsec check is also set to ignore" - name = "MGN-Test" - tags = local.tags -} -#tfsec:ignore:aws-iam-no-user-attached-policies -resource "aws_iam_user_policy_attachment" "mgn_attach_policy_migration" { - #tfsec:ignore:aws-iam-no-user-attached-policies "This is a short lived user, so allowing IAM policies attached directly to a user." - #checkov:skip=CKV_AWS_40: "Skipping as tfsec check is also ignored" - user = aws_iam_user.mgn_user.name - policy_arn = "arn:aws:iam::aws:policy/AWSApplicationMigrationAgentInstallationPolicy" -} - -#tfsec:ignore:aws-iam-no-user-attached-policies -resource "aws_iam_user_policy_attachment" "mgn_attach_policy_discovery" { - #tfsec:ignore:aws-iam-no-user-attached-policies "This is a short lived user, so allowing IAM policies attached directly to a user." - #checkov:skip=CKV_AWS_40: "Skipping as tfsec check is also ignored" - user = aws_iam_user.mgn_user.name - policy_arn = "arn:aws:iam::aws:policy/AWSApplicationDiscoveryAgentAccess" -} - -resource "aws_iam_user_policy_attachment" "mgn_attach_policy_service_access" { - #tfsec:ignore:aws-iam-no-user-attached-policies "This is a short lived user, so allowing IAM policies attached directly to a user." - #checkov:skip=CKV_AWS_40: "Skipping as tfsec check is also ignored" - user = aws_iam_user.mgn_user.name - policy_arn = "arn:aws:iam::aws:policy/AWSApplicationDiscoveryServiceFullAccess" -} - -resource "aws_iam_user_policy_attachment" "mgn_attach_policy_migrationhub_access" { - #tfsec:ignore:aws-iam-no-user-attached-policies "This is a short lived user, so allowing IAM policies attached directly to a user." - #checkov:skip=CKV_AWS_40: "Skipping as tfsec check is also ignored" - user = aws_iam_user.mgn_user.name - policy_arn = "arn:aws:iam::aws:policy/AWSMigrationHubFullAccess" -} - -resource "aws_iam_user_policy_attachment" "mgn_attach_policy_app_migrationfull_access" { - #tfsec:ignore:aws-iam-no-user-attached-policies "This is a short lived user, so allowing IAM policies attached directly to a user." - #checkov:skip=CKV_AWS_40: "Skipping as tfsec check is also ignored" - user = aws_iam_user.mgn_user.name - policy_arn = "arn:aws:iam::aws:policy/AWSApplicationMigrationFullAccess" -} - diff --git a/terraform/environments/oasys/locals_test.tf b/terraform/environments/oasys/locals_test.tf index e67c876c39e..8caed99b4ba 100644 --- a/terraform/environments/oasys/locals_test.tf +++ b/terraform/environments/oasys/locals_test.tf @@ -3,7 +3,6 @@ locals { baseline_presets_test = { options = { - enable_observability_platform_monitoring = true sns_topics = { pagerduty_integrations = { pagerduty = "oasys-test" diff --git a/terraform/environments/observability-platform/environment-configurations.tf b/terraform/environments/observability-platform/environment-configurations.tf index 97163b141d1..656dd941e2d 100644 --- a/terraform/environments/observability-platform/environment-configurations.tf +++ b/terraform/environments/observability-platform/environment-configurations.tf @@ -16,96 +16,6 @@ locals { athena_enabled = false } } - }, - "analytical-platform" = { - identity_centre_team = "analytical-platform" - aws_accounts = { - "analytical-platform-ingestion-development" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = true - athena_enabled = false - }, - "analytical-platform-compute-development" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = true - amazon_prometheus_workspace_region = "eu-west-2" - amazon_prometheus_workspace_id = "ws-bfdd5d7a-5571-4686-bfd4-43ab07cf8d54ba" - xray_enabled = true - athena_enabled = false - }, - "analytical-platform-compute-test" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = true - amazon_prometheus_workspace_region = "eu-west-2" - amazon_prometheus_workspace_id = "ws-a9d7f576-58b7-4748-b4c1-b02bbdc54a2922" - xray_enabled = true - athena_enabled = false - } - } - }, - "data-engineering" = { - "identity_centre_team" = "data-engineering", - "aws_accounts" = { - "analytical-platform-data-engineering-sandboxa" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - } - }, - "digital-prison-reporting" = { - "identity_centre_team" = "hmpps-digital-prison-reporting", - "aws_accounts" = { - "digital-prison-reporting-development" = { - cloudwatch_enabled = true - cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - }, - "digital-prison-reporting-preproduction" = { - cloudwatch_enabled = true - cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - }, - "digital-prison-reporting-test" = { - cloudwatch_enabled = true - cloudwatch_custom_namespaces = "DPRAgentCustomMetrics,DPRDataReconciliationCustom" - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - } - }, - "digital-studio-operations" = { - "identity_centre_team" = "studio-webops" - "aws_accounts" = { - "nomis-test" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - "oasys-test" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - } } } grafana_version = "10.4" @@ -272,25 +182,6 @@ locals { } } }, - "digital-studio-operations" = { - "identity_centre_team" = "studio-webops" - "aws_accounts" = { - "nomis-test" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - "oasys-test" = { - cloudwatch_enabled = true - prometheus_push_enabled = false - amazon_prometheus_query_enabled = false - xray_enabled = false - athena_enabled = false - } - } - }, "modernisation-platform" = { identity_centre_team = "modernisation-platform" slack_channels = ["mod-plat-observ-test"] diff --git a/terraform/environments/panda-cyber-appsec-lab/ec2.tf b/terraform/environments/panda-cyber-appsec-lab/ec2.tf index 12f11452239..0dc90c123b8 100644 --- a/terraform/environments/panda-cyber-appsec-lab/ec2.tf +++ b/terraform/environments/panda-cyber-appsec-lab/ec2.tf @@ -22,27 +22,39 @@ resource "aws_instance" "kali_linux" { } user_data = <<-EOF #!/bin/bash - # Update and install dependencies - apt-get update - apt-get upgrade - apt-get install -y wget + set -e + exec > >(tee /var/log/user-data.log | logger -t user-data) 2>&1 + + # Update system packages + echo "Updating and upgrading system packages..." + apt-get update -y + apt-get upgrade -y + + # Install necessary tools and Kali default tools + echo "Installing wget, git, and kali-linux-default tools..." + apt-get install -y wget git kali-linux-default + + # Check if 'kali' user exists + if id "kali" &>/dev/null; then + echo "User 'kali' exists. Proceeding to create tooling directory..." + + # Create tooling directory and set ownership + mkdir -p /home/kali/tooling + chown -R kali:kali /home/kali + echo "Tooling directory created under /home/kali and ownership set." + + # Clone the repository as 'kali' user + echo "Cloning gotestwaf repository into /home/kali/tooling..." + sudo -u kali git clone https://github.com/wallarm/gotestwaf.git /home/kali/tooling + echo "Repository cloned successfully." + else + echo "User 'kali' does not exist. Exiting." + exit 1 + fi + + echo "User data script completed successfully." - # Download the SSM agent - wget https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/debian_amd64/amazon-ssm-agent.deb - - # Install the agent - dpkg -i amazon-ssm-agent.deb - - # Start the SSM service - systemctl enable amazon-ssm-agent - systemctl start amazon-ssm-agent - - # Check the status - systemctl status amazon-ssm-agent - - # Install kali-linux-default tools - apt-get install -y kali-linux-default EOF tags = { diff --git a/terraform/environments/ppud/iam.tf b/terraform/environments/ppud/iam.tf index 5ab6eb99243..44ac6ffe45a 100644 --- a/terraform/environments/ppud/iam.tf +++ b/terraform/environments/ppud/iam.tf @@ -1175,14 +1175,35 @@ resource "aws_iam_policy" "iam_policy_for_lambda_cloudwatch_get_metric_data_dev" "Sid" : "CloudwatchMetricPolicy", "Effect" : "Allow", "Action" : [ - "cloudwatch:GetMetricData", - "cloudwatch:GetMetricStatistics", - "cloudwatch:ListMetrics" + "cloudwatch:*" ], "Resource" : [ "arn:aws:cloudwatch:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*" ] }, + { + "Sid" : "S3BucketPolicy", + "Effect" : "Allow", + "Action" : [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject" + ], + "Resource" : [ + "arn:aws:s3:::moj-lambda-layers-dev", + "arn:aws:s3:::moj-lambda-layers-dev/*" + ] + }, + { + "Sid" : "SSMPolicy", + "Effect" : "Allow", + "Action" : [ + "ssm:GetParameter" + ], + "Resource" : [ + "arn:aws:ssm:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:parameter/klayers-account" + ] + }, { "Sid" : "LogPolicy", "Effect" : "Allow", @@ -1215,10 +1236,11 @@ resource "aws_iam_policy" "iam_policy_for_lambda_cloudwatch_get_metric_data_dev" "Sid" : "SESPolicy", "Effect" : "Allow", "Action" : [ - "ses:SendEmail" + "ses:*" ], "Resource" : [ - "arn:aws:ses:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*" + "arn:aws:ses:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:*", + "arn:aws:ses:eu-west-2:${local.environment_management.account_ids["ppud-development"]}:identity/internaltest.ppud.justice.gov.uk" ] }] }) @@ -1229,3 +1251,17 @@ resource "aws_iam_role_policy_attachment" "attach_lambda_policy_cloudwatch_get_m role = aws_iam_role.lambda_role_cloudwatch_get_metric_data_dev[0].name policy_arn = aws_iam_policy.iam_policy_for_lambda_cloudwatch_get_metric_data_dev[0].arn } + +#resource "aws_iam_policy_attachment" "attach_lambda_read_only_access" { +# count = local.is-development == true ? 1 : 0 +# name = "lambda-read-only-access-iam-attachment" +# roles = [aws_iam_role.lambda_role_cloudwatch_get_metric_data_dev[0].id] +# policy_arn = "arn:aws:iam::aws:policy/AWSLambda_ReadOnlyAccess" +#} + +#resource "aws_iam_policy_attachment" "attach_ses_full_access" { +# count = local.is-development == true ? 1 : 0 +# name = "ses-full-access-iam-attachment" +# roles = [aws_iam_role.lambda_role_cloudwatch_get_metric_data_dev[0].id] +# policy_arn = "arn:aws:iam::aws:policy/AmazonSESFullAccess" +#} \ No newline at end of file diff --git a/terraform/environments/ppud/lambda.tf b/terraform/environments/ppud/lambda.tf index c10322387f0..d38b0fd223e 100644 --- a/terraform/environments/ppud/lambda.tf +++ b/terraform/environments/ppud/lambda.tf @@ -514,8 +514,9 @@ resource "aws_lambda_function" "terraform_lambda_func_send_cpu_graph_dev" { mode = "Active" } layers = [ - "arn:aws:lambda:eu-west-2:770693421928:layer:Klayers-p312-numpy:8", #Publically available ARN for numpy package - "arn:aws:lambda:eu-west-2:770693421928:layer:Klayers-p312-pillow:1" #Publically available ARN for pillow package + "arn:aws:lambda:eu-west-2:${data.aws_ssm_parameter.klayers_account_dev[0].value}:layer:Klayers-p312-numpy:8", + "arn:aws:lambda:eu-west-2:${data.aws_ssm_parameter.klayers_account_dev[0].value}:layer:Klayers-p312-pillow:1", + aws_lambda_layer_version.lambda_layer_matplotlib_dev[0].arn ] } @@ -526,4 +527,15 @@ data "archive_file" "zip_the_send_cpu_graph_code_dev" { type = "zip" source_dir = "${path.module}/lambda_scripts/" output_path = "${path.module}/lambda_scripts/send_cpu_graph_dev.zip" -} \ No newline at end of file +} + +# Lambda Layer for Matplotlib + +resource "aws_lambda_layer_version" "lambda_layer_matplotlib_dev" { + count = local.is-development == true ? 1 : 0 + layer_name = "matplotlib-layer" + description = "matplotlib-layer for python 3.12" + s3_bucket = aws_s3_bucket.moj-lambda-layers-dev[0].id + s3_key = "matplotlib-layer.zip" + compatible_runtimes = ["python3.12"] +} diff --git a/terraform/environments/ppud/lambda_scripts/send_cpu_graph_dev.py b/terraform/environments/ppud/lambda_scripts/send_cpu_graph_dev.py index 8e758c2ee42..c707aa7f453 100644 --- a/terraform/environments/ppud/lambda_scripts/send_cpu_graph_dev.py +++ b/terraform/environments/ppud/lambda_scripts/send_cpu_graph_dev.py @@ -1,5 +1,7 @@ import boto3 import datetime +import os +os.environ['MPLCONFIGDIR'] = "/tmp/graph" import matplotlib.pyplot as plt import io import base64 diff --git a/terraform/environments/ppud/platform_secrets.tf b/terraform/environments/ppud/platform_secrets.tf index bb006856534..bac34e1a259 100644 --- a/terraform/environments/ppud/platform_secrets.tf +++ b/terraform/environments/ppud/platform_secrets.tf @@ -15,3 +15,16 @@ data "aws_secretsmanager_secret_version" "environment_management" { provider = aws.modernisation-platform secret_id = data.aws_secretsmanager_secret.environment_management.id } + +# Klayers Account ID - used by lambda layer ARNs - https://github.com/keithrozario/Klayers?tab=readme-ov-file +data "aws_ssm_parameter" "klayers_account_dev" { + count = local.is-development == true ? 1 : 0 + name = "klayers-account" + with_decryption = true +} + +# This ID is the elb-account-id for eu-west-2 obtained from https://docs.aws.amazon.com/elasticloadbalancing/latest/application/enable-access-logging.html +data "aws_ssm_parameter" "elb-account-eu-west-2" { + name = "elb-account-eu-west-2" + with_decryption = true +} \ No newline at end of file diff --git a/terraform/environments/ppud/s3.tf b/terraform/environments/ppud/s3.tf index c27b3b03114..70748c59531 100644 --- a/terraform/environments/ppud/s3.tf +++ b/terraform/environments/ppud/s3.tf @@ -899,3 +899,131 @@ resource "aws_s3_bucket_policy" "moj-log-files-dev" { ] }) } + +# S3 Bucket for Lambda Layers for Development + +resource "aws_s3_bucket" "moj-lambda-layers-dev" { + # checkov:skip=CKV_AWS_145: "S3 bucket is not public facing, does not contain any sensitive information and does not need encryption" + # checkov:skip=CKV_AWS_62: "S3 bucket event notification is not required" + # checkov:skip=CKV2_AWS_62: "S3 bucket event notification is not required" + # checkov:skip=CKV_AWS_144: "PPUD has a UK Sovereignty requirement so cross region replication is prohibited" + # checkov:skip=CKV_AWS_18: "S3 bucket logging is not required" + count = local.is-development == true ? 1 : 0 + bucket = "moj-lambda-layers-dev" + tags = merge( + local.tags, + { + Name = "${local.application_name}-moj-lambda-layers-dev" + } + ) +} + +resource "aws_s3_bucket_versioning" "moj-lambda-layers-dev" { + count = local.is-development == true ? 1 : 0 + bucket = aws_s3_bucket.moj-lambda-layers-dev[0].id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_public_access_block" "moj-lambda-layers-dev" { + count = local.is-development == true ? 1 : 0 + bucket = aws_s3_bucket.moj-lambda-layers-dev[0].id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_lifecycle_configuration" "moj-lambda-layers-dev" { + # checkov:skip=CKV_AWS_300: "S3 bucket has a set period for aborting failed uploads, this is a false positive finding" + count = local.is-development == true ? 1 : 0 + bucket = aws_s3_bucket.moj-lambda-layers-dev[0].id + rule { + id = "Move-to-IA-then-delete-moj-lambda-layers-dev" + status = "Enabled" + abort_incomplete_multipart_upload { + days_after_initiation = 7 + } + noncurrent_version_transition { + noncurrent_days = 30 + storage_class = "STANDARD_IA" + } + transition { + days = 30 + storage_class = "STANDARD_IA" + } + expiration { + days = 60 + } + } +} + +resource "aws_s3_bucket_policy" "moj-lambda-layers-dev" { + count = local.is-development == true ? 1 : 0 + bucket = aws_s3_bucket.moj-lambda-layers-dev[0].id + + policy = jsonencode({ + + "Version" : "2012-10-17", + "Statement" : [ + { + "Action" : [ + "s3:PutBucketNotification", + "s3:GetBucketNotification", + "s3:GetBucketAcl", + "s3:DeleteObject", + "s3:GetObject", + "s3:PutObject", + "s3:ListBucket" + ], + "Effect" : "Allow", + "Resource" : [ + "arn:aws:s3:::moj-lambda-layers-dev", + "arn:aws:s3:::moj-lambda-layers-dev/*" + ], + "Principal" : { + Service = "logging.s3.amazonaws.com" + } + }, + { + "Action" : [ + "s3:PutBucketNotification", + "s3:GetBucketNotification", + "s3:GetBucketAcl", + "s3:DeleteObject", + "s3:GetObject", + "s3:PutObject", + "s3:ListBucket" + ], + "Effect" : "Allow", + "Resource" : [ + "arn:aws:s3:::moj-lambda-layers-dev", + "arn:aws:s3:::moj-lambda-layers-dev/*" + ], + "Principal" : { + Service = "sns.amazonaws.com" + } + }, + { + "Action" : [ + "s3:GetBucketAcl", + "s3:DeleteObject", + "s3:GetObject", + "s3:PutObject", + "s3:ListBucket" + ], + "Effect" : "Allow", + "Resource" : [ + "arn:aws:s3:::moj-lambda-layers-dev", + "arn:aws:s3:::moj-lambda-layers-dev/*" + ], + "Principal" : { + "AWS" : [ + "arn:aws:iam::${local.environment_management.account_ids["ppud-development"]}:role/ec2-iam-role" + ] + } + } + ] + }) +} diff --git a/terraform/environments/ppud/shield.tf b/terraform/environments/ppud/shield.tf index 00708681268..4a731e41297 100644 --- a/terraform/environments/ppud/shield.tf +++ b/terraform/environments/ppud/shield.tf @@ -20,9 +20,3 @@ module "shield" { } } } - -import { - for_each = local.is-production ? { "build" = true } : {} - id = "60a72081-57ea-4a38-b04a-778796012304/FMManagedWebACLV2-shield_advanced_auto_remediate-1649415357278/REGIONAL" - to = module.shield["build"].aws_wafv2_web_acl.main -} diff --git a/terraform/environments/tribunals/cloudfront.tf b/terraform/environments/tribunals/cloudfront.tf index c81f76104f9..3d8d5c8005f 100644 --- a/terraform/environments/tribunals/cloudfront.tf +++ b/terraform/environments/tribunals/cloudfront.tf @@ -1,5 +1,7 @@ resource "aws_cloudfront_distribution" "tribunals_distribution" { + web_acl_id = aws_wafv2_web_acl.tribunals_web_acl.arn + aliases = local.is-production ? [ "*.decisions.tribunals.gov.uk", "*.venues.tribunals.gov.uk", diff --git a/terraform/environments/tribunals/dns-delegate-route53.tf b/terraform/environments/tribunals/dns-delegate-route53.tf index a73f3ff77dd..0753dda5c05 100644 --- a/terraform/environments/tribunals/dns-delegate-route53.tf +++ b/terraform/environments/tribunals/dns-delegate-route53.tf @@ -32,6 +32,9 @@ locals { ] nginx_records = [ + ] + + nginx_records_pre_migration = [ "", "adjudicationpanel", "charity", @@ -117,6 +120,20 @@ resource "aws_route53_record" "nginx_instances" { } } +resource "aws_route53_record" "nginx_instances_pre_migration" { + count = local.is-production ? length(local.nginx_records_pre_migration) : 0 + provider = aws.core-network-services + zone_id = local.production_zone_id + name = local.nginx_records_pre_migration[count.index] + type = "A" + + alias { + name = "tribunals-nginx-1184258455.eu-west-1.elb.amazonaws.com" + zone_id = "Z32O12XQLNTSW2" + evaluate_target_health = false + } +} + # 'A' records for tribunals www. URLs redirects to existing entries - subtract the "www." resource "aws_route53_record" "www_instances" { count = local.is-production ? length(local.www_records) : 0 diff --git a/terraform/environments/tribunals/load_balancer.tf b/terraform/environments/tribunals/load_balancer.tf index 79c4debc3b7..2ea0a08d8a8 100644 --- a/terraform/environments/tribunals/load_balancer.tf +++ b/terraform/environments/tribunals/load_balancer.tf @@ -123,8 +123,3 @@ resource "aws_lb_listener_rule" "tribunals_lb_rule" { } } } - -resource "aws_wafv2_web_acl_association" "web_acl_association_my_lb" { - resource_arn = aws_lb.tribunals_lb.arn - web_acl_arn = aws_wafv2_web_acl.tribunals_web_acl.arn -} diff --git a/terraform/environments/tribunals/waf.tf b/terraform/environments/tribunals/waf.tf index fd723b99a41..c072624461f 100644 --- a/terraform/environments/tribunals/waf.tf +++ b/terraform/environments/tribunals/waf.tf @@ -1,6 +1,7 @@ resource "aws_wafv2_ip_set" "allowed_ip_set" { - name = "allowed-ip-set" - scope = "REGIONAL" + provider = aws.us-east-1 + name = "allowed-ip-set" + scope = "CLOUDFRONT" addresses = [ "20.26.11.71/32", "20.26.11.108/32", "20.49.214.199/32", "20.49.214.228/32", "51.149.249.0/29", "51.149.249.32/29", @@ -12,8 +13,9 @@ resource "aws_wafv2_ip_set" "allowed_ip_set" { } resource "aws_wafv2_web_acl" "tribunals_web_acl" { - name = "tribunals-web-acl" - scope = "REGIONAL" + provider = aws.us-east-1 + name = "tribunals-web-acl" + scope = "CLOUDFRONT" default_action { allow {} @@ -21,7 +23,7 @@ resource "aws_wafv2_web_acl" "tribunals_web_acl" { rule { name = "common-rule-set" - priority = 1 + priority = 2 override_action { none {} @@ -61,7 +63,7 @@ resource "aws_wafv2_web_acl" "tribunals_web_acl" { rule { name = "AllowSpecificIPsForAdminAndSecurePaths" - priority = 2 + priority = 3 action { allow {} @@ -122,7 +124,7 @@ resource "aws_wafv2_web_acl" "tribunals_web_acl" { rule { name = "BlockNonAllowedIPsForAdminAndSecurePaths" - priority = 3 + priority = 4 action { block { @@ -168,14 +170,15 @@ resource "aws_wafv2_web_acl" "tribunals_web_acl" { } resource "aws_wafv2_regex_pattern_set" "blocked_paths" { - name = "blocked-paths" - scope = "REGIONAL" + provider = aws.us-east-1 + name = "blocked-paths" + scope = "CLOUDFRONT" regular_expression { - regex_string = "^/admin(/.*)?$" + regex_string = "(?i)^/admin(/.*)?$" } regular_expression { - regex_string = "^/secure(/.*)?$" + regex_string = "(?i)^/secure(/.*)?$" } } \ No newline at end of file diff --git a/terraform/modules/baseline_presets/iam_roles.tf b/terraform/modules/baseline_presets/iam_roles.tf index 03473411e07..dde1d86a9ef 100644 --- a/terraform/modules/baseline_presets/iam_roles.tf +++ b/terraform/modules/baseline_presets/iam_roles.tf @@ -5,7 +5,6 @@ locals { var.options.enable_ec2_delius_dba_secrets_access ? ["EC2OracleEnterpriseManagementSecretsRole"] : [], var.options.enable_image_builder ? ["EC2ImageBuilderDistributionCrossAccountRole"] : [], var.options.enable_ec2_oracle_enterprise_managed_server ? ["EC2OracleEnterpriseManagementSecretsRole"] : [], - var.options.enable_observability_platform_monitoring ? ["observability-platform"] : [], try(length(var.options.cloudwatch_metric_oam_links), 0) != 0 ? ["CloudWatch-CrossAccountSharingRole"] : [], var.options.enable_vmimport ? ["vmimport"] : [], ])) @@ -92,21 +91,6 @@ locals { ] } - # allow Observability Plaform read-only access to Cloudwatch metrics - observability-platform = { - assume_role_policy = [{ - effect = "Allow" - actions = ["sts:AssumeRole"] - principals = { - type = "AWS" - identifiers = ["observability-platform-development"] - } - }] - policy_attachments = [ - "arn:aws:iam::aws:policy/CloudWatchReadOnlyAccess", - ] - } - vmimport = { assume_role_policy = [{ effect = "Allow" diff --git a/terraform/modules/baseline_presets/variables.tf b/terraform/modules/baseline_presets/variables.tf index e1a08a9bc19..41f697eeca3 100644 --- a/terraform/modules/baseline_presets/variables.tf +++ b/terraform/modules/baseline_presets/variables.tf @@ -37,7 +37,6 @@ variable "options" { enable_ec2_session_manager_cloudwatch_logs = optional(bool, false) # create SSM doc and log group for session manager logs enable_ec2_ssm_agent_update = optional(bool, false) # create SSM association for auto-update of SSM agent. update-ssm-agent tag needs to be set on EC2s also enable_ec2_user_keypair = optional(bool, false) # create secret and key-pair for ec2-user - enable_observability_platform_monitoring = optional(bool, false) # create role for observability platform monitroing enable_s3_bucket = optional(bool, false) # create s3-bucket S3 bucket for general use enable_s3_db_backup_bucket = optional(bool, false) # create db-backup S3 buckets enable_s3_shared_bucket = optional(bool, false) # create devtest and preprodprod S3 bucket for sharing between accounts diff --git a/terraform/modules/fargate_graceful_retirement/eventbridge.tf b/terraform/modules/fargate_graceful_retirement/eventbridge.tf index 4310d4894e7..2386c3685ac 100644 --- a/terraform/modules/fargate_graceful_retirement/eventbridge.tf +++ b/terraform/modules/fargate_graceful_retirement/eventbridge.tf @@ -1,5 +1,5 @@ resource "aws_cloudwatch_event_rule" "ecs_restart_rule" { - name = "ecs_task_retirement_rul" + name = "ecs_task_retirement_rule" description = "Rule to catch AWS ECS Task Patching Retirement events" event_pattern = jsonencode({ @@ -122,3 +122,8 @@ resource "aws_iam_policy" "eventbridge_execution_role_policy" { ] }) } + +resource "aws_iam_role_policy_attachment" "eventbridge_execution_role_policy" { + policy_arn = aws_iam_policy.eventbridge_execution_role_policy.arn + role = aws_iam_role.eventbridge_execution_role.name +} diff --git a/terraform/modules/fargate_graceful_retirement/files/ecs_restart/lambda_function.py b/terraform/modules/fargate_graceful_retirement/files/ecs_restart/lambda_function.py index 4e490ba8023..b7703d5fc55 100644 --- a/terraform/modules/fargate_graceful_retirement/files/ecs_restart/lambda_function.py +++ b/terraform/modules/fargate_graceful_retirement/files/ecs_restart/lambda_function.py @@ -1,25 +1,27 @@ import json -import boto3 import os +import boto3 + + def lambda_handler(event, context): print("Event received:", json.dumps(event)) try: # Create an ECS client using boto3 - ecs_client = boto3.client('ecs') + ecs_client = boto3.client("ecs") # Extract the affected entities from the event - affected_entities = event['detail']['affectedEntities'] + affected_entities = event["detail"]["affectedEntities"] # Iterate over each affected entity for entity in affected_entities: # Get the entity value - entity_value = entity.get('entityValue') + entity_value = entity.get("entityValue") if entity_value is not None: # Extract cluster name and service name from the entity value - cluster_name = entity_value.split('|')[0] - service_name = entity_value.split('|')[1] + cluster_name = entity_value.split("|")[0] + service_name = entity_value.split("|")[1] print("Cluster name:", cluster_name) print("Service name:", service_name) @@ -29,22 +31,19 @@ def lambda_handler(event, context): response = ecs_client.update_service( cluster=cluster_name, service=service_name, - forceNewDeployment=True + forceNewDeployment=True, ) - if os.environ.get('DEBUG_LOGGING', False): + if os.environ.get("DEBUG_LOGGING", False): print("[DEBUG] Update service response:", response) else: print("No entity value found in the event") return { - 'statusCode': 200, - 'body': json.dumps('Handled ECS Task Patching Retirement') - 'restarted_services': affected_entities + "statusCode": 200, + "body": json.dumps("Handled ECS Task Patching Retirement"), + "restarted_services": affected_entities, } except Exception as e: print("Error updating service:", e) - return { - 'statusCode': 500, - 'body': json.dumps('Error updating service') - } + return {"statusCode": 500, "body": json.dumps("Error updating service")}