From 85fb7a792589d807a5349c44de89a843f8b57625 Mon Sep 17 00:00:00 2001 From: koladeadewuyi-moj <136330532+koladeadewuyi-moj@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:49:17 +0100 Subject: [PATCH] DPR2-1053: Stop pipeline before ingestion (#7537) --- .../domains/ingestion-pipeline/pipeline.tf | 71 ++++++++++++++++++- .../domains/ingestion-pipeline/variables.tf | 62 +++++++++++++++- .../domains/reload-pipeline/pipeline.tf | 6 +- 3 files changed, 134 insertions(+), 5 deletions(-) diff --git a/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/pipeline.tf index 8c77cf136e9..21cdc8ed4d9 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/pipeline.tf @@ -23,8 +23,65 @@ module "data_ingestion_pipeline" { definition = jsonencode( { "Comment" : "Data Ingestion Pipeline Step Function", - "StartAt" : "Start DMS Replication Task", + "StartAt" : "Deactivate Archive Trigger", "States" : { + "Deactivate Archive Trigger" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.glue_trigger_activation_job, + "Arguments" : { + "--dpr.glue.trigger.name" : var.archive_job_trigger_name, + "--dpr.glue.trigger.activate" : "false" + } + }, + "Next" : "Stop Archive Job" + }, + "Stop Archive Job" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.glue_stop_glue_instance_job, + "Arguments" : { + "--dpr.stop.glue.instance.job.name" : var.glue_archive_job + } + }, + "Next" : "Stop DMS Replication Task" + }, + "Stop DMS Replication Task" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.stop_dms_task_job, + "Arguments" : { + "--dpr.dms.replication.task.id" : var.replication_task_id + } + }, + "Next" : "Stop Glue Streaming Job" + }, + "Stop Glue Streaming Job" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.glue_stop_glue_instance_job, + "Arguments" : { + "--dpr.stop.glue.instance.job.name" : var.glue_reporting_hub_cdc_jobname + } + }, + "Next" : "Empty All Data" + }, + "Empty All Data" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.glue_s3_data_deletion_job, + "Arguments" : { + "--dpr.file.deletion.buckets" : "${var.s3_raw_bucket_id},${var.s3_raw_archive_bucket_id},${var.s3_structured_bucket_id},${var.s3_curated_bucket_id},${var.s3_temp_reload_bucket_id}", + "--dpr.config.key" : var.domain + } + }, + "Next" : "Start DMS Replication Task" + }, "Start DMS Replication Task" : { "Type" : "Task", "Resource" : "arn:aws:states:::aws-sdk:databasemigration:startReplicationTask", @@ -120,6 +177,18 @@ module "data_ingestion_pipeline" { "--dpr.config.key" : var.domain } }, + "Next" : "Reactivate Archive Trigger" + }, + "Reactivate Archive Trigger" : { + "Type" : "Task", + "Resource" : "arn:aws:states:::glue:startJobRun.sync", + "Parameters" : { + "JobName" : var.glue_trigger_activation_job, + "Arguments" : { + "--dpr.glue.trigger.name" : var.archive_job_trigger_name, + "--dpr.glue.trigger.activate" : "true" + } + }, "End" : true } } diff --git a/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/variables.tf b/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/variables.tf index ed511865a64..3e54df4a7a1 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/variables.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/ingestion-pipeline/variables.tf @@ -23,7 +23,19 @@ variable "pipeline_additional_policies" { default = [] } -variable "dms_replication_task_arn" {} +variable "glue_s3_data_deletion_job" { + description = "Name of glue job which deletes parquet files from s3 bucket(s)" + type = string + default = "" +} + +variable "dms_replication_task_arn" { + type = string +} + +variable "replication_task_id" { + type = string +} variable "pipeline_notification_lambda_function" { description = "Pipeline Notification Lambda Name" @@ -61,6 +73,54 @@ variable "s3_raw_archive_bucket_id" { default = "" } +variable "s3_structured_bucket_id" { + description = "S3, Structured Bucket ID" + type = string + default = "" +} + +variable "s3_curated_bucket_id" { + description = "S3, Curated Bucket ID" + type = string + default = "" +} + +variable "s3_temp_reload_bucket_id" { + description = "S3 Bucket ID for the temporary location to store reload data" + type = string + default = "" +} + +variable "glue_stop_glue_instance_job" { + description = "Name of job to stop the current running instance of the streaming job" + type = string + default = "" +} + +variable "stop_dms_task_job" { + description = "Name of job to stop a running DMS task" + type = string + default = "" +} + +variable "glue_trigger_activation_job" { + description = "Name of job to which activates/deactivates a glue trigger" + type = string + default = "" +} + +variable "archive_job_trigger_name" { + description = "Name of the trigger for a glue trigger" + type = string + default = "" +} + +variable "glue_archive_job" { + description = "Name of the glue job which archives the raw data" + type = string + default = "" +} + variable "glue_s3_file_transfer_job" { description = "Name of s3 file transfer job" type = string diff --git a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf index 7294b188c97..ba529eed42e 100644 --- a/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf +++ b/terraform/environments/digital-prison-reporting/modules/domains/reload-pipeline/pipeline.tf @@ -134,15 +134,15 @@ module "reload_pipeline" { "--dpr.config.key" : var.domain } }, - "Next" : "Empty Structured and Curated Data" + "Next" : "Empty Raw, Structured and Curated Data" }, - "Empty Structured and Curated Data" : { + "Empty Raw, Structured and Curated Data" : { "Type" : "Task", "Resource" : "arn:aws:states:::glue:startJobRun.sync", "Parameters" : { "JobName" : var.glue_s3_data_deletion_job, "Arguments" : { - "--dpr.file.deletion.buckets" : "${var.s3_structured_bucket_id},${var.s3_curated_bucket_id}", + "--dpr.file.deletion.buckets" : "${var.s3_raw_bucket_id},${var.s3_structured_bucket_id},${var.s3_curated_bucket_id}", "--dpr.config.key" : var.domain } },