diff --git a/terraform/environments/data-and-insights-wepi/bastion_linux.json b/terraform/environments/data-and-insights-wepi/bastion_linux.json index 1e8f42cf8f0..4d21b49e29b 100644 --- a/terraform/environments/data-and-insights-wepi/bastion_linux.json +++ b/terraform/environments/data-and-insights-wepi/bastion_linux.json @@ -1,9 +1,9 @@ { "keys": { - "development": { + "preproduction": { "simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp simona.treivase@justice.gov.uk" }, - "test": {}, - "production": {} + "development": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp simona.treivase@justice.gov.uk"}, + "production": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp simona.treivase@justice.gov.uk"} } } diff --git a/terraform/environments/data-and-insights-wepi/crawler.tf b/terraform/environments/data-and-insights-wepi/crawler.tf index e69de29bb2d..9ef709e122a 100644 --- a/terraform/environments/data-and-insights-wepi/crawler.tf +++ b/terraform/environments/data-and-insights-wepi/crawler.tf @@ -0,0 +1,50 @@ +resource "aws_glue_catalog_database" "mojap_all_tables" { + name = "ap-mojap-tables-all" + description = "" +} + +resource "aws_glue_catalog_database" "sop_redshift" { + name = "sop-redshift" + description = "" +} + +resource "aws_glue_crawler" "mojap_import_all" { + database_name = aws_glue_catalog_database.mojap_all_tables.name + name = "sop-import-all" + role = aws_iam_role.wepi_iam_role_glue.arn + description = "Identifies schema for SOP datasets on AP" + s3_target { + path = "s3://mojap-mp-redshift/" + connection_name = aws_glue_connection.wepi_glue_conn_redshift.name + } +} + +resource "aws_glue_crawler" "absence_crawler" { + database_name = aws_glue_catalog_database.sop_redshift.name + name = "absence-redshift-schema" + role = aws_iam_role.wepi_iam_role_glue.arn + description = "Crawl AWS Redshift \"absence\" table using JDBC" + jdbc_target { + path = "wepidevelopmentdb/public/absence" + connection_name = aws_glue_connection.wepi_glue_conn_redshift.name + } +} + +resource "aws_glue_crawler" "leavers_crawler" { + database_name = aws_glue_catalog_database.sop_redshift.name + name = "leavers-redshift-schema" + role = aws_iam_role.wepi_iam_role_glue.arn + description = "Crawl AWS Redshift \"leavers\" table using JDBC" + jdbc_target { + path = "wepidevelopmentdb/public/leavers" + connection_name = aws_glue_connection.wepi_glue_conn_redshift.name + } +} + +# resource "null_resource" "setup_leavers_redshift" { +# depends_on = ["aws_redshift_cluster"] #wait for the db to be ready +# provisioner "local-exec" { +# command = "mysql -u ${aws_redshift_cluster.wepi_redshift_cluster.master_username} -p${aws_secretsmanager_secret_version.wepi_redshift_admin_pw.secret_string} < create_table_leavers.sql" +# } +# } + diff --git a/terraform/environments/data-and-insights-wepi/glue.tf b/terraform/environments/data-and-insights-wepi/glue.tf index ebf7cbc4b4e..ae53baaf1f4 100644 --- a/terraform/environments/data-and-insights-wepi/glue.tf +++ b/terraform/environments/data-and-insights-wepi/glue.tf @@ -13,9 +13,28 @@ resource "aws_glue_connection" "wepi_glue_conn_redshift" { name = "wepi-redshift-${local.environment}-conn" physical_connection_requirements { + availability_zone = data.aws_subnet.data_subnets_a.availability_zone security_group_id_list = ["${aws_security_group.wepi_sg_allow_redshift.id}"] subnet_id = data.aws_subnet.data_subnets_a.id } tags = local.tags -} \ No newline at end of file +} + +# resource "aws_glue_job" "absence_glue_job" { +# name = "absence-sop-glue-job" +# role_arn = aws_iam_role.wepi_iam_role_glue.arn + +# command { +# script_location = "s3://mojap-sop-data-glue-job-scripts/absence-glue-job.py" +# } +# } + +# resource "aws_glue_job" "leavers_glue_job" { +# name = "leavers-sop-glue-job" +# role_arn = aws_iam_role.wepi_iam_role_glue.arn + +# command { +# script_location = "s3://mojap-sop-data-glue-job-scripts/leavers-glue-job.py" +# } +# } diff --git a/terraform/environments/data-and-insights-wepi/json/wepi_iam_role_glue.json b/terraform/environments/data-and-insights-wepi/json/wepi_iam_role_glue.json index eee59753995..4873267832e 100644 --- a/terraform/environments/data-and-insights-wepi/json/wepi_iam_role_glue.json +++ b/terraform/environments/data-and-insights-wepi/json/wepi_iam_role_glue.json @@ -1,193 +1,13 @@ { - "Version": "2012-10-17", - "Statement": [ + "Version": "2012-10-17", + "Statement": [ { - "Effect": "Allow", - "Action": [ - "s3:*", - "s3-object-lambda:*", - "glue:*", - "s3:GetBucketLocation", - "s3:ListBucket", - "s3:ListAllMyBuckets", - "s3:GetBucketAcl", - "ec2:DescribeVpcEndpoints", - "ec2:DescribeRouteTables", - "ec2:CreateNetworkInterface", - "ec2:DeleteNetworkInterface", - "ec2:DescribeNetworkInterfaces", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSubnets", - "ec2:DescribeVpcAttribute", - "iam:ListRolePolicies", - "iam:GetRole", - "iam:GetRolePolicy", - "cloudwatch:PutMetricData" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "s3:CreateBucket" - ], - "Resource": [ - "arn:aws:s3:::aws-glue-*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject", - "s3:PutObject", - "s3:DeleteObject" - ], - "Resource": [ - "arn:aws:s3:::aws-glue-*/*", - "arn:aws:s3:::*/*aws-glue-*/*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject", - "s3:PutObject", - "s3:DeleteObject" - ], - "Resource": [ - "arn:aws:s3:::aws-glue-*/*", - "arn:aws:s3:::*/*aws-glue-*/*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": [ - "arn:aws:s3:::crawler-public*", - "arn:aws:s3:::aws-glue-*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ], - "Resource": [ - "arn:aws:logs:*:*:/aws-glue/*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateTags", - "ec2:DeleteTags" - ], - "Condition": { - "ForAllValues:StringEquals": { - "aws:TagKeys": [ - "aws-glue-service-resource" - ] - } + "Action": "sts:AssumeRole", + "Principal": { + "Service": "glue.amazonaws.com" }, - "Resource": [ - "arn:aws:ec2:*:*:network-interface/*", - "arn:aws:ec2:*:*:security-group/*", - "arn:aws:ec2:*:*:instance/*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "glue:CreateDatabase", - "glue:CreatePartition", - "glue:CreateTable", - "glue:DeleteDatabase", - "glue:DeletePartition", - "glue:DeleteTable", - "glue:GetDatabase", - "glue:GetDatabases", - "glue:GetPartition", - "glue:GetPartitions", - "glue:GetTable", - "glue:GetTableVersions", - "glue:GetTables", - "glue:UpdateDatabase", - "glue:UpdatePartition", - "glue:UpdateTable", - "glue:CreateConnection", - "glue:CreateJob", - "glue:DeleteConnection", - "glue:DeleteJob", - "glue:GetConnection", - "glue:GetConnections", - "glue:GetDevEndpoint", - "glue:GetDevEndpoints", - "glue:GetJob", - "glue:GetJobs", - "glue:UpdateJob", - "glue:BatchDeleteConnection", - "glue:UpdateConnection", - "glue:GetUserDefinedFunction", - "glue:UpdateUserDefinedFunction", - "glue:GetUserDefinedFunctions", - "glue:DeleteUserDefinedFunction", - "glue:CreateUserDefinedFunction", - "glue:BatchGetPartition", - "glue:BatchDeletePartition", - "glue:BatchCreatePartition", - "glue:BatchDeleteTable", - "glue:UpdateDevEndpoint", - "s3:GetBucketLocation", - "s3:ListBucket", - "s3:ListAllMyBuckets", - "s3:GetBucketAcl" - ], - "Resource": [ - "*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "s3:GetObject" - ], - "Resource": [ - "arn:aws:s3:::crawler-public*", - "arn:aws:s3:::aws-glue*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "s3:PutObject", - "s3:DeleteObject" - ], - "Resource": [ - "arn:aws:s3:::aws-glue*" - ] - }, - { - "Effect": "Allow", - "Action": [ - "ec2:CreateTags", - "ec2:DeleteTags" - ], - "Condition": { - "ForAllValues:StringEquals": { - "aws:TagKeys": [ - "aws-glue-service-resource" - ] - } - }, - "Resource": [ - "arn:aws:ec2:*:*:network-interface/*", - "arn:aws:ec2:*:*:security-group/*", - "arn:aws:ec2:*:*:instance/*" - ] - } - ] -} \ No newline at end of file + "Effect": "Allow", + "Sid": "" + } + ] + } \ No newline at end of file diff --git a/terraform/environments/data-and-insights-wepi/network.tf b/terraform/environments/data-and-insights-wepi/network.tf index c82c178d671..7124a5ca078 100644 --- a/terraform/environments/data-and-insights-wepi/network.tf +++ b/terraform/environments/data-and-insights-wepi/network.tf @@ -5,23 +5,42 @@ resource "aws_security_group" "wepi_sg_allow_redshift" { description = "Allow Redshift inbound traffic from bastion" vpc_id = data.aws_vpc.shared.id + # ingress { + # description = "Redshift ingress from bastion" + # from_port = 0 + # to_port = 65535 + # protocol = "tcp" + # cidr_blocks = ["0.0.0.0/0"] + # security_groups = [ + # module.wepi_bastion.bastion_security_group + # ] + # } + + # egress { + # description = "Redshift egress to S3 endpoint" + # from_port = 0 + # to_port = 0 + # protocol = "-1" + # cidr_blocks = ["0.0.0.0/0"] + # prefix_list_ids = [ + # data.aws_vpc_endpoint.s3.prefix_list_id + # ] + # } ingress { - description = "Redshift ingress from bastion" - from_port = 0 - to_port = 65535 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - security_groups = [ - module.wepi_bastion.bastion_security_group - ] - } + description = "Redshift ingress from bastion" + from_port = 5439 + to_port = 5439 + protocol = "tcp" + security_groups = [ + module.wepi_bastion.bastion_security_group + ] + } egress { description = "Redshift egress to S3 endpoint" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] + from_port = 443 + to_port = 443 + protocol = "tcp" prefix_list_ids = [ data.aws_vpc_endpoint.s3.prefix_list_id ] diff --git a/terraform/environments/data-and-insights-wepi/redshift.tf b/terraform/environments/data-and-insights-wepi/redshift.tf index 6931b9b183a..ee288e8ece1 100644 --- a/terraform/environments/data-and-insights-wepi/redshift.tf +++ b/terraform/environments/data-and-insights-wepi/redshift.tf @@ -48,8 +48,8 @@ resource "aws_redshift_cluster" "wepi_redshift_cluster" { encrypted = true kms_key_id = aws_kms_key.wepi_kms_cmk.arn - publicly_accessible = false - enhanced_vpc_routing = true + publicly_accessible = false + enhanced_vpc_routing = false vpc_security_group_ids = [ aws_security_group.wepi_sg_allow_redshift.id ] @@ -57,8 +57,6 @@ resource "aws_redshift_cluster" "wepi_redshift_cluster" { cluster_parameter_group_name = aws_redshift_parameter_group.wepi_redshift_param_group.name - aqua_configuration_status = "enabled" - automated_snapshot_retention_period = local.application_data.accounts[local.environment].redshift_auto_snapshot_retention manual_snapshot_retention_period = local.application_data.accounts[local.environment].redshift_manual_snapshot_retention skip_final_snapshot = true @@ -155,20 +153,20 @@ resource "aws_security_group" "redshift-data-lb" { tags = local.tags } -resource "aws_security_group_rule" "tcp-5439" { - cidr_blocks = ["0.0.0.0/0"] - from_port = 5439 - protocol = "tcp" - security_group_id = aws_security_group.redshift-data-lb.id - to_port = 5439 - type = "ingress" -} +# resource "aws_security_group_rule" "tcp-5439" { +# cidr_blocks = ["0.0.0.0/0"] +# from_port = 5439 +# protocol = "tcp" +# security_group_id = aws_security_group.redshift-data-lb.id +# to_port = 5439 +# type = "ingress" +# } resource "aws_lb" "redshift-data" { name = format("%s-redshift-lb", local.environment) internal = true load_balancer_type = "application" - security_groups = [aws_security_group.redshift-data-lb.id] #extra line added late at night + security_groups = [aws_security_group.redshift-data-lb.id] subnets = data.aws_subnets.shared-private.ids tags = local.tags } @@ -176,7 +174,7 @@ resource "aws_lb" "redshift-data" { resource "aws_lb_target_group" "redshift-data" { name = "redshift-lb-tg-5439" port = 5439 - protocol = "tcp" + protocol = "TCP" target_type = "ip" vpc_id = data.aws_vpc.shared.id } @@ -186,4 +184,4 @@ resource "aws_lb_target_group_attachment" "redshift-data" { target_group_arn = aws_lb_target_group.redshift-data.arn target_id = each.value.private_ip port = 5439 -} \ No newline at end of file +}