Skip to content

Commit

Permalink
Merge pull request #3022 from ministryofjustice/data-and-insights-wep…
Browse files Browse the repository at this point in the history
…i/glue-ap

Data and insights wepi/glue ap
  • Loading branch information
gwionap authored Aug 21, 2023
2 parents fcb09b5 + 5444239 commit 7c2e021
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 222 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"keys": {
"development": {
"preproduction": {
"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"
},
"test": {},
"production": {}
"development": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"},
"production": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"}
}
}
50 changes: 50 additions & 0 deletions terraform/environments/data-and-insights-wepi/crawler.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
resource "aws_glue_catalog_database" "mojap_all_tables" {
name = "ap-mojap-tables-all"
description = ""
}

resource "aws_glue_catalog_database" "sop_redshift" {
name = "sop-redshift"
description = ""
}

resource "aws_glue_crawler" "mojap_import_all" {
database_name = aws_glue_catalog_database.mojap_all_tables.name
name = "sop-import-all"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Identifies schema for SOP datasets on AP"
s3_target {
path = "s3://mojap-mp-redshift/"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

resource "aws_glue_crawler" "absence_crawler" {
database_name = aws_glue_catalog_database.sop_redshift.name
name = "absence-redshift-schema"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Crawl AWS Redshift \"absence\" table using JDBC"
jdbc_target {
path = "wepidevelopmentdb/public/absence"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

resource "aws_glue_crawler" "leavers_crawler" {
database_name = aws_glue_catalog_database.sop_redshift.name
name = "leavers-redshift-schema"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Crawl AWS Redshift \"leavers\" table using JDBC"
jdbc_target {
path = "wepidevelopmentdb/public/leavers"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

# resource "null_resource" "setup_leavers_redshift" {
# depends_on = ["aws_redshift_cluster"] #wait for the db to be ready
# provisioner "local-exec" {
# command = "mysql -u ${aws_redshift_cluster.wepi_redshift_cluster.master_username} -p${aws_secretsmanager_secret_version.wepi_redshift_admin_pw.secret_string} < create_table_leavers.sql"
# }
# }

21 changes: 20 additions & 1 deletion terraform/environments/data-and-insights-wepi/glue.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,28 @@ resource "aws_glue_connection" "wepi_glue_conn_redshift" {

name = "wepi-redshift-${local.environment}-conn"
physical_connection_requirements {
availability_zone = data.aws_subnet.data_subnets_a.availability_zone
security_group_id_list = ["${aws_security_group.wepi_sg_allow_redshift.id}"]
subnet_id = data.aws_subnet.data_subnets_a.id
}

tags = local.tags
}
}

# resource "aws_glue_job" "absence_glue_job" {
# name = "absence-sop-glue-job"
# role_arn = aws_iam_role.wepi_iam_role_glue.arn

# command {
# script_location = "s3://mojap-sop-data-glue-job-scripts/absence-glue-job.py"
# }
# }

# resource "aws_glue_job" "leavers_glue_job" {
# name = "leavers-sop-glue-job"
# role_arn = aws_iam_role.wepi_iam_role_glue.arn

# command {
# script_location = "s3://mojap-sop-data-glue-job-scripts/leavers-glue-job.py"
# }
# }
Original file line number Diff line number Diff line change
@@ -1,193 +1,13 @@
{
"Version": "2012-10-17",
"Statement": [
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:*",
"s3-object-lambda:*",
"glue:*",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeRouteTables",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"iam:ListRolePolicies",
"iam:GetRole",
"iam:GetRolePolicy",
"cloudwatch:PutMetricData"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"s3:CreateBucket"
],
"Resource": [
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:*:*:/aws-glue/*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
"Action": "sts:AssumeRole",
"Principal": {
"Service": "glue.amazonaws.com"
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
},
{
"Effect": "Allow",
"Action": [
"glue:CreateDatabase",
"glue:CreatePartition",
"glue:CreateTable",
"glue:DeleteDatabase",
"glue:DeletePartition",
"glue:DeleteTable",
"glue:GetDatabase",
"glue:GetDatabases",
"glue:GetPartition",
"glue:GetPartitions",
"glue:GetTable",
"glue:GetTableVersions",
"glue:GetTables",
"glue:UpdateDatabase",
"glue:UpdatePartition",
"glue:UpdateTable",
"glue:CreateConnection",
"glue:CreateJob",
"glue:DeleteConnection",
"glue:DeleteJob",
"glue:GetConnection",
"glue:GetConnections",
"glue:GetDevEndpoint",
"glue:GetDevEndpoints",
"glue:GetJob",
"glue:GetJobs",
"glue:UpdateJob",
"glue:BatchDeleteConnection",
"glue:UpdateConnection",
"glue:GetUserDefinedFunction",
"glue:UpdateUserDefinedFunction",
"glue:GetUserDefinedFunctions",
"glue:DeleteUserDefinedFunction",
"glue:CreateUserDefinedFunction",
"glue:BatchGetPartition",
"glue:BatchDeletePartition",
"glue:BatchCreatePartition",
"glue:BatchDeleteTable",
"glue:UpdateDevEndpoint",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl"
],
"Resource": [
"*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
}
]
}
"Effect": "Allow",
"Sid": ""
}
]
}
45 changes: 32 additions & 13 deletions terraform/environments/data-and-insights-wepi/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,42 @@ resource "aws_security_group" "wepi_sg_allow_redshift" {
description = "Allow Redshift inbound traffic from bastion"
vpc_id = data.aws_vpc.shared.id

# ingress {
# description = "Redshift ingress from bastion"
# from_port = 0
# to_port = 65535
# protocol = "tcp"
# cidr_blocks = ["0.0.0.0/0"]
# security_groups = [
# module.wepi_bastion.bastion_security_group
# ]
# }

# egress {
# description = "Redshift egress to S3 endpoint"
# from_port = 0
# to_port = 0
# protocol = "-1"
# cidr_blocks = ["0.0.0.0/0"]
# prefix_list_ids = [
# data.aws_vpc_endpoint.s3.prefix_list_id
# ]
# }
ingress {
description = "Redshift ingress from bastion"
from_port = 0
to_port = 65535
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_groups = [
module.wepi_bastion.bastion_security_group
]
}
description = "Redshift ingress from bastion"
from_port = 5439
to_port = 5439
protocol = "tcp"
security_groups = [
module.wepi_bastion.bastion_security_group
]
}

egress {
description = "Redshift egress to S3 endpoint"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
from_port = 443
to_port = 443
protocol = "tcp"
prefix_list_ids = [
data.aws_vpc_endpoint.s3.prefix_list_id
]
Expand Down
Loading

0 comments on commit 7c2e021

Please sign in to comment.