Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data and insights wepi/glue ap #3022

Merged
merged 18 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"keys": {
"development": {
"preproduction": {
"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"
},
"test": {},
"production": {}
"development": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"},
"production": {"simonytta": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKeBrsu9K2qVs2r/fueve0V+5WBY/ZZTNq1UJYhuXIZp [email protected]"}
}
}
50 changes: 50 additions & 0 deletions terraform/environments/data-and-insights-wepi/crawler.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
resource "aws_glue_catalog_database" "mojap_all_tables" {
name = "ap-mojap-tables-all"
description = ""
}

resource "aws_glue_catalog_database" "sop_redshift" {
name = "sop-redshift"
description = ""
}

resource "aws_glue_crawler" "mojap_import_all" {
database_name = aws_glue_catalog_database.mojap_all_tables.name
name = "sop-import-all"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Identifies schema for SOP datasets on AP"
s3_target {
path = "s3://mojap-mp-redshift/"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

resource "aws_glue_crawler" "absence_crawler" {
database_name = aws_glue_catalog_database.sop_redshift.name
name = "absence-redshift-schema"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Crawl AWS Redshift \"absence\" table using JDBC"
jdbc_target {
path = "wepidevelopmentdb/public/absence"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

resource "aws_glue_crawler" "leavers_crawler" {
database_name = aws_glue_catalog_database.sop_redshift.name
name = "leavers-redshift-schema"
role = aws_iam_role.wepi_iam_role_glue.arn
description = "Crawl AWS Redshift \"leavers\" table using JDBC"
jdbc_target {
path = "wepidevelopmentdb/public/leavers"
connection_name = aws_glue_connection.wepi_glue_conn_redshift.name
}
}

# resource "null_resource" "setup_leavers_redshift" {
# depends_on = ["aws_redshift_cluster"] #wait for the db to be ready
# provisioner "local-exec" {
# command = "mysql -u ${aws_redshift_cluster.wepi_redshift_cluster.master_username} -p${aws_secretsmanager_secret_version.wepi_redshift_admin_pw.secret_string} < create_table_leavers.sql"
# }
# }

Original file line number Diff line number Diff line change
@@ -1,193 +1,13 @@
{
"Version": "2012-10-17",
"Statement": [
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:*",
"s3-object-lambda:*",
"glue:*",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl",
"ec2:DescribeVpcEndpoints",
"ec2:DescribeRouteTables",
"ec2:CreateNetworkInterface",
"ec2:DeleteNetworkInterface",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeVpcAttribute",
"iam:ListRolePolicies",
"iam:GetRole",
"iam:GetRolePolicy",
"cloudwatch:PutMetricData"
],
"Resource": "*"
},
{
"Effect": "Allow",
"Action": [
"s3:CreateBucket"
],
"Resource": [
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue-*/*",
"arn:aws:s3:::*/*aws-glue-*/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue-*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": [
"arn:aws:logs:*:*:/aws-glue/*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
"Action": "sts:AssumeRole",
"Principal": {
"Service": "glue.amazonaws.com"
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
},
{
"Effect": "Allow",
"Action": [
"glue:CreateDatabase",
"glue:CreatePartition",
"glue:CreateTable",
"glue:DeleteDatabase",
"glue:DeletePartition",
"glue:DeleteTable",
"glue:GetDatabase",
"glue:GetDatabases",
"glue:GetPartition",
"glue:GetPartitions",
"glue:GetTable",
"glue:GetTableVersions",
"glue:GetTables",
"glue:UpdateDatabase",
"glue:UpdatePartition",
"glue:UpdateTable",
"glue:CreateConnection",
"glue:CreateJob",
"glue:DeleteConnection",
"glue:DeleteJob",
"glue:GetConnection",
"glue:GetConnections",
"glue:GetDevEndpoint",
"glue:GetDevEndpoints",
"glue:GetJob",
"glue:GetJobs",
"glue:UpdateJob",
"glue:BatchDeleteConnection",
"glue:UpdateConnection",
"glue:GetUserDefinedFunction",
"glue:UpdateUserDefinedFunction",
"glue:GetUserDefinedFunctions",
"glue:DeleteUserDefinedFunction",
"glue:CreateUserDefinedFunction",
"glue:BatchGetPartition",
"glue:BatchDeletePartition",
"glue:BatchCreatePartition",
"glue:BatchDeleteTable",
"glue:UpdateDevEndpoint",
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListAllMyBuckets",
"s3:GetBucketAcl"
],
"Resource": [
"*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:GetObject"
],
"Resource": [
"arn:aws:s3:::crawler-public*",
"arn:aws:s3:::aws-glue*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": [
"arn:aws:s3:::aws-glue*"
]
},
{
"Effect": "Allow",
"Action": [
"ec2:CreateTags",
"ec2:DeleteTags"
],
"Condition": {
"ForAllValues:StringEquals": {
"aws:TagKeys": [
"aws-glue-service-resource"
]
}
},
"Resource": [
"arn:aws:ec2:*:*:network-interface/*",
"arn:aws:ec2:*:*:security-group/*",
"arn:aws:ec2:*:*:instance/*"
]
}
]
}
"Effect": "Allow",
"Sid": ""
}
]
}
45 changes: 32 additions & 13 deletions terraform/environments/data-and-insights-wepi/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,42 @@ resource "aws_security_group" "wepi_sg_allow_redshift" {
description = "Allow Redshift inbound traffic from bastion"
vpc_id = data.aws_vpc.shared.id

# ingress {
# description = "Redshift ingress from bastion"
# from_port = 0
# to_port = 65535
# protocol = "tcp"
# cidr_blocks = ["0.0.0.0/0"]
# security_groups = [
# module.wepi_bastion.bastion_security_group
# ]
# }

# egress {
# description = "Redshift egress to S3 endpoint"
# from_port = 0
# to_port = 0
# protocol = "-1"
# cidr_blocks = ["0.0.0.0/0"]
# prefix_list_ids = [
# data.aws_vpc_endpoint.s3.prefix_list_id
# ]
# }
ingress {
description = "Redshift ingress from bastion"
from_port = 0
to_port = 65535
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
security_groups = [
module.wepi_bastion.bastion_security_group
]
}
description = "Redshift ingress from bastion"
from_port = 5439
to_port = 5439
protocol = "tcp"
security_groups = [
module.wepi_bastion.bastion_security_group
]
}

egress {
description = "Redshift egress to S3 endpoint"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
from_port = 443
to_port = 443
protocol = "tcp"
prefix_list_ids = [
data.aws_vpc_endpoint.s3.prefix_list_id
]
Expand Down
14 changes: 6 additions & 8 deletions terraform/environments/data-and-insights-wepi/redshift.tf
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,18 @@ resource "aws_redshift_cluster" "wepi_redshift_cluster" {
cluster_type = local.application_data.accounts[local.environment].redshift_cluster_node_count > 1 ? "multi-node" : "single-node"
number_of_nodes = local.application_data.accounts[local.environment].redshift_cluster_node_count

encrypted = true
encrypted = false
kms_key_id = aws_kms_key.wepi_kms_cmk.arn

publicly_accessible = false
enhanced_vpc_routing = true
publicly_accessible = true
enhanced_vpc_routing = false
vpc_security_group_ids = [
aws_security_group.wepi_sg_allow_redshift.id
]
cluster_subnet_group_name = aws_redshift_subnet_group.wepi_redhsift_subnet_group.name

cluster_parameter_group_name = aws_redshift_parameter_group.wepi_redshift_param_group.name

aqua_configuration_status = "enabled"

automated_snapshot_retention_period = local.application_data.accounts[local.environment].redshift_auto_snapshot_retention
manual_snapshot_retention_period = local.application_data.accounts[local.environment].redshift_manual_snapshot_retention
skip_final_snapshot = true
Expand Down Expand Up @@ -167,7 +165,7 @@ resource "aws_security_group_rule" "tcp-5439" {
resource "aws_lb" "redshift-data" {
name = format("%s-redshift-lb", local.environment)
internal = true
load_balancer_type = "application"
load_balancer_type = "network"
security_groups = [aws_security_group.redshift-data-lb.id] #extra line added late at night
subnets = data.aws_subnets.shared-private.ids
tags = local.tags
Expand All @@ -176,7 +174,7 @@ resource "aws_lb" "redshift-data" {
resource "aws_lb_target_group" "redshift-data" {
name = "redshift-lb-tg-5439"
port = 5439
protocol = "tcp"
protocol = "TCP"
target_type = "ip"
vpc_id = data.aws_vpc.shared.id
}
Expand All @@ -186,4 +184,4 @@ resource "aws_lb_target_group_attachment" "redshift-data" {
target_group_arn = aws_lb_target_group.redshift-data.arn
target_id = each.value.private_ip
port = 5439
}
}