diff --git a/README.md b/README.md index e6028ede..54880abe 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ [![repo standards badge](https://img.shields.io/endpoint?labelColor=231f20&color=005ea5&style=for-the-badge&label=MoJ%20Compliant&url=https%3A%2F%2Foperations-engineering-reports.cloud-platform.service.justice.gov.uk%2Fapi%2Fv1%2Fcompliant_public_repositories%2Fendpoint%2Fmodernisation-platform-terraform-loadbalancer&logo=)](https://operations-engineering-reports.cloud-platform.service.justice.gov.uk/public-report/modernisation-platform-terraform-loadbalancer) -A Terraform module that creates application loadbalancer (with loadbalancer security groups) in AWS with logging enabled, s3 to store logs and Athena DB to query logs. +A Terraform module that creates an application loadbalancer (with loadbalancer security groups) or network loadbalancer in AWS with logging enabled, s3 to store logs and Athena DB to query logs. -An s3 bucket name can be provided in the module by adding the `existing_bucket_name` variable and adding the bucket name. Otherwise, if no bucket exists one will be created and no variable needs to be set in the module. If using an existing bucket the logs will need to be moved with the following folder structure {application_name}/AWSLogs/{account_number}/elasticloadbalancing/ otherwise you will experience errors with the gluecrawler function. +An s3 bucket name can be provided in the module by adding the `existing_bucket_name` variable and adding the bucket name. Otherwise, if no bucket exists one will be created and no variable needs to be set in the module. Application loadbalancers and network loadbalancers do not log to the same S3 bucket location. If you're using existing buckets they also need to have specific permissions applied to them. See the [External buckets](#external-buckets) section for more information. Either pass in existing security group(s) to attach to the load balancer using the `security_groups` variable, or define `loadbalancer_ingress_rules` and `loadbalancer_egress_rules` variables to create a new security group within the module. @@ -45,14 +45,91 @@ locals { Loadbalancer target groups and listeners need to be created separately. -To run queries in Athena do the following: -Go to the Athena console and click on Saved Queries https://console.aws.amazon.com/athena/saved-queries/home +The use of "aws_glue_catalog_table" resources for application and network loadbalancers means that logs appearing in the S3 bucket will be available to query via Athena without having to carry out any manual Athena config steps. -Click the new saved query that is named ``-create-table and Run it. You only have to do it once. +## Module created S3 access_logs bucket -Try a query like `select * from lb_logs limit 100;` +By default the loadbalancer will set up an access_logs bucket for you, unless you set access_logs = false initially for testing or some other reason. Setting this back to true after the lb has been deployed will then create the bucket for you. The reason for the 'depends_on' here is that without the module.s3-bucket resource being created first, the module.lb resource will fail with a validation error. +```hcl + depends_on = [ + module.s3-bucket + ] +``` + +## External buckets + +If you decide to use externally created buckets they need to have been created and have appropriate permissions applied to them BEFORE `access_logs = true` and `existing_bucket_name` values are added to the lb code. If you add these values before the bucket is created you will get an error because the lb module will run a check to see if the s3 bucket is writeable and if it is not it will fail. + +So to use `external_bucket_name` the deployment steps are: +1. Set `access_logs = false` in the lb create code & create the lb +2. Create the bucket - making sure the appropriate permissions are applied +3. Set `existing_bucket_name` in the lb create code as your-bucket-name-GUID + +### External bucket permissions + +For simplicity the bucket can be created with the following policy attached to it. This applies whether the loadbalancer is an "application" or "network" loadbalancer. This uses the bucket_policy_v2 implementation using the s3_bucket module: + +```hcl + public-lb-logs-bucket = { + sse_algorithm = "AES256" # required for Network Loadbalancers + bucket_policy_v2 = [ + { + effect = "Allow" + actions = [ + "s3:PutObject", + ] + principals = { + identifiers = ["arn:aws:iam::652711504416:root"] + type = "AWS" + } + }, + { + effect = "Allow" + actions = [ + "s3:PutObject" + ] + principals = { + identifiers = ["delivery.logs.amazonaws.com"] + type = "Service" + } + + conditions = [ + { + test = "StringEquals" + variable = "s3:x-amz-acl" + values = ["bucket-owner-full-control"] + } + ] + }, + { + effect = "Allow" + actions = [ + "s3:GetBucketAcl" + ] + principals = { + identifiers = ["delivery.logs.amazonaws.com"] + type = "Service" + } + } + ] + iam_policies = module.baseline_presets.s3_iam_policies + } +``` + +If you want to see exactly what policies are needed for each then refer to [NLB Requirements](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/load-balancer-access-logs.html#access-logging-bucket-requirements) and [ALB Requirements](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/enable-access-logging.html#attach-bucket-policy) + +## Network Loadbalancer caveats + +* Access logs are created only if the load balancer has a TLS listener and they contain information only about TLS requests. +* Network loadbalancers only support SSE-S3 encryption for access logs, not aws:kms (AWS managed keys). +* They can support customer managed keys but this is not currently supported by this module. +* No "verify bucket permissions" test file is created in the relevant bucket, only that the terraform apply step will fail with a validation error if the permissions and the bucket encryption parameters are not correct. + +## Application Loadbalancer caveats + +* It's worth noting that Application LB's will create a test file in the S3 bucket to verify that the bucket permissions are correct. ## Usage @@ -163,28 +240,28 @@ If you're looking to raise an issue with this module, please create a new issue | Name | Source | Version | |------|--------|---------| -| [s3-bucket](#module\_s3-bucket) | github.com/ministryofjustice/modernisation-platform-terraform-s3-bucket | 8688bc15a08fbf5a4f4eef9b7433c5a417df8df1 | +| [s3-bucket](#module\_s3-bucket) | github.com/ministryofjustice/modernisation-platform-terraform-s3-bucket | 568694e50e03630d99cb569eafa06a0b879a1239 | ## Resources | Name | Type | |------|------| | [aws_athena_database.lb-access-logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_database) | resource | -| [aws_athena_named_query.main](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_named_query) | resource | | [aws_athena_workgroup.lb-access-logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_workgroup) | resource | -| [aws_glue_crawler.ssm_resource_sync](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_crawler) | resource | -| [aws_iam_policy.lb_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | -| [aws_iam_role.lb_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | -| [aws_iam_role_policy_attachment.lb_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | -| [aws_iam_role_policy_attachment.lb_glue_service](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_glue_catalog_table.application_lb_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_table) | resource | +| [aws_glue_catalog_table.network_lb_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_table) | resource | +| [aws_iam_policy.glue_s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.glue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.glue_s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.glue_service](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_lb.loadbalancer](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lb) | resource | | [aws_lb_target_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lb_target_group) | resource | | [aws_lb_target_group_attachment.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lb_target_group_attachment) | resource | | [aws_security_group.lb](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | | [aws_elb_service_account.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/elb_service_account) | data source | | [aws_iam_policy_document.bucket_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.lb_glue_crawler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | -| [aws_iam_policy_document.lb_glue_crawler_assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.glue_assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.glue_s3](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | [aws_vpc.shared](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc) | data source | ## Inputs @@ -205,7 +282,6 @@ If you're looking to raise an issue with this module, please create a new issue | [load\_balancer\_type](#input\_load\_balancer\_type) | application or network | `string` | `"application"` | no | | [loadbalancer\_egress\_rules](#input\_loadbalancer\_egress\_rules) | Create new security group with these egress rules for the loadbalancer. Or use the security\_groups var to attach existing group(s) |
map(object({
description = string
from_port = number
to_port = number
protocol = string
security_groups = list(string)
cidr_blocks = list(string)
}))
| `{}` | no | | [loadbalancer\_ingress\_rules](#input\_loadbalancer\_ingress\_rules) | Create new security group with these ingress rules for the loadbalancer. Or use the security\_groups var to attach existing group(s) |
map(object({
description = string
from_port = number
to_port = number
protocol = string
security_groups = list(string)
cidr_blocks = list(string)
}))
| `{}` | no | -| [log\_schedule](#input\_log\_schedule) | n/a | `string` | `"cron(15 1 ? * MON *)"` | no | | [public\_subnets](#input\_public\_subnets) | Badly named variable, use subnets instead. Keeping for backward compatibility | `list(string)` | `[]` | no | | [region](#input\_region) | AWS Region where resources are to be created | `string` | n/a | yes | | [s3\_versioning](#input\_s3\_versioning) | A boolean that determines whether s3 will have versioning | `bool` | `true` | no | diff --git a/main.tf b/main.tf index ead91384..77f65f11 100644 --- a/main.tf +++ b/main.tf @@ -8,7 +8,7 @@ data "aws_vpc" "shared" { module "s3-bucket" { count = var.existing_bucket_name == "" && var.access_logs ? 1 : 0 - source = "github.com/ministryofjustice/modernisation-platform-terraform-s3-bucket?ref=8688bc15a08fbf5a4f4eef9b7433c5a417df8df1" # v7.0.0 + source = "github.com/ministryofjustice/modernisation-platform-terraform-s3-bucket?ref=568694e50e03630d99cb569eafa06a0b879a1239" # v7.1.0 providers = { aws.bucket-replication = aws.bucket-replication @@ -18,6 +18,7 @@ module "s3-bucket" { replication_enabled = false versioning_enabled = var.s3_versioning force_destroy = var.force_destroy_bucket + sse_algorithm = var.load_balancer_type == "network" ? "AES256" : "aws:kms" lifecycle_rule = [ { id = "main" @@ -69,14 +70,24 @@ data "aws_iam_policy_document" "bucket_policy" { actions = [ "s3:PutObject" ] - resources = [var.existing_bucket_name != "" ? "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*" : "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*"] + resources = flatten([var.existing_bucket_name != "" + ? [ + "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*", + "arn:aws:s3:::${var.existing_bucket_name}/AWSLogs/${var.account_number}/*" + ] + : [ + "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*", + "${module.s3-bucket[0].bucket.arn}/AWSLogs/${var.account_number}/*" + ] + ]) principals { type = "AWS" identifiers = [data.aws_elb_service_account.default.arn] } } statement { - sid = "AWSLogDeliveryWrite" + effect = "Allow" + sid = "AWSLogDeliveryWrite" principals { type = "Service" @@ -87,7 +98,16 @@ data "aws_iam_policy_document" "bucket_policy" { "s3:PutObject" ] - resources = [var.existing_bucket_name != "" ? "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*" : "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*"] + resources = flatten([var.existing_bucket_name != "" + ? [ + "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*", + "arn:aws:s3:::${var.existing_bucket_name}/AWSLogs/${var.account_number}/*" + ] + : [ + "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*", + "${module.s3-bucket[0].bucket.arn}/AWSLogs/${var.account_number}/*" + ] + ]) condition { test = "StringEquals" @@ -100,7 +120,8 @@ data "aws_iam_policy_document" "bucket_policy" { } statement { - sid = "AWSLogDeliveryAclCheck" + sid = "AWSLogDeliveryAclCheck" + effect = "Allow" principals { type = "Service" @@ -149,6 +170,8 @@ resource "aws_lb" "loadbalancer" { Name = "${var.application_name}-lb" }, ) + + depends_on = [module.s3-bucket] } resource "aws_security_group" "lb" { @@ -189,7 +212,6 @@ resource "aws_security_group" "lb" { ) } - resource "aws_athena_database" "lb-access-logs" { count = var.access_logs ? 1 : 0 name = replace("${var.application_name}-lb-access-logs", "-", "_") # dashes not allowed in name @@ -199,24 +221,6 @@ resource "aws_athena_database" "lb-access-logs" { } } -resource "aws_athena_named_query" "main" { - count = var.access_logs ? 1 : 0 - name = "${var.application_name}-create-table" - database = aws_athena_database.lb-access-logs[0].name - workgroup = aws_athena_workgroup.lb-access-logs[0].id - - query = templatefile( - "${path.module}/templates/create_table.sql", - { - bucket = var.existing_bucket_name != "" ? var.existing_bucket_name : module.s3-bucket[0].bucket.id - account_id = var.account_number - region = var.region - application_name = var.application_name - database = aws_athena_database.lb-access-logs[0].name - } - ) -} - resource "aws_athena_workgroup" "lb-access-logs" { count = var.access_logs ? 1 : 0 name = "${var.application_name}-lb-access-logs" @@ -224,6 +228,9 @@ resource "aws_athena_workgroup" "lb-access-logs" { configuration { enforce_workgroup_configuration = true publish_cloudwatch_metrics_enabled = true + engine_version { + selected_engine_version = "Athena engine version 3" + } result_configuration { output_location = var.existing_bucket_name != "" ? "s3://${var.existing_bucket_name}/output/" : "s3://${module.s3-bucket[0].bucket.id}/output/" @@ -290,15 +297,15 @@ resource "aws_lb_target_group_attachment" "this" { port = coalesce(each.value.attachment_port, each.value.port) } -# Glue crawler to update Athena Table -# Role for crawler -resource "aws_iam_role" "lb_glue_crawler" { +# Glue Permissions +resource "aws_iam_role" "glue" { count = var.access_logs ? 1 : 0 - name = "ssm-glue-crawler" - assume_role_policy = data.aws_iam_policy_document.lb_glue_crawler_assume.json + name = "glue-${var.application_name}" + assume_role_policy = data.aws_iam_policy_document.glue_assume[count.index].json } -data "aws_iam_policy_document" "lb_glue_crawler_assume" { +data "aws_iam_policy_document" "glue_assume" { + count = var.access_logs ? 1 : 0 statement { effect = "Allow" actions = ["sts:AssumeRole"] @@ -310,13 +317,7 @@ data "aws_iam_policy_document" "lb_glue_crawler_assume" { } } -resource "aws_iam_policy" "lb_glue_crawler" { - count = var.access_logs ? 1 : 0 - name = "LbGlueCrawler" - policy = data.aws_iam_policy_document.lb_glue_crawler[count.index].json -} - -data "aws_iam_policy_document" "lb_glue_crawler" { +data "aws_iam_policy_document" "glue_s3" { count = var.access_logs ? 1 : 0 statement { effect = "Allow" @@ -324,33 +325,320 @@ data "aws_iam_policy_document" "lb_glue_crawler" { "s3:GetObject", "s3:PutObject" ] - resources = [var.existing_bucket_name != "" ? "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*" : "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*"] + resources = flatten([var.existing_bucket_name != "" + ? [ + "arn:aws:s3:::${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/*", + "arn:aws:s3:::${var.existing_bucket_name}/AWSLogs/${var.account_number}/*" + ] + : [ + "${module.s3-bucket[0].bucket.arn}/${var.application_name}/AWSLogs/${var.account_number}/*", + "${module.s3-bucket[0].bucket.arn}/AWSLogs/${var.account_number}/*" + ] + ]) } } -# Glue Crawler Policy -resource "aws_iam_role_policy_attachment" "lb_glue_crawler" { - count = var.access_logs ? 1 : 0 - role = aws_iam_role.lb_glue_crawler[count.index].name - policy_arn = aws_iam_policy.lb_glue_crawler[count.index].arn +resource "aws_iam_policy" "glue_s3" { + count = var.access_logs && length(data.aws_iam_policy_document.glue_s3) > 0 ? 1 : 0 + name = "glue-s3-${var.application_name}" + policy = data.aws_iam_policy_document.glue_s3[count.index].json +} + +resource "aws_iam_role_policy_attachment" "glue_s3" { + count = var.access_logs && length(data.aws_iam_policy_document.glue_s3) > 0 ? 1 : 0 + role = aws_iam_role.glue[count.index].name + policy_arn = aws_iam_policy.glue_s3[count.index].arn } -resource "aws_iam_role_policy_attachment" "lb_glue_service" { +resource "aws_iam_role_policy_attachment" "glue_service" { count = var.access_logs ? 1 : 0 - role = aws_iam_role.lb_glue_crawler[count.index].id + role = aws_iam_role.glue[count.index].id policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" } -# Glue Crawler -resource "aws_glue_crawler" "ssm_resource_sync" { - #checkov:skip=CKV_AWS_195 - count = var.access_logs ? 1 : 0 +# Catalog Tables +resource "aws_glue_catalog_table" "application_lb_logs" { + count = var.access_logs && var.load_balancer_type == "application" ? 1 : 0 + name = "${var.application_name}-application-lb-logs" + database_name = aws_athena_database.lb-access-logs[0].name + + table_type = "EXTERNAL_TABLE" + + partition_keys { + name = "day" + type = "string" + } + + parameters = { + "projection.enabled" = "true" + "projection.day.format" = "yyyy/MM/dd" + "projection.day.interval" = "1" + "projection.day.interval.unit" = "DAYS" + "projection.day.type" = "date" + "projection.day.range" = "2023/01/01,NOW" + "storage.location.template" = var.existing_bucket_name != "" ? "s3://${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}/$${day}" : "s3://${module.s3-bucket[0].bucket.id}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}/$${day}" + } + storage_descriptor { + location = var.existing_bucket_name != "" ? "s3://${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}" : "s3://${module.s3-bucket[0].bucket.id}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}" + input_format = "org.apache.hadoop.mapred.TextInputFormat" + output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" + ser_de_info { + name = "application_lb_logs" + parameters = { + "serialization.format" = "1", + "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^s]+?)\" \"([^s]+)\" \"([^ ]*)\" \"([^ ]*)\"" + } + serialization_library = "org.apache.hadoop.hive.serde2.RegexSerDe" + } + columns { + name = "type" + type = "string" + } + columns { + name = "time" + type = "string" + } + columns { + name = "elb" + type = "string" + } + columns { + name = "client_ip" + type = "string" + } + columns { + name = "client_port" + type = "int" + } + columns { + name = "target_ip" + type = "string" + } + columns { + name = "target_port" + type = "int" + } + columns { + name = "request_processing_time" + type = "double" + } + columns { + name = "target_processing_time" + type = "double" + } + columns { + name = "response_processing_time" + type = "double" + } + columns { + name = "elb_status_code" + type = "int" + } + columns { + name = "target_status_code" + type = "int" + } + columns { + name = "received_bytes" + type = "bigint" + } + columns { + name = "sent_bytes" + type = "bigint" + } + columns { + name = "request_verb" + type = "string" + } + columns { + name = "request_url" + type = "string" + } + columns { + name = "request_proto" + type = "string" + } + columns { + name = "user_agent" + type = "string" + } + columns { + name = "ssl_cipher" + type = "string" + } + columns { + name = "ssl_protocol" + type = "string" + } + columns { + name = "target_group_arn" + type = "string" + } + columns { + name = "trace_id" + type = "string" + } + columns { + name = "domain_name" + type = "string" + } + columns { + name = "chosen_cert_arn" + type = "string" + } + columns { + name = "matched_rule_priority" + type = "int" + } + columns { + name = "request_creation_time" + type = "string" + } + columns { + name = "actions_executed" + type = "string" + } + columns { + name = "redirect_url" + type = "string" + } + columns { + name = "lambda_error_reason" + type = "string" + } + columns { + name = "target_port_list" + type = "string" + } + columns { + name = "target_status_code_list" + type = "string" + } + columns { + name = "classification" + type = "string" + } + columns { + name = "classification_reason" + type = "string" + } + } +} + +resource "aws_glue_catalog_table" "network_lb_logs" { + count = var.access_logs && var.load_balancer_type == "network" ? 1 : 0 + name = "${var.application_name}-network-lb-logs" database_name = aws_athena_database.lb-access-logs[0].name - name = "lb_resource_sync" - role = aws_iam_role.lb_glue_crawler[count.index].arn - schedule = var.log_schedule - s3_target { - path = var.existing_bucket_name != "" ? "s3://${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/" : "s3://${module.s3-bucket[0].bucket.id}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/" + table_type = "EXTERNAL_TABLE" + + storage_descriptor { + location = var.existing_bucket_name != "" ? "s3://${var.existing_bucket_name}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}" : "s3://${module.s3-bucket[0].bucket.id}/${var.application_name}/AWSLogs/${var.account_number}/elasticloadbalancing/${var.region}" + input_format = "org.apache.hadoop.mapred.TextInputFormat" + output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" + ser_de_info { + name = "network_lb_logs" + parameters = { + "serialization.format" = "1", + "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-0-9]*) ([-0-9]*) ([-0-9]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*)$" + } + serialization_library = "org.apache.hadoop.hive.serde2.RegexSerDe" + } + columns { + name = "type" + type = "string" + } + columns { + name = "version" + type = "string" + } + columns { + name = "time" + type = "string" + } + columns { + name = "elb" + type = "string" + } + columns { + name = "listener_id" + type = "string" + } + columns { + name = "client_ip" + type = "string" + } + columns { + name = "client_port" + type = "int" + } + columns { + name = "target_ip" + type = "string" + } + columns { + name = "target_port" + type = "int" + } + columns { + name = "tcp_connection_time" + type = "double" + } + columns { + name = "tls_handshake_time" + type = "double" + } + columns { + name = "received_bytes" + type = "bigint" + } + columns { + name = "sent_bytes" + type = "bigint" + } + columns { + name = "incoming_tls_alert" + type = "int" + } + columns { + name = "cert_arn" + type = "string" + } + columns { + name = "certificate_serial" + type = "string" + } + columns { + name = "tls_cipher_suite" + type = "string" + } + columns { + name = "tls_protocol_version" + type = "string" + } + columns { + name = "tls_named_group" + type = "string" + } + columns { + name = "domain_name" + type = "string" + } + columns { + name = "alpn_fe_protocol" + type = "string" + } + columns { + name = "alpn_be_protocol" + type = "string" + } + columns { + name = "alpn_client_preference_list" + type = "string" + } + columns { + name = "tls_connection_creation_time" + type = "string" + } } } diff --git a/templates/create_table.sql b/templates/create_table.sql deleted file mode 100644 index 25476c6b..00000000 --- a/templates/create_table.sql +++ /dev/null @@ -1,36 +0,0 @@ -CREATE EXTERNAL TABLE IF NOT EXISTS ${database}.lb_logs ( - type string, - time string, - elb string, - client_ip string, - client_port int, - target_ip string, - target_port int, - request_processing_time double, - target_processing_time double, - response_processing_time double, - elb_status_code string, - target_status_code string, - received_bytes bigint, - sent_bytes bigint, - request_verb string, - request_url string, - request_proto string, - user_agent string, - ssl_cipher string, - ssl_protocol string, - target_group_arn string, - trace_id string, - domain_name string, - chosen_cert_arn string, - matched_rule_priority string, - request_creation_time string, - actions_executed string, - redirect_url string, - new_field string - ) - ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' - WITH SERDEPROPERTIES ( - 'serialization.format' = '1', - 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\"($| \"[^ ]*\")(.*)') - LOCATION 's3://${bucket}/${application_name}/AWSLogs/${account_id}/elasticloadbalancing/${region}/'; diff --git a/test/unit-test/locals.tf b/test/unit-test/locals.tf index d4e5bcb0..eee08677 100644 --- a/test/unit-test/locals.tf +++ b/test/unit-test/locals.tf @@ -34,11 +34,7 @@ locals { vpc_all = "${local.vpc_name}-${local.environment}" subnet_set_name = "${var.networking[0].business-unit}-${local.environment}-${var.networking[0].set}" - is_live = [substr(terraform.workspace, length(local.application_name), length(terraform.workspace)) == "-production" || substr(terraform.workspace, length(local.application_name), length(terraform.workspace)) == "-preproduction" ? "live" : "non-live"] - provider_name = "core-vpc-${local.environment}" - lb_target_groups = { - https-80 = { - port = 80 - } - } + is_live = [substr(terraform.workspace, length(local.application_name), length(terraform.workspace)) == "-production" || substr(terraform.workspace, length(local.application_name), length(terraform.workspace)) == "-preproduction" ? "live" : "non-live"] + provider_name = "core-vpc-${local.environment}" + lb_target_groups = {} } diff --git a/test/unit-test/main.tf b/test/unit-test/main.tf index 990c9193..389b8397 100644 --- a/test/unit-test/main.tf +++ b/test/unit-test/main.tf @@ -76,5 +76,4 @@ module "lb_access_logs_enabled" { idle_timeout = 60 force_destroy_bucket = true lb_target_groups = local.lb_target_groups - log_schedule = "cron(15 1 ? * MON *)" } diff --git a/variables.tf b/variables.tf index 78122179..959631ab 100644 --- a/variables.tf +++ b/variables.tf @@ -122,10 +122,6 @@ variable "lb_target_groups" { })) default = {} } -variable "log_schedule" { - type = string - default = "cron(15 1 ? * MON *)" -} variable "enable_cross_zone_load_balancing" { type = bool