From d6dedc712a8575006af17eb2b182af3c6c5bfc3c Mon Sep 17 00:00:00 2001 From: robertsweetman Date: Wed, 13 Dec 2023 14:07:20 +0000 Subject: [PATCH] fix de-serialisation, change names and types for columns --- main.tf | 90 +++++++++++++++++++++------------------------------------ 1 file changed, 33 insertions(+), 57 deletions(-) diff --git a/main.tf b/main.tf index 81c3a66b..4f779651 100644 --- a/main.tf +++ b/main.tf @@ -355,8 +355,8 @@ resource "aws_glue_crawler" "ssm_resource_sync" { } } -resource "aws_glue_catalog_table" "lb_log_table" { - name = "${var.application_name}-lb-log-table" +resource "aws_glue_catalog_table" "app_lb_logs" { + name = "${var.application_name}-app-lb-logs" database_name = aws_athena_database.lb-access-logs[0].name table_type = "EXTERNAL_TABLE" @@ -366,150 +366,126 @@ resource "aws_glue_catalog_table" "lb_log_table" { input_format = "org.apache.hadoop.mapred.TextInputFormat" output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat" ser_de_info { - serialization_library = "org.apache.hadoop.hive.serde2.RegexSerDe" + name = "app_lb_logs" + parameters = { + "field.delim" = " " + } + serialization_library = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" } columns { name = "type" type = "string" } - columns { - name = "time" - type = "string" + name = "timestamp" + type = "timestamp" } - columns { name = "elb" type = "string" } - columns { name = "client_ip" type = "string" } - - columns { - name = "client_port" - type = "int" - } - columns { name = "target_ip" type = "string" } - - columns { - name = "target_port" - type = "int" - } - columns { name = "request_processing_time" type = "double" } - columns { name = "target_processing_time" type = "double" } - columns { name = "response_processing_time" type = "double" } - columns { name = "elb_status_code" - type = "string" + type = "int" } - columns { name = "target_status_code" - type = "string" + type = "int" } - columns { name = "received_bytes" - type = "bigint" + type = "int" } - columns { name = "sent_bytes" - type = "bigint" - } - - columns { - name = "request_verb" - type = "string" - } - - columns { - name = "request_url" - type = "string" + type = "int" } - columns { - name = "request_proto" + name = "request" type = "string" } - columns { name = "user_agent" type = "string" } - columns { name = "ssl_cipher" type = "string" } - columns { name = "ssl_protocol" type = "string" } - columns { name = "target_group_arn" type = "string" } - columns { name = "trace_id" type = "string" } - columns { name = "domain_name" type = "string" } - columns { name = "chosen_cert_arn" type = "string" } - columns { name = "matched_rule_priority" - type = "string" + type = "int" } - columns { name = "request_creation_time" - type = "string" + type = "timestamp" } - columns { name = "actions_executed" type = "string" } - columns { name = "redirect_url" type = "string" } - columns { - name = "new_field" + name = "error_reason" + type = "string" + } + columns { + name = "target_port_list" + type = "string" + } + columns { + name = "target_status_code_list" + type = "string" + } + columns { + name = "classification" + type = "string" + } + columns { + name = "classification_reason" type = "string" } }