From c9bd167b4357da6629f2105967721c2fdcba68e4 Mon Sep 17 00:00:00 2001 From: Hari Chintala Date: Thu, 17 Aug 2023 09:20:42 +0100 Subject: [PATCH 1/2] Add Spark Resource Parameters for Glue Job --- .../digital-prison-reporting/application_variables.json | 8 ++++++++ terraform/environments/digital-prison-reporting/locals.tf | 4 ++++ terraform/environments/digital-prison-reporting/main.tf | 2 ++ 3 files changed, 14 insertions(+) diff --git a/terraform/environments/digital-prison-reporting/application_variables.json b/terraform/environments/digital-prison-reporting/application_variables.json index ffa9f69faed..5a386de68c7 100644 --- a/terraform/environments/digital-prison-reporting/application_variables.json +++ b/terraform/environments/digital-prison-reporting/application_variables.json @@ -6,6 +6,8 @@ "db_description": "DPR Data Catalog", "create_job": true, "glue_job_name": "glue-job-ingester", + "reporting_hub_spark_driver_mem": "2g", + "reporting_hub_spark_executor_mem": "2g", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -36,6 +38,8 @@ "db_description": "DPR Data Catalog", "create_job": true, "glue_job_name": "glue-job-ingester", + "reporting_hub_spark_driver_mem": "2g", + "reporting_hub_spark_executor_mem": "2g", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -66,6 +70,8 @@ "db_description": "DPR Data Catalog", "create_job": true, "glue_job_name": "glue-job-ingester", + "reporting_hub_spark_driver_mem": "2g", + "reporting_hub_spark_executor_mem": "2g", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -96,6 +102,8 @@ "db_description": "DPR Data Catalog", "create_job": true, "glue_job_name": "glue-job-ingester", + "reporting_hub_spark_driver_mem": "26g", + "reporting_hub_spark_executor_mem": "26g", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, diff --git a/terraform/environments/digital-prison-reporting/locals.tf b/terraform/environments/digital-prison-reporting/locals.tf index de266ad9b41..66ecbefae02 100644 --- a/terraform/environments/digital-prison-reporting/locals.tf +++ b/terraform/environments/digital-prison-reporting/locals.tf @@ -39,6 +39,10 @@ locals { datamart_username = jsondecode(data.aws_secretsmanager_secret_version.datamart.secret_string)["username"] datamart_password = jsondecode(data.aws_secretsmanager_secret_version.datamart.secret_string)["password"] + # Glue Job parameters + reporting_hub_driver_mem = local.application_data.accounts[local.environment].reporting_hub_spark_driver_mem + reporting_hub_executor_mem = local.application_data.accounts[local.environment].reporting_hub_spark_executor_mem + # Common Policies kms_read_access_policy = "${local.project}_kms_read_policy" s3_read_access_policy = "${local.project}_s3_read_policy" diff --git a/terraform/environments/digital-prison-reporting/main.tf b/terraform/environments/digital-prison-reporting/main.tf index eb588399d4d..d84c4119b81 100644 --- a/terraform/environments/digital-prison-reporting/main.tf +++ b/terraform/environments/digital-prison-reporting/main.tf @@ -64,6 +64,8 @@ module "glue_reporting_hub_job" { "--dpr.domain.catalog.db" = module.glue_data_domain_database.db_name "--dpr.redshift.secrets.name" = "${local.project}-redshift-secret-${local.environment}" "--dpr.datamart.db.name" = "datamart" + "--spark.driver.memory" = local.reporting_hub_driver_mem + "--spark.executor.memory" = local.reporting_hub_executor_mem } } From 1d8122664a802f45ce0f0bab8167c65aff815e1b Mon Sep 17 00:00:00 2001 From: Hari Chintala Date: Thu, 17 Aug 2023 09:37:02 +0100 Subject: [PATCH 2/2] Add Spark Log Level Support --- .../application_variables.json | 10 +++++++--- .../environments/digital-prison-reporting/locals.tf | 1 + .../environments/digital-prison-reporting/main.tf | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/terraform/environments/digital-prison-reporting/application_variables.json b/terraform/environments/digital-prison-reporting/application_variables.json index 5a386de68c7..64ba3020c02 100644 --- a/terraform/environments/digital-prison-reporting/application_variables.json +++ b/terraform/environments/digital-prison-reporting/application_variables.json @@ -7,7 +7,8 @@ "create_job": true, "glue_job_name": "glue-job-ingester", "reporting_hub_spark_driver_mem": "2g", - "reporting_hub_spark_executor_mem": "2g", + "reporting_hub_spark_executor_mem": "2g", + "reporting_hub_spark_log_level": "INFO", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -40,6 +41,7 @@ "glue_job_name": "glue-job-ingester", "reporting_hub_spark_driver_mem": "2g", "reporting_hub_spark_executor_mem": "2g", + "reporting_hub_spark_log_level": "INFO", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -71,7 +73,8 @@ "create_job": true, "glue_job_name": "glue-job-ingester", "reporting_hub_spark_driver_mem": "2g", - "reporting_hub_spark_executor_mem": "2g", + "reporting_hub_spark_executor_mem": "2g", + "reporting_hub_spark_log_level": "WARN", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, @@ -103,7 +106,8 @@ "create_job": true, "glue_job_name": "glue-job-ingester", "reporting_hub_spark_driver_mem": "26g", - "reporting_hub_spark_executor_mem": "26g", + "reporting_hub_spark_executor_mem": "26g", + "reporting_hub_spark_log_level": "WARN", "create_security_conf": true, "setup_buckets": true, "create_kinesis_streams": true, diff --git a/terraform/environments/digital-prison-reporting/locals.tf b/terraform/environments/digital-prison-reporting/locals.tf index 66ecbefae02..f62fa2fedee 100644 --- a/terraform/environments/digital-prison-reporting/locals.tf +++ b/terraform/environments/digital-prison-reporting/locals.tf @@ -42,6 +42,7 @@ locals { # Glue Job parameters reporting_hub_driver_mem = local.application_data.accounts[local.environment].reporting_hub_spark_driver_mem reporting_hub_executor_mem = local.application_data.accounts[local.environment].reporting_hub_spark_executor_mem + reporting_hub_log_level = local.application_data.accounts[local.environment].reporting_hub_spark_log_level # Common Policies kms_read_access_policy = "${local.project}_kms_read_policy" diff --git a/terraform/environments/digital-prison-reporting/main.tf b/terraform/environments/digital-prison-reporting/main.tf index d84c4119b81..c578fd8b60f 100644 --- a/terraform/environments/digital-prison-reporting/main.tf +++ b/terraform/environments/digital-prison-reporting/main.tf @@ -65,7 +65,8 @@ module "glue_reporting_hub_job" { "--dpr.redshift.secrets.name" = "${local.project}-redshift-secret-${local.environment}" "--dpr.datamart.db.name" = "datamart" "--spark.driver.memory" = local.reporting_hub_driver_mem - "--spark.executor.memory" = local.reporting_hub_executor_mem + "--spark.executor.memory" = local.reporting_hub_executor_mem + "--dpr.log.level" = local.reporting_hub_log_level } }