Skip to content

Commit

Permalink
Merge pull request #7143 from ministryofjustice/Migration_GlueJob_Cor…
Browse files Browse the repository at this point in the history
…rections-4

CodeLogicImprovements
  • Loading branch information
madhu-k-sr2 authored Jul 19, 2024
2 parents 6b0c0c4 + 3049ac4 commit 0523539
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 97 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,38 +222,41 @@ resource "aws_glue_job" "rds_to_s3_parquet_migration" {
worker_type = "G.2X"
number_of_workers = 5
default_arguments = {
"--script_bucket_name" = aws_s3_bucket.dms_dv_glue_job_s3_bucket.id
"--rds_db_host_ep" = split(":", aws_db_instance.database_2022.endpoint)[0]
"--rds_db_pwd" = aws_db_instance.database_2022.password
"--rds_sqlserver_db" = ""
"--rds_sqlserver_db_schema" = "dbo"
"--rds_sqlserver_db_table" = ""
"--rds_db_tbl_pkeys_col_list" = ""
"--rds_table_total_size_mb" = ""
"--rds_table_total_rows" = ""
"--date_partition_column_name" = ""
"--other_partitionby_columns" = ""
"--validation_sample_fraction_float" = 0
"--validation_sample_df_repartition" = 0
"--jdbc_read_256mb_partitions" = "false"
"--jdbc_read_512mb_partitions" = "false"
"--jdbc_read_1gb_partitions" = "false"
"--jdbc_read_2gb_partitions" = "false"
"--rename_migrated_prq_tbl_folder" = ""
"--year_partition" = "false"
"--month_partition" = "false"
"--day_partition" = "false"
"--rds_to_parquet_output_s3_bucket" = aws_s3_bucket.dms_target_ep_s3_bucket.id
"--dv_parquet_output_s3_bucket" = aws_s3_bucket.dms_dv_parquet_s3_bucket.id
"--glue_catalog_db_name" = aws_glue_catalog_database.dms_dv_glue_catalog_db.name
"--glue_catalog_tbl_name" = "glue_df_output"
"--continuous-log-logGroup" = "/aws-glue/jobs/${aws_cloudwatch_log_group.rds_to_s3_parquet_migration.name}"
"--enable-continuous-cloudwatch-log" = "true"
"--enable-continuous-log-filter" = "true"
"--enable-metrics" = "true"
"--enable-auto-scaling" = "true"
"--conf" = <<EOF
"--script_bucket_name" = aws_s3_bucket.dms_dv_glue_job_s3_bucket.id
"--rds_db_host_ep" = split(":", aws_db_instance.database_2022.endpoint)[0]
"--rds_db_pwd" = aws_db_instance.database_2022.password
"--rds_sqlserver_db" = ""
"--rds_sqlserver_db_schema" = "dbo"
"--rds_sqlserver_db_table" = ""
"--rds_db_tbl_pkeys_col_list" = ""
"--rds_table_total_size_mb" = ""
"--rds_table_total_rows" = ""
"--rds_df_repartition_num" = 0
"--date_partition_column_name" = ""
"--other_partitionby_columns" = ""
"--validation_sample_fraction_float" = 0
"--validation_sample_df_repartition_num" = 0
"--jdbc_read_256mb_partitions" = "false"
"--jdbc_read_512mb_partitions" = "false"
"--jdbc_read_1gb_partitions" = "false"
"--jdbc_read_2gb_partitions" = "false"
"--rename_migrated_prq_tbl_folder" = ""
"--year_partition_bool" = "false"
"--month_partition_bool" = "false"
"--day_partition_bool" = "false"
"--validation_only_run" = "false"
"--rds_to_parquet_output_s3_bucket" = aws_s3_bucket.dms_target_ep_s3_bucket.id
"--dv_parquet_output_s3_bucket" = aws_s3_bucket.dms_dv_parquet_s3_bucket.id
"--glue_catalog_db_name" = aws_glue_catalog_database.dms_dv_glue_catalog_db.name
"--glue_catalog_tbl_name" = "glue_df_output"
"--continuous-log-logGroup" = "/aws-glue/jobs/${aws_cloudwatch_log_group.rds_to_s3_parquet_migration.name}"
"--enable-continuous-cloudwatch-log" = "true"
"--enable-continuous-log-filter" = "true"
"--enable-metrics" = "true"
"--enable-auto-scaling" = "true"
"--conf" = <<EOF
spark.sql.legacy.parquet.datetimeRebaseModeInRead=CORRECTED
--conf spark.sql.sources.partitionOverwriteMode=dynamic
--conf spark.sql.parquet.aggregatePushdown=true
--conf spark.sql.shuffle.partitions=2001
--conf spark.sql.files.maxPartitionBytes=256m
Expand Down
Loading

0 comments on commit 0523539

Please sign in to comment.