From 0ceac3b04c1cf56a65187462a692ccac5f3eb505 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 27 Jun 2023 13:15:26 -0400 Subject: [PATCH 1/3] add more staging columns to dynamo and fix bugs --- .../marts/telemetry/base/cloud_executions_base.sql | 13 ++++--------- .../stg_dynamodb__project_schedules_table.sql | 3 ++- .../dynamodb/stg_dynamodb__projects_table.sql | 4 +++- .../stg_dynamodb__workload_metadata_table.sql | 4 +++- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/data/transform/models/marts/telemetry/base/cloud_executions_base.sql b/data/transform/models/marts/telemetry/base/cloud_executions_base.sql index 978b4db2..bedfbbb7 100644 --- a/data/transform/models/marts/telemetry/base/cloud_executions_base.sql +++ b/data/transform/models/marts/telemetry/base/cloud_executions_base.sql @@ -8,8 +8,8 @@ WITH open_source_agg AS ( MIN( CASE WHEN cli_command = 'schedule' THEN started_ts END ) AS oss_run_started_ts, - MIN( - CASE WHEN cli_command = 'schedule' THEN finished_ts END + MAX( + finished_ts ) AS oss_run_finished_ts FROM {{ ref('fact_cli_executions') }} WHERE cloud_execution_id IS NOT NULL @@ -34,6 +34,7 @@ SELECT stg_dynamodb__workload_metadata_table.command_text_hash, stg_dynamodb__workload_metadata_table.cloud_job_name_hash, stg_dynamodb__workload_metadata_table.cloud_schedule_name_hash, + stg_dynamodb__workload_metadata_table.cloud_deployment_name_hash, cloud_schedule_frequency.schedule_freq_day, cloud_schedule_frequency.schedule_freq_rolling_avg, COALESCE( @@ -102,12 +103,6 @@ INNER JOIN {{ ref('stg_dynamodb__projects_table') }} ON stg_dynamodb__workload_metadata_table.cloud_project_id = stg_dynamodb__projects_table.cloud_project_id -LEFT JOIN {{ ref('stg_dynamodb__project_deployments') }} - ON - stg_dynamodb__workload_metadata_table.cloud_project_id - = stg_dynamodb__project_deployments.cloud_project_id - AND stg_dynamodb__workload_metadata_table.cloud_environment_name_hash - = stg_dynamodb__project_deployments.cloud_environment_name_hash LEFT JOIN {{ ref('stg_dynamodb__project_schedules_table') }} ON stg_dynamodb__workload_metadata_table.cloud_project_id @@ -116,7 +111,7 @@ LEFT JOIN {{ ref('stg_dynamodb__project_schedules_table') }} = stg_dynamodb__project_schedules_table.tenant_resource_key AND stg_dynamodb__workload_metadata_table.cloud_schedule_name_hash = stg_dynamodb__project_schedules_table.cloud_schedule_name_hash - AND stg_dynamodb__project_deployments.cloud_deployment_name_hash + AND stg_dynamodb__workload_metadata_table.cloud_deployment_name_hash = stg_dynamodb__project_schedules_table.cloud_deployment_name_hash LEFT JOIN open_source_agg ON diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql index 945e86e4..e056a4a8 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql @@ -43,7 +43,8 @@ renamed AS ( tenant_resource_key, cloud_project_id, SHA2_HEX(cloud_deployment_name) AS cloud_deployment_name_hash, - SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash + SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash, + eventbridge_name FROM clean_source WHERE row_num = 1 diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__projects_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__projects_table.sql index 6de34bf5..f4a7f3e7 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__projects_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__projects_table.sql @@ -18,7 +18,9 @@ renamed AS ( project_surrogate_key, project_id AS cloud_project_id, project_name, - tenant_resource_key + tenant_resource_key, + SHA2_HEX(git_repository) AS git_repository_hash, + SHA2_HEX(project_root_path) AS project_root_path_hash FROM source WHERE row_num = 1 diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql index f3d4ee08..7e3601ac 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql @@ -27,7 +27,9 @@ renamed AS ( ) AS tenant_resource_key, SPLIT_PART( "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05 - ) AS cloud_project_id + ) AS cloud_project_id, + SHA2_HEX(deployment_name) AS cloud_deployment_name_hash, + stopped_reason FROM source WHERE row_num = 1 From cbf3f49ec147a0d358345972806d3201ce73c936 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 27 Jun 2023 14:16:44 -0400 Subject: [PATCH 2/3] fix sqlfluff errors --- .../dynamodb/stg_dynamodb__project_schedules_table.sql | 4 ++-- .../dynamodb/stg_dynamodb__workload_metadata_table.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql index e056a4a8..4c18e504 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql @@ -42,9 +42,9 @@ renamed AS ( enabled AS is_enabled, tenant_resource_key, cloud_project_id, + eventbridge_name, SHA2_HEX(cloud_deployment_name) AS cloud_deployment_name_hash, - SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash, - eventbridge_name + SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash FROM clean_source WHERE row_num = 1 diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql index 7e3601ac..e800bf30 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__workload_metadata_table.sql @@ -18,6 +18,7 @@ renamed AS ( NULLIF(exit_code, 'N/A')::INT AS cloud_exit_code, NULLIF(start_time, 'N/A')::TIMESTAMP_NTZ AS started_ts, ttl::INT AS cloud_run_ttl, + stopped_reason, SHA2_HEX(command_text) AS command_text_hash, SHA2_HEX(environment_name) AS cloud_environment_name_hash, SHA2_HEX(job_name) AS cloud_job_name_hash, @@ -28,8 +29,7 @@ renamed AS ( SPLIT_PART( "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05 ) AS cloud_project_id, - SHA2_HEX(deployment_name) AS cloud_deployment_name_hash, - stopped_reason + SHA2_HEX(deployment_name) AS cloud_deployment_name_hash FROM source WHERE row_num = 1 From 915622367ff9095711ec3ba0e6492a0b52a76f48 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Tue, 27 Jun 2023 14:39:00 -0400 Subject: [PATCH 3/3] refactor deployments join thats no longer needed --- .../marts/telemetry/base/cloud_schedule_frequency.sql | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/data/transform/models/marts/telemetry/base/cloud_schedule_frequency.sql b/data/transform/models/marts/telemetry/base/cloud_schedule_frequency.sql index 70e3e736..7627b08e 100644 --- a/data/transform/models/marts/telemetry/base/cloud_schedule_frequency.sql +++ b/data/transform/models/marts/telemetry/base/cloud_schedule_frequency.sql @@ -4,10 +4,6 @@ WITH joined AS ( stg_dynamodb__workload_metadata_table.started_ts::DATE AS date_day, stg_dynamodb__workload_metadata_table.cloud_execution_id FROM {{ ref('stg_dynamodb__workload_metadata_table') }} - LEFT JOIN {{ ref('stg_dynamodb__project_deployments') }} - ON - stg_dynamodb__workload_metadata_table.cloud_environment_name_hash - = stg_dynamodb__project_deployments.cloud_environment_name_hash LEFT JOIN {{ ref('stg_dynamodb__project_schedules_table') }} ON stg_dynamodb__workload_metadata_table.cloud_schedule_name_hash @@ -17,7 +13,7 @@ WITH joined AS ( AND stg_dynamodb__workload_metadata_table.cloud_project_id = stg_dynamodb__project_schedules_table.cloud_project_id AND - stg_dynamodb__project_deployments.cloud_deployment_name_hash + stg_dynamodb__workload_metadata_table.cloud_deployment_name_hash = stg_dynamodb__project_schedules_table.cloud_deployment_name_hash WHERE stg_dynamodb__project_schedules_table.cloud_schedule_name_hash