From b69f9134f9dd6b3ee80f8b656ba58ae038bf1317 Mon Sep 17 00:00:00 2001 From: Ian Whitestone Date: Sun, 14 Jan 2024 15:20:30 -0500 Subject: [PATCH] [v5.0.0] Add query acceleration costs, incrementalize cost per query, account for new service type (#141) * Lotta tings * Fix typos * Add changelog * Update desc * Make sqlfluff happy --- .changes/5.0.0.md | 19 +++++++ .../unreleased/Fixes-20231130-140709.yaml | 6 --- CHANGELOG.md | 21 ++++++++ dbt_project.yml | 2 +- models/cost_per_query.sql | 53 ++++++++++++++----- models/dbt_queries.sql | 4 +- models/hourly_spend.sql | 2 +- models/query_history_enriched.sql | 10 ++-- models/staging/stg_metering_history.sql | 15 ++++-- models/staging/stg_rate_sheet_daily.sql | 7 ++- .../stg_warehouse_metering_history.sql | 8 ++- 11 files changed, 113 insertions(+), 34 deletions(-) create mode 100644 .changes/5.0.0.md delete mode 100644 .changes/unreleased/Fixes-20231130-140709.yaml diff --git a/.changes/5.0.0.md b/.changes/5.0.0.md new file mode 100644 index 0000000..f42caf4 --- /dev/null +++ b/.changes/5.0.0.md @@ -0,0 +1,19 @@ +## dbt-snowflake-monitoring 5.0.0 - January 14, 2024 + +### Features + +- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) +- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) + +### Breaking Changes + +- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) + +To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model. + +### Fixes + +- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139)) + +### Contributors +- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes) \ No newline at end of file diff --git a/.changes/unreleased/Fixes-20231130-140709.yaml b/.changes/unreleased/Fixes-20231130-140709.yaml deleted file mode 100644 index 451627f..0000000 --- a/.changes/unreleased/Fixes-20231130-140709.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: 'Support quoting: true' -time: 2023-11-30T14:07:09.323447Z -custom: - Author: ernestoongaro - PR: "139" diff --git a/CHANGELOG.md b/CHANGELOG.md index fa56ad1..6051d49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), and is generated by [Changie](https://github.com/miniscruff/changie). +## dbt-snowflake-monitoring 5.0.0 - January 14, 2024 + +### Features + +- Add query acceleration costs and update cost per query algorithm to include them ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) +- Make cost_per_query model incremental ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) + +### Breaking Changes + +- Add entity_id to stg_metering_history ([#141](https://github.com/get-select/dbt-snowflake-monitoring/pull/141)) + +To upgrade from 4.x.x, you'll need to full refresh the `stg_metering_history` model. + +### Fixes + +- Support quoting: true ([#139](https://github.com/get-select/dbt-snowflake-monitoring/pull/139)) + +### Contributors +- [@ernestoongaro](https://github.com/ernestoongaro) (Fixes) + + ## dbt-snowflake-monitoring 4.6.0 - November 09, 2023 ### Features diff --git a/dbt_project.yml b/dbt_project.yml index a51ced3..92a3303 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'dbt_snowflake_monitoring' -version: '4.6.0' +version: '5.0.0' config-version: 2 profile: dbt_snowflake_monitoring diff --git a/models/cost_per_query.sql b/models/cost_per_query.sql index abc9c70..79d938b 100644 --- a/models/cost_per_query.sql +++ b/models/cost_per_query.sql @@ -1,4 +1,7 @@ -{{ config(materialized='table') }} +{{ config( + materialized='incremental', + unique_key=['query_id', 'start_time'], +) }} with stop_threshold as ( @@ -21,9 +24,15 @@ filtered_queries as ( start_time ) as execution_start_time, start_time, - end_time + end_time, + query_acceleration_bytes_scanned from {{ ref('stg_query_history') }} - where end_time <= (select latest_ts from stop_threshold) + where true + and end_time <= (select latest_ts from stop_threshold) + {% if is_incremental() %} + -- account for late arriving queries + and end_time > (select coalesce(dateadd(day, -3, max(end_time)), '1970-01-01') from {{ this }}) + {% endif %} ), hours_list as ( @@ -34,7 +43,12 @@ hours_list as ( dateadd('day', '+1', current_date::timestamp_tz) ) as hour_start, dateadd('hour', '+1', hour_start) as hour_end + + {% if is_incremental() %} + from table(generator(rowcount => (24 * 7))) + {% else %} from table(generator(rowcount => (24 * 730))) + {% endif %} ), -- 1 row per hour a query ran @@ -56,6 +70,8 @@ query_seconds_per_hour as ( datediff('millisecond', greatest(execution_start_time, hour_start), least(end_time, hour_end)) as num_milliseconds_query_ran, sum(num_milliseconds_query_ran) over (partition by warehouse_id, hour_start) as total_query_milliseconds_in_hour, div0(num_milliseconds_query_ran, total_query_milliseconds_in_hour) as fraction_of_total_query_time_in_hour, + sum(query_acceleration_bytes_scanned) over (partition by warehouse_id, hour_start) as total_query_acceleration_bytes_scanned_in_hour, + div0(query_acceleration_bytes_scanned, total_query_acceleration_bytes_scanned_in_hour) as fraction_of_total_query_acceleration_bytes_scanned_in_hour, hour_start as hour from query_hours ), @@ -63,18 +79,23 @@ query_seconds_per_hour as ( credits_billed_hourly as ( select start_time as hour, - warehouse_id, - credits_used_compute, - credits_used_cloud_services - from {{ ref('stg_warehouse_metering_history') }} + entity_id as warehouse_id, + sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_compute, 0)) as credits_used_compute, + sum(iff(service_type = 'WAREHOUSE_METERING', credits_used_cloud_services, 0)) as credits_used_cloud_services, + sum(iff(service_type = 'QUERY_ACCELERATION', credits_used_compute, 0)) as credits_used_query_acceleration + from {{ ref('stg_metering_history') }} + where true + and service_type in ('QUERY_ACCELERATION', 'WAREHOUSE_METERING') + group by 1, 2 ), query_cost as ( select query_seconds_per_hour.*, - credits_billed_hourly.credits_used_compute * daily_rates.effective_rate as actual_warehouse_cost, - credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour, - credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour + credits_billed_hourly.credits_used_compute * query_seconds_per_hour.fraction_of_total_query_time_in_hour as allocated_compute_credits_in_hour, + allocated_compute_credits_in_hour * daily_rates.effective_rate as allocated_compute_cost_in_hour, + credits_billed_hourly.credits_used_query_acceleration * query_seconds_per_hour.fraction_of_total_query_acceleration_bytes_scanned_in_hour as allocated_query_acceleration_credits_in_hour, + allocated_query_acceleration_credits_in_hour * daily_rates.effective_rate as allocated_query_acceleration_cost_in_hour from query_seconds_per_hour inner join credits_billed_hourly on query_seconds_per_hour.warehouse_id = credits_billed_hourly.warehouse_id @@ -93,6 +114,8 @@ cost_per_query as ( any_value(execution_start_time) as execution_start_time, sum(allocated_compute_cost_in_hour) as compute_cost, sum(allocated_compute_credits_in_hour) as compute_credits, + sum(allocated_query_acceleration_cost_in_hour) as query_acceleration_cost, + sum(allocated_query_acceleration_credits_in_hour) as query_acceleration_credits, any_value(credits_used_cloud_services) as credits_used_cloud_services, any_value(ran_on_warehouse) as ran_on_warehouse from query_cost @@ -117,6 +140,8 @@ all_queries as ( execution_start_time, compute_cost, compute_credits, + query_acceleration_cost, + query_acceleration_credits, credits_used_cloud_services, ran_on_warehouse from cost_per_query @@ -130,6 +155,8 @@ all_queries as ( execution_start_time, 0 as compute_cost, 0 as compute_credits, + 0 as query_acceleration_cost, + 0 as query_acceleration_credits, credits_used_cloud_services, ran_on_warehouse from filtered_queries @@ -144,14 +171,16 @@ select all_queries.execution_start_time, all_queries.compute_cost, all_queries.compute_credits, + all_queries.query_acceleration_cost, + all_queries.query_acceleration_credits, -- For the most recent day, which is not yet complete, this calculation won't be perfect. -- For example, at 12PM on the latest day, it's possible that cloud credits make up <10% of compute cost, so the queries -- from that day are not allocated any cloud_services_cost. The next time the model runs, after we have the full day of data, -- this may change if cloud credits make up >10% of compute cost. (div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services) * coalesce(daily_rates.effective_rate, current_rates.effective_rate) as cloud_services_cost, div0(all_queries.credits_used_cloud_services, credits_billed_daily.daily_credits_used_cloud_services) * credits_billed_daily.daily_billable_cloud_services as cloud_services_credits, - all_queries.compute_cost + cloud_services_cost as query_cost, - all_queries.compute_credits + cloud_services_credits as query_credits, + all_queries.compute_cost + all_queries.query_acceleration_cost + cloud_services_cost as query_cost, + all_queries.compute_credits + all_queries.query_acceleration_credits + cloud_services_credits as query_credits, all_queries.ran_on_warehouse, coalesce(daily_rates.currency, current_rates.currency) as currency from all_queries diff --git a/models/dbt_queries.sql b/models/dbt_queries.sql index d299fb7..86c5127 100644 --- a/models/dbt_queries.sql +++ b/models/dbt_queries.sql @@ -51,7 +51,7 @@ select from {{ ref('query_history_enriched') }} where dbt_metadata is not null {% if is_incremental() %} - -- Conservatively re-process the last 7 days to account for late arriving rates data + -- Conservatively re-process the last 3 days to account for late arriving rates data -- which changes the cost per query - and end_time > (select dateadd(day, -7, max(end_time)) from {{ this }}) + and end_time > (select dateadd(day, -3, max(end_time)) from {{ this }}) {% endif %} diff --git a/models/hourly_spend.sql b/models/hourly_spend.sql index cbb0ac1..222edec 100644 --- a/models/hourly_spend.sql +++ b/models/hourly_spend.sql @@ -365,7 +365,7 @@ query_acceleration_spend_hourly as ( hours.hour, 'Query Acceleration' as service, null as storage_type, - null as warehouse_name, + stg_metering_history.name as warehouse_name, null as database_name, coalesce( sum( diff --git a/models/query_history_enriched.sql b/models/query_history_enriched.sql index d047537..8a05e45 100644 --- a/models/query_history_enriched.sql +++ b/models/query_history_enriched.sql @@ -24,9 +24,9 @@ query_history as ( from {{ ref('stg_query_history') }} {% if is_incremental() %} - -- Conservatively re-process the last 7 days to account for late arriving rates data + -- Conservatively re-process the last 3 days to account for late arriving rates data -- which changes the cost per query - where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }}) + where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }}) {% endif %} ), @@ -34,9 +34,9 @@ cost_per_query as ( select * from {{ ref('cost_per_query') }} {% if is_incremental() %} - -- Conservatively re-process the last 7 days to account for late arriving rates data + -- Conservatively re-process the last 3 days to account for late arriving rates data -- which changes the cost per query - where end_time > (select dateadd(day, -7, max(end_time)) from {{ this }}) + where end_time > (select dateadd(day, -3, max(end_time)) from {{ this }}) {% endif %} ) @@ -44,6 +44,8 @@ select cost_per_query.query_id, cost_per_query.compute_cost, cost_per_query.compute_credits, + cost_per_query.query_acceleration_cost, + cost_per_query.query_acceleration_credits, cost_per_query.cloud_services_cost, cost_per_query.cloud_services_credits, cost_per_query.query_cost, diff --git a/models/staging/stg_metering_history.sql b/models/staging/stg_metering_history.sql index 6af2bbf..ca8c3d1 100644 --- a/models/staging/stg_metering_history.sql +++ b/models/staging/stg_metering_history.sql @@ -1,17 +1,22 @@ -{{ config(materialized='incremental') }} +{{ config( + materialized='incremental', + unique_key=['service_type', 'start_time', 'entity_id'], +) }} select - name, - credits_used_compute, + service_type, start_time, end_time, - service_type, + entity_id, + name, + credits_used_compute, credits_used_cloud_services, credits_used from {{ source('snowflake_account_usage', 'metering_history') }} {% if is_incremental() %} - where end_time > (select max(end_time) from {{ this }}) + -- account for changing metering data + where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }}) {% endif %} order by start_time asc diff --git a/models/staging/stg_rate_sheet_daily.sql b/models/staging/stg_rate_sheet_daily.sql index b0d2566..be876e8 100644 --- a/models/staging/stg_rate_sheet_daily.sql +++ b/models/staging/stg_rate_sheet_daily.sql @@ -12,6 +12,11 @@ select usage_type, currency, effective_rate, - service_type + case + -- Have only seen this on one account. Normally it is COMPUTE, and all our downstream models rely on that + -- May adjust this in the future if Snowflake is permanently changing these fields for all accounts + when service_type = 'WAREHOUSE_METERING' then 'COMPUTE' + else service_type + end as service_type from {{ source('snowflake_organization_usage', 'rate_sheet_daily') }} order by date diff --git a/models/staging/stg_warehouse_metering_history.sql b/models/staging/stg_warehouse_metering_history.sql index 1f6fc7e..a75d7b9 100644 --- a/models/staging/stg_warehouse_metering_history.sql +++ b/models/staging/stg_warehouse_metering_history.sql @@ -1,4 +1,7 @@ -{{ config(materialized='incremental') }} +{{ config( + materialized='incremental', + unique_key=['start_time', 'warehouse_id'], +) }} select start_time, @@ -11,7 +14,8 @@ select from {{ source('snowflake_account_usage', 'warehouse_metering_history') }} {% if is_incremental() %} - where end_time > (select max(end_time) from {{ this }}) + -- account for changing metering data + where end_time > (select coalesce(dateadd(day, -7, max(end_time)), '1970-01-01') from {{ this }}) {% endif %} order by start_time