Skip to content

Commit

Permalink
Merge pull request #51 from bqbooster/rework_compute_cost_and_billing
Browse files Browse the repository at this point in the history
Rework compute cost & billing
  • Loading branch information
Kayrnt authored Nov 4, 2024
2 parents d68b850 + d2d6fec commit c50bf6b
Show file tree
Hide file tree
Showing 13 changed files with 209 additions and 27 deletions.
4 changes: 2 additions & 2 deletions macros/jobs_done_incremental_hourly.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ FROM
{{ ref('jobs_by_project_with_cost') }}
WHERE
{% if is_incremental() %}
creation_time > TIMESTAMP_TRUNC(_dbt_max_partition, HOUR)
creation_time >= TIMESTAMP_TRUNC(_dbt_max_partition, HOUR)
{% else %}
creation_time > TIMESTAMP_SUB(
creation_time >= TIMESTAMP_SUB(
TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), HOUR),
INTERVAL {{ var('lookback_window_days') }} DAY)
{% endif %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
The downstream jobs will use that model to aggregate all finished jobs as pending/running jobs metrics will evolve
So the lookback is based on the max partition of the downstream model
#}
{% macro jobs_done_incremental_daily() -%}
{% macro jobs_done_incremental_minute() -%}
(SELECT *
FROM
{{ ref('jobs_by_project_with_cost') }}
WHERE
{% if is_incremental() %}
creation_time > TIMESTAMP_TRUNC(_dbt_max_partition, DAY)
creation_time >= TIMESTAMP_TRUNC(_dbt_max_partition, MINUTE)
{% else %}
creation_time > TIMESTAMP_SUB(
TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY),
creation_time >= TIMESTAMP_SUB(
TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), MINUTE),
INTERVAL {{ var('lookback_window_days') }} DAY)
{% endif %}
AND state = 'DONE'
Expand Down
12 changes: 12 additions & 0 deletions models/compute/datamart/compute_cost_per_hour_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{
config(
materialized='view',
)
}}

SELECT
TIMESTAMP_TRUNC(HOUR, YEAR) AS year,
TIMESTAMP_TRUNC(HOUR, MONTH) AS month,
TIMESTAMP_TRUNC(HOUR, DAY) AS day,
*
FROM {{ ref('compute_cost_per_hour') }}
32 changes: 32 additions & 0 deletions models/compute/datamart/compute_cost_per_hour_view.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
version: 2

models:
- name: compute_cost_per_hour_view
description: >
An enriched view over the model that stores the compute cost per hour.
meta:
label: "Compute cost per hour"
order_fields_by: "label"
group_label: "Compute cost"
columns:
- name: year
description: The year of the compute cost.
- name: month
description: The month of the compute cost.
- name: day
description: The day of the compute cost.
- name: hour
description: The hour of the compute cost.
- name: project_id
description: The project id of the job.
- name: total_query_cost
description: The total cost of all queries run during the hour.
- name: failing_query_cost
description: The total cost of all queries that failed during the hour.
- name: total_slot_ms
description: The total number of slot time milliseconds used by all queries during the hour.
- name: total_slot_time
description: The total number of slot time in human readable format used by all queries during the hour.
- name: query_count
description: The total number of queries run during the hour.

12 changes: 12 additions & 0 deletions models/compute/datamart/compute_cost_per_minute_view.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{
config(
materialized='view',
)
}}
SELECT
TIMESTAMP_TRUNC(MINUTE, YEAR) AS year,
TIMESTAMP_TRUNC(MINUTE, MONTH) AS month,
TIMESTAMP_TRUNC(MINUTE, DAY) AS day,
TIMESTAMP_TRUNC(MINUTE, HOUR) AS hour,
*
FROM {{ ref('compute_cost_per_minute') }}
34 changes: 34 additions & 0 deletions models/compute/datamart/compute_cost_per_minute_view.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
version: 2

models:
- name: compute_cost_per_minute_view
description: >
An enriched view over the model that stores the compute cost per minute.
meta:
label: "Compute cost per minute"
order_fields_by: "label"
group_label: "Compute cost"
columns:
- name: year
description: The year of the compute cost.
- name: month
description: The month of the compute cost.
- name: day
description: The day of the compute cost.
- name: hour
description: The hour of the compute cost.
- name: minute
description: The minute of the compute cost.
- name: project_id
description: The project id of the job.
- name: total_query_cost
description: The total cost of all queries run during the minute.
- name: failing_query_cost
description: The total cost of all queries that failed during the minute.
- name: total_slot_ms
description: The total number of slot time milliseconds used by all queries during the minute.
- name: total_slot_time
description: The total number of slot time in human readable format used by all queries during the minute.
- name: query_count
description: The total number of queries run during the minute.

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ WHERE
((service.description LIKE 'BigQuery' AND LOWER(sku.description) LIKE '%analysis%')
OR (service.description LIKE 'BigQuery Reservation API'))
{% if is_incremental() %}
AND TIMESTAMP_TRUNC(usage_start_time, HOUR) > _dbt_max_partition
AND TIMESTAMP_TRUNC(usage_start_time, HOUR) >= _dbt_max_partition
{% endif %}
GROUP BY ALL
28 changes: 28 additions & 0 deletions models/compute/intermediate/cost/compute_cost_per_hour.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{
config(
materialized='incremental',
incremental_strategy = 'insert_overwrite',
on_schema_change='append_new_columns',
partition_by={
"field": "hour",
"granularity": "day",
"data_type": "timestamp",
"copy_partitions": should_use_copy_partitions()
},
cluster_by = ['hour', 'project_id'],
partition_expiration_days = var('output_partition_expiration_days')
)
}}
{%- call set_sql_header(config) %}
{{ milliseconds_to_readable_time_udf() }}
{%- endcall %}
SELECT
TIMESTAMP_TRUNC(MINUTE, HOUR) AS hour,
project_id,
SUM(ROUND(total_query_cost, 2)) AS total_query_cost,
SUM(ROUND(failing_query_cost, 2)) AS failing_query_cost,
SUM(total_slot_ms) AS total_slot_ms,
MILLISECONDS_TO_READABLE_TIME_UDF(total_slot_ms, 2) AS total_slot_time,
SUM(query_count) AS query_count
FROM {{ ref("compute_cost_per_minute") }}
GROUP BY ALL
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: 2
models:
- name: compute_cost_per_hour
description: >
A model that store the compute cost per hour.
A model that stores the compute cost per hour.
meta:
label: "Compute cost per hour"
order_fields_by: "label"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,25 @@
incremental_strategy = 'insert_overwrite',
on_schema_change='append_new_columns',
partition_by={
"field": "day",
"field": "minute",
"granularity": "hour",
"data_type": "timestamp",
"copy_partitions": should_use_copy_partitions()
},
cluster_by = ['hour', 'project_id'],
cluster_by = ['minute', 'project_id'],
partition_expiration_days = var('output_partition_expiration_days')
)
}}
{%- call set_sql_header(config) %}
{{ milliseconds_to_readable_time_udf() }}
{%- endcall %}
SELECT
TIMESTAMP_TRUNC(creation_time, DAY) AS day,
TIMESTAMP_TRUNC(creation_time, HOUR) AS hour,
TIMESTAMP_TRUNC(creation_time, MINUTE) AS minute,
project_id,
SUM(ROUND(query_cost, 2)) AS total_query_cost,
SUM(IF(error_result IS NOT NULL, ROUND(query_cost, 2), 0)) AS failing_query_cost,
SUM(total_slot_ms) AS total_slot_ms,
MILLISECONDS_TO_READABLE_TIME_UDF(SUM(total_slot_ms), 2) AS total_slot_time,
MILLISECONDS_TO_READABLE_TIME_UDF(total_slot_ms, 2) AS total_slot_time,
COUNT(*) AS query_count
FROM {{ jobs_done_incremental_daily() }}
GROUP BY day, hour, project_id
FROM {{ jobs_done_incremental_minute() }}
GROUP BY ALL
28 changes: 28 additions & 0 deletions models/compute/intermediate/cost/compute_cost_per_minute.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
version: 2

models:
- name: compute_cost_per_minute
description: >
A model that stores the compute cost per hour.
meta:
label: "Compute cost per minute"
order_fields_by: "label"
group_label: "Compute cost"
columns:
- name: hour
description: The hour of the compute cost.
- name: minute
description: The minute of the compute cost.
- name: project_id
description: The project id of the job.
- name: total_query_cost
description: The total cost of all queries run during the minute.
- name: failing_query_cost
description: The total cost of all queries that failed during the minute.
- name: total_slot_ms
description: The total number of slot time milliseconds used by all queries during the minute.
- name: total_slot_time
description: The total number of slot time in human readable format used by all queries during the hour.
- name: query_count
description: The total number of queries run during the minute.

60 changes: 48 additions & 12 deletions models/global/datamart/daily_spend.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,52 @@
},
)
}}
{%- call set_sql_header(config) %}
{{ milliseconds_to_readable_time_udf() }}
{%- endcall %}
SELECT
with compute_cost as (
{#- we use the billing export if possible else fallback to the estimated comput cost #}
{%- if enable_gcp_billing_export() %}
SELECT day,
'compute' AS cost_category,
SUM(compute_cost) AS cost
FROM {{ ref('compute_billing_per_hour') }}
{% if is_incremental() %}
WHERE hour >= TIMESTAMP_SUB(_dbt_max_partition, INTERVAL 1 DAY)
{% endif %}
GROUP BY ALL
{%- else %}
select
day,
'compute' as cost_category,
SUM(total_query_cost) as cost
from {{ ref('compute_cost_per_hour_view') }}
{% if is_incremental() %}
where hour >= TIMESTAMP_SUB(_dbt_max_partition, interval 1 day)
{% endif %}
group by all
{% endif %}
)
{%- if enable_gcp_billing_export() %}
,
storage_cost AS (
SELECT
day,
'storage' AS cost_category,
SUM(storage_cost) AS cost
FROM {{ ref('storage_billing_per_hour') }}
{% if is_incremental() %}
WHERE hour >= TIMESTAMP_SUB(_dbt_max_partition, INTERVAL 1 DAY)
{% endif %}
GROUP BY day
)
{%- endif %}
select
day,
'compute' AS cost_category,
SUM(total_query_cost) AS cost
FROM {{ ref('compute_cost_per_hour') }}
{% if is_incremental() %}
WHERE hour >= TIMESTAMP_SUB(_dbt_max_partition, INTERVAL 1 DAY)
{% endif %}
GROUP BY day
{# TODO - add a union with storage_cost_per_hour once added #}
cost_category,
SUM(cost) as cost
from (
select * from compute_cost
{%- if enable_gcp_billing_export() %}
UNION ALL
SELECT * FROM storage_cost
{%- endif %}
)
group by all

0 comments on commit c50bf6b

Please sign in to comment.