From e89563ba13fff53d3559de62f750163ad3c28708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matu=CC=81s=CC=8C=20Tomlein?= Date: Fri, 14 Jun 2024 06:26:54 +0200 Subject: [PATCH] Fix counting impressions based using distinct play_id instead of page_view_id (close#72) --- CHANGELOG | 8 ++++++++ docs/markdown/snowplow_media_player_common_cols.md | 4 ++-- .../snowplow_media_player_media_stats_expected.csv | 4 ++-- models/media_stats/snowplow_media_player_media_stats.sql | 4 ++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 079172a..dae69e0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +snowplow-media-player 0.8.0 (2024-06-17) +--------------------------------------- +## Fix +- Fix counting impressions based using distinct play_id instead of page_view_id (#72) + +## 🚨 Breaking Changes 🚨 +This version requires a full refresh run if you have been using any previous versions. Otherwise the calculation of the `impressions` and `play_rate` columns in the media stats table will be inconsistent with historical data. Check out the [migration guide](https://docs.snowplow.io/docs/modeling-your-data/modeling-your-data-with-dbt/migration-guides/media-player/) for more information when you upgrade. + snowplow-media-player 0.7.2 (2024-03-18) --------------------------------------- ## Summary diff --git a/docs/markdown/snowplow_media_player_common_cols.md b/docs/markdown/snowplow_media_player_common_cols.md index 97d4e41..c80b1b3 100644 --- a/docs/markdown/snowplow_media_player_common_cols.md +++ b/docs/markdown/snowplow_media_player_common_cols.md @@ -207,7 +207,7 @@ The sum of all media plays that exceeds the minimum media length set within the {% enddocs %} {% docs col_impressions %} -The number of pageviews where a media content was rendered regardless of whether the media was actually played or not. +The number of player instance (media sessions) where a media content was rendered regardless of whether the media was actually played or not. {% enddocs %} {% docs col_avg_playback_rate %} @@ -215,7 +215,7 @@ Average playback rate (1 is normal speed). {% enddocs %} {% docs col_play_rate %} -Total plays divided by impressions. Please note that as the base for media plays is pageview / media_identifier, in case the same video is played multiple times within the same pageview, it will still count as one play. +Total plays divided by impressions. Please note that, if the media session ID is not available, as the base for media plays is pageview / media_identifier, in case the same video is played multiple times within the same pageview, it will still count as one play. {% enddocs %} {% docs col_complete_plays %} diff --git a/integration_tests/data/expected/snowplow_media_player_media_stats_expected.csv b/integration_tests/data/expected/snowplow_media_player_media_stats_expected.csv index e754b95..c767acd 100644 --- a/integration_tests/data/expected/snowplow_media_player_media_stats_expected.csv +++ b/integration_tests/data/expected/snowplow_media_player_media_stats_expected.csv @@ -3,5 +3,5 @@ ae9ad2ba4e69068fa1807c9f41d9235a,html-dbt,dbt Coalesce 2021 - Data modeling at S 9b71f6dbf74a346e7da60a5a744b362d,yt-dbt-coalesce-2022,dbt Coalesce 2022 - Data modeling at Scale 2,1889,video,com.youtube-youtube,,,0,0,0,1,2022-01-20 19:17:13.125,0,0,0,0,0,0,,,,,0,, 57a978825bca2d327f303441c310b33c,yt-dbt-coalesce-2022,dbt Coalesce 2022 - Data modeling at Scale 3,1889,video,com.youtube-youtube,,,0,0,0,1,2022-01-20 19:17:13.125,0,0,0,0,0,0,,,,,0,, 68efef79c990d2f2542070769fbda051,yt-dbt-coalesce-2021,dbt Coalesce 2021 - Data modeling at Scale,1887,video,com.youtube-youtube,2022-01-18 21:23:57.381,2022-01-20 19:13:21.293,38,5,0,39,2022-01-20 19:17:13.125,2,2,1,0,4,55.067,1.449,,1.039,0.046,0.974,0,0.003 -12a63eafc0d70abcfc379a23ae9788e3,,For bigger fun,60,video,html5,2022-10-08 11:09:03.425,2023-08-04 13:47:32.066,38,18,14,24,2023-08-04 13:47:32.066,34,24,16,14,14,24.036,0.633,0.512,1.050,0.512,1.583,0.368,0.326 -712ae6a1ffdbd7446c3d4767f7c0c264,,For bigger fun,60,audio,vimeo,2023-08-04 11:11:21.138,2023-08-04 11:11:21.138,2,0,0,1,2023-08-04 13:47:32.066,0,0,0,0,0,0.007,0.003,0.017,1.000,0.017,2.000,0.000,0.000 +12a63eafc0d70abcfc379a23ae9788e3,,For bigger fun,60,video,html5,2022-10-08 11:09:03.425,2023-08-04 13:47:32.066,38,18,14,48,2023-08-04 13:47:32.066,34,24,16,14,14,24.036,0.633,0.512,1.050,0.512,0.792,0.368,0.326 +712ae6a1ffdbd7446c3d4767f7c0c264,,For bigger fun,60,audio,vimeo,2023-08-04 11:11:21.138,2023-08-04 11:11:21.138,2,0,0,2,2023-08-04 13:47:32.066,0,0,0,0,0,0.007,0.003,0.017,1.000,0.017,1.000,0.000,0.000 diff --git a/models/media_stats/snowplow_media_player_media_stats.sql b/models/media_stats/snowplow_media_player_media_stats.sql index 818636e..57af62e 100644 --- a/models/media_stats/snowplow_media_player_media_stats.sql +++ b/models/media_stats/snowplow_media_player_media_stats.sql @@ -42,7 +42,7 @@ with new_data as ( sum(case when is_played then 1 else 0 end) as plays, sum(case when is_valid_play then 1 else 0 end) as valid_plays, sum(case when p.is_complete_play then 1 else 0 end) as complete_plays, - count(distinct p.page_view_id) as impressions, + count(distinct p.play_id) as impressions, avg(case when is_played then coalesce(p.content_watched_secs, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, avg(case when is_played then p.retention_rate end) as avg_retention_rate, avg(case when is_played then p.avg_playback_rate end) as avg_playback_rate, @@ -192,7 +192,7 @@ with prep as ( sum(case when is_played then 1 else 0 end) as plays, sum(case when is_valid_play then 1 else 0 end) as valid_plays, sum(case when p.is_complete_play then 1 else 0 end) as complete_plays, - count(distinct p.page_view_id) as impressions, + count(distinct p.play_id) as impressions, avg(case when is_played then coalesce(p.content_watched_secs, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, avg(case when is_played then p.retention_rate end) as avg_retention_rate, avg(case when is_played then p.avg_playback_rate end) as avg_playback_rate,