From 568d0a2b6c48335a0b144f0eb77d3bb43047eac3 Mon Sep 17 00:00:00 2001 From: Jeff Klukas Date: Wed, 22 Apr 2020 12:48:18 -0400 Subject: [PATCH] Fenix baseline_daily and clients_last_seen tables As announced in: https://mail.mozilla.org/pipermail/fx-data-dev/2020-April/000339.html --- GRAVEYARD.md | 8 + .../clients_last_seen/view.sql | 21 --- .../clients_daily_v1/query.sql | 104 ----------- .../clients_last_seen_v1/init.sql | 25 --- .../clients_last_seen_v1/query.sql | 49 ----- .../baseline_daily/view.sql | 7 - .../clients_last_seen/view.sql | 17 -- .../metrics_daily/view.sql | 7 - .../baseline_daily_v1/metadata.yaml | 10 -- .../baseline_daily_v1/query.sql | 169 ------------------ .../clients_first_seen_v1/init.sql | 101 ----------- .../clients_first_seen_v1/metadata.yaml | 11 -- .../clients_first_seen_v1/query.sql | 107 ----------- .../clients_last_seen_v1/init.sql | 34 ---- .../clients_last_seen_v1/metadata.yaml | 11 -- .../clients_last_seen_v1/query.sql | 85 --------- .../metrics_daily_v1/metadata.yaml | 10 -- .../metrics_daily_v1/query.sql | 90 ---------- 18 files changed, 8 insertions(+), 858 deletions(-) delete mode 100755 sql/org_mozilla_fenix/clients_last_seen/view.sql delete mode 100755 sql/org_mozilla_fenix_derived/clients_daily_v1/query.sql delete mode 100755 sql/org_mozilla_fenix_derived/clients_last_seen_v1/init.sql delete mode 100755 sql/org_mozilla_fenix_derived/clients_last_seen_v1/query.sql delete mode 100644 sql/org_mozilla_firefox/baseline_daily/view.sql delete mode 100644 sql/org_mozilla_firefox/clients_last_seen/view.sql delete mode 100644 sql/org_mozilla_firefox/metrics_daily/view.sql delete mode 100755 sql/org_mozilla_firefox_derived/baseline_daily_v1/metadata.yaml delete mode 100644 sql/org_mozilla_firefox_derived/baseline_daily_v1/query.sql delete mode 100644 sql/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql delete mode 100755 sql/org_mozilla_firefox_derived/clients_first_seen_v1/metadata.yaml delete mode 100644 sql/org_mozilla_firefox_derived/clients_first_seen_v1/query.sql delete mode 100644 sql/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql delete mode 100755 sql/org_mozilla_firefox_derived/clients_last_seen_v1/metadata.yaml delete mode 100644 sql/org_mozilla_firefox_derived/clients_last_seen_v1/query.sql delete mode 100755 sql/org_mozilla_firefox_derived/metrics_daily_v1/metadata.yaml delete mode 100644 sql/org_mozilla_firefox_derived/metrics_daily_v1/query.sql diff --git a/GRAVEYARD.md b/GRAVEYARD.md index 09fe1fac543..3aa41b9f95e 100644 --- a/GRAVEYARD.md +++ b/GRAVEYARD.md @@ -2,6 +2,14 @@ This document records interesting code that we've deleted for the sake of discoverability for the future. +## 2020-04 Fenix baseline_daily and clients_last_seen + +- [Removal PR](https://github.com/mozilla/bigquery-etl/pull/925) + +We are now using dynamically generated queries for generic Glean +ETL on top of baseline pings, so we have deprecated previous versions +of daily and last_seen tables. + ## Smoot Usage v1 - [Removal PR](https://github.com/mozilla/bigquery-etl/pull/460) diff --git a/sql/org_mozilla_fenix/clients_last_seen/view.sql b/sql/org_mozilla_fenix/clients_last_seen/view.sql deleted file mode 100755 index 3a6727ce2ff..00000000000 --- a/sql/org_mozilla_fenix/clients_last_seen/view.sql +++ /dev/null @@ -1,21 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.org_mozilla_fenix.clients_last_seen` -AS -WITH with_days_since AS ( - SELECT - -- We cannot use UDFs in a view, so we paste the body of udf.bitpos(bits) literally here. - CAST(SAFE.LOG(days_seen_bits & -days_seen_bits, 2) AS INT64) AS days_since_seen, - CAST( - SAFE.LOG(days_created_profile_bits & -days_created_profile_bits, 2) AS INT64 - ) AS days_since_created_profile, - * - FROM - `moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1` -) - -- -SELECT - -- Include date_last_seen for compatibility with existing queries. - DATE_SUB(submission_date, INTERVAL days_since_seen DAY) AS date_last_seen, - * -FROM - with_days_since diff --git a/sql/org_mozilla_fenix_derived/clients_daily_v1/query.sql b/sql/org_mozilla_fenix_derived/clients_daily_v1/query.sql deleted file mode 100755 index fc95d544374..00000000000 --- a/sql/org_mozilla_fenix_derived/clients_daily_v1/query.sql +++ /dev/null @@ -1,104 +0,0 @@ -WITH unioned AS ( - SELECT - submission_timestamp, - document_id, - client_info, - sample_id, - metadata, - normalized_channel, - metrics AS baseline_metrics, - NULL AS metrics - FROM - org_mozilla_fenix_stable.baseline_v1 - UNION ALL - SELECT - submission_timestamp, - document_id, - client_info, - sample_id, - metadata, - normalized_channel, - NULL AS baseline_metrics, - metrics - FROM - org_mozilla_fenix_stable.metrics_v1 -), - -- -base AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - LOWER(client_info.client_id) AS client_id, - * - FROM - unioned - WHERE - client_info.client_id IS NOT NULL -), - -- -windowed AS ( - SELECT - submission_date, - client_id, - sample_id, - ROW_NUMBER() OVER w1_unframed AS _n, - 'Fenix' AS app_name, - -- - -- Take the earliest first_run_date if ambiguous. - MIN(SAFE.PARSE_DATE('%F', SUBSTR(client_info.first_run_date, 1, 10))) OVER w1 AS first_run_date, - -- - -- Sums over distinct baseline pings. - SUM( - udf.glean_timespan_seconds(baseline_metrics.timespan.glean_baseline_duration) - ) OVER w1 AS durations, - -- - -- For all other dimensions, we use the mode of observed values in the day. - udf.mode_last(ARRAY_AGG(client_info.os) OVER w1) AS os, - udf.mode_last(ARRAY_AGG(client_info.os_version) OVER w1) AS os_version, - udf.mode_last(ARRAY_AGG(baseline_metrics.string.glean_baseline_locale) OVER w1) AS locale, - udf.json_mode_last( - ARRAY_AGG(udf.geo_struct(metadata.geo.country, metadata.geo.city, NULL, NULL)) OVER w1 - ).* EXCEPT (geo_subdivision1, geo_subdivision2), - udf.mode_last(ARRAY_AGG(client_info.device_manufacturer) OVER w1) AS device_manufacturer, - udf.mode_last(ARRAY_AGG(client_info.device_model) OVER w1) AS device_model, - udf.mode_last(ARRAY_AGG(client_info.app_build) OVER w1) AS app_build, - udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel, - udf.mode_last(ARRAY_AGG(client_info.architecture) OVER w1) AS architecture, - udf.mode_last( - ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1 - ) AS search_default_engine_name, - udf.mode_last( - ARRAY_AGG(metrics.string.search_default_engine_name) OVER w1 - ) AS search_default_engine_code, - udf.mode_last(ARRAY_AGG(metrics.boolean.metrics_default_browser) OVER w1) AS default_browser, - udf.mode_last(ARRAY_AGG(client_info.app_display_version) OVER w1) AS app_display_version - FROM - base - WHERE - -- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL - (@submission_date IS NULL OR @submission_date = submission_date) - WINDOW - w1 AS ( - PARTITION BY - client_id, - submission_date - ORDER BY - submission_timestamp - ROWS BETWEEN - UNBOUNDED PRECEDING - AND UNBOUNDED FOLLOWING - ), - -- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause. - w1_unframed AS ( - PARTITION BY - client_id, - submission_date - ORDER BY - submission_timestamp - ) -) -SELECT - * EXCEPT (_n) -FROM - windowed -WHERE - _n = 1 diff --git a/sql/org_mozilla_fenix_derived/clients_last_seen_v1/init.sql b/sql/org_mozilla_fenix_derived/clients_last_seen_v1/init.sql deleted file mode 100755 index 658cff419c7..00000000000 --- a/sql/org_mozilla_fenix_derived/clients_last_seen_v1/init.sql +++ /dev/null @@ -1,25 +0,0 @@ -CREATE TABLE - `moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_last_seen_v1` -PARTITION BY - submission_date -CLUSTER BY - app_name, - os, - sample_id -OPTIONS - (require_partition_filter = TRUE) -AS -SELECT - CAST(NULL AS DATE) AS submission_date, - 0 AS days_seen_bits, - 0 AS days_created_profile_bits, - -- We make sure to delay * until the end so that as new columns are added - -- to clients_daily, we can add those columns in the same order to the end - -- of this schema, which may be necessary for the daily join query between - -- the two tables to validate. - * EXCEPT (submission_date) -FROM - `moz-fx-data-shared-prod.org_mozilla_fenix_derived.clients_daily_v1` -WHERE - -- Output empty table and read no input rows - FALSE diff --git a/sql/org_mozilla_fenix_derived/clients_last_seen_v1/query.sql b/sql/org_mozilla_fenix_derived/clients_last_seen_v1/query.sql deleted file mode 100755 index e04aace1ce8..00000000000 --- a/sql/org_mozilla_fenix_derived/clients_last_seen_v1/query.sql +++ /dev/null @@ -1,49 +0,0 @@ -WITH _current AS ( - SELECT - -- In this raw table, we capture the history of activity over the past - -- 28 days for each usage criterion as a single 64-bit integer. The - -- rightmost bit represents whether the user was active in the current day. - CAST(TRUE AS INT64) AS days_seen_bits, - udf.days_since_created_profile_as_28_bits( - DATE_DIFF(submission_date, first_run_date, DAY) - ) AS days_created_profile_bits, - * EXCEPT (submission_date) - FROM - clients_daily_v1 - WHERE - submission_date = @submission_date -), - -- -_previous AS ( - SELECT - * EXCEPT (submission_date) - FROM - clients_last_seen_v1 AS cls - WHERE - submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) - -- Filter out rows from yesterday that have now fallen outside the 28-day window. - AND udf.shift_28_bits_one_day(days_seen_bits) > 0 -) - -- -SELECT - @submission_date AS submission_date, - IF( - _current.client_id IS NOT NULL, - _current, - _previous - ).* REPLACE ( -- - udf.combine_adjacent_days_28_bits( - _previous.days_seen_bits, - _current.days_seen_bits - ) AS days_seen_bits, - udf.coalesce_adjacent_days_28_bits( - _previous.days_created_profile_bits, - _current.days_created_profile_bits - ) AS days_created_profile_bits - ) -FROM - _current -FULL JOIN - _previous -USING - (client_id) diff --git a/sql/org_mozilla_firefox/baseline_daily/view.sql b/sql/org_mozilla_firefox/baseline_daily/view.sql deleted file mode 100644 index 9bb6f5cf600..00000000000 --- a/sql/org_mozilla_firefox/baseline_daily/view.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.org_mozilla_firefox.baseline_daily` -AS -SELECT - * -FROM - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1` diff --git a/sql/org_mozilla_firefox/clients_last_seen/view.sql b/sql/org_mozilla_firefox/clients_last_seen/view.sql deleted file mode 100644 index c601bfc1624..00000000000 --- a/sql/org_mozilla_firefox/clients_last_seen/view.sql +++ /dev/null @@ -1,17 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.org_mozilla_firefox.clients_last_seen` -AS -SELECT - `moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`(baseline.days_seen_bits) AS days_since_seen, - `moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`( - baseline.days_seen_session_start_bits - ) AS days_since_seen_session_start, - `moz-fx-data-shared-prod.udf.pos_of_trailing_set_bit`( - baseline.days_seen_session_end_bits - ) AS days_since_seen_session_end, - DATE_DIFF(submission_date, baseline.first_run_date, DAY) AS days_since_created_profile, - * EXCEPT (baseline, metrics), - baseline.*, - metrics.* -FROM - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1` diff --git a/sql/org_mozilla_firefox/metrics_daily/view.sql b/sql/org_mozilla_firefox/metrics_daily/view.sql deleted file mode 100644 index c32110d2d36..00000000000 --- a/sql/org_mozilla_firefox/metrics_daily/view.sql +++ /dev/null @@ -1,7 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.org_mozilla_firefox.metrics_daily` -AS -SELECT - * -FROM - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1` diff --git a/sql/org_mozilla_firefox_derived/baseline_daily_v1/metadata.yaml b/sql/org_mozilla_firefox_derived/baseline_daily_v1/metadata.yaml deleted file mode 100755 index 91081af2c62..00000000000 --- a/sql/org_mozilla_firefox_derived/baseline_daily_v1/metadata.yaml +++ /dev/null @@ -1,10 +0,0 @@ -friendly_name: Baseline Daily -description: > - A daily aggregate of baseline pings from each Firefox for Android (Fenix) - client across all release channels, partitioned by day -owners: - - jklukas@mozilla.com -labels: - application: fenix - schedule: daily - incremental: true diff --git a/sql/org_mozilla_firefox_derived/baseline_daily_v1/query.sql b/sql/org_mozilla_firefox_derived/baseline_daily_v1/query.sql deleted file mode 100644 index 86ab183ca24..00000000000 --- a/sql/org_mozilla_firefox_derived/baseline_daily_v1/query.sql +++ /dev/null @@ -1,169 +0,0 @@ -/* - -This is a daily aggregation of just baseline pings from across the various -apps that correspond to the different channels of "Fenix", the new Firefox for Android. - -*/ --- The schemas for the different apps differ slightly such that we can't --- blindly union over the baseline views; instead, we extract out the set of --- fields we want via a UDF before we union. -CREATE TEMP FUNCTION extract_fields(baseline ANY TYPE) AS ( - ( - SELECT AS STRUCT - baseline.submission_timestamp, - DATE(baseline.submission_timestamp) AS submission_date, - LOWER(baseline.client_info.client_id) AS client_id, - baseline.sample_id, - SAFE.PARSE_DATE('%F', SUBSTR(baseline.client_info.first_run_date, 1, 10)) AS first_run_date, - baseline.ping_info.parsed_end_time, - udf.glean_timespan_seconds(baseline.metrics.timespan.glean_baseline_duration) AS duration, - baseline.client_info.android_sdk_version, - baseline.client_info.app_build, - baseline.client_info.app_channel, - baseline.client_info.app_display_version, - baseline.client_info.architecture, - baseline.client_info.device_manufacturer, - baseline.client_info.device_model, - baseline.client_info.telemetry_sdk_build, - baseline.client_info.locale, - baseline.metadata.geo.city, - baseline.metadata.geo.country, - baseline.metrics.string.glean_baseline_locale, - baseline.normalized_os, - baseline.normalized_os_version, - ) -); - -WITH unioned AS ( - SELECT - extract_fields(baseline).*, - 'release' AS normalized_channel - FROM - org_mozilla_firefox.baseline AS baseline - UNION ALL - SELECT - extract_fields(baseline).*, - 'beta' AS normalized_channel - FROM - org_mozilla_firefox_beta.baseline AS baseline - UNION ALL - SELECT - extract_fields(baseline).*, - 'nightly' AS normalized_channel - FROM - org_mozilla_fenix.baseline AS baseline - -- These final two apps should be retired in February. - UNION ALL - SELECT - extract_fields(baseline).*, - 'aurora nightly' AS normalized_channel - FROM - org_mozilla_fennec_aurora.baseline AS baseline - UNION ALL - SELECT - extract_fields(baseline).*, - 'preview nightly' AS normalized_channel - FROM - org_mozilla_fenix_nightly.baseline AS baseline -), --- -with_dates AS ( - SELECT - *, - -- For explanation of session start time calculation, see Glean docs: - -- https://mozilla.github.io/glean/book/user/pings/baseline.html#contents - DATE(TIMESTAMP_SUB(parsed_end_time, INTERVAL duration SECOND)) AS session_start_date, - DATE(parsed_end_time) AS session_end_date, - FROM - unioned -), --- -base AS ( - SELECT - *, - DATE_DIFF(submission_date, session_start_date, DAY) AS session_start_date_offset, - DATE_DIFF(submission_date, session_end_date, DAY) AS session_end_date_offset, - FROM - with_dates -), --- -windowed AS ( - SELECT - submission_date, - client_id, - sample_id, - ROW_NUMBER() OVER w1_unframed AS _n, - -- - -- Take the earliest first_run_date if ambiguous. - MIN(first_run_date) OVER w1 AS first_run_date, - -- - -- Sums over distinct baseline pings. - SUM(IF(duration BETWEEN 0 AND 100000, duration, 0)) OVER w1 AS durations, - -- - -- Bit patterns capturing activity dates relative to the submission date. - BIT_OR( - 1 << IF(session_start_date_offset BETWEEN 0 AND 27, session_start_date_offset, NULL) - ) OVER w1 AS days_seen_session_start_bits, - BIT_OR( - 1 << IF(session_end_date_offset BETWEEN 0 AND 27, session_end_date_offset, NULL) - ) OVER w1 AS days_seen_session_end_bits, - -- - -- For all other dimensions, we use the mode of observed values in the day. - udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel, - udf.mode_last(ARRAY_AGG(android_sdk_version) OVER w1) AS android_sdk_version, - udf.mode_last(ARRAY_AGG(app_build) OVER w1) AS app_build, - udf.mode_last(ARRAY_AGG(app_channel) OVER w1) AS app_channel, - udf.mode_last(ARRAY_AGG(app_display_version) OVER w1) AS app_display_version, - udf.mode_last(ARRAY_AGG(architecture) OVER w1) AS architecture, - udf.mode_last(ARRAY_AGG(device_manufacturer) OVER w1) AS device_manufacturer, - udf.mode_last(ARRAY_AGG(device_model) OVER w1) AS device_model, - udf.mode_last(ARRAY_AGG(telemetry_sdk_build) OVER w1) AS telemetry_sdk_build, - udf.mode_last(ARRAY_AGG(locale) OVER w1) AS locale, - udf.mode_last(ARRAY_AGG(city) OVER w1) AS city, - udf.mode_last(ARRAY_AGG(country) OVER w1) AS country, - udf.mode_last(ARRAY_AGG(glean_baseline_locale) OVER w1) AS glean_baseline_locale, - udf.mode_last(ARRAY_AGG(normalized_os) OVER w1) AS os, - udf.mode_last(ARRAY_AGG(normalized_os_version) OVER w1) AS os_version, - FROM - base - WHERE - -- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL - (@submission_date IS NULL OR @submission_date = submission_date) - WINDOW - w1 AS ( - PARTITION BY - sample_id, - client_id, - submission_date - ORDER BY - submission_timestamp - ROWS BETWEEN - UNBOUNDED PRECEDING - AND UNBOUNDED FOLLOWING - ), - -- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause. - w1_unframed AS ( - PARTITION BY - sample_id, - client_id, - submission_date - ORDER BY - submission_timestamp - ) -) --- -SELECT - wnd.submission_date, - cfs.fenix_first_seen_date, - cfs.fennec_first_seen_date, - wnd.* EXCEPT (_n, submission_date) -FROM - windowed AS wnd --- We incur the expense of joining in first_seen dates here so that we can --- identify returning users cheaply in further views on top of baseline_daily. -LEFT JOIN - clients_first_seen_v1 AS cfs -USING - (client_id) -WHERE - _n = 1 diff --git a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql b/sql/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql deleted file mode 100644 index dcf0c7bc6ec..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql +++ /dev/null @@ -1,101 +0,0 @@ --- See query.sql for more explanation of what's going on here. -CREATE TABLE - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_first_seen_v1` -PARTITION BY - (fenix_first_seen_date) -AS -WITH base AS ( - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_firefox.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_firefox_beta.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fennec_aurora.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fenix.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fenix_nightly.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_firefox.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_firefox_beta.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fennec_aurora.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fenix.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fenix_nightly.migration -), --- -per_client_id AS ( - SELECT - client_id, - DATE(MIN(submission_timestamp)) AS fenix_first_seen_date, - udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id, - FROM - base - WHERE - submission_timestamp > '2010-01-01' - GROUP BY - client_id -) --- -SELECT - per_client_id.client_id, - per_client_id.fenix_first_seen_date, - core_clients_first_seen.first_seen_date AS fennec_first_seen_date, -FROM - per_client_id -LEFT JOIN - telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen -ON - (fennec_id = core_clients_first_seen.client_id) diff --git a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/metadata.yaml b/sql/org_mozilla_firefox_derived/clients_first_seen_v1/metadata.yaml deleted file mode 100755 index 8fa3f1329d5..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/metadata.yaml +++ /dev/null @@ -1,11 +0,0 @@ -friendly_name: Clients First Seen -description: > - A brief summary of the first date each Firefox for Android (Fenix) client - sent a ping to the telemetry infrastructure. Each client will appear exactly - once in this table over all time. -owners: - - jklukas@mozilla.com -labels: - application: fenix - schedule: daily - incremental: true diff --git a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/query.sql b/sql/org_mozilla_firefox_derived/clients_first_seen_v1/query.sql deleted file mode 100644 index 29b2c2dcb50..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_first_seen_v1/query.sql +++ /dev/null @@ -1,107 +0,0 @@ -/* - -This clients_first_seen table is incrementally populated every day such that -each client only appears once in the entire table. For each day's query, we -scan the entire history of the table to exclude clients we've already recorded -as seen. - -This table is convenient for retention calculations where we want to be able to -tell the difference between a lapsed user returning and a truly new user. - -*/ --- We union over baseline pings and migration pings so that we can pull the --- user's fennec user ID from the migration ping if they are being upgraded --- from fennec. -WITH base AS ( - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_firefox.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_firefox_beta.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fennec_aurora.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fenix.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - CAST(NULL AS STRING) AS fennec_id - FROM - org_mozilla_fenix_nightly.baseline - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_firefox.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_firefox_beta.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fennec_aurora.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fenix.migration - UNION ALL - SELECT - client_info.client_id, - submission_timestamp, - metrics.uuid.migration_telemetry_identifiers_fennec_client_id AS fennec_id - FROM - org_mozilla_fenix_nightly.migration -), -per_client AS ( - SELECT - client_id, - DATE(MIN(submission_timestamp)) AS fenix_first_seen_date, - udf.mode_last(ARRAY_AGG(LOWER(fennec_id))) AS fennec_id, - FROM - base - WHERE - DATE(submission_timestamp) = @submission_date - GROUP BY - client_id -) -SELECT - per_client.client_id, - per_client.fenix_first_seen_date, - core_clients_first_seen.first_seen_date AS fennec_first_seen_date, -FROM - per_client -LEFT JOIN - telemetry_derived.core_clients_first_seen_v1 AS core_clients_first_seen -ON - (fennec_id = core_clients_first_seen.client_id) diff --git a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql b/sql/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql deleted file mode 100644 index 0605b47e62e..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql +++ /dev/null @@ -1,34 +0,0 @@ -CREATE TABLE - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.clients_last_seen_v1` -PARTITION BY - submission_date -CLUSTER BY - normalized_channel, - sample_id -OPTIONS - (require_partition_filter = TRUE) -AS -SELECT - CAST(NULL AS DATE) AS submission_date, - client_id, - sample_id, - baseline.normalized_channel, --- We make sure to delay * until the end so that as new columns are added --- to clients_daily, we can add those columns in the same order to the end --- of this schema, which may be necessary for the daily join query between --- the two tables to validate. - ( - SELECT AS STRUCT - 0 AS days_seen_bits, - baseline.* EXCEPT (submission_date, client_id, sample_id, normalized_channel) - ) AS baseline, - (SELECT AS STRUCT metrics.* EXCEPT (submission_date, client_id, sample_id)) AS metrics, -FROM - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.baseline_daily_v1` AS baseline -LEFT JOIN - `moz-fx-data-shared-prod.org_mozilla_firefox_derived.metrics_daily_v1` AS metrics -USING - (client_id, sample_id) -WHERE --- Output empty table and read no input rows - FALSE diff --git a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/metadata.yaml b/sql/org_mozilla_firefox_derived/clients_last_seen_v1/metadata.yaml deleted file mode 100755 index 48414033f5c..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/metadata.yaml +++ /dev/null @@ -1,11 +0,0 @@ -friendly_name: Clients Last Seen -description: > - A rolling view of aggregated activity over the past 28 days - of each Firefox for Android (Fenix) client across all release channels, - partitioned by day -owners: - - jklukas@mozilla.com -labels: - application: fenix - schedule: daily - incremental: true diff --git a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/query.sql b/sql/org_mozilla_firefox_derived/clients_last_seen_v1/query.sql deleted file mode 100644 index a0784079c88..00000000000 --- a/sql/org_mozilla_firefox_derived/clients_last_seen_v1/query.sql +++ /dev/null @@ -1,85 +0,0 @@ -/* - -This query carries information about each client over a 28-day window, -encoding various forms of usage over those 28 days as bit patterns. - -For Glean, the concept of a user being active or not is determined only -from baseline pings; we also record some dimensions here based on metrics -pings, but we are careful to keep the baseline info separate from the -metrics info so that metrics pings only have the effect of providing -updated dimension values, but never have the effect of marking a user as -active for a given day. - -*/ -WITH cls_yesterday AS ( - SELECT - * EXCEPT (submission_date) - FROM - clients_last_seen_v1 AS cls - WHERE - submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) -), --- -cls_today AS ( - SELECT - client_id, - sample_id, - baseline_today.normalized_channel, - baseline_today.client_id IS NOT NULL AS baseline_received_today, - metrics_today.client_id IS NOT NULL AS metrics_received_today, - ( - SELECT AS STRUCT - -- In this raw table, we capture the history of activity over the past - -- 28 days for each usage criterion as a single 64-bit integer. The - -- rightmost bit represents whether the user was active in the current day. - CAST(baseline_today.client_id IS NOT NULL AS INT64) AS days_seen_bits, - baseline_today.* EXCEPT (submission_date, client_id, sample_id, normalized_channel) - ) AS baseline, - (SELECT AS STRUCT metrics_today.* EXCEPT (submission_date, client_id, sample_id)) AS metrics, - FROM - baseline_daily_v1 AS baseline_today - FULL JOIN - metrics_daily_v1 AS metrics_today - USING - (submission_date, client_id, sample_id) - WHERE - submission_date = @submission_date -), - -- -adjacent_days_combined AS ( - SELECT - @submission_date AS submission_date, - client_id, - sample_id, - COALESCE(cls_today.normalized_channel, cls_yesterday.normalized_channel) AS normalized_channel, - ( - SELECT AS STRUCT - IF(baseline_received_today, cls_today.baseline, cls_yesterday.baseline).* REPLACE ( - udf.combine_adjacent_days_28_bits( - cls_yesterday.baseline.days_seen_bits, - cls_today.baseline.days_seen_bits - ) AS days_seen_bits, - udf.combine_adjacent_days_28_bits( - cls_yesterday.baseline.days_seen_session_start_bits, - cls_today.baseline.days_seen_session_start_bits - ) AS days_seen_session_start_bits, - udf.combine_adjacent_days_28_bits( - cls_yesterday.baseline.days_seen_session_end_bits, - cls_today.baseline.days_seen_session_end_bits - ) AS days_seen_session_end_bits - ) - ) AS baseline, - IF(metrics_received_today, cls_today.metrics, cls_yesterday.metrics) AS metrics, - FROM - cls_today - FULL JOIN - cls_yesterday - USING - (client_id, sample_id) -) -SELECT - * -FROM - adjacent_days_combined -WHERE - baseline.days_seen_bits > 0 diff --git a/sql/org_mozilla_firefox_derived/metrics_daily_v1/metadata.yaml b/sql/org_mozilla_firefox_derived/metrics_daily_v1/metadata.yaml deleted file mode 100755 index 6c4d00235d4..00000000000 --- a/sql/org_mozilla_firefox_derived/metrics_daily_v1/metadata.yaml +++ /dev/null @@ -1,10 +0,0 @@ -friendly_name: Metrics Daily -description: > - A daily aggregate of metrics pings from each Firefox for Android (Fenix) - client across all release channels, partitioned by day -owners: - - jklukas@mozilla.com -labels: - application: fenix - schedule: daily - incremental: true diff --git a/sql/org_mozilla_firefox_derived/metrics_daily_v1/query.sql b/sql/org_mozilla_firefox_derived/metrics_daily_v1/query.sql deleted file mode 100644 index 3b89b8e1f59..00000000000 --- a/sql/org_mozilla_firefox_derived/metrics_daily_v1/query.sql +++ /dev/null @@ -1,90 +0,0 @@ -/* - -Very similar to baseline_daily, but considers only metrics pings. - -*/ -CREATE TEMP FUNCTION extract_fields(metrics ANY TYPE) AS ( - ( - SELECT AS STRUCT - metrics.submission_timestamp, - DATE(metrics.submission_timestamp) AS submission_date, - LOWER(metrics.client_info.client_id) AS client_id, - metrics.sample_id, - metrics.metrics.string.search_default_engine_name, - metrics.metrics.string.search_default_engine_code, - metrics.metrics.boolean.metrics_default_browser, - ) -); - -WITH base AS ( - SELECT - extract_fields(metrics).*, - FROM - org_mozilla_firefox.metrics AS metrics - UNION ALL - SELECT - extract_fields(metrics).*, - FROM - org_mozilla_firefox_beta.metrics AS metrics - UNION ALL - SELECT - extract_fields(metrics).*, - FROM - org_mozilla_fennec_aurora.metrics AS metrics - UNION ALL - SELECT - extract_fields(metrics).*, - FROM - org_mozilla_fenix.metrics AS metrics - UNION ALL - SELECT - extract_fields(metrics).*, - FROM - org_mozilla_fenix_nightly.metrics AS metrics -), --- -windowed AS ( - SELECT - submission_date, - client_id, - sample_id, - ROW_NUMBER() OVER w1_unframed AS _n, - -- - -- For all other dimensions, we use the mode of observed values in the day. - udf.mode_last(ARRAY_AGG(search_default_engine_name) OVER w1) AS search_default_engine_name, - udf.mode_last(ARRAY_AGG(search_default_engine_code) OVER w1) AS search_default_engine_code, - udf.mode_last(ARRAY_AGG(metrics_default_browser) OVER w1) AS metrics_default_browser, - FROM - base - WHERE - -- Reprocess all dates by running this query with --parameter=submission_date:DATE:NULL - (@submission_date IS NULL OR @submission_date = submission_date) - WINDOW - w1 AS ( - PARTITION BY - sample_id, - client_id, - submission_date - ORDER BY - submission_timestamp - ROWS BETWEEN - UNBOUNDED PRECEDING - AND UNBOUNDED FOLLOWING - ), - -- We must provide a modified window for ROW_NUMBER which cannot accept a frame clause. - w1_unframed AS ( - PARTITION BY - sample_id, - client_id, - submission_date - ORDER BY - submission_timestamp - ) -) --- -SELECT - * EXCEPT (_n) -FROM - windowed -WHERE - _n = 1