Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ilias1111 committed Nov 13, 2024
1 parent fb55b41 commit ffff3fb
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 5 deletions.
4 changes: 2 additions & 2 deletions integration_tests/.scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ fi

for db in ${DATABASES[@]}; do

if [[ "$db" == "bigquery" || "$db" == "spark_iceberg" ]]; then
echo "Snowplow web integration tests: Seeding data and doing first run"
if [[ "$db" == "bigquery" ]]; then
echo "Snowplow integration tests: Seeding data and doing first run"

eval "dbt seed --target $db --full-refresh" || exit 1
eval "dbt run --target $db --full-refresh" || exit 1
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/macros/test_normalize_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ It runs 9 tests:
{% macro spark__test_normalize_events() %}
-- Main difference here is that spark doesnt need the catalog in the from clause
{% set expected_dict = {
"flat_cols_only" : "select event_id , collector_tstamp , DATE(collector_tstamp) as collector_tstamp_date -- Flat columns from event table , app_id -- self describing events columns from event table -- context column(s) from the event table from "~target.schema~"_scratch.snowplow_normalize_base_events_this_run where event_name in ('event_name')",
"sde_plus_cols" : "select event_id , collector_tstamp , DATE(collector_tstamp) as collector_tstamp_date -- Flat columns from event table , app_id -- self describing events columns from event table , UNSTRUCT_EVENT_TEST_1.test_id as test_id , UNSTRUCT_EVENT_TEST_1.test_class as test_class -- context column(s) from the event table from "~target.schema~"_scratch.snowplow_normalize_base_events_this_run where event_name in ('event_name')",
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/macros/test_users_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ It runs 6 tests:
{% macro spark__test_users_table() %}
-- Main difference here is that spark doesnt need the catalog in the from clause
{% set expected_dict = {
"1_context" : "with defined_user_id as ( select user_id as user_id , collector_tstamp as latest_collector_tstamp , DATE(collector_tstamp) as latest_collector_tstamp_date -- Flat columns from event table -- user column(s) from the event table , CONTEXTS_TEST_1[0].context_test_id as context_test_id , CONTEXTS_TEST_1[0].context_test_class as context_test_class from "~target.schema~"_scratch.snowplow_normalize_base_events_this_run where 1 = 1 ), users_ordering as ( select a.* , row_number() over (partition by user_id order by latest_collector_tstamp desc) as rn from defined_user_id a where user_id is not null ) select * except (rn) from users_ordering where rn = 1",
"2_context" : "with defined_user_id as ( select user_id as user_id , collector_tstamp as latest_collector_tstamp , DATE(collector_tstamp) as latest_collector_tstamp_date -- Flat columns from event table -- user column(s) from the event table , CONTEXTS_TEST_1[0].context_test_id as context_test_id , CONTEXTS_TEST_1[0].context_test_class as context_test_class , CONTEXT_TEST2_1[0].context_test_id2 as context_test_id2 , CONTEXT_TEST2_1[0].context_test_class2 as context_test_class2 from "~target.schema~"_scratch.snowplow_normalize_base_events_this_run where 1 = 1 ), users_ordering as ( select a.* , row_number() over (partition by user_id order by latest_collector_tstamp desc) as rn from defined_user_id a where user_id is not null ) select * except (rn) from users_ordering where rn = 1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0

{%- set lower_limit, upper_limit, session_start_limit = snowplow_utils.return_base_new_event_limits(ref('snowplow_normalize_base_new_event_limits')) %}

with final_cte AS (
with prep AS (

select
a.*
Expand All @@ -38,13 +38,14 @@ with final_cte AS (
and a.derived_tstamp <= {{ upper_limit }}
{% endif %}
and {{ snowplow_utils.app_id_filter(var("snowplow__app_id",[])) }}
-- We are doing the branching in order not to do the qualify in the case of spark, as it does not support it
{% if target.type in ['databricks','snowflake','bigquery'] %}
qualify row_number() over (partition by a.event_id order by a.collector_tstamp{% if target.type in ['databricks', 'spark'] -%}, a.etl_tstamp {%- endif %}) = 1
{% endif %}
)

SELECT *
FROM final_cte
FROM prep
{% if target.type not in ['databricks','snowflake','bigquery'] %}
WHERE rn = 1
{% endif %}

0 comments on commit ffff3fb

Please sign in to comment.