From 56f6fd7fc7db3316d43b5beffc7e2bd40c0d6452 Mon Sep 17 00:00:00 2001 From: Agnes Kiss Date: Wed, 11 Dec 2024 18:39:32 +0000 Subject: [PATCH] Add unit tests for manifest --- integration_tests/.scripts/unit_tests.sh | 53 +++++++++++++++++- ...data_get_incremental_manifest_status_t.csv | 5 ++ ...incremental_manifest_status_t_expected.csv | 3 + .../data_get_run_limits_t.csv | 9 +++ integration_tests/dbt_project.yml | 18 ++++++ .../incremental_hooks/incremental_hooks.yml | 10 ---- .../schema.yml | 7 +++ ...get_incremental_manifest_status_macro.sql} | 0 .../schema.yml | 7 +++ ...et_incremental_manifest_status_t_macro.sql | 46 ++++++++++++++++ .../test_get_run_limits_macro/schema.yml | 9 +++ .../test_get_run_limits_macro.sql} | 0 .../test_get_run_limits_t_macro/schema.yml | 9 +++ .../test_get_run_limits_t_macro.sql | 55 +++++++++++++++++++ unit_tests/unit_tests.sh | 22 -------- 15 files changed, 219 insertions(+), 34 deletions(-) create mode 100644 integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t.csv create mode 100644 integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t_expected.csv create mode 100644 integration_tests/data/incremental_hooks/data_get_run_limits_t.csv create mode 100644 integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/schema.yml rename integration_tests/models/{incremental_hooks/test_get_incremental_manifest_status.sql => unit_tests/test_get_incremental_manifest_status_macro/test_get_incremental_manifest_status_macro.sql} (100%) create mode 100644 integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/schema.yml create mode 100644 integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/test_get_incremental_manifest_status_t_macro.sql create mode 100644 integration_tests/models/unit_tests/test_get_run_limits_macro/schema.yml rename integration_tests/models/{incremental_hooks/test_get_run_limits.sql => unit_tests/test_get_run_limits_macro/test_get_run_limits_macro.sql} (100%) create mode 100644 integration_tests/models/unit_tests/test_get_run_limits_t_macro/schema.yml create mode 100644 integration_tests/models/unit_tests/test_get_run_limits_t_macro/test_get_run_limits_t_macro.sql delete mode 100644 unit_tests/unit_tests.sh diff --git a/integration_tests/.scripts/unit_tests.sh b/integration_tests/.scripts/unit_tests.sh index bfd3fdcc..3f74e396 100644 --- a/integration_tests/.scripts/unit_tests.sh +++ b/integration_tests/.scripts/unit_tests.sh @@ -26,14 +26,63 @@ fi for db in ${DATABASES[@]}; do + # Run dbt seed to set up the database, this prepares the ground for int tests that come after unit tests + + echo "Snowplow unified unit tests: Seeding data" + eval "dbt seed --full-refresh --target $db" || exit 1; + # In order to test this macro we need a model reference first and also a timestamp column which the macro takes the min and max of # We need to make sure that the correct result is returned even if the table is empty and whether they want the output to be a low or a high set date in that case # All in the models folder - if [[ $BRANCH == "release" || $BRANCH == "fix/return_limits" ]]; then + if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then echo "Snowplow-utils unit tests: Run test_return_limits_from_model_macro" eval "dbt run --select +test_return_limits_from_model_macro expected_return_limits_from_model_macro --target $db --full-refresh" || exit 1; - eval "dbt test --select +test_return_limits_from_model_macro --store-failures --target $db" || exit 1; + eval "dbt test --select test_return_limits_from_model_macro --store-failures --target $db" || exit 1; + fi + + # This macro returns different queries for different states which will be used to create the base_new_event_limits table + # We need to make sure that the correct result is returned from this query depending on different inputs + # Inputs are given based on the get_incremental_manifest_status macro but we can just fake it as it returns an array + # Input example: ['9999-01-01 00:00:00', '9999-01-01 00:00:00', 0, false] + # Inputs are read from a seed file + + if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then + echo "Snowplow-utils unit tests: Run test_get_run_limits_macro" + eval "dbt run --select test_get_run_limits_macro --target $db --full-refresh" || exit 1; + eval "dbt test --select test_get_run_limits_macro --store-failures --target $db" || exit 1; + fi + + # This macro returns different queries for different states which will be used to create the base_new_event_limits table + # We need to make sure that the correct result is returned from this query depending on different inputs + # Inputs are given based on the get_incremental_manifest_status macro but we can just fake it as it returns an array + # Input example: ['9999-01-01 00:00:00', '9999-01-01 00:00:00', 0, 0, false] + # Inputs are read from a seed file + + if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then + echo "Snowplow-utils unit tests: Run test_get_run_limits_t_macro" + eval "dbt run --select test_get_run_limits_t_macro --target $db --full-refresh" || exit 1; + eval "dbt test --select test_get_run_limits_t_macro --store-failures --target $db" || exit 1; + fi + + # This macro returns returns the array: [min_last_success, max_last_success, models_matched_from_manifest, has_matched_all_models] + # Not too important to test, it is effectively returns a min/max/count from values in the manifest based on the models in the run + # Inputs are read from a seed file, we can selectively test the different inputs depending on the models in run array so no need for it to contain exact scenarios upfront + + if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then + echo "Snowplow-utils unit tests: Run test_get_incremental_manifest_status_macro" + eval "dbt run --select test_get_incremental_manifest_status_macro --target $db --full-refresh" || exit 1; + eval "dbt test --select test_get_incremental_manifest_status_macro --store-failures --target $db" || exit 1; + fi + + # This macro returns returns the array: [min_first_processed_load_tstamp, max_first_processed_load_tstamp, min_last_processed_load_tstamp, max_last_processed_load_tstamp, models_matched_from_manifest, sync_count, has_matched_all_models] + # Not too important to test, it is effectively returns a min/max/count from values in the manifest based on the models in the run + # Inputs are read from a seed file, we can selectively test the different inputs depending on the models in run array so no need for it to contain exact scenarios upfront + + if [[ $BRANCH == "release" || $BRANCH == "utils_revamp" ]]; then + echo "Snowplow-utils unit tests: Run test_get_incremental_manifest_status_t_macro" + eval "dbt run --select test_get_incremental_manifest_status_t_macro --target $db --full-refresh" || exit 1; + eval "dbt test --select test_get_incremental_manifest_status_t_macro --store-failures --target $db" || exit 1; fi done diff --git a/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t.csv b/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t.csv new file mode 100644 index 00000000..33048b85 --- /dev/null +++ b/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t.csv @@ -0,0 +1,5 @@ +model,first_processed_load_tstamp,last_processed_load_tstamp +a,2020-01-01 00:00:00,2020-01-02 00:00:00 +b,2020-01-02 00:00:00,2020-01-03 00:00:00 +c,2020-01-03 00:00:00,2020-01-04 00:00:00 +d,2020-01-01 00:00:00,2020-01-02 00:00:00 diff --git a/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t_expected.csv b/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t_expected.csv new file mode 100644 index 00000000..3c70e017 --- /dev/null +++ b/integration_tests/data/incremental_hooks/data_get_incremental_manifest_status_t_expected.csv @@ -0,0 +1,3 @@ +test_case,min_first_processed_load_tstamp,max_first_processed_load_tstamp,min_last_processed_load_tstamp,max_last_processed_load_tstamp,models_matched_from_manifest,sync_count,has_matched_all_models +all model_in_run exist in manifest,2020-01-01 00:00:00,2020-01-03 00:00:00,2020-01-02 00:00:00,2020-01-04 00:00:00,3,3,true +some model_in_run exist in manifest,2020-01-01 00:00:00,2020-01-03 00:00:00,2020-01-02 00:00:00,2020-01-03 00:00:00,2,2,false diff --git a/integration_tests/data/incremental_hooks/data_get_run_limits_t.csv b/integration_tests/data/incremental_hooks/data_get_run_limits_t.csv new file mode 100644 index 00000000..2c43e469 --- /dev/null +++ b/integration_tests/data/incremental_hooks/data_get_run_limits_t.csv @@ -0,0 +1,9 @@ +min_last_processed_load_tstamp,max_last_processed_load_tstamp,models_matched_from_manifest,has_matched_all_models,sync_count,start_date,lower_limit,upper_limit +,,0,FALSE,0,2021-01-01,2021-01-01 00:00:00+00:00,2021-01-31 00:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,1,2021-01-01,2021-01-01 00:00:00+00:00,2021-01-31 00:00:00+00:00 +2021-03-01 18:00:00+00:00,2021-03-01 18:00:00+00:00,10,TRUE,1,2021-01-01,2021-03-01 18:00:00+00:00,2021-03-31 18:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,2,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,FALSE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-03-05 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-05-01 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00 +2021-03-01 00:00:00+00:00,2021-03-01 00:00:00+00:00,10,TRUE,3,2021-01-01,1999-01-01 00:00:00+00:00,1999-01-02 00:00:00+00:00 diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 3072bcd0..2100b2f4 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -71,6 +71,7 @@ vars: snowplow__dev_target_name: dev snowplow__databricks_catalog: 'hive_metastore' snowplow__query_tag: 'snowplow_dbt' + snowplow__testing: true models: snowplow_utils_integration_tests: @@ -138,6 +139,16 @@ seeds: +column_types: min_last_success: timestamp max_last_success: timestamp + data_get_incremental_manifest_status_t: + +column_types: + first_processed_load_tstamp: timestamp + last_processed_load_tstamp: timestamp + data_get_incremental_manifest_status_t_expected: + +column_types: + min_first_processed_load_tstamp: timestamp + max_first_processed_load_tstamp: timestamp + min_last_processed_load_tstamp: timestamp + max_last_processed_load_tstamp: timestamp data_get_run_limits: +column_types: min_last_success: timestamp @@ -145,6 +156,13 @@ seeds: start_date: date lower_limit: timestamp upper_limit: timestamp + data_get_run_limits_t: + +column_types: + min_last_success: timestamp + max_last_success: timestamp + start_date: date + lower_limit: timestamp + upper_limit: timestamp data_update_incremental_manifest_table: +column_types: is_in_manifest: boolean diff --git a/integration_tests/models/incremental_hooks/incremental_hooks.yml b/integration_tests/models/incremental_hooks/incremental_hooks.yml index d68119da..f9cd6853 100644 --- a/integration_tests/models/incremental_hooks/incremental_hooks.yml +++ b/integration_tests/models/incremental_hooks/incremental_hooks.yml @@ -1,16 +1,6 @@ version: 2 models: - - name: test_get_incremental_manifest_status - tests: - - dbt_utils.equality: - compare_model: ref('data_get_incremental_manifest_status_expected') - - name: test_get_run_limits - tests: - - dbt_utils.expression_is_true: - expression: "expected_lower_limit = actual_lower_limit" - - dbt_utils.expression_is_true: - expression: "expected_upper_limit = actual_upper_limit" - name: test_update_incremental_manifest_table tests: - dbt_utils.equality: diff --git a/integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/schema.yml b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/schema.yml new file mode 100644 index 00000000..6e42400f --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: test_get_incremental_manifest_status_macro + tests: + - dbt_utils.equality: + compare_model: ref('data_get_incremental_manifest_status_expected') diff --git a/integration_tests/models/incremental_hooks/test_get_incremental_manifest_status.sql b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/test_get_incremental_manifest_status_macro.sql similarity index 100% rename from integration_tests/models/incremental_hooks/test_get_incremental_manifest_status.sql rename to integration_tests/models/unit_tests/test_get_incremental_manifest_status_macro/test_get_incremental_manifest_status_macro.sql diff --git a/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/schema.yml b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/schema.yml new file mode 100644 index 00000000..e702e8e2 --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: test_get_incremental_manifest_status_t_macro + tests: + - dbt_utils.equality: + compare_model: ref('data_get_incremental_manifest_status_t_expected') diff --git a/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/test_get_incremental_manifest_status_t_macro.sql b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/test_get_incremental_manifest_status_t_macro.sql new file mode 100644 index 00000000..7f74736a --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_incremental_manifest_status_t_macro/test_get_incremental_manifest_status_t_macro.sql @@ -0,0 +1,46 @@ +{# +Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{%- set all_models = snowplow_utils.get_incremental_manifest_status_t(ref('data_get_incremental_manifest_status_t'), ['a','b','c']) -%} +{%- set partial_models = snowplow_utils.get_incremental_manifest_status_t(ref('data_get_incremental_manifest_status_t'), ['b','d','e']) -%} + +with prep as ( +select + 'all model_in_run exist in manifest' as test_case, + {{ snowplow_utils.cast_to_tstamp(all_models[0]) }} as min_first_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(all_models[1]) }} as max_first_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(all_models[2]) }} as min_last_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(all_models[3]) }} as max_last_processed_load_tstamp, + {{all_models[4]}} as models_matched_from_manifest, + {{all_models[5]}} as sync_count, + {{all_models[6]}} as has_matched_all_models + +union all + +select + 'some model_in_run exist in manifest' as test_case, + {{ snowplow_utils.cast_to_tstamp(all_models[0]) }} as min_first_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(all_models[1]) }} as max_first_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(partial_models[2]) }} as min_last_processed_load_tstamp, + {{ snowplow_utils.cast_to_tstamp(partial_models[3]) }} as max_last_processed_load_tstamp, + {{partial_models[4]}} as models_matched_from_manifest, + {{partial_models[5]}} as sync_count, + {{partial_models[6]}} as has_matched_all_models + +) + +select + test_case, + min_first_processed_load_tstamp, + max_first_processed_load_tstamp, + min_last_processed_load_tstamp, + max_last_processed_load_tstamp, + models_matched_from_manifest, + sync_count, + cast(has_matched_all_models as {{ dbt.type_boolean() }}) as has_matched_all_models + +from prep diff --git a/integration_tests/models/unit_tests/test_get_run_limits_macro/schema.yml b/integration_tests/models/unit_tests/test_get_run_limits_macro/schema.yml new file mode 100644 index 00000000..ecfed83b --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_run_limits_macro/schema.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: test_get_run_limits_macro + tests: + - dbt_utils.expression_is_true: + expression: "expected_lower_limit = actual_lower_limit" + - dbt_utils.expression_is_true: + expression: "expected_upper_limit = actual_upper_limit" diff --git a/integration_tests/models/incremental_hooks/test_get_run_limits.sql b/integration_tests/models/unit_tests/test_get_run_limits_macro/test_get_run_limits_macro.sql similarity index 100% rename from integration_tests/models/incremental_hooks/test_get_run_limits.sql rename to integration_tests/models/unit_tests/test_get_run_limits_macro/test_get_run_limits_macro.sql diff --git a/integration_tests/models/unit_tests/test_get_run_limits_t_macro/schema.yml b/integration_tests/models/unit_tests/test_get_run_limits_t_macro/schema.yml new file mode 100644 index 00000000..fba5f48f --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_run_limits_t_macro/schema.yml @@ -0,0 +1,9 @@ +version: 2 + +models: + - name: test_get_run_limits_t_macro + tests: + - dbt_utils.expression_is_true: + expression: "expected_lower_limit = actual_lower_limit" + - dbt_utils.expression_is_true: + expression: "expected_upper_limit = actual_upper_limit" diff --git a/integration_tests/models/unit_tests/test_get_run_limits_t_macro/test_get_run_limits_t_macro.sql b/integration_tests/models/unit_tests/test_get_run_limits_t_macro/test_get_run_limits_t_macro.sql new file mode 100644 index 00000000..9bfc80cf --- /dev/null +++ b/integration_tests/models/unit_tests/test_get_run_limits_t_macro/test_get_run_limits_t_macro.sql @@ -0,0 +1,55 @@ +{# +Copyright (c) 2021-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{%- set data_query -%} + select * from {{ ref('data_get_run_limits_t') }} +{%- endset -%} + +{# fetch test data set as dict. dict form {column_name: (tuple_of_results) #} +{%- set raw_test_data = dbt_utils.get_query_results_as_dict(data_query) -%} + +{# Snowflake returns keys as uppercase. Iterate and set to lowercase #} +{% set test_data = {} %} +{% for key, value in raw_test_data.items() %} + {% do test_data.update({key.lower(): value}) %} +{% endfor %} + +{% for i in range(test_data.min_last_processed_load_tstamp|length) %} + + {# iteratively pass each row of test data into get_run_limits_t() and execute returned query + min_first_processed_load_tstamp and max_first_processed_load_tstamp are not yet used, placeholder in place #} + {%- set results = run_query(snowplow_utils.get_run_limits_t('9999-01-01 00:00:00', + '9999-01-01 00:00:00', + test_data.min_last_processed_load_tstamp[i], + test_data.max_last_processed_load_tstamp[i], + test_data.models_matched_from_manifest[i], + test_data.sync_count[i], + test_data.has_matched_all_models[i], + test_data.start_date[i])) -%} + + {# expected limits taken from test data #} + {%- set expected_lower_limit = test_data.lower_limit[i] -%} + {%- set expected_upper_limit = test_data.upper_limit[i] -%} + + {# actual limits taken from get_run_limits_t() results #} + {%- if execute -%} + {%- set actual_lower_limit = results.columns[0].values()[0] -%} + {%- set actual_upper_limit = results.columns[1].values()[0] -%} + {%- else -%} + {%- set actual_lower_limit = none -%} + {%- set actual_upper_limit = none -%} + {%- endif -%} + + {# union expected vs. actual for each test case #} + select + {{snowplow_utils.cast_to_tstamp(expected_lower_limit)}} as expected_lower_limit, + {{snowplow_utils.cast_to_tstamp(expected_upper_limit)}} as expected_upper_limit, + {{snowplow_utils.cast_to_tstamp(actual_lower_limit)}} as actual_lower_limit, + {{snowplow_utils.cast_to_tstamp(actual_upper_limit)}} as actual_upper_limit + {% if not loop.last %} union all {% endif %} + +{% endfor %} diff --git a/unit_tests/unit_tests.sh b/unit_tests/unit_tests.sh deleted file mode 100644 index 908a0324..00000000 --- a/unit_tests/unit_tests.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Expected input: -# -d (database) target database for dbt. Set to 'all' to test all supported databases. - -while getopts 'd:' opt -do - case $opt in - d) DATABASE=$OPTARG - esac -done - -declare -a SUPPORTED_DATABASES=("bigquery" "databricks" "postgres" "redshift" "snowflake", "spark_iceberg") - -# set to lower case -DATABASE="$(echo $DATABASE | tr '[:upper:]' '[:lower:]')" - -if [[ $DATABASE == "all" ]]; then - DATABASES=( "${SUPPORTED_DATABASES[@]}" ) -else - DATABASES=$DATABASE -fi