-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdbt_project.yml
74 lines (59 loc) · 3.08 KB
/
dbt_project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
name: '<YOUR PACKAGE NAME>'
version: '0.0.1'
config-version: 2
require-dbt-version: [">=1.6.0", "<2.0.0"]
profile: 'default'
dispatch:
- macro_namespace: dbt
search_order: ['snowplow_utils', 'dbt']
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target"
clean-targets:
- "target"
- "dbt_packages"
vars:
<YOUR PACKAGE NAME>:
# See https://docs.snowplow.io/docs/modeling-your-data/modeling-your-data-with-dbt/dbt-configuration/<YOUR PACKAGE NAME> for more information and an interactive tool to help you with the variable setup
# Please only add those that you change the values of to your root dbt_project.yml file, do not copy all values as this can lead to unexpected issues
# WAREHOUSE & TRACKER
# snowplow__atomic_schema: 'atomic_data_sample' # Only set if not using 'atomic' schema for Snowplow events data
# snowplow__database: # Only set if not using target.database for Snowplow events data -- WILL BE IGNORED FOR DATABRICKS
snowplow__dev_target_name: dev
snowplow__events: "{{ source('atomic', 'events') }}"
# snowplow__events_table: "events" # Only set if not using 'events' table for Snowplow events data
# OPERATION & LOGIC
snowplow__allow_refresh: false
snowplow__start_date: '2020-01-01'
snowplow__backfill_limit_days: 30
snowplow__upsert_lookback_days: 30
snowplow__days_late_allowed: 3
snowplow__max_session_days: 3
snowplow__session_timestamp: 'collector_tstamp'
snowplow__session_identifiers: [{"schema": "atomic", "field" : "domain_sessionid"}]
snowplow__user_identifiers: [{"schema": "atomic", "field" : "domain_userid"}]
snowplow__app_id: []
# snowplow__session_sql: 'e.domain_sessionid'
# snowplow__user_sql: 'e.domain_userid'
# snowplow__custom_sql: ''
# WAREHOUSE SPECIFIC
# Bigquery:
snowplow__derived_tstamp_partitioned: true
# Snowflake:
snowplow__query_tag: snowplow_dbt
# Databricks:
# Depending on the use case it should either be the catalog (for Unity Catalog users from databricks connector 1.1.1 onwards) or the same value as your snowplow__atomic_schema (unless changed it should be 'atomic')
snowplow__databricks_catalog: 'hive_metastore'
# Redshift/postgres:
snowplow__entities_or_sdes: []
# Completely or partially remove models from the manifest during run start.
on-run-start:
- "{{ snowplow_utils.snowplow_delete_from_manifest(var('models_to_remove',[])) }}"
# Update manifest table with last event consumed per successfully executed node/model
on-run-end:
- "{{ snowplow_utils.snowplow_incremental_post_hook(package_name='<YOUR PACKAGE NAME>', incremental_manifest_table_name='<YOUR PACKAGE NAME>_incremental_manifest', base_events_this_run_table_name='<YOUR PACKAGE NAME>_base_events_this_run', session_timestamp=var('snowplow__session_timestamp')) }}"
# Tag 'snowplow_<YOUR PACKAGE NAME>_incremental' allows snowplow_incremental_post_hook to identify Snowplow models and add their last successful collector_tstamp to the manifest.