From 849f23f8b7638ce07e5381b3ad00aff5f2f87f6f Mon Sep 17 00:00:00 2001 From: swanderz Date: Fri, 23 Jul 2021 17:19:10 -0700 Subject: [PATCH 1/7] fewer adapters will need to re-implemnt basic_load_csv_rows --- CHANGELOG.md | 6 ++++++ .../macros/materializations/seed/seed.sql | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 545a4361e0d..9e26eee2357 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,12 @@ Contributors: - Fix `store_failures` config when defined as a modifier for `unique` and `not_null` tests ([#3575](https://github.com/fishtown-analytics/dbt/issues/3575), [#3577](https://github.com/fishtown-analytics/dbt/pull/3577)) - Fix `where` config with `relationships` test by refactoring test SQL. Note: The default `relationships` test now includes CTEs, and may need reimplementing on adapters that don't support CTEs nested inside subqueries. ([#3579](https://github.com/fishtown-analytics/dbt/issues/3579), [#3583](https://github.com/fishtown-analytics/dbt/pull/3583)) +### Under the hood +- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623)) + + +Contributors: +- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/fishtown-analytics/dbt/pull/3623) ## dbt 0.20.0 (July 12, 2021) diff --git a/core/dbt/include/global_project/macros/materializations/seed/seed.sql b/core/dbt/include/global_project/macros/materializations/seed/seed.sql index d91f1915e79..e7c19eeab75 100644 --- a/core/dbt/include/global_project/macros/materializations/seed/seed.sql +++ b/core/dbt/include/global_project/macros/materializations/seed/seed.sql @@ -11,6 +11,10 @@ {{ adapter.dispatch('load_csv_rows')(model, agate_table) }} {%- endmacro %} +{% macro get_binding_char() -%} + {{ adapter.dispatch('get_binding_char')() }} +{%- endmacro %} + {% macro default__create_csv_table(model, agate_table) %} {%- set column_override = model['config'].get('column_types', {}) -%} {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} @@ -47,6 +51,9 @@ {{ return(sql) }} {% endmacro %} +{% macro default__get_binding_char() %} + {{ return('%s') }} +{% endmacro %} {% macro get_seed_column_quoted_csv(model, column_names) %} {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} @@ -59,7 +66,6 @@ {{ return(dest_cols_csv) }} {% endmacro %} - {% macro basic_load_csv_rows(model, batch_size, agate_table) %} {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} {% set bindings = [] %} @@ -77,7 +83,7 @@ insert into {{ this.render() }} ({{ cols_sql }}) values {% for row in chunk -%} ({%- for column in agate_table.column_names -%} - %s + {{ get_binding_char() }} {%- if not loop.last%},{%- endif %} {%- endfor -%}) {%- if not loop.last%},{%- endif %} From ff0c3bddc880c5a5b805a8ae386ad2e4b9188bd6 Mon Sep 17 00:00:00 2001 From: swanderz Date: Mon, 26 Jul 2021 09:23:47 -0700 Subject: [PATCH 2/7] hack version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a0867a86451..9d6c037ce4a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ package_name = "dbt" -package_version = "0.21.0a1" +package_version = "0.20.0" description = """With dbt, data analysts and engineers can build analytics \ the way engineers build applications.""" From ae4c6399a6617eb6a141506175b9e7f3b60f70f4 Mon Sep 17 00:00:00 2001 From: swanderz Date: Sun, 1 Aug 2021 12:36:10 -0700 Subject: [PATCH 3/7] reordering per convention --- .../macros/materializations/seed/seed.sql | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/seed/seed.sql b/core/dbt/include/global_project/macros/materializations/seed/seed.sql index e7c19eeab75..03e6156c510 100644 --- a/core/dbt/include/global_project/macros/materializations/seed/seed.sql +++ b/core/dbt/include/global_project/macros/materializations/seed/seed.sql @@ -3,18 +3,6 @@ {{ adapter.dispatch('create_csv_table')(model, agate_table) }} {%- endmacro %} -{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%} - {{ adapter.dispatch('reset_csv_table')(model, full_refresh, old_relation, agate_table) }} -{%- endmacro %} - -{% macro load_csv_rows(model, agate_table) -%} - {{ adapter.dispatch('load_csv_rows')(model, agate_table) }} -{%- endmacro %} - -{% macro get_binding_char() -%} - {{ adapter.dispatch('get_binding_char')() }} -{%- endmacro %} - {% macro default__create_csv_table(model, agate_table) %} {%- set column_override = model['config'].get('column_types', {}) -%} {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} @@ -37,6 +25,9 @@ {{ return(sql) }} {% endmacro %} +{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%} + {{ adapter.dispatch('reset_csv_table')(model, full_refresh, old_relation, agate_table) }} +{%- endmacro %} {% macro default__reset_csv_table(model, full_refresh, old_relation, agate_table) %} {% set sql = "" %} @@ -51,6 +42,10 @@ {{ return(sql) }} {% endmacro %} +{% macro get_binding_char() -%} + {{ adapter.dispatch('get_binding_char')() }} +{%- endmacro %} + {% macro default__get_binding_char() %} {{ return('%s') }} {% endmacro %} @@ -66,6 +61,10 @@ {{ return(dest_cols_csv) }} {% endmacro %} +{% macro load_csv_rows(model, agate_table) -%} + {{ adapter.dispatch('load_csv_rows')(model, agate_table) }} +{%- endmacro %} + {% macro basic_load_csv_rows(model, batch_size, agate_table) %} {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} {% set bindings = [] %} From b019a1397ac31cb6b99f8b5fe61bfa16861b07bb Mon Sep 17 00:00:00 2001 From: swanderz Date: Sun, 1 Aug 2021 12:42:38 -0700 Subject: [PATCH 4/7] make redundant basic_load_csv_rows --- .../macros/materializations/seed/seed.sql | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/core/dbt/include/global_project/macros/materializations/seed/seed.sql b/core/dbt/include/global_project/macros/materializations/seed/seed.sql index 03e6156c510..825f79ae644 100644 --- a/core/dbt/include/global_project/macros/materializations/seed/seed.sql +++ b/core/dbt/include/global_project/macros/materializations/seed/seed.sql @@ -50,6 +50,14 @@ {{ return('%s') }} {% endmacro %} +{% macro get_batch_size() -%} + {{ adapter.dispatch('get_batch_size')() }} +{%- endmacro %} + +{% macro default__get_batch_size() %} + {{ return(var("batch_size", 10000)) }} +{% endmacro %} + {% macro get_seed_column_quoted_csv(model, column_names) %} {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} {% set quoted = [] %} @@ -65,47 +73,44 @@ {{ adapter.dispatch('load_csv_rows')(model, agate_table) }} {%- endmacro %} -{% macro basic_load_csv_rows(model, batch_size, agate_table) %} - {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} - {% set bindings = [] %} +{% macro default__load_csv_rows(model, agate_table) %} - {% set statements = [] %} + {% set batch_size = get_batch_size() %} - {% for chunk in agate_table.rows | batch(batch_size) %} - {% set bindings = [] %} + {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} + {% set bindings = [] %} - {% for row in chunk %} - {% do bindings.extend(row) %} - {% endfor %} + {% set statements = [] %} - {% set sql %} - insert into {{ this.render() }} ({{ cols_sql }}) values - {% for row in chunk -%} - ({%- for column in agate_table.column_names -%} - {{ get_binding_char() }} - {%- if not loop.last%},{%- endif %} - {%- endfor -%}) - {%- if not loop.last%},{%- endif %} - {%- endfor %} - {% endset %} + {% for chunk in agate_table.rows | batch(batch_size) %} + {% set bindings = [] %} - {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + {% for row in chunk %} + {% do bindings.extend(row) %} + {% endfor %} - {% if loop.index0 == 0 %} - {% do statements.append(sql) %} - {% endif %} - {% endfor %} + {% set sql %} + insert into {{ this.render() }} ({{ cols_sql }}) values + {% for row in chunk -%} + ({%- for column in agate_table.column_names -%} + {{ get_binding_char() }} + {%- if not loop.last%},{%- endif %} + {%- endfor -%}) + {%- if not loop.last%},{%- endif %} + {%- endfor %} + {% endset %} - {# Return SQL so we can render it out into the compiled files #} - {{ return(statements[0]) }} -{% endmacro %} + {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %} + {% if loop.index0 == 0 %} + {% do statements.append(sql) %} + {% endif %} + {% endfor %} -{% macro default__load_csv_rows(model, agate_table) %} - {{ return(basic_load_csv_rows(model, 10000, agate_table) )}} + {# Return SQL so we can render it out into the compiled files #} + {{ return(statements[0]) }} {% endmacro %} - {% materialization seed, default %} {%- set identifier = model['alias'] -%} From 6b0101e2b925943bd16e8df91972b375442b54e1 Mon Sep 17 00:00:00 2001 From: swanderz Date: Sun, 1 Aug 2021 12:46:09 -0700 Subject: [PATCH 5/7] for next version --- CHANGELOG.md | 10 ++-------- setup.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e26eee2357..8735e552e3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536)) - Better interaction between `dbt init` and adapters. Avoid raising errors while initializing a project ([#2814](https://github.com/fishtown-analytics/dbt/pull/2814), [#3483](https://github.com/fishtown-analytics/dbt/pull/3483)) - Update `create_adapter_plugins` script to include latest accessories, and stay up to date with latest dbt-core version ([#3002](https://github.com/fishtown-analytics/dbt/issues/3002), [#3509](https://github.com/fishtown-analytics/dbt/pull/3509)) +- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623)) ### Dependencies - Require `werkzeug>=1` @@ -28,7 +29,7 @@ Contributors: - [@tconbeer](https://github.com/tconbeer) [#3468](https://github.com/fishtown-analytics/dbt/pull/3468)) - [@JLDLaughlin](https://github.com/JLDLaughlin) ([#3473](https://github.com/fishtown-analytics/dbt/pull/3473)) - [@jmriego](https://github.com/jmriego) ([#3526](https://github.com/dbt-labs/dbt/pull/3526)) - +- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/fishtown-analytics/dbt/pull/3623) ## dbt 0.20.1 (Release TBD) @@ -36,13 +37,6 @@ Contributors: - Fix `store_failures` config when defined as a modifier for `unique` and `not_null` tests ([#3575](https://github.com/fishtown-analytics/dbt/issues/3575), [#3577](https://github.com/fishtown-analytics/dbt/pull/3577)) - Fix `where` config with `relationships` test by refactoring test SQL. Note: The default `relationships` test now includes CTEs, and may need reimplementing on adapters that don't support CTEs nested inside subqueries. ([#3579](https://github.com/fishtown-analytics/dbt/issues/3579), [#3583](https://github.com/fishtown-analytics/dbt/pull/3583)) -### Under the hood -- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623)) - - -Contributors: -- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/fishtown-analytics/dbt/pull/3623) - ## dbt 0.20.0 (July 12, 2021) ### Fixes diff --git a/setup.py b/setup.py index 9d6c037ce4a..a0867a86451 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ package_name = "dbt" -package_version = "0.20.0" +package_version = "0.21.0a1" description = """With dbt, data analysts and engineers can build analytics \ the way engineers build applications.""" From 6d7ca82b7c0f550fe7bfc0b2d545d5dfdd488ee8 Mon Sep 17 00:00:00 2001 From: Anders Date: Mon, 30 Aug 2021 15:36:19 -0700 Subject: [PATCH 6/7] Update core/dbt/include/global_project/macros/materializations/seed/seed.sql Co-authored-by: Jeremy Cohen --- .../global_project/macros/materializations/seed/seed.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/include/global_project/macros/materializations/seed/seed.sql b/core/dbt/include/global_project/macros/materializations/seed/seed.sql index 825f79ae644..bb028b3debf 100644 --- a/core/dbt/include/global_project/macros/materializations/seed/seed.sql +++ b/core/dbt/include/global_project/macros/materializations/seed/seed.sql @@ -55,7 +55,7 @@ {%- endmacro %} {% macro default__get_batch_size() %} - {{ return(var("batch_size", 10000)) }} + {{ return(10000) }} {% endmacro %} {% macro get_seed_column_quoted_csv(model, column_names) %} From 10f8c3c2279c1e2777785f0ba54d32121c91713a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Tue, 31 Aug 2021 14:28:23 +0200 Subject: [PATCH 7/7] Move up changelog entry --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23aa679b335..7af79593fce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,12 @@ ### Under the hood - Use GitHub Actions for CI ([#3688](https://github.com/dbt-labs/dbt/issues/3688), [#3669](https://github.com/dbt-labs/dbt/pull/3669)) +- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623)) Contributors: - [@xemuliam](https://github.com/xemuliam) ([#3606](https://github.com/dbt-labs/dbt/pull/3606)) +- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/fishtown-analytics/dbt/pull/3623) ## dbt 0.21.0b2 (August 19, 2021) @@ -59,7 +61,6 @@ Contributors: - Better interaction between `dbt init` and adapters. Avoid raising errors while initializing a project ([#2814](https://github.com/dbt-labs/dbt/pull/2814), [#3483](https://github.com/dbt-labs/dbt/pull/3483)) - Update `create_adapter_plugins` script to include latest accessories, and stay up to date with latest dbt-core version ([#3002](https://github.com/dbt-labs/dbt/issues/3002), [#3509](https://github.com/dbt-labs/dbt/pull/3509)) - Scrub environment secrets from logs and console output ([#3617](https://github.com/dbt-labs/dbt/pull/3617)) -- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623)) ### Dependencies @@ -72,7 +73,6 @@ Contributors: - [@tconbeer](https://github.com/tconbeer) [#3468](https://github.com/dbt-labs/dbt/pull/3468)) - [@JLDLaughlin](https://github.com/JLDLaughlin) ([#3473](https://github.com/dbt-labs/dbt/pull/3473)) - [@jmriego](https://github.com/jmriego) ([#3526](https://github.com/dbt-labs/dbt/pull/3526)) -- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/fishtown-analytics/dbt/pull/3623) - [@danielefrigo](https://github.com/danielefrigo) ([#3547](https://github.com/dbt-labs/dbt/pull/3547)) ## dbt 0.20.2 (Release TBD)