From 45bb6fdb3a15f432f8e12bdd7b2f1dabfb45865d Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 29 Aug 2023 16:23:25 -0400 Subject: [PATCH] Liquid Clustering config for table materialization (#398) (#415) Signed-off-by: Ammar Chalifah Co-authored-by: Ammar Chalifah <38188988+ammarchalifah@users.noreply.github.com> (cherry picked from commit b632484c26b21d92f65b049ad5946daee0901b3d) --- CHANGELOG.md | 2 + dbt/adapters/databricks/impl.py | 1 + dbt/include/databricks/macros/adapters.sql | 20 +++++ tests/unit/macros/test_adapters_macros.py | 92 +++++++++++++++++++--- 4 files changed, 102 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2554fe3..b828ae1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Features - Follow up: re-implement fix for issue where the show tables extended command is limited to 2048 characters. ([#326](https://github.com/databricks/dbt-databricks/pull/326)). Set `DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS` to `true` to enable this behaviour. +- Add `liquid_clustered_by` config to enable Liquid Clustering for Delta-based dbt models. ## dbt-databricks 1.6.1 (August 2, 2023) @@ -29,6 +30,7 @@ - Drop support for Python 3.7 - Support for revamped `dbt debug` + ## dbt-databricks 1.5.5 (July 7, 2023) ### Fixes diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py index 38d8488f..2c6739b7 100644 --- a/dbt/adapters/databricks/impl.py +++ b/dbt/adapters/databricks/impl.py @@ -67,6 +67,7 @@ class DatabricksConfig(AdapterConfig): location_root: Optional[str] = None partition_by: Optional[Union[List[str], str]] = None clustered_by: Optional[Union[List[str], str]] = None + liquid_clustered_by: Optional[Union[List[str], str]] = None buckets: Optional[int] = None options: Optional[Dict[str, str]] = None merge_update_columns: Optional[str] = None diff --git a/dbt/include/databricks/macros/adapters.sql b/dbt/include/databricks/macros/adapters.sql index bf742c80..82bc94fe 100644 --- a/dbt/include/databricks/macros/adapters.sql +++ b/dbt/include/databricks/macros/adapters.sql @@ -43,6 +43,25 @@ {%- endif %} {%- endmacro -%} +{% macro liquid_clustered_cols(label, required=false) -%} + {{ return(adapter.dispatch('liquid_clustered_cols', 'dbt')(label, required)) }} +{%- endmacro -%} + +{% macro databricks__liquid_clustered_cols(label, required=false) -%} + {%- set cols = config.get('liquid_clustered_by', validator=validation.any[list, basestring]) -%} + {%- if cols is not none %} + {%- if cols is string -%} + {%- set cols = [cols] -%} + {%- endif -%} + {{ label }} ( + {%- for item in cols -%} + {{ item }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor -%} + ) + {%- endif %} +{%- endmacro -%} + {% macro databricks__create_table_as(temporary, relation, compiled_code, language='sql') -%} {%- if language == 'sql' -%} @@ -62,6 +81,7 @@ {{ file_format_clause() }} {{ options_clause() }} {{ partition_cols(label="partitioned by") }} + {{ liquid_clustered_cols(label="cluster by") }} {{ clustered_cols(label="clustered by") }} {{ location_clause() }} {{ comment_clause() }} diff --git a/tests/unit/macros/test_adapters_macros.py b/tests/unit/macros/test_adapters_macros.py index 4cad1013..a092fdec 100644 --- a/tests/unit/macros/test_adapters_macros.py +++ b/tests/unit/macros/test_adapters_macros.py @@ -105,6 +105,26 @@ def test_macros_create_table_as_clusters(self): "using delta clustered by (cluster_1,cluster_2) into 1 buckets as select 1", ) + def test_macros_create_table_as_liquid_cluster(self): + self.config["liquid_clustered_by"] = "cluster_1" + sql = self._render_create_table_as() + + self.assertEqual( + sql, + "create or replace table my_table " "using delta cluster by (cluster_1) as select 1", + ) + + def test_macros_create_table_as_liquid_clusters(self): + self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"] + self.config["buckets"] = "1" + sql = self._render_create_table_as() + + self.assertEqual( + sql, + "create or replace table my_table " + "using delta cluster by (cluster_1,cluster_2) as select 1", + ) + def test_macros_create_table_as_location(self): self.config["location_root"] = "/mnt/root" sql = self._render_create_table_as() @@ -140,6 +160,7 @@ def test_macros_create_table_as_tblproperties(self): def test_macros_create_table_as_all_delta(self): self.config["location_root"] = "/mnt/root" self.config["partition_by"] = ["partition_1", "partition_2"] + self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"] self.config["clustered_by"] = ["cluster_1", "cluster_2"] self.config["buckets"] = "1" self.config["persist_docs"] = {"relation": True} @@ -154,6 +175,7 @@ def test_macros_create_table_as_all_delta(self): "create or replace table my_table " "using delta " "partitioned by (partition_1,partition_2) " + "cluster by (cluster_1,cluster_2) " "clustered by (cluster_1,cluster_2) into 1 buckets " "location '/mnt/root/my_table' " "comment 'Description Test' " @@ -434,7 +456,11 @@ def __model(self): def test_macros_get_constraint_sql_not_null_with_columns(self): model = self.__model() r = self.__render_constraint_sql({"type": "not_null", "columns": ["id", "name"]}, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;', 'alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` change column id " + "set not null ;', 'alter table `some_database`.`some_schema`.`some_table` " + "change column name set not null ;']" + ) # noqa: E501 assert expected in r @@ -442,7 +468,10 @@ def test_macros_get_constraint_sql_not_null_with_column(self): model = self.__model() r = self.__render_constraint_sql({"type": "not_null"}, model, model["columns"]["id"]) - expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` change column id " + "set not null ;']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_not_null_mismatched_columns(self): @@ -451,7 +480,10 @@ def test_macros_get_constraint_sql_not_null_mismatched_columns(self): {"type": "not_null", "columns": ["name"]}, model, model["columns"]["id"] ) - expected = "['alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` change column name " + "set not null ;']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_check(self): @@ -464,7 +496,10 @@ def test_macros_get_constraint_sql_check(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint check (id != name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_check_named_constraint(self): @@ -476,7 +511,10 @@ def test_macros_get_constraint_sql_check_named_constraint(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint check (id != name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_check_none_constraint(self): @@ -487,7 +525,10 @@ def test_macros_get_constraint_sql_check_none_constraint(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint None check (id != name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint None " + "check (id != name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_check_missing_expression(self): @@ -509,7 +550,10 @@ def test_macros_get_constraint_sql_primary_key(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint primary key(name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_primary_key_with_specified_column(self): @@ -522,7 +566,10 @@ def test_macros_get_constraint_sql_primary_key_with_specified_column(self): column = {"name": "id"} r = self.__render_constraint_sql(constraint, model, column) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint primary key(name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_primary_key_with_name(self): @@ -534,7 +581,10 @@ def test_macros_get_constraint_sql_primary_key_with_name(self): column = {"name": "id"} r = self.__render_constraint_sql(constraint, model, column) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(id);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint primary key(id);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_foreign_key(self): @@ -547,7 +597,11 @@ def test_macros_get_constraint_sql_foreign_key(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table;']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add " + "constraint myconstraint foreign key(name) references " + "some_schema.parent_table;']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_foreign_key_parent_column(self): @@ -561,7 +615,11 @@ def test_macros_get_constraint_sql_foreign_key_parent_column(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table(parent_name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add " + "constraint myconstraint foreign key(name) references " + "some_schema.parent_table(parent_name);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_foreign_key_multiple_columns(self): @@ -575,7 +633,11 @@ def test_macros_get_constraint_sql_foreign_key_multiple_columns(self): } r = self.__render_constraint_sql(constraint, model) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name, id) references some_schema.parent_table(parent_name, parent_id);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint foreign key(name, id) " + "references some_schema.parent_table(parent_name, parent_id);']" + ) # noqa: E501 assert expected in r def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self): @@ -589,5 +651,9 @@ def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self) column = {"name": "id"} r = self.__render_constraint_sql(constraint, model, column) - expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(id) references some_schema.parent_table(parent_name);']" # noqa: E501 + expected = ( + "['alter table `some_database`.`some_schema`.`some_table` add constraint " + "myconstraint foreign key(id) references " + "some_schema.parent_table(parent_name);']" + ) # noqa: E501 assert expected in r