Skip to content

Commit

Permalink
Liquid Clustering config for table materialization (#398) (#415)
Browse files Browse the repository at this point in the history
Signed-off-by: Ammar Chalifah <[email protected]>
Co-authored-by: Ammar Chalifah <[email protected]>
(cherry picked from commit b632484)
  • Loading branch information
Jesse Whitehouse committed Aug 29, 2023
1 parent b10eabf commit 45bb6fd
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 13 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### Features

- Follow up: re-implement fix for issue where the show tables extended command is limited to 2048 characters. ([#326](https://github.com/databricks/dbt-databricks/pull/326)). Set `DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS` to `true` to enable this behaviour.
- Add `liquid_clustered_by` config to enable Liquid Clustering for Delta-based dbt models.

## dbt-databricks 1.6.1 (August 2, 2023)

Expand All @@ -29,6 +30,7 @@
- Drop support for Python 3.7
- Support for revamped `dbt debug`


## dbt-databricks 1.5.5 (July 7, 2023)

### Fixes
Expand Down
1 change: 1 addition & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DatabricksConfig(AdapterConfig):
location_root: Optional[str] = None
partition_by: Optional[Union[List[str], str]] = None
clustered_by: Optional[Union[List[str], str]] = None
liquid_clustered_by: Optional[Union[List[str], str]] = None
buckets: Optional[int] = None
options: Optional[Dict[str, str]] = None
merge_update_columns: Optional[str] = None
Expand Down
20 changes: 20 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@
{%- endif %}
{%- endmacro -%}

{% macro liquid_clustered_cols(label, required=false) -%}
{{ return(adapter.dispatch('liquid_clustered_cols', 'dbt')(label, required)) }}
{%- endmacro -%}

{% macro databricks__liquid_clustered_cols(label, required=false) -%}
{%- set cols = config.get('liquid_clustered_by', validator=validation.any[list, basestring]) -%}
{%- if cols is not none %}
{%- if cols is string -%}
{%- set cols = [cols] -%}
{%- endif -%}
{{ label }} (
{%- for item in cols -%}
{{ item }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor -%}
)
{%- endif %}
{%- endmacro -%}


{% macro databricks__create_table_as(temporary, relation, compiled_code, language='sql') -%}
{%- if language == 'sql' -%}
Expand All @@ -62,6 +81,7 @@
{{ file_format_clause() }}
{{ options_clause() }}
{{ partition_cols(label="partitioned by") }}
{{ liquid_clustered_cols(label="cluster by") }}
{{ clustered_cols(label="clustered by") }}
{{ location_clause() }}
{{ comment_clause() }}
Expand Down
92 changes: 79 additions & 13 deletions tests/unit/macros/test_adapters_macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ def test_macros_create_table_as_clusters(self):
"using delta clustered by (cluster_1,cluster_2) into 1 buckets as select 1",
)

def test_macros_create_table_as_liquid_cluster(self):
self.config["liquid_clustered_by"] = "cluster_1"
sql = self._render_create_table_as()

self.assertEqual(
sql,
"create or replace table my_table " "using delta cluster by (cluster_1) as select 1",
)

def test_macros_create_table_as_liquid_clusters(self):
self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"]
self.config["buckets"] = "1"
sql = self._render_create_table_as()

self.assertEqual(
sql,
"create or replace table my_table "
"using delta cluster by (cluster_1,cluster_2) as select 1",
)

def test_macros_create_table_as_location(self):
self.config["location_root"] = "/mnt/root"
sql = self._render_create_table_as()
Expand Down Expand Up @@ -140,6 +160,7 @@ def test_macros_create_table_as_tblproperties(self):
def test_macros_create_table_as_all_delta(self):
self.config["location_root"] = "/mnt/root"
self.config["partition_by"] = ["partition_1", "partition_2"]
self.config["liquid_clustered_by"] = ["cluster_1", "cluster_2"]
self.config["clustered_by"] = ["cluster_1", "cluster_2"]
self.config["buckets"] = "1"
self.config["persist_docs"] = {"relation": True}
Expand All @@ -154,6 +175,7 @@ def test_macros_create_table_as_all_delta(self):
"create or replace table my_table "
"using delta "
"partitioned by (partition_1,partition_2) "
"cluster by (cluster_1,cluster_2) "
"clustered by (cluster_1,cluster_2) into 1 buckets "
"location '/mnt/root/my_table' "
"comment 'Description Test' "
Expand Down Expand Up @@ -434,15 +456,22 @@ def __model(self):
def test_macros_get_constraint_sql_not_null_with_columns(self):
model = self.__model()
r = self.__render_constraint_sql({"type": "not_null", "columns": ["id", "name"]}, model)
expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;', 'alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column id "
"set not null ;', 'alter table `some_database`.`some_schema`.`some_table` "
"change column name set not null ;']"
) # noqa: E501

assert expected in r

def test_macros_get_constraint_sql_not_null_with_column(self):
model = self.__model()
r = self.__render_constraint_sql({"type": "not_null"}, model, model["columns"]["id"])

expected = "['alter table `some_database`.`some_schema`.`some_table` change column id set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column id "
"set not null ;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_not_null_mismatched_columns(self):
Expand All @@ -451,7 +480,10 @@ def test_macros_get_constraint_sql_not_null_mismatched_columns(self):
{"type": "not_null", "columns": ["name"]}, model, model["columns"]["id"]
)

expected = "['alter table `some_database`.`some_schema`.`some_table` change column name set not null ;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` change column name "
"set not null ;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check(self):
Expand All @@ -464,7 +496,10 @@ def test_macros_get_constraint_sql_check(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_named_constraint(self):
Expand All @@ -476,7 +511,10 @@ def test_macros_get_constraint_sql_check_named_constraint(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_none_constraint(self):
Expand All @@ -487,7 +525,10 @@ def test_macros_get_constraint_sql_check_none_constraint(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint None check (id != name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint None "
"check (id != name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_check_missing_expression(self):
Expand All @@ -509,7 +550,10 @@ def test_macros_get_constraint_sql_primary_key(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_primary_key_with_specified_column(self):
Expand All @@ -522,7 +566,10 @@ def test_macros_get_constraint_sql_primary_key_with_specified_column(self):
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_primary_key_with_name(self):
Expand All @@ -534,7 +581,10 @@ def test_macros_get_constraint_sql_primary_key_with_name(self):
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint primary key(id);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint primary key(id);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key(self):
Expand All @@ -547,7 +597,11 @@ def test_macros_get_constraint_sql_foreign_key(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table;']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add "
"constraint myconstraint foreign key(name) references "
"some_schema.parent_table;']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_parent_column(self):
Expand All @@ -561,7 +615,11 @@ def test_macros_get_constraint_sql_foreign_key_parent_column(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name) references some_schema.parent_table(parent_name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add "
"constraint myconstraint foreign key(name) references "
"some_schema.parent_table(parent_name);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_multiple_columns(self):
Expand All @@ -575,7 +633,11 @@ def test_macros_get_constraint_sql_foreign_key_multiple_columns(self):
}
r = self.__render_constraint_sql(constraint, model)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(name, id) references some_schema.parent_table(parent_name, parent_id);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint foreign key(name, id) "
"references some_schema.parent_table(parent_name, parent_id);']"
) # noqa: E501
assert expected in r

def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self):
Expand All @@ -589,5 +651,9 @@ def test_macros_get_constraint_sql_foreign_key_columns_supplied_separately(self)
column = {"name": "id"}
r = self.__render_constraint_sql(constraint, model, column)

expected = "['alter table `some_database`.`some_schema`.`some_table` add constraint myconstraint foreign key(id) references some_schema.parent_table(parent_name);']" # noqa: E501
expected = (
"['alter table `some_database`.`some_schema`.`some_table` add constraint "
"myconstraint foreign key(id) references "
"some_schema.parent_table(parent_name);']"
) # noqa: E501
assert expected in r

0 comments on commit 45bb6fd

Please sign in to comment.