From 82b3740353fdaf01523fd743749b9032e08a547f Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 18 Mar 2022 08:30:11 +0100 Subject: [PATCH 1/8] Support user-supplied name for generic tests --- core/dbt/parser/generic_test_builders.py | 32 +++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 3d522dec7ce..63bfbf26300 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -25,9 +25,12 @@ from dbt.parser.search import FileBlock -def get_nice_generic_test_name( +def synthesize_generic_test_names( test_type: str, test_name: str, args: Dict[str, Any] ) -> Tuple[str, str]: + # Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name + # Returns a shorter version (hashed/truncated, for the compiled file) + # as well as the full name (for the unique_id + FQN) flat_args = [] for arg_name in sorted(args): # the model is already embedded in the name, so skip it @@ -263,13 +266,23 @@ def __init__( if self.namespace is not None: self.package_name = self.namespace - compiled_name, fqn_name = self.get_test_name() - self.compiled_name: str = compiled_name - self.fqn_name: str = fqn_name + # If the user has provided a custom name for this generic test, use it + # Then delete the "name" argument to avoid passing it into the test macro + # Otherwise, use an auto-generated name synthesized from test inputs + self.compiled_name: str = "" + self.fqn_name: str = "" - # use hashed name as alias if too long - if compiled_name != fqn_name and "alias" not in self.config: - self.config["alias"] = compiled_name + if "name" in self.args: + self.compiled_name = self.args["name"] + self.fqn_name = self.args["name"] + del self.args["name"] + else: + short_name, full_name = self.get_synthetic_test_names() + self.compiled_name = short_name + self.fqn_name = full_name + # use hashed name as alias if full name is too long + if short_name != full_name and "alias" not in self.config: + self.config["alias"] = short_name def _bad_type(self) -> TypeError: return TypeError('invalid target type "{}"'.format(type(self.target))) @@ -401,7 +414,8 @@ def macro_name(self) -> str: macro_name = "{}.{}".format(self.namespace, macro_name) return macro_name - def get_test_name(self) -> Tuple[str, str]: + def get_synthetic_test_names(self) -> Tuple[str, str]: + # Returns two names: shorter (for the compiled file), full (for the unique_id + FQN) if isinstance(self.target, UnparsedNodeUpdate): name = self.name elif isinstance(self.target, UnpatchedSourceDefinition): @@ -410,7 +424,7 @@ def get_test_name(self) -> Tuple[str, str]: raise self._bad_type() if self.namespace is not None: name = "{}_{}".format(self.namespace, name) - return get_nice_generic_test_name(name, self.target.name, self.args) + return synthesize_generic_test_names(name, self.target.name, self.args) def construct_config(self) -> str: configs = ",".join( From 705e6ed0b9ad2fc25697edc00e72c9d0cda196fa Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 18 Mar 2022 08:30:59 +0100 Subject: [PATCH 2/8] Support dict-style generic test spec --- core/dbt/parser/generic_test_builders.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 63bfbf26300..afcf24e6808 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -294,13 +294,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]: "test must be dict or str, got {} (value {})".format(type(test), test) ) - test = list(test.items()) - if len(test) != 1: - raise_parsing_error( - "test definition dictionary must have exactly one key, got" - " {} instead ({} keys)".format(test, len(test)) - ) - test_name, test_args = test[0] + # If the test is a dictionary with top-level keys, the test name is "test_name" + # and the rest are arguments + # {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}} + if "test_name" in test.keys(): + test_name = test.pop("test_name") + test_args = test + # If the test is a nested dictionary with one top-level key, the test name + # is the dict name, and nested keys are arguments + # {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}} + else: + test = list(test.items()) + if len(test) != 1: + raise_parsing_error( + "test definition dictionary must have exactly one key, got" + " {} instead ({} keys)".format(test, len(test)) + ) + test_name, test_args = test[0] if not isinstance(test_args, dict): raise_parsing_error( From ed3a1a5750f7d9fb54e2dc2158d64d30d0ae2eed Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 18 Mar 2022 08:58:03 +0100 Subject: [PATCH 3/8] Add changelog entry --- .changes/unreleased/Features-20220318-085756.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changes/unreleased/Features-20220318-085756.yaml diff --git a/.changes/unreleased/Features-20220318-085756.yaml b/.changes/unreleased/Features-20220318-085756.yaml new file mode 100644 index 00000000000..8b734db7797 --- /dev/null +++ b/.changes/unreleased/Features-20220318-085756.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Support custom names for generic tests +time: 2022-03-18T08:57:56.05584+01:00 +custom: + Author: jtcohen6 + Issue: "3348" + PR: "4898" From d0e1fde4ffae07a0dd28137970fd5da18ceda86f Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 18 Mar 2022 09:05:47 +0100 Subject: [PATCH 4/8] Add TODO comment --- core/dbt/parser/generic_test_builders.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index afcf24e6808..2e4d9588e25 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -273,6 +273,10 @@ def __init__( self.fqn_name: str = "" if "name" in self.args: + # TODO: Should we append the model name to the test name here? + # Or trust the user to have globally unique names within their project? + # Logic like this, with accounting for source table targets: + # generic_test_name = f"{self.args["name"]}_{target.name}" self.compiled_name = self.args["name"] self.fqn_name = self.args["name"] del self.args["name"] From 444c79d45dc98b9cef587757a0f91750fdda2899 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 24 Mar 2022 13:29:13 +0100 Subject: [PATCH 5/8] Rework raise_duplicate_resource_name --- core/dbt/exceptions.py | 55 ++++++++++++------- .../test_duplicate_exposure.py | 2 +- .../test_duplicate_model.py | 4 +- .../test_duplicate_source.py | 2 +- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 79f9a6c0c62..c2b4160ac0b 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -838,31 +838,46 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn: def raise_duplicate_resource_name(node_1, node_2): duped_name = node_1.name + node_type = NodeType(node_1.resource_type) + pluralized = ( + node_type.pluralize() + if node_1.resource_type == node_2.resource_type + else "resources" # still raise if ref() collision, e.g. model + seed + ) - if node_1.resource_type in NodeType.refable(): - get_func = 'ref("{}")'.format(duped_name) - elif node_1.resource_type == NodeType.Source: + action = "looking for" + # duplicate 'ref' targets + if node_type in NodeType.refable(): + formatted_name = f'ref("{duped_name}")' + # duplicate sources + elif node_type == NodeType.Source: duped_name = node_1.get_full_source_name() - get_func = node_1.get_source_representation() - elif node_1.resource_type == NodeType.Documentation: - get_func = 'doc("{}")'.format(duped_name) - elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags: - return + formatted_name = node_1.get_source_representation() + # duplicate docs blocks + elif node_type == NodeType.Documentation: + formatted_name = f'doc("{duped_name}")' + # duplicate generic tests + elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"): + column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else "" + # TODO: we need a better way of storing the target model/source/etc in generic tests + model_name = ".".join((node_1.refs or node_1.sources)[0]) + duped_name = f'{node_1.name}" defined on {column_name}"{model_name}' + action = "running" + formatted_name = "tests" + # all other resource types else: - get_func = '"{}"'.format(duped_name) + formatted_name = duped_name + # should this be raise_parsing_error instead? raise_compiler_error( - 'dbt found two resources with the name "{}". Since these resources ' - "have the same name,\ndbt will be unable to find the correct resource " - "when {} is used. To fix this,\nchange the name of one of " - "these resources:\n- {} ({})\n- {} ({})".format( - duped_name, - get_func, - node_1.unique_id, - node_1.original_file_path, - node_2.unique_id, - node_2.original_file_path, - ) + f'dbt found two {pluralized} with the name "{duped_name}". ' + f"\n" + f"\nSince these resources have the same name, dbt will be unable to find the correct resource " + f"\nwhen {action} {formatted_name}. " + f"\n" + f"\nTo fix this, change the name of one of these resources: " + f"\n- {node_1.unique_id} ({node_1.original_file_path}) " + f"\n- {node_2.unique_id} ({node_2.original_file_path})" ) diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py b/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py index 33bcb3b7892..d9890cf906c 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_exposure(self): - message = "dbt found two resources with the name" + message = "dbt found two exposures with the name" try: self.run_dbt(["compile"]) self.assertTrue(False, "dbt did not throw for duplicate exposures") diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_model.py b/test/integration/025_duplicate_model_tests/test_duplicate_model.py index ca34186adf8..8088ab91929 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_model.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_model.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_model_enabled(self): - message = "dbt found two resources with the name" + message = "dbt found two models with the name" try: self.run_dbt(["run"]) self.assertTrue(False, "dbt did not throw for duplicate models") @@ -80,7 +80,7 @@ def packages_config(self): @use_profile("postgres") def test_postgres_duplicate_model_enabled_across_packages(self): self.run_dbt(["deps"]) - message = "dbt found two resources with the name" + message = "dbt found two models with the name" try: self.run_dbt(["run"]) self.assertTrue(False, "dbt did not throw for duplicate models") diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_source.py b/test/integration/025_duplicate_model_tests/test_duplicate_source.py index 614f600f0c7..7d4facd8af2 100644 --- a/test/integration/025_duplicate_model_tests/test_duplicate_source.py +++ b/test/integration/025_duplicate_model_tests/test_duplicate_source.py @@ -14,7 +14,7 @@ def models(self): @use_profile("postgres") def test_postgres_duplicate_source_enabled(self): - message = "dbt found two resources with the name" + message = "dbt found two sources with the name" try: self.run_dbt(["compile"]) self.assertTrue(False, "dbt did not throw for duplicate sources") From 667493237578d56b1283a5c27f6ca94ba1582327 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 24 Mar 2022 13:54:58 +0100 Subject: [PATCH 6/8] Add functional tests --- tests/functional/schema_tests/fixtures.py | 91 +++++++++++++++++++ .../schema_tests/test_schema_v2_tests.py | 59 +++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/tests/functional/schema_tests/fixtures.py b/tests/functional/schema_tests/fixtures.py index e97cc2fe01e..d024dec5468 100644 --- a/tests/functional/schema_tests/fixtures.py +++ b/tests/functional/schema_tests/fixtures.py @@ -420,6 +420,73 @@ SELECT 'NOT_NULL' AS id """ + +dupe_generic_tests_collide__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - not_null: + config: + where: "1=1" + - not_null: + config: + where: "1=2" + +""" + +dupe_generic_tests_collide__model_a = """ +SELECT 'NOT_NULL' AS id +""" + + +custom_generic_test_names__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - not_null: + name: not_null_where_1_equals_1 + config: + where: "1=1" + - not_null: + name: not_null_where_1_equals_2 + config: + where: "1=2" + +""" + +custom_generic_test_names__model_a = """ +SELECT 'NOT_NULL' AS id +""" + +custom_generic_test_names_alt_format__schema_yml = """ +version: 2 +models: +- name: model_a + columns: + - name: id + tests: + - name: not_null_where_1_equals_1 + test_name: not_null + config: + where: "1=1" + - name: not_null_where_1_equals_2 + test_name: not_null + config: + where: "1=2" + +""" + +custom_generic_test_names_alt_format__model_a = """ +SELECT 'NOT_NULL' AS id +""" + + test_context_where_subq_macros__custom_generic_test_sql = """ /*{# This test will fail if get_where_subquery() is missing from TestContext + TestMacroNamespace #}*/ @@ -1266,6 +1333,30 @@ def name_collision(): } +@pytest.fixture(scope="class") +def dupe_tests_collide(): + return { + "schema.yml": dupe_generic_tests_collide__schema_yml, + "model_a.sql": dupe_generic_tests_collide__model_a, + } + + +@pytest.fixture(scope="class") +def custom_generic_test_names(): + return { + "schema.yml": custom_generic_test_names__schema_yml, + "model_a.sql": custom_generic_test_names__model_a, + } + + +@pytest.fixture(scope="class") +def custom_generic_test_names_alt_format(): + return { + "schema.yml": custom_generic_test_names_alt_format__schema_yml, + "model_a.sql": custom_generic_test_names_alt_format__model_a, + } + + @pytest.fixture(scope="class") def test_context_where_subq_macros(): return {"custom_generic_test.sql": test_context_where_subq_macros__custom_generic_test_sql} diff --git a/tests/functional/schema_tests/test_schema_v2_tests.py b/tests/functional/schema_tests/test_schema_v2_tests.py index 5b49d44242e..538f68fce22 100644 --- a/tests/functional/schema_tests/test_schema_v2_tests.py +++ b/tests/functional/schema_tests/test_schema_v2_tests.py @@ -16,6 +16,9 @@ seeds, test_context_models, name_collision, + dupe_tests_collide, + custom_generic_test_names, + custom_generic_test_names_alt_format, test_context_where_subq_macros, invalid_schema_models, all_models, @@ -25,7 +28,7 @@ project_files, case_sensitive_models__uppercase_SQL, ) -from dbt.exceptions import ParsingException +from dbt.exceptions import ParsingException, CompilationException from dbt.contracts.results import TestStatus @@ -658,6 +661,60 @@ def test_collision_test_names_get_hash( assert test_results[1].node.unique_id in expected_unique_ids +class TestGenericTestsCollide: + @pytest.fixture(scope="class") + def models(self, dupe_tests_collide): # noqa: F811 + return dupe_tests_collide + + def test_generic_test_collision( + self, + project, + ): + """These tests collide, since only the configs differ""" + with pytest.raises(CompilationException) as exc: + run_dbt() + assert "dbt found two tests with the name" in str(exc) + + +class TestGenericTestsCustomNames: + @pytest.fixture(scope="class") + def models(self, custom_generic_test_names): # noqa: F811 + return custom_generic_test_names + + def test_collision_test_names_get_hash( + self, + project, + ): + """These tests don't collide, since they have user-provided custom names""" + results = run_dbt() + test_results = run_dbt(["test"]) + + # model + both tests run + assert len(results) == 1 + assert len(test_results) == 2 + + # custom names propagate to the unique_id + expected_unique_ids = [ + "test.test.not_null_where_1_equals_1.7b96089006", + "test.test.not_null_where_1_equals_2.8ae586e17f", + ] + assert test_results[0].node.unique_id in expected_unique_ids + assert test_results[1].node.unique_id in expected_unique_ids + + +class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames): + @pytest.fixture(scope="class") + def models(self, custom_generic_test_names_alt_format): # noqa: F811 + return custom_generic_test_names_alt_format + + # exactly as above, just alternative format for yaml definition + def test_collision_test_names_get_hash( + self, + project, + ): + super().test_collision_test_names_get_hash(project) + + class TestInvalidSchema: @pytest.fixture(scope="class") def models(self, invalid_schema_models): # noqa: F811 From 75b2f437635c3c01c6100be5fa320cc9040fa36f Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Thu, 24 Mar 2022 13:57:38 +0100 Subject: [PATCH 7/8] Update comments, rm TODO --- core/dbt/parser/generic_test_builders.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 2e4d9588e25..c2f6fe25fe4 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -29,6 +29,7 @@ def synthesize_generic_test_names( test_type: str, test_name: str, args: Dict[str, Any] ) -> Tuple[str, str]: # Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name + # Will not be unique if multiple tests have same name + arguments, and only configs differ # Returns a shorter version (hashed/truncated, for the compiled file) # as well as the full name (for the unique_id + FQN) flat_args = [] @@ -273,10 +274,8 @@ def __init__( self.fqn_name: str = "" if "name" in self.args: - # TODO: Should we append the model name to the test name here? - # Or trust the user to have globally unique names within their project? - # Logic like this, with accounting for source table targets: - # generic_test_name = f"{self.args["name"]}_{target.name}" + # Trust the user to have a unique name for this model + column combo + # otherwise, raise_duplicate_resource_name later on self.compiled_name = self.args["name"] self.fqn_name = self.args["name"] del self.args["name"] From 2117072297be55f2de6e54cced69a300ae30266a Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Fri, 25 Mar 2022 11:32:31 +0100 Subject: [PATCH 8/8] PR feedback --- core/dbt/exceptions.py | 21 ++++++++++--------- core/dbt/parser/generic_test_builders.py | 4 ++-- .../schema_tests/test_schema_v2_tests.py | 7 +++++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index c2b4160ac0b..b760c3a9d3a 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -859,8 +859,7 @@ def raise_duplicate_resource_name(node_1, node_2): # duplicate generic tests elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"): column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else "" - # TODO: we need a better way of storing the target model/source/etc in generic tests - model_name = ".".join((node_1.refs or node_1.sources)[0]) + model_name = node_1.file_key_name duped_name = f'{node_1.name}" defined on {column_name}"{model_name}' action = "running" formatted_name = "tests" @@ -870,14 +869,16 @@ def raise_duplicate_resource_name(node_1, node_2): # should this be raise_parsing_error instead? raise_compiler_error( - f'dbt found two {pluralized} with the name "{duped_name}". ' - f"\n" - f"\nSince these resources have the same name, dbt will be unable to find the correct resource " - f"\nwhen {action} {formatted_name}. " - f"\n" - f"\nTo fix this, change the name of one of these resources: " - f"\n- {node_1.unique_id} ({node_1.original_file_path}) " - f"\n- {node_2.unique_id} ({node_2.original_file_path})" + f""" +dbt found two {pluralized} with the name "{duped_name}". + +Since these resources have the same name, dbt will be unable to find the correct resource +when {action} {formatted_name}. + +To fix this, change the name of one of these resources: +- {node_1.unique_id} ({node_1.original_file_path}) +- {node_2.unique_id} ({node_2.original_file_path}) + """.strip() ) diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index c2f6fe25fe4..fce7905e575 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -274,8 +274,8 @@ def __init__( self.fqn_name: str = "" if "name" in self.args: - # Trust the user to have a unique name for this model + column combo - # otherwise, raise_duplicate_resource_name later on + # Assign the user-defined name here, which will be checked for uniqueness later + # we will raise an error if two tests have same name for same model + column combo self.compiled_name = self.args["name"] self.fqn_name = self.args["name"] del self.args["name"] diff --git a/tests/functional/schema_tests/test_schema_v2_tests.py b/tests/functional/schema_tests/test_schema_v2_tests.py index 538f68fce22..3fa5a361da1 100644 --- a/tests/functional/schema_tests/test_schema_v2_tests.py +++ b/tests/functional/schema_tests/test_schema_v2_tests.py @@ -681,7 +681,8 @@ class TestGenericTestsCustomNames: def models(self, custom_generic_test_names): # noqa: F811 return custom_generic_test_names - def test_collision_test_names_get_hash( + # users can define custom names for specific instances of generic tests + def test_generic_tests_with_custom_names( self, project, ): @@ -712,7 +713,9 @@ def test_collision_test_names_get_hash( self, project, ): - super().test_collision_test_names_get_hash(project) + """These tests don't collide, since they have user-provided custom names, + defined using an alternative format""" + super().test_generic_tests_with_custom_names(project) class TestInvalidSchema: