Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom names for generic tests #4898

Merged
merged 8 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220318-085756.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Support custom names for generic tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it just generic tests or all tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Singular tests take their name from the file in which they're defined (tests/my_custom_test.sql), just like models — so they've always required custom names

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even more evidence that this PR is a good idea!

time: 2022-03-18T08:57:56.05584+01:00
custom:
Author: jtcohen6
Issue: "3348"
PR: "4898"
56 changes: 36 additions & 20 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,31 +838,47 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn:

def raise_duplicate_resource_name(node_1, node_2):
duped_name = node_1.name
node_type = NodeType(node_1.resource_type)
pluralized = (
node_type.pluralize()
if node_1.resource_type == node_2.resource_type
else "resources" # still raise if ref() collision, e.g. model + seed
)

if node_1.resource_type in NodeType.refable():
get_func = 'ref("{}")'.format(duped_name)
elif node_1.resource_type == NodeType.Source:
action = "looking for"
# duplicate 'ref' targets
if node_type in NodeType.refable():
formatted_name = f'ref("{duped_name}")'
# duplicate sources
elif node_type == NodeType.Source:
duped_name = node_1.get_full_source_name()
get_func = node_1.get_source_representation()
elif node_1.resource_type == NodeType.Documentation:
get_func = 'doc("{}")'.format(duped_name)
elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags:
return
formatted_name = node_1.get_source_representation()
# duplicate docs blocks
elif node_type == NodeType.Documentation:
formatted_name = f'doc("{duped_name}")'
# duplicate generic tests
elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"):
column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else ""
model_name = node_1.file_key_name
duped_name = f'{node_1.name}" defined on {column_name}"{model_name}'
action = "running"
formatted_name = "tests"
# all other resource types
else:
get_func = '"{}"'.format(duped_name)
formatted_name = duped_name

# should this be raise_parsing_error instead?
raise_compiler_error(
'dbt found two resources with the name "{}". Since these resources '
"have the same name,\ndbt will be unable to find the correct resource "
"when {} is used. To fix this,\nchange the name of one of "
"these resources:\n- {} ({})\n- {} ({})".format(
duped_name,
get_func,
node_1.unique_id,
node_1.original_file_path,
node_2.unique_id,
node_2.original_file_path,
)
f"""
dbt found two {pluralized} with the name "{duped_name}".

Since these resources have the same name, dbt will be unable to find the correct resource
when {action} {formatted_name}.

To fix this, change the name of one of these resources:
- {node_1.unique_id} ({node_1.original_file_path})
- {node_2.unique_id} ({node_2.original_file_path})
""".strip()
)


Expand Down
61 changes: 44 additions & 17 deletions core/dbt/parser/generic_test_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@
from dbt.parser.search import FileBlock


def get_nice_generic_test_name(
def synthesize_generic_test_names(
test_type: str, test_name: str, args: Dict[str, Any]
) -> Tuple[str, str]:
# Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] If we're going to use the word "hopefully" in this comment let's lay out exactly when this would not be unique.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for adding that!

# Will not be unique if multiple tests have same name + arguments, and only configs differ
# Returns a shorter version (hashed/truncated, for the compiled file)
# as well as the full name (for the unique_id + FQN)
flat_args = []
for arg_name in sorted(args):
# the model is already embedded in the name, so skip it
Expand Down Expand Up @@ -263,13 +267,25 @@ def __init__(
if self.namespace is not None:
self.package_name = self.namespace

compiled_name, fqn_name = self.get_test_name()
self.compiled_name: str = compiled_name
self.fqn_name: str = fqn_name

# use hashed name as alias if too long
if compiled_name != fqn_name and "alias" not in self.config:
self.config["alias"] = compiled_name
# If the user has provided a custom name for this generic test, use it
# Then delete the "name" argument to avoid passing it into the test macro
# Otherwise, use an auto-generated name synthesized from test inputs
self.compiled_name: str = ""
self.fqn_name: str = ""

if "name" in self.args:
# Assign the user-defined name here, which will be checked for uniqueness later
# we will raise an error if two tests have same name for same model + column combo
self.compiled_name = self.args["name"]
self.fqn_name = self.args["name"]
del self.args["name"]
else:
short_name, full_name = self.get_synthetic_test_names()
self.compiled_name = short_name
self.fqn_name = full_name
# use hashed name as alias if full name is too long
if short_name != full_name and "alias" not in self.config:
self.config["alias"] = short_name

def _bad_type(self) -> TypeError:
return TypeError('invalid target type "{}"'.format(type(self.target)))
Expand All @@ -281,13 +297,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]:
"test must be dict or str, got {} (value {})".format(type(test), test)
)

test = list(test.items())
if len(test) != 1:
raise_parsing_error(
"test definition dictionary must have exactly one key, got"
" {} instead ({} keys)".format(test, len(test))
)
test_name, test_args = test[0]
# If the test is a dictionary with top-level keys, the test name is "test_name"
# and the rest are arguments
# {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_name and test name are confusing, so this example really helps 👍

if "test_name" in test.keys():
test_name = test.pop("test_name")
test_args = test
# If the test is a nested dictionary with one top-level key, the test name
# is the dict name, and nested keys are arguments
# {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}}
else:
test = list(test.items())
if len(test) != 1:
raise_parsing_error(
"test definition dictionary must have exactly one key, got"
" {} instead ({} keys)".format(test, len(test))
)
test_name, test_args = test[0]

if not isinstance(test_args, dict):
raise_parsing_error(
Expand Down Expand Up @@ -401,7 +427,8 @@ def macro_name(self) -> str:
macro_name = "{}.{}".format(self.namespace, macro_name)
return macro_name

def get_test_name(self) -> Tuple[str, str]:
def get_synthetic_test_names(self) -> Tuple[str, str]:
# Returns two names: shorter (for the compiled file), full (for the unique_id + FQN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This type signature is a very easy-to-misuse one. good call adding a comment.

if isinstance(self.target, UnparsedNodeUpdate):
name = self.name
elif isinstance(self.target, UnpatchedSourceDefinition):
Expand All @@ -410,7 +437,7 @@ def get_test_name(self) -> Tuple[str, str]:
raise self._bad_type()
if self.namespace is not None:
name = "{}_{}".format(self.namespace, name)
return get_nice_generic_test_name(name, self.target.name, self.args)
return synthesize_generic_test_names(name, self.target.name, self.args)

def construct_config(self) -> str:
configs = ",".join(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_exposure(self):
message = "dbt found two resources with the name"
message = "dbt found two exposures with the name"
try:
self.run_dbt(["compile"])
self.assertTrue(False, "dbt did not throw for duplicate exposures")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_model_enabled(self):
message = "dbt found two resources with the name"
message = "dbt found two models with the name"
try:
self.run_dbt(["run"])
self.assertTrue(False, "dbt did not throw for duplicate models")
Expand Down Expand Up @@ -80,7 +80,7 @@ def packages_config(self):
@use_profile("postgres")
def test_postgres_duplicate_model_enabled_across_packages(self):
self.run_dbt(["deps"])
message = "dbt found two resources with the name"
message = "dbt found two models with the name"
try:
self.run_dbt(["run"])
self.assertTrue(False, "dbt did not throw for duplicate models")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_source_enabled(self):
message = "dbt found two resources with the name"
message = "dbt found two sources with the name"
try:
self.run_dbt(["compile"])
self.assertTrue(False, "dbt did not throw for duplicate sources")
Expand Down
91 changes: 91 additions & 0 deletions tests/functional/schema_tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,73 @@
SELECT 'NOT_NULL' AS id
"""


dupe_generic_tests_collide__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- not_null:
config:
where: "1=1"
- not_null:
config:
where: "1=2"

"""

dupe_generic_tests_collide__model_a = """
SELECT 'NOT_NULL' AS id
"""


custom_generic_test_names__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- not_null:
name: not_null_where_1_equals_1
config:
where: "1=1"
- not_null:
name: not_null_where_1_equals_2
config:
where: "1=2"

"""

custom_generic_test_names__model_a = """
SELECT 'NOT_NULL' AS id
"""

custom_generic_test_names_alt_format__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- name: not_null_where_1_equals_1
test_name: not_null
config:
where: "1=1"
- name: not_null_where_1_equals_2
test_name: not_null
config:
where: "1=2"

"""

custom_generic_test_names_alt_format__model_a = """
SELECT 'NOT_NULL' AS id
"""


test_context_where_subq_macros__custom_generic_test_sql = """
/*{# This test will fail if get_where_subquery() is missing from TestContext + TestMacroNamespace #}*/

Expand Down Expand Up @@ -1266,6 +1333,30 @@ def name_collision():
}


@pytest.fixture(scope="class")
def dupe_tests_collide():
return {
"schema.yml": dupe_generic_tests_collide__schema_yml,
"model_a.sql": dupe_generic_tests_collide__model_a,
}


@pytest.fixture(scope="class")
def custom_generic_test_names():
return {
"schema.yml": custom_generic_test_names__schema_yml,
"model_a.sql": custom_generic_test_names__model_a,
}


@pytest.fixture(scope="class")
def custom_generic_test_names_alt_format():
return {
"schema.yml": custom_generic_test_names_alt_format__schema_yml,
"model_a.sql": custom_generic_test_names_alt_format__model_a,
}


@pytest.fixture(scope="class")
def test_context_where_subq_macros():
return {"custom_generic_test.sql": test_context_where_subq_macros__custom_generic_test_sql}
Expand Down
62 changes: 61 additions & 1 deletion tests/functional/schema_tests/test_schema_v2_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
seeds,
test_context_models,
name_collision,
dupe_tests_collide,
custom_generic_test_names,
custom_generic_test_names_alt_format,
test_context_where_subq_macros,
invalid_schema_models,
all_models,
Expand All @@ -25,7 +28,7 @@
project_files,
case_sensitive_models__uppercase_SQL,
)
from dbt.exceptions import ParsingException
from dbt.exceptions import ParsingException, CompilationException
from dbt.contracts.results import TestStatus


Expand Down Expand Up @@ -658,6 +661,63 @@ def test_collision_test_names_get_hash(
assert test_results[1].node.unique_id in expected_unique_ids


class TestGenericTestsCollide:
@pytest.fixture(scope="class")
def models(self, dupe_tests_collide): # noqa: F811
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] # noqa: F811 is a little unexpected here since that's usually for import statements. Does it work without this comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied this from the other test definitions, it looks like this is a flake8 complaint:

tests/functional/schema_tests/test_schema_v2_tests.py:666:22: F811 redefinition of unused 'dupe_tests_collide' from line 6

Indeed because dupe_tests_collide is being imported at the top, from the separate file of fixtures (tests.functional.schema_tests.fixtures)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting. that seems like a flake8 bug counting a reference as a redefinition. thanks for double checking!

return dupe_tests_collide

def test_generic_test_collision(
self,
project,
):
"""These tests collide, since only the configs differ"""
with pytest.raises(CompilationException) as exc:
run_dbt()
assert "dbt found two tests with the name" in str(exc)


class TestGenericTestsCustomNames:
@pytest.fixture(scope="class")
def models(self, custom_generic_test_names): # noqa: F811
return custom_generic_test_names

# users can define custom names for specific instances of generic tests
def test_generic_tests_with_custom_names(
self,
project,
):
"""These tests don't collide, since they have user-provided custom names"""
results = run_dbt()
test_results = run_dbt(["test"])

# model + both tests run
assert len(results) == 1
assert len(test_results) == 2

# custom names propagate to the unique_id
expected_unique_ids = [
"test.test.not_null_where_1_equals_1.7b96089006",
"test.test.not_null_where_1_equals_2.8ae586e17f",
]
assert test_results[0].node.unique_id in expected_unique_ids
assert test_results[1].node.unique_id in expected_unique_ids


class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames):
@pytest.fixture(scope="class")
def models(self, custom_generic_test_names_alt_format): # noqa: F811
return custom_generic_test_names_alt_format

# exactly as above, just alternative format for yaml definition
def test_collision_test_names_get_hash(
self,
project,
):
"""These tests don't collide, since they have user-provided custom names,
defined using an alternative format"""
super().test_generic_tests_with_custom_names(project)


class TestInvalidSchema:
@pytest.fixture(scope="class")
def models(self, invalid_schema_models): # noqa: F811
Expand Down