Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/schema tests are more unique #3335

Merged
merged 4 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Contributors:
- Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
- Use absolute path for profiles directory instead of a path relative to the project directory. Note: If a user supplies a relative path to the profiles directory, the value of `args.profiles_dir` will still be absolute. ([#3133](https://github.com/fishtown-analytics/dbt/issues/3133))
- Fix FQN selector unable to find models whose name contains dots ([#3246](https://github.com/fishtown-analytics/dbt/issues/3246))
- Fix unique_id generation for generic tests so tests with the same FQN but different configuration will run. ([#3254](https://github.com/fishtown-analytics/dbt/issues/3254))

### Features
- Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
Expand Down
26 changes: 20 additions & 6 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import itertools
import os
from typing import (
List, Dict, Any, Generic, TypeVar
List, Dict, Any, Generic, Optional, TypeVar
)

from dbt.dataclass_schema import ValidationError
Expand Down Expand Up @@ -55,11 +55,25 @@ def parse_file(self, block: FileBlock) -> None:
def resource_type(self) -> NodeType:
pass

def generate_unique_id(self, resource_name: str) -> str:
"""Returns a unique identifier for a resource"""
return "{}.{}.{}".format(self.resource_type,
self.project.project_name,
resource_name)
def generate_unique_id(
self,
resource_name: str,
hash: Optional[str] = None
) -> str:
"""Returns a unique identifier for a resource
An optional hash may be passed in to ensure uniqueness for edge cases"""

return '.'.join(
filter(
None,
[
self.resource_type,
self.project.project_name,
resource_name,
hash
]
)
)


class Parser(BaseParser[FinalValue], Generic[FinalValue]):
Expand Down
4 changes: 2 additions & 2 deletions core/dbt/parser/docs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterable
from typing import Iterable, Optional

import re

Expand All @@ -23,7 +23,7 @@ def resource_type(self) -> NodeType:
def get_compiled_path(cls, block: FileBlock):
return block.path.relative_path

def generate_unique_id(self, resource_name: str) -> str:
def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str:
# because docs are in their own graph namespace, node type doesn't
# need to be part of the unique ID.
return '{}.{}'.format(self.project.project_name, resource_name)
Expand Down
22 changes: 21 additions & 1 deletion core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

from abc import ABCMeta, abstractmethod
from hashlib import md5
from typing import (
Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type
)
Expand Down Expand Up @@ -351,6 +352,25 @@ def create_test_node(
column_name: Optional[str],
) -> ParsedSchemaTestNode:

HASH_LENGTH = 10

# N.B: This function builds a hashable string from any given test_metadata dict.
# it's a bit fragile for general use (only supports str, int, float, List, Dict)
# but it gets the job done here without the overhead of complete ser(de).
def get_hashable_md(
nathaniel-may marked this conversation as resolved.
Show resolved Hide resolved
data: Union[str, int, float, List, Dict]
) -> Union[str, List, Dict]:
if type(data) == dict:
return {k: get_hashable_md(data[k]) for k in sorted(data.keys())} # type: ignore
iknox-fa marked this conversation as resolved.
Show resolved Hide resolved
elif type(data) == list:
return [get_hashable_md(val) for val in data] # type: ignore
else:
return str(data)

hashable_metadata = repr(get_hashable_md(test_metadata))
hash_string = ''.join([name, hashable_metadata]).encode('utf-8')
test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:]
nathaniel-may marked this conversation as resolved.
Show resolved Hide resolved

dct = {
'alias': name,
'schema': self.default_schema,
Expand All @@ -364,7 +384,7 @@ def create_test_node(
'original_file_path': target.original_file_path,
'package_name': self.project.project_name,
'raw_sql': raw_sql,
'unique_id': self.generate_unique_id(name),
'unique_id': self.generate_unique_id(name, test_hash),
'config': self.config_dict(config),
'test_metadata': test_metadata,
'column_name': column_name,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'hello_world' AS extension_id
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'NOT_NULL' AS id
12 changes: 12 additions & 0 deletions test/integration/008_schema_tests_test/name_collision/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2
models:
- name: base
columns:
- name: extension_id
tests:
- not_null
- name: base_extension
columns:
- name: id
tests:
- not_null
33 changes: 32 additions & 1 deletion test/integration/008_schema_tests_test/test_schema_v2_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def models(self):

def run_schema_validations(self):
args = FakeArgs()

test_task = TestTask(args, self.config)
return test_task.run()

Expand Down Expand Up @@ -377,3 +376,35 @@ def test_postgres_test_context_tests(self):
# was rendered correctly
self.assertRegex(result.node.compiled_sql, r'union all')


class TestSchemaTestNameCollision(DBTIntegrationTest):
@property
def schema(self):
return "schema_tests_008"

@property
def models(self):
return "name_collision"

def run_schema_tests(self):
args = FakeArgs()
test_task = TestTask(args, self.config)
return test_task.run()

@use_profile('postgres')
def test_postgres_collision_test_names_get_hash(self):
"""The models should produce unique IDs with a has appended"""
results = self.run_dbt()
test_results = self.run_schema_tests()

# both models and both tests run
self.assertEqual(len(results), 2)
self.assertEqual(len(test_results), 2)

# both tests have the same unique id except for the hash
expected_unique_ids = [
'test.test.not_null_base_extension_id.2dbb9627b6',
'test.test.not_null_base_extension_id.d70fc39f40'
]
self.assertIn(test_results[0].node.unique_id, expected_unique_ids)
self.assertIn(test_results[1].node.unique_id, expected_unique_ids)
38 changes: 19 additions & 19 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'checksum': self._checksum_file(seed_path),
'unrendered_config': unrendered_seed_config,
},
'test.test.not_null_model_id': {
'test.test.not_null_model_id.8ab00aacfe': {
iknox-fa marked this conversation as resolved.
Show resolved Hide resolved
'alias': 'not_null_model_id',
'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'),
'column_name': 'id',
Expand Down Expand Up @@ -1361,7 +1361,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'database': self.default_database,
'tags': ['schema'],
'meta': {},
'unique_id': 'test.test.not_null_model_id',
'unique_id': 'test.test.not_null_model_id.8ab00aacfe',
'docs': {'show': True},
'compiled': True,
'compiled_sql': AnyStringWith('where id is null'),
Expand Down Expand Up @@ -1420,7 +1420,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'unique_id': 'snapshot.test.snapshot_seed',
'unrendered_config': unrendered_snapshot_config,
},
'test.test.test_nothing_model_': {
'test.test.test_nothing_model_.596f819934': {
'alias': 'test_nothing_model_',
'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'),
'column_name': None,
Expand Down Expand Up @@ -1448,7 +1448,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'database': self.default_database,
'tags': ['schema'],
'meta': {},
'unique_id': 'test.test.test_nothing_model_',
'unique_id': 'test.test.test_nothing_model_.596f819934',
'docs': {'show': True},
'compiled': True,
'compiled_sql': AnyStringWith('select 0'),
Expand All @@ -1464,7 +1464,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'checksum': {'name': 'none', 'checksum': ''},
'unrendered_config': unrendered_test_config,
},
'test.test.unique_model_id': {
'test.test.unique_model_id.3de6fa785f': {
'alias': 'unique_model_id',
'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'),
'column_name': 'id',
Expand Down Expand Up @@ -1492,7 +1492,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'database': self.default_database,
'tags': ['schema'],
'meta': {},
'unique_id': 'test.test.unique_model_id',
'unique_id': 'test.test.unique_model_id.3de6fa785f',
'docs': {'show': True},
'compiled': True,
'compiled_sql': AnyStringWith('count(*)'),
Expand Down Expand Up @@ -1621,17 +1621,17 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'seed.test.seed': [],
'snapshot.test.snapshot_seed': ['seed.test.seed'],
'source.test.my_source.my_table': [],
'test.test.not_null_model_id': ['model.test.model'],
'test.test.test_nothing_model_': ['model.test.model'],
'test.test.unique_model_id': ['model.test.model'],
'test.test.not_null_model_id.8ab00aacfe': ['model.test.model'],
'test.test.test_nothing_model_.596f819934': ['model.test.model'],
'test.test.unique_model_id.3de6fa785f': ['model.test.model'],
},
'child_map': {
'model.test.model': [
'exposure.test.notebook_exposure',
'exposure.test.simple_exposure',
'test.test.not_null_model_id',
'test.test.test_nothing_model_',
'test.test.unique_model_id',
'test.test.not_null_model_id.8ab00aacfe',
'test.test.test_nothing_model_.596f819934',
'test.test.unique_model_id.3de6fa785f',
],
'model.test.second_model': ['exposure.test.notebook_exposure'],
'exposure.test.notebook_exposure': [],
Expand All @@ -1641,9 +1641,9 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
'snapshot.test.snapshot_seed'],
'snapshot.test.snapshot_seed': [],
'source.test.my_source.my_table': ['exposure.test.simple_exposure'],
'test.test.not_null_model_id': [],
'test.test.test_nothing_model_': [],
'test.test.unique_model_id': [],
'test.test.not_null_model_id.8ab00aacfe': [],
'test.test.test_nothing_model_.596f819934': [],
'test.test.unique_model_id.3de6fa785f': [],
},
'docs': {
'dbt.__overview__': ANY,
Expand Down Expand Up @@ -2921,7 +2921,7 @@ def verify_manifest(self, expected_manifest):
else:
self.assertIn(key, expected_manifest) # sanity check
self.assertEqual(manifest[key], expected_manifest[key])

def _quote(self, value):
quote_char = '`' if self.adapter_type == 'bigquery' else '"'
return '{0}{1}{0}'.format(quote_char, value)
Expand Down Expand Up @@ -2972,7 +2972,7 @@ def expected_run_results(self):
'status': 'success',
'message': None,
'execution_time': AnyFloat(),
'unique_id': 'test.test.not_null_model_id',
'unique_id': 'test.test.not_null_model_id.8ab00aacfe',
'adapter_response': ANY,
'thread_id': ANY,
'timing': [ANY, ANY],
Expand All @@ -2981,7 +2981,7 @@ def expected_run_results(self):
'status': 'success',
'message': None,
'execution_time': AnyFloat(),
'unique_id': 'test.test.test_nothing_model_',
'unique_id': 'test.test.test_nothing_model_.596f819934',
'adapter_response': ANY,
'thread_id': ANY,
'timing': [ANY, ANY],
Expand All @@ -2990,7 +2990,7 @@ def expected_run_results(self):
'status': 'success',
'message': None,
'execution_time': AnyFloat(),
'unique_id': 'test.test.unique_model_id',
'unique_id': 'test.test.unique_model_id.3de6fa785f',
'adapter_response': ANY,
'thread_id': ANY,
'timing': [ANY, ANY],
Expand Down
6 changes: 3 additions & 3 deletions test/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def test__parse_basic_model_tests(self):
self.assertEqual(tests[0].package_name, 'snowplow')
self.assertTrue(tests[0].name.startswith('accepted_values_'))
self.assertEqual(tests[0].fqn, ['snowplow', 'schema_test', tests[0].name])
self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name])
self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, '468aa12dc7'])
self.assertEqual(tests[0].test_metadata.name, 'accepted_values')
self.assertIsNone(tests[0].test_metadata.namespace)
self.assertEqual(
Expand All @@ -415,7 +415,7 @@ def test__parse_basic_model_tests(self):
self.assertEqual(tests[1].fqn, ['snowplow', 'schema_test', tests[1].name])
self.assertTrue(tests[1].name.startswith('foreign_package_test_case_'))
self.assertEqual(tests[1].package_name, 'snowplow')
self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name])
self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, 'b1daa30e01'])
self.assertEqual(tests[1].test_metadata.name, 'test_case')
self.assertEqual(tests[1].test_metadata.namespace, 'foreign_package')
self.assertEqual(
Expand All @@ -434,7 +434,7 @@ def test__parse_basic_model_tests(self):
self.assertEqual(tests[2].package_name, 'snowplow')
self.assertTrue(tests[2].name.startswith('not_null_'))
self.assertEqual(tests[2].fqn, ['snowplow', 'schema_test', tests[2].name])
self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name])
self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, 'bb12aa4687'])
self.assertEqual(tests[2].test_metadata.name, 'not_null')
self.assertIsNone(tests[2].test_metadata.namespace)
self.assertEqual(
Expand Down