dbt-labs · iknox-fa · May 13, 2021 · May 10, 2021 · May 10, 2021 · May 12, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ Contributors:
 - Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
 - Use absolute path for profiles directory instead of a path relative to the project directory. Note: If a user supplies a relative path to the profiles directory, the value of `args.profiles_dir` will still be absolute. ([#3133](https://github.com/fishtown-analytics/dbt/issues/3133))
 - Fix FQN selector unable to find models whose name contains dots ([#3246](https://github.com/fishtown-analytics/dbt/issues/3246))
+- Fix unique_id generation for generic tests so tests with the same FQN but different configuration will run. ([#3254](https://github.com/fishtown-analytics/dbt/issues/3254)) 
 
 ### Features
 - Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270))

diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py
@@ -2,7 +2,7 @@
 import itertools
 import os
 from typing import (
-    List, Dict, Any, Generic, TypeVar
+    List, Dict, Any, Generic, Optional, TypeVar
 )
 
 from dbt.dataclass_schema import ValidationError
@@ -55,11 +55,25 @@ def parse_file(self, block: FileBlock) -> None:
     def resource_type(self) -> NodeType:
         pass
 
-    def generate_unique_id(self, resource_name: str) -> str:
-        """Returns a unique identifier for a resource"""
-        return "{}.{}.{}".format(self.resource_type,
-                                 self.project.project_name,
-                                 resource_name)
+    def generate_unique_id(
+        self,
+        resource_name: str,
+        hash: Optional[str] = None
+    ) -> str:
+        """Returns a unique identifier for a resource
+        An optional hash may be passed in to ensure uniqueness for edge cases"""
+
+        return '.'.join(
+            filter(
+                None,
+                [
+                    self.resource_type,
+                    self.project.project_name,
+                    resource_name,
+                    hash
+                ]
+            )
+        )
 
 
 class Parser(BaseParser[FinalValue], Generic[FinalValue]):

diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py
@@ -1,4 +1,4 @@
-from typing import Iterable
+from typing import Iterable, Optional
 
 import re
 
@@ -23,7 +23,7 @@ def resource_type(self) -> NodeType:
     def get_compiled_path(cls, block: FileBlock):
         return block.path.relative_path
 
-    def generate_unique_id(self, resource_name: str) -> str:
+    def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str:
         # because docs are in their own graph namespace, node type doesn't
         # need to be part of the unique ID.
         return '{}.{}'.format(self.project.project_name, resource_name)

diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py
@@ -2,6 +2,7 @@
 import os
 
 from abc import ABCMeta, abstractmethod
+from hashlib import md5
 from typing import (
     Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type
 )
@@ -351,6 +352,25 @@ def create_test_node(
         column_name: Optional[str],
     ) -> ParsedSchemaTestNode:
 
+        HASH_LENGTH = 10
+
+        # N.B: This function builds a hashable string from any given test_metadata dict.
+        #   it's a bit fragile for general use (only supports str, int, float, List, Dict)
+        #   but it gets the job done here without the overhead of complete ser(de).
+        def get_hashable_md(
+            data: Union[str, int, float, List, Dict]
+        ) -> Union[str, List, Dict]:
+            if type(data) == dict:
+                return {k: get_hashable_md(data[k]) for k in sorted(data.keys())}  # type: ignore
+            elif type(data) == list:
+                return [get_hashable_md(val) for val in data]  # type: ignore
+            else:
+                return str(data)
+
+        hashable_metadata = repr(get_hashable_md(test_metadata))
+        hash_string = ''.join([name, hashable_metadata]).encode('utf-8')
+        test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:]
+
         dct = {
             'alias': name,
             'schema': self.default_schema,
@@ -364,7 +384,7 @@ def create_test_node(
             'original_file_path': target.original_file_path,
             'package_name': self.project.project_name,
             'raw_sql': raw_sql,
-            'unique_id': self.generate_unique_id(name),
+            'unique_id': self.generate_unique_id(name, test_hash),
             'config': self.config_dict(config),
             'test_metadata': test_metadata,
             'column_name': column_name,

diff --git a/test/integration/008_schema_tests_test/name_collision/base.sql b/test/integration/008_schema_tests_test/name_collision/base.sql
@@ -0,0 +1 @@
+SELECT 'hello_world' AS extension_id
diff --git a/test/integration/008_schema_tests_test/name_collision/base_extension.sql b/test/integration/008_schema_tests_test/name_collision/base_extension.sql
@@ -0,0 +1 @@
+SELECT 'NOT_NULL' AS id
diff --git a/test/integration/008_schema_tests_test/name_collision/schema.yml b/test/integration/008_schema_tests_test/name_collision/schema.yml
@@ -0,0 +1,12 @@
+version: 2
+models:
+- name: base
+  columns:
+  - name: extension_id
+    tests:
+    - not_null
+- name: base_extension
+  columns:
+  - name: id
+    tests:
+    - not_null
diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py
@@ -22,7 +22,6 @@ def models(self):
 
     def run_schema_validations(self):
         args = FakeArgs()
-
         test_task = TestTask(args, self.config)
         return test_task.run()
 
@@ -377,3 +376,35 @@ def test_postgres_test_context_tests(self):
                 # was rendered correctly
                 self.assertRegex(result.node.compiled_sql, r'union all')
 
+
+class TestSchemaTestNameCollision(DBTIntegrationTest):
+    @property
+    def schema(self):
+        return "schema_tests_008"
+
+    @property
+    def models(self):
+        return "name_collision"
+
+    def run_schema_tests(self):
+        args = FakeArgs()
+        test_task = TestTask(args, self.config)
+        return test_task.run()
+
+    @use_profile('postgres')
+    def test_postgres_collision_test_names_get_hash(self):
+        """The models should produce unique IDs with a has appended"""
+        results = self.run_dbt()
+        test_results = self.run_schema_tests()
+
+        # both models and both tests run
+        self.assertEqual(len(results), 2)
+        self.assertEqual(len(test_results), 2)
+
+        # both tests have the same unique id except for the hash
+        expected_unique_ids = [
+            'test.test.not_null_base_extension_id.2dbb9627b6',
+            'test.test.not_null_base_extension_id.d70fc39f40'
+            ]
+        self.assertIn(test_results[0].node.unique_id, expected_unique_ids)
+        self.assertIn(test_results[1].node.unique_id, expected_unique_ids)
diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py
@@ -1333,7 +1333,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'checksum': self._checksum_file(seed_path),
                     'unrendered_config': unrendered_seed_config,
                 },
-                'test.test.not_null_model_id': {
+                'test.test.not_null_model_id.8ab00aacfe': {
                     'alias': 'not_null_model_id',
                     'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'),
                     'column_name': 'id',
@@ -1361,7 +1361,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'database': self.default_database,
                     'tags': ['schema'],
                     'meta': {},
-                    'unique_id': 'test.test.not_null_model_id',
+                    'unique_id': 'test.test.not_null_model_id.8ab00aacfe',
                     'docs': {'show': True},
                     'compiled': True,
                     'compiled_sql': AnyStringWith('where id is null'),
@@ -1420,7 +1420,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'unique_id': 'snapshot.test.snapshot_seed',
                     'unrendered_config': unrendered_snapshot_config,
                 },
-                'test.test.test_nothing_model_': {
+                'test.test.test_nothing_model_.596f819934': {
                     'alias': 'test_nothing_model_',
                     'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'),
                     'column_name': None,
@@ -1448,7 +1448,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'database': self.default_database,
                     'tags': ['schema'],
                     'meta': {},
-                    'unique_id': 'test.test.test_nothing_model_',
+                    'unique_id': 'test.test.test_nothing_model_.596f819934',
                     'docs': {'show': True},
                     'compiled': True,
                     'compiled_sql': AnyStringWith('select 0'),
@@ -1464,7 +1464,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'checksum': {'name': 'none', 'checksum': ''},
                     'unrendered_config': unrendered_test_config,
                 },
-                'test.test.unique_model_id': {
+                'test.test.unique_model_id.3de6fa785f': {
                     'alias': 'unique_model_id',
                     'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'),
                     'column_name': 'id',
@@ -1492,7 +1492,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                     'database': self.default_database,
                     'tags': ['schema'],
                     'meta': {},
-                    'unique_id': 'test.test.unique_model_id',
+                    'unique_id': 'test.test.unique_model_id.3de6fa785f',
                     'docs': {'show': True},
                     'compiled': True,
                     'compiled_sql': AnyStringWith('count(*)'),
@@ -1621,17 +1621,17 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                 'seed.test.seed': [],
                 'snapshot.test.snapshot_seed': ['seed.test.seed'],
                 'source.test.my_source.my_table': [],
-                'test.test.not_null_model_id': ['model.test.model'],
-                'test.test.test_nothing_model_': ['model.test.model'],
-                'test.test.unique_model_id': ['model.test.model'],
+                'test.test.not_null_model_id.8ab00aacfe': ['model.test.model'],
+                'test.test.test_nothing_model_.596f819934': ['model.test.model'],
+                'test.test.unique_model_id.3de6fa785f': ['model.test.model'],
             },
             'child_map': {
                 'model.test.model': [
                     'exposure.test.notebook_exposure',
                     'exposure.test.simple_exposure',
-                    'test.test.not_null_model_id',
-                    'test.test.test_nothing_model_',
-                    'test.test.unique_model_id',
+                    'test.test.not_null_model_id.8ab00aacfe',
+                    'test.test.test_nothing_model_.596f819934',
+                    'test.test.unique_model_id.3de6fa785f',
                 ],
                 'model.test.second_model': ['exposure.test.notebook_exposure'],
                 'exposure.test.notebook_exposure': [],
@@ -1641,9 +1641,9 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
                                    'snapshot.test.snapshot_seed'],
                 'snapshot.test.snapshot_seed': [],
                 'source.test.my_source.my_table': ['exposure.test.simple_exposure'],
-                'test.test.not_null_model_id': [],
-                'test.test.test_nothing_model_': [],
-                'test.test.unique_model_id': [],
+                'test.test.not_null_model_id.8ab00aacfe': [],
+                'test.test.test_nothing_model_.596f819934': [],
+                'test.test.unique_model_id.3de6fa785f': [],
             },
             'docs': {
                 'dbt.__overview__': ANY,
@@ -2921,7 +2921,7 @@ def verify_manifest(self, expected_manifest):
             else:
                 self.assertIn(key, expected_manifest)  # sanity check
                 self.assertEqual(manifest[key], expected_manifest[key])
-
+                
     def _quote(self, value):
         quote_char = '`' if self.adapter_type == 'bigquery' else '"'
         return '{0}{1}{0}'.format(quote_char, value)
@@ -2972,7 +2972,7 @@ def expected_run_results(self):
                 'status': 'success',
                 'message': None,
                 'execution_time': AnyFloat(),
-                'unique_id': 'test.test.not_null_model_id',
+                'unique_id': 'test.test.not_null_model_id.8ab00aacfe',
                 'adapter_response': ANY,
                 'thread_id': ANY,
                 'timing': [ANY, ANY],
@@ -2981,7 +2981,7 @@ def expected_run_results(self):
                 'status': 'success',
                 'message': None,
                 'execution_time': AnyFloat(),
-                'unique_id': 'test.test.test_nothing_model_',
+                'unique_id': 'test.test.test_nothing_model_.596f819934',
                 'adapter_response': ANY,
                 'thread_id': ANY,
                 'timing': [ANY, ANY],
@@ -2990,7 +2990,7 @@ def expected_run_results(self):
                 'status': 'success',
                 'message': None,
                 'execution_time': AnyFloat(),
-                'unique_id': 'test.test.unique_model_id',
+                'unique_id': 'test.test.unique_model_id.3de6fa785f',
                 'adapter_response': ANY,
                 'thread_id': ANY,
                 'timing': [ANY, ANY],

diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py
@@ -393,7 +393,7 @@ def test__parse_basic_model_tests(self):
         self.assertEqual(tests[0].package_name, 'snowplow')
         self.assertTrue(tests[0].name.startswith('accepted_values_'))
         self.assertEqual(tests[0].fqn, ['snowplow', 'schema_test', tests[0].name])
-        self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name])
+        self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, '468aa12dc7'])
         self.assertEqual(tests[0].test_metadata.name, 'accepted_values')
         self.assertIsNone(tests[0].test_metadata.namespace)
         self.assertEqual(
@@ -415,7 +415,7 @@ def test__parse_basic_model_tests(self):
         self.assertEqual(tests[1].fqn, ['snowplow', 'schema_test', tests[1].name])
         self.assertTrue(tests[1].name.startswith('foreign_package_test_case_'))
         self.assertEqual(tests[1].package_name, 'snowplow')
-        self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name])
+        self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, 'b1daa30e01'])
         self.assertEqual(tests[1].test_metadata.name, 'test_case')
         self.assertEqual(tests[1].test_metadata.namespace, 'foreign_package')
         self.assertEqual(
@@ -434,7 +434,7 @@ def test__parse_basic_model_tests(self):
         self.assertEqual(tests[2].package_name, 'snowplow')
         self.assertTrue(tests[2].name.startswith('not_null_'))
         self.assertEqual(tests[2].fqn, ['snowplow', 'schema_test', tests[2].name])
-        self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name])
+        self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, 'bb12aa4687'])
         self.assertEqual(tests[2].test_metadata.name, 'not_null')
         self.assertIsNone(tests[2].test_metadata.namespace)
         self.assertEqual(