Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise CompilationException on duplicate model #568

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions dbt/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,30 @@ def load_project(cls, root_project, all_projects, project, project_name,

class ModelLoader(ResourceLoader):

@classmethod
def load_all(cls, root_project, all_projects, macros=None):
to_return = {}

for project_name, project in all_projects.items():
project_loaded = cls.load_project(root_project,
all_projects,
project, project_name,
macros)

to_return.update(project_loaded)

# Check for duplicate model names
names_models = {}
for model, attribs in to_return.items():
name = attribs['name']
existing_name = names_models.get(name)
if existing_name is not None:
raise dbt.exceptions.CompilationException(
'Found models with the same name: \n- %s\n- %s' % (
model, existing_name))
names_models[name] = model
return to_return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks good. @drewbanin we should expand on this error message later on. specifically it'd be nice to tell the user what to do (disable one of these in your dbt_project.yml, or rename one of the models)


@classmethod
def load_project(cls, root_project, all_projects, project, project_name,
macros):
Expand Down
30 changes: 22 additions & 8 deletions dbt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import hashlib
import collections

import dbt.exceptions
import dbt.flags
import dbt.model
import dbt.utils
Expand Down Expand Up @@ -272,14 +273,27 @@ def parse_sql_nodes(nodes, root_project, projects, tags=None, macros=None):
package_name,
node.get('name'))

# TODO if this is set, raise a compiler error
to_return[node_path] = parse_node(node,
node_path,
root_project,
projects.get(package_name),
projects,
tags=tags,
macros=macros)
node_parsed = parse_node(node,
node_path,
root_project,
projects.get(package_name),
projects,
tags=tags,
macros=macros)

# Ignore disabled nodes
if not node_parsed['config']['enabled']:
continue

# Check for duplicate model names
existing_node = to_return.get(node_path)
if existing_node is not None:
raise dbt.exceptions.CompilationException(
'Found models with the same name:\n- %s\n- %s' % (
existing_node.get('original_file_path'),
node.get('original_file_path')))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need this check in addition to the one in ModelLoader? if not let's just rip this out to DRY this up

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still need this check because to_return is a dictionary and keys for duplicate models are non-unique. Without this check, only one of the models would be left in the dictionary after this stage and user would not be notified about the existing duplicate.


to_return[node_path] = node_parsed

dbt.contracts.graph.parsed.validate_nodes(to_return)

Expand Down
6 changes: 0 additions & 6 deletions test/integration/008_schema_tests_test/models/disabled.sql

This file was deleted.

7 changes: 0 additions & 7 deletions test/integration/008_schema_tests_test/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,3 @@ table_failure_summary:

relationships:
- { from: favorite_color, to: ref('table_copy'), field: favorite_color }


# this will fail if run, but we've disabled this model
disabled:
constraints:
unique:
- not_unique
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ def test_view_with_incremental_attributes(self):
# should throw
self.assertTrue(False)
except RuntimeError as e:
self.assertTrue("which is disabled" in str(e))
self.assertTrue("which was not found" in str(e))
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{
config(
enabled=True,
materialized="table",
)
}}

select 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{
config(
enabled=True,
materialized="table",
)
}}

select 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{
config(
enabled=False,
materialized="table",
)
}}

select 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{
config(
enabled=True,
materialized="table",
)
}}

select 1 as value
7 changes: 7 additions & 0 deletions test/integration/025_duplicate_model_test/models-3/table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{
config(
materialized = 'table',
)
}}

select 1 as item
8 changes: 8 additions & 0 deletions test/integration/025_duplicate_model_test/models-4/table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{
config(
materialized = 'table',
enabled = False,
)
}}

select 1 as item
587 changes: 587 additions & 0 deletions test/integration/025_duplicate_model_test/seed.sql

Large diffs are not rendered by default.

155 changes: 155 additions & 0 deletions test/integration/025_duplicate_model_test/test_duplicate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from nose.plugins.attrib import attr

from dbt.exceptions import CompilationException
from test.integration.base import DBTIntegrationTest


class TestDuplicateModelEnabled(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)

@property
def schema(self):
return "duplicate_model_025"

@property
def models(self):
return "test/integration/025_duplicate_model_test/models-1"

@property
def profile_config(self):
return {
"test": {
"outputs": {
"dev": {
"type": "postgres",
"threads": 1,
"host": "database",
"port": 5432,
"user": "root",
"pass": "password",
"dbname": "dbt",
"schema": self.unique_schema()
},
},
"target": "dev"
}
}

@attr(type="postgres")
def test_duplicate_model_enabled(self):
message = "Found models with the same name:.*"
with self.assertRaisesRegexp(CompilationException, message):
self.run_dbt(["run"])


class TestDuplicateModelDisabled(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)

@property
def schema(self):
return "duplicate_model_025"

@property
def models(self):
return "test/integration/025_duplicate_model_test/models-2"

@property
def profile_config(self):
return {
"test": {
"outputs": {
"dev": {
"type": "postgres",
"threads": 1,
"host": "database",
"port": 5432,
"user": "root",
"pass": "password",
"dbname": "dbt",
"schema": self.unique_schema()
},
},
"target": "dev"
}
}

@attr(type="postgres")
def test_duplicate_model_disabled(self):
try:
self.run_dbt(["run"])
except CompilationException:
self.fail(
"Compilation Exception raised on disabled model")
query = "select value from {schema}.model" \
.format(schema=self.unique_schema())
result = self.run_sql(query, fetch="one")[0]
assert result == 1


class TestDuplicateModelEnabledAcrossPackages(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)

@property
def schema(self):
return "duplicate_model_025"

@property
def models(self):
return "test/integration/025_duplicate_model_test/models-3"

@property
def project_config(self):
return {
"repositories": [
'https://github.com/fishtown-analytics/dbt-integration-project@master'
]
}

@attr(type="postgres")
def test_duplicate_model_enabled_across_packages(self):
self.run_dbt(["deps"])
message = "Found models with the same name:.*"
with self.assertRaisesRegexp(CompilationException, message):
self.run_dbt(["run"])


class TestDuplicateModelDisabledAcrossPackages(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)
self.run_sql_file("test/integration/025_duplicate_model_test/seed.sql")

@property
def schema(self):
return "duplicate_model_025"

@property
def models(self):
return "test/integration/025_duplicate_model_test/models-4"

@property
def project_config(self):
return {
"repositories": [
'https://github.com/fishtown-analytics/dbt-integration-project@master'
]
}

@attr(type="postgres")
def test_duplicate_model_disabled_across_packages(self):
self.run_dbt(["deps"])
try:
self.run_dbt(["run"])
except CompilationException:
self.fail(
"Compilation Exception raised on disabled model")
query = "select 1 from {schema}.table" \
.format(schema=self.unique_schema())
result = self.run_sql(query, fetch="one")[0]
assert result == 1
29 changes: 0 additions & 29 deletions test/unit/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,35 +190,6 @@ def test__model_materializations(self):
.get('materialized')
self.assertEquals(actual, expected)

def test__model_enabled(self):
self.use_models({
'model_one': 'select * from events',
'model_two': "select * from {{ref('model_one')}}",
})

cfg = {
"models": {
"materialized": "table",
"test_models_compile": {
"model_one": {"enabled": True},
"model_two": {"enabled": False},
}
}
}

compiler = self.get_compiler(self.get_project(cfg))
graph, linker = compiler.compile()

six.assertCountEqual(
self, linker.nodes(),
['model.test_models_compile.model_one',
'model.test_models_compile.model_two'])

six.assertCountEqual(
self, linker.edges(),
[('model.test_models_compile.model_one',
'model.test_models_compile.model_two',)])

def test__model_incremental(self):
self.use_models({
'model_one': 'select * from events'
Expand Down
42 changes: 0 additions & 42 deletions test/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,48 +1101,6 @@ def test__other_project_config(self):
'raw_sql': self.find_input_by_name(
models, 'view').get('raw_sql')
},
'model.snowplow.disabled': {
'name': 'disabled',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.snowplow.disabled',
'fqn': ['snowplow', 'disabled'],
'empty': False,
'package_name': 'snowplow',
'refs': [],
'depends_on': {
'nodes': [],
'macros': []
},
'path': 'disabled.sql',
'original_file_path': 'disabled.sql',
'root_path': get_os_path('/usr/src/app'),
'config': disabled_config,
'tags': set(),
'raw_sql': self.find_input_by_name(
models, 'disabled').get('raw_sql')
},
'model.snowplow.package': {
'name': 'package',
'schema': 'analytics',
'resource_type': 'model',
'unique_id': 'model.snowplow.package',
'fqn': ['snowplow', 'views', 'package'],
'empty': False,
'package_name': 'snowplow',
'refs': [],
'depends_on': {
'nodes': [],
'macros': []
},
'path': get_os_path('views/package.sql'),
'original_file_path': get_os_path('views/package.sql'),
'root_path': get_os_path('/usr/src/app'),
'config': sort_config,
'tags': set(),
'raw_sql': self.find_input_by_name(
models, 'package').get('raw_sql')
},
'model.snowplow.multi_sort': {
'name': 'multi_sort',
'schema': 'analytics',
Expand Down