Skip to content

Commit

Permalink
[Backport 1.4.latest] Add back depends_on for seeds - only macros
Browse files Browse the repository at this point in the history
…, never `nodes` (#6920)

* Add back `depends_on` for seeds - only `macros`, never `nodes` (#6851)

* Extend functional tests for seeds w hooks

* Add MacroDependsOn to seeds, raise exception for other deps

* Add changelog entry

* Fix unit tests

* Update upgrade_seed_content

* Cleanup

* Regen manifest v8 schema. Fix tests

* Be less magical

* PR feedback

(cherry picked from commit 298bf8a)

* Update manifest v8 for v1.4.x

* Cleanup

---------

Co-authored-by: Jeremy Cohen <[email protected]>
  • Loading branch information
github-actions[bot] and jtcohen6 authored Feb 9, 2023
1 parent 2bb2e73 commit ddb2f0f
Show file tree
Hide file tree
Showing 9 changed files with 227 additions and 75 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230203-135557.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Readd depends_on.macros to SeedNode, to support seeds with hooks calling macros
time: 2023-02-03T13:55:57.853715+01:00
custom:
Author: jtcohen6
Issue: "6806"
39 changes: 37 additions & 2 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.events.proto_types import NodeInfo
from dbt.events.functions import warn_or_error
from dbt.exceptions import ParsingError
from dbt.events.types import (
SeedIncreased,
SeedExceedsLimitSamePath,
Expand Down Expand Up @@ -482,6 +483,7 @@ class SeedNode(ParsedNode): # No SQLDefaults!
# seeds need the root_path because the contents are not loaded initially
# and we need the root_path to load the seed later
root_path: Optional[str] = None
depends_on: MacroDependsOn = field(default_factory=MacroDependsOn)

def same_seeds(self, other: "SeedNode") -> bool:
# for seeds, we check the hashes. If the hashes are different types,
Expand Down Expand Up @@ -523,6 +525,39 @@ def empty(self):
"""Seeds are never empty"""
return False

def _disallow_implicit_dependencies(self):
"""Disallow seeds to take implicit upstream dependencies via pre/post hooks"""
# Seeds are root nodes in the DAG. They cannot depend on other nodes.
# However, it's possible to define pre- and post-hooks on seeds, and for those
# hooks to include {{ ref(...) }}. This worked in previous versions, but it
# was never officially documented or supported behavior. Let's raise an explicit error,
# which will surface during parsing if the user has written code such that we attempt
# to capture & record a ref/source/metric call on the SeedNode.
# For more details: https://github.com/dbt-labs/dbt-core/issues/6806
hooks = [f'- pre_hook: "{hook.sql}"' for hook in self.config.pre_hook] + [
f'- post_hook: "{hook.sql}"' for hook in self.config.post_hook
]
hook_list = "\n".join(hooks)
message = f"""
Seeds cannot depend on other nodes. dbt detected a seed with a pre- or post-hook
that calls 'ref', 'source', or 'metric', either directly or indirectly via other macros.
Error raised for '{self.unique_id}', which has these hooks defined: \n{hook_list}
"""
raise ParsingError(message)

@property
def refs(self):
self._disallow_implicit_dependencies()

@property
def sources(self):
self._disallow_implicit_dependencies()

@property
def metrics(self):
self._disallow_implicit_dependencies()

def same_body(self, other) -> bool:
return self.same_seeds(other)

Expand All @@ -531,8 +566,8 @@ def depends_on_nodes(self):
return []

@property
def depends_on_macros(self):
return []
def depends_on_macros(self) -> List[str]:
return self.depends_on.macros

@property
def extra_ctes(self):
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/contracts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ def upgrade_seed_content(node_content):
"refs",
"sources",
"metrics",
"depends_on",
"compiled_path",
"compiled",
"compiled_code",
Expand All @@ -260,6 +259,8 @@ def upgrade_seed_content(node_content):
):
if attr_name in node_content:
del node_content[attr_name]
# In v1.4, we switched SeedNode.depends_on from DependsOn to MacroDependsOn
node_content.get("depends_on", {}).pop("nodes", None)


def upgrade_manifest_json(manifest: dict) -> dict:
Expand Down
212 changes: 142 additions & 70 deletions schemas/dbt/manifest/v8.json

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions test/unit/test_contracts_graph_parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ def basic_parsed_seed_dict():
'unique_id': 'seed.test.foo',
'fqn': ['test', 'seeds', 'foo'],
'database': 'test_db',
'depends_on': {'macros': []},
'description': '',
'schema': 'test_schema',
'tags': [],
Expand Down Expand Up @@ -472,6 +473,7 @@ def basic_parsed_seed_object():
raw_code='',
unique_id='seed.test.foo',
fqn=['test', 'seeds', 'foo'],
depends_on=MacroDependsOn(),
database='test_db',
description='',
schema='test_schema',
Expand Down Expand Up @@ -520,6 +522,7 @@ def complex_parsed_seed_dict():
'unique_id': 'seed.test.foo',
'fqn': ['test', 'seeds', 'foo'],
'database': 'test_db',
'depends_on': {'macros': []},
'description': 'a description',
'schema': 'test_schema',
'tags': ['mytag'],
Expand Down Expand Up @@ -564,6 +567,7 @@ def complex_parsed_seed_object():
unique_id='seed.test.foo',
fqn=['test', 'seeds', 'foo'],
database='test_db',
depends_on=MacroDependsOn(),
description='a description',
schema='test_schema',
tags=['mytag'],
Expand Down
2 changes: 2 additions & 0 deletions tests/functional/artifacts/expected_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"fqn": ["test", "seed"],
"tags": [],
"meta": {},
"depends_on": {"macros": []},
"schema": my_schema_name,
"database": project.database,
"alias": "seed",
Expand Down Expand Up @@ -1043,6 +1044,7 @@ def expected_references_manifest(project):
},
"config": get_rendered_seed_config(),
"deferred": False,
"depends_on": {"macros": []},
"description": "The test seed",
"docs": {"node_color": None, "show": True},
"fqn": ["test", "seed"],
Expand Down
1 change: 0 additions & 1 deletion tests/functional/artifacts/test_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,6 @@ def test_run_and_generate(self, project, manifest_schema_path, run_results_schem
start_time = datetime.utcnow()
results = run_dbt(["compile"])
assert len(results) == 7

verify_manifest(
project,
expected_seeded_manifest(project, quote_model=False),
Expand Down
34 changes: 33 additions & 1 deletion tests/functional/hooks/test_model_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pathlib import Path

from dbt.exceptions import CompilationError
from dbt.exceptions import CompilationError, ParsingError

from dbt.tests.util import (
run_dbt,
Expand Down Expand Up @@ -251,6 +251,8 @@ def project_config_update(self):
"post-hook": [
"alter table {{ this }} add column new_col int",
"update {{ this }} set new_col = 1",
# call any macro to track dependency: https://github.com/dbt-labs/dbt-core/issues/6806
"select null::{{ dbt.type_int() }} as id",
],
"quote_columns": False,
},
Expand All @@ -263,6 +265,36 @@ def test_hooks_on_seeds(self, project):
assert len(res) == 1, "Expected exactly one item"


class TestHooksRefsOnSeeds:
"""
This should not succeed, and raise an explicit error
https://github.com/dbt-labs/dbt-core/issues/6806
"""

@pytest.fixture(scope="class")
def seeds(self):
return {"example_seed.csv": seeds__example_seed_csv}

@pytest.fixture(scope="class")
def models(self):
return {"schema.yml": properties__seed_models, "post.sql": models__post}

@pytest.fixture(scope="class")
def project_config_update(self):
return {
"seeds": {
"post-hook": [
"select * from {{ ref('post') }}",
],
},
}

def test_hook_with_ref_on_seeds(self, project):
with pytest.raises(ParsingError) as excinfo:
run_dbt(["parse"])
assert "Seeds cannot depend on other nodes" in str(excinfo.value)


class TestPrePostModelHooksOnSeedsPlusPrefixed(TestPrePostModelHooksOnSeeds):
@pytest.fixture(scope="class")
def project_config_update(self):
Expand Down
1 change: 1 addition & 0 deletions tests/functional/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ def expect_seed_output(self):
"incremental_strategy": None,
"docs": {"node_color": None, "show": True},
},
"depends_on": {"macros": []},
"unique_id": "seed.test.seed",
"original_file_path": normalize("seeds/seed.csv"),
"alias": "seed",
Expand Down

0 comments on commit ddb2f0f

Please sign in to comment.