From 62aeebff6331280d5eddb6ec7192fbb0104b734c Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 3 Oct 2023 15:10:48 -0400 Subject: [PATCH 1/9] respect project root when loading seeds --- core/dbt/context/providers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 996d5027c58..7a6344f1a8e 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -862,8 +862,7 @@ def try_or_compiler_error( def load_agate_table(self) -> agate.Table: if not isinstance(self.model, SeedNode): raise LoadAgateTableNotSeedError(self.model.resource_type, node=self.model) - assert self.model.root_path - path = os.path.join(self.model.root_path, self.model.original_file_path) + path = os.path.join(self.config.project_root, self.model.original_file_path) column_types = self.model.config.column_types delimiter = self.model.config.delimiter try: From 7100ff953ca53de53503b7ff30d4f1b65c778bdf Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 3 Oct 2023 15:45:35 -0400 Subject: [PATCH 2/9] preserve relative path in seed root_path --- core/dbt/context/providers.py | 5 ++++- core/dbt/parser/seeds.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 7a6344f1a8e..1d4820ee52e 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -862,7 +862,10 @@ def try_or_compiler_error( def load_agate_table(self) -> agate.Table: if not isinstance(self.model, SeedNode): raise LoadAgateTableNotSeedError(self.model.resource_type, node=self.model) - path = os.path.join(self.config.project_root, self.model.original_file_path) + assert self.model.root_path + path = os.path.join( + self.config.project_root, self.model.root_path, self.model.original_file_path + ) column_types = self.model.config.column_types delimiter = self.model.config.delimiter try: diff --git a/core/dbt/parser/seeds.py b/core/dbt/parser/seeds.py index 23c77e1ed7c..c11e97df757 100644 --- a/core/dbt/parser/seeds.py +++ b/core/dbt/parser/seeds.py @@ -1,3 +1,5 @@ +from os.path import relpath + from dbt.context.context_config import ContextConfig from dbt.contracts.graph.nodes import SeedNode from dbt.node_types import NodeType @@ -8,7 +10,7 @@ class SeedParser(SimpleSQLParser[SeedNode]): def parse_from_dict(self, dct, validate=True) -> SeedNode: # seeds need the root_path because the contents are not loaded - dct["root_path"] = self.project.project_root + dct["root_path"] = relpath(self.project.project_root, self.root_project.project_root) if "language" in dct: del dct["language"] # raw_code is not currently used, but it might be in the future From 86eebaee06c287eafef7693eda4e3480c76fb841 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 3 Oct 2023 16:25:51 -0400 Subject: [PATCH 3/9] fix artifact tests --- tests/functional/artifacts/expected_manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index 6082ae4b8d4..5b667a36ef5 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -443,7 +443,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False): "patch_path": "test://" + seed_schema_yml_path, "path": "seed.csv", "name": "seed", - "root_path": project.project_root, + "root_path": ".", "resource_type": "seed", "raw_code": "", "package_name": "test", @@ -1158,7 +1158,7 @@ def expected_references_manifest(project): "path": "seed.csv", "raw_code": "", "resource_type": "seed", - "root_path": project.project_root, + "root_path": ".", "schema": my_schema_name, "database": project.database, "tags": [], From 60bbe0ef95a07a8936eaaa9275efe1399510a218 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Tue, 3 Oct 2023 17:46:15 -0400 Subject: [PATCH 4/9] restore root_path, use packages_install_path in load_agate_table instead --- core/dbt/context/providers.py | 11 ++++++++--- core/dbt/contracts/graph/nodes.py | 1 + core/dbt/parser/seeds.py | 4 +--- tests/functional/artifacts/expected_manifest.py | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 1d4820ee52e..c6ffe4e35b8 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -862,10 +862,15 @@ def try_or_compiler_error( def load_agate_table(self) -> agate.Table: if not isinstance(self.model, SeedNode): raise LoadAgateTableNotSeedError(self.model.resource_type, node=self.model) - assert self.model.root_path - path = os.path.join( - self.config.project_root, self.model.root_path, self.model.original_file_path + + # include package_path for seeds defined in packages + package_path = ( + os.path.join(self.config.packages_install_path, self.model.package_name) + if self.model.package_name != self.config.project_name + else "." ) + path = os.path.join(self.config.project_root, package_path, self.model.original_file_path) + column_types = self.model.config.column_types delimiter = self.model.config.delimiter try: diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 065a60a6d33..830237f8fd6 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -875,6 +875,7 @@ class SeedNode(ParsedNode): # No SQLDefaults! config: SeedConfig = field(default_factory=SeedConfig) # seeds need the root_path because the contents are not loaded initially # and we need the root_path to load the seed later + # TODO: remove root_path as it is unused, and instead computed dynamically in load_agate_table root_path: Optional[str] = None depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/parser/seeds.py b/core/dbt/parser/seeds.py index c11e97df757..23c77e1ed7c 100644 --- a/core/dbt/parser/seeds.py +++ b/core/dbt/parser/seeds.py @@ -1,5 +1,3 @@ -from os.path import relpath - from dbt.context.context_config import ContextConfig from dbt.contracts.graph.nodes import SeedNode from dbt.node_types import NodeType @@ -10,7 +8,7 @@ class SeedParser(SimpleSQLParser[SeedNode]): def parse_from_dict(self, dct, validate=True) -> SeedNode: # seeds need the root_path because the contents are not loaded - dct["root_path"] = relpath(self.project.project_root, self.root_project.project_root) + dct["root_path"] = self.project.project_root if "language" in dct: del dct["language"] # raw_code is not currently used, but it might be in the future diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index 5b667a36ef5..6082ae4b8d4 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -443,7 +443,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False): "patch_path": "test://" + seed_schema_yml_path, "path": "seed.csv", "name": "seed", - "root_path": ".", + "root_path": project.project_root, "resource_type": "seed", "raw_code": "", "package_name": "test", @@ -1158,7 +1158,7 @@ def expected_references_manifest(project): "path": "seed.csv", "raw_code": "", "resource_type": "seed", - "root_path": ".", + "root_path": project.project_root, "schema": my_schema_name, "database": project.database, "tags": [], From 0494a172d7b3779da79522ec8e05568eb9ce5d59 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 6 Oct 2023 12:06:23 -0400 Subject: [PATCH 5/9] add functional test: TestPortablePartialParsing --- core/dbt/tests/util.py | 4 ++ .../partial_parsing/test_partial_parsing.py | 72 ++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/core/dbt/tests/util.py b/core/dbt/tests/util.py index 7c22a4df94f..21a23b58010 100644 --- a/core/dbt/tests/util.py +++ b/core/dbt/tests/util.py @@ -224,6 +224,10 @@ def rm_dir(directory_path): raise FileNotFoundError(f"{directory_path} does not exist.") +def rename_dir(src_directory_path, dest_directory_path): + os.rename(src_directory_path, dest_directory_path) + + # Get an artifact (usually from the target directory) such as # manifest.json or catalog.json to use in a test def get_artifact(*paths): diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index cd89fd0d014..12d896f8b00 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -1,7 +1,16 @@ +from argparse import Namespace import pytest from unittest import mock -from dbt.tests.util import run_dbt, get_manifest, write_file, rm_file, run_dbt_and_capture +import dbt.flags as flags +from dbt.tests.util import ( + run_dbt, + get_manifest, + write_file, + rm_file, + run_dbt_and_capture, + rename_dir, +) from dbt.tests.fixtures.project import write_project_files from tests.functional.partial_parsing.fixtures import ( model_one_sql, @@ -846,3 +855,64 @@ def test_pp_external_models( run_dbt(["--partial-parse", "parse"]) assert len(manifest.nodes) == 6 assert len(manifest.external_node_unique_ids) == 4 + + +class TestPortablePartialParsing: + @pytest.fixture(scope="class") + def models(self): + return { + "model_one.sql": model_one_sql, + } + + @pytest.fixture(scope="class") + def packages(self): + return {"packages": [{"local": "local_dependency"}]} + + @pytest.fixture(scope="class") + def local_dependency_files(self): + return { + "dbt_project.yml": local_dependency__dbt_project_yml, + "models": { + "schema.yml": local_dependency__models__schema_yml, + "model_to_import.sql": local_dependency__models__model_to_import_sql, + }, + "macros": {"dep_macro.sql": local_dependency__macros__dep_macro_sql}, + "seeds": {"seed.csv": local_dependency__seeds__seed_csv}, + } + + def rename_project_root(self, project, new_project_root): + rename_dir(project.project_root, new_project_root) + project.project_root = new_project_root + # flags.project_dir is set during the project test fixture, and is persisted across run_dbt calls, + # so it needs to be reset between + flags.set_from_args(Namespace(PROJECT_DIR=new_project_root), None) + + @pytest.fixture(scope="class", autouse=True) + def initial_run_and_rename_project_dir(self, project, local_dependency_files): + initial_project_root = project.project_root + renamed_project_root = os.path.join(project.project_root.dirname, "renamed_project_dir") + + write_project_files(project.project_root, "local_dependency", local_dependency_files) + + # initial run + run_dbt(["deps"]) + assert len(run_dbt(["seed"])) == 1 + assert len(run_dbt(["run"])) == 2 + + self.rename_project_root(project, renamed_project_root) + yield + self.rename_project_root(project, initial_project_root) + + def test_pp_renamed_project_dir_unchanged_project_contents(self, project): + # partial parse same project in new absolute dir location, using partial_parse.msgpack created in previous dir + run_dbt(["deps"]) + assert len(run_dbt(["--partial-parse", "seed"])) == 1 + assert len(run_dbt(["--partial-parse", "run"])) == 2 + + def test_pp_renamed_project_dir_changed_project_contents(self, project): + write_file(model_two_sql, project.project_root, "models", "model_two.sql") + + # partial parse changed project in new absolute dir location, using partial_parse.msgpack created in previous dir + run_dbt(["deps"]) + len(run_dbt(["--partial-parse", "seed"])) == 1 + len(run_dbt(["--partial-parse", "run"])) == 3 From d316e77cbc77ee616eea6fa58e08fe2ce96cd1f2 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 6 Oct 2023 13:44:01 -0400 Subject: [PATCH 6/9] fallback to existing model.root_path behaviour for seed loading --- core/dbt/context/providers.py | 3 +++ core/dbt/contracts/graph/nodes.py | 1 - tests/functional/partial_parsing/test_partial_parsing.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index c6ffe4e35b8..0863b465b53 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -870,6 +870,9 @@ def load_agate_table(self) -> agate.Table: else "." ) path = os.path.join(self.config.project_root, package_path, self.model.original_file_path) + if not os.path.exists(path): + assert self.model.root_path + path = os.path.join(self.model.root_path, self.model.original_file_path) column_types = self.model.config.column_types delimiter = self.model.config.delimiter diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 830237f8fd6..065a60a6d33 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -875,7 +875,6 @@ class SeedNode(ParsedNode): # No SQLDefaults! config: SeedConfig = field(default_factory=SeedConfig) # seeds need the root_path because the contents are not loaded initially # and we need the root_path to load the seed later - # TODO: remove root_path as it is unused, and instead computed dynamically in load_agate_table root_path: Optional[str] = None depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) defer_relation: Optional[DeferRelation] = None diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index 12d896f8b00..62834f04106 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -884,7 +884,7 @@ def rename_project_root(self, project, new_project_root): rename_dir(project.project_root, new_project_root) project.project_root = new_project_root # flags.project_dir is set during the project test fixture, and is persisted across run_dbt calls, - # so it needs to be reset between + # so it needs to be reset between invocations flags.set_from_args(Namespace(PROJECT_DIR=new_project_root), None) @pytest.fixture(scope="class", autouse=True) From 55789a70e870c6fa90b2a8b3c4a13b9d5b52a201 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Fri, 6 Oct 2023 13:45:57 -0400 Subject: [PATCH 7/9] changelog entry --- .changes/unreleased/Fixes-20231006-134551.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20231006-134551.yaml diff --git a/.changes/unreleased/Fixes-20231006-134551.yaml b/.changes/unreleased/Fixes-20231006-134551.yaml new file mode 100644 index 00000000000..9dea4f6e194 --- /dev/null +++ b/.changes/unreleased/Fixes-20231006-134551.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Enable seeds to be handled from stored manifest data +time: 2023-10-06T13:45:51.925546-04:00 +custom: + Author: michelleark + Issue: "6875" From bb92c0409d2d423bf4a3902f62b7d240c97a3559 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 9 Oct 2023 12:44:55 -0400 Subject: [PATCH 8/9] restructure portable partial parsing tests --- tests/functional/partial_parsing/test_partial_parsing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index 62834f04106..9e24a7aaaa2 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -857,7 +857,7 @@ def test_pp_external_models( assert len(manifest.external_node_unique_ids) == 4 -class TestPortablePartialParsing: +class TestPortablePartialParsingBase: @pytest.fixture(scope="class") def models(self): return { @@ -903,12 +903,16 @@ def initial_run_and_rename_project_dir(self, project, local_dependency_files): yield self.rename_project_root(project, initial_project_root) + +class TestPortablePartialParsingUnchangedProject(TestPortablePartialParsingBase): def test_pp_renamed_project_dir_unchanged_project_contents(self, project): # partial parse same project in new absolute dir location, using partial_parse.msgpack created in previous dir run_dbt(["deps"]) assert len(run_dbt(["--partial-parse", "seed"])) == 1 assert len(run_dbt(["--partial-parse", "run"])) == 2 + +class TestPortablePartialParsingChangedProject(TestPortablePartialParsingBase): def test_pp_renamed_project_dir_changed_project_contents(self, project): write_file(model_two_sql, project.project_root, "models", "model_two.sql") From f778deceaa3f44c73973e02ff69c93e3434c0fe9 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 9 Oct 2023 15:23:17 -0400 Subject: [PATCH 9/9] try: fix permission issue w up_one --- .../partial_parsing/test_partial_parsing.py | 18 ++++++++---------- tests/functional/utils.py | 5 +++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index 9e24a7aaaa2..4efc3577be9 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -11,6 +11,7 @@ run_dbt_and_capture, rename_dir, ) +from tests.functional.utils import up_one from dbt.tests.fixtures.project import write_project_files from tests.functional.partial_parsing.fixtures import ( model_one_sql, @@ -857,7 +858,7 @@ def test_pp_external_models( assert len(manifest.external_node_unique_ids) == 4 -class TestPortablePartialParsingBase: +class TestPortablePartialParsing: @pytest.fixture(scope="class") def models(self): return { @@ -881,11 +882,12 @@ def local_dependency_files(self): } def rename_project_root(self, project, new_project_root): - rename_dir(project.project_root, new_project_root) - project.project_root = new_project_root - # flags.project_dir is set during the project test fixture, and is persisted across run_dbt calls, - # so it needs to be reset between invocations - flags.set_from_args(Namespace(PROJECT_DIR=new_project_root), None) + with up_one(new_project_root): + rename_dir(project.project_root, new_project_root) + project.project_root = new_project_root + # flags.project_dir is set during the project test fixture, and is persisted across run_dbt calls, + # so it needs to be reset between invocations + flags.set_from_args(Namespace(PROJECT_DIR=new_project_root), None) @pytest.fixture(scope="class", autouse=True) def initial_run_and_rename_project_dir(self, project, local_dependency_files): @@ -903,16 +905,12 @@ def initial_run_and_rename_project_dir(self, project, local_dependency_files): yield self.rename_project_root(project, initial_project_root) - -class TestPortablePartialParsingUnchangedProject(TestPortablePartialParsingBase): def test_pp_renamed_project_dir_unchanged_project_contents(self, project): # partial parse same project in new absolute dir location, using partial_parse.msgpack created in previous dir run_dbt(["deps"]) assert len(run_dbt(["--partial-parse", "seed"])) == 1 assert len(run_dbt(["--partial-parse", "run"])) == 2 - -class TestPortablePartialParsingChangedProject(TestPortablePartialParsingBase): def test_pp_renamed_project_dir_changed_project_contents(self, project): write_file(model_two_sql, project.project_root, "models", "model_two.sql") diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 26d31c752ac..ddfe367856b 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -1,13 +1,14 @@ import os from contextlib import contextmanager +from typing import Optional from pathlib import Path @contextmanager -def up_one(): +def up_one(return_path: Optional[Path] = None): current_path = Path.cwd() os.chdir("../") try: yield finally: - os.chdir(current_path) + os.chdir(return_path or current_path)