From 39e0c22353d2230fe1bc8b4852bbb0afa27fd2c0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 15 Jun 2023 14:41:57 -0400 Subject: [PATCH] Allow setting packages in dependencies.yml and move dependencies to runtime config (#7857) --- .../unreleased/Features-20230613-151917.yaml | 6 ++ core/dbt/config/project.py | 70 ++++++++++++++++--- core/dbt/config/renderer.py | 4 ++ core/dbt/config/runtime.py | 1 + core/dbt/contracts/graph/manifest.py | 3 +- core/dbt/parser/manifest.py | 57 ++++++--------- core/dbt/tests/fixtures/project.py | 19 +++++ .../dependencies/test_simple_dependency.py | 22 ++++++ .../multi_project/test_publication.py | 15 ++-- .../partial_parsing/test_partial_parsing.py | 9 ++- tests/unit/test_config.py | 3 + tests/unit/utils.py | 1 + 12 files changed, 152 insertions(+), 58 deletions(-) create mode 100644 .changes/unreleased/Features-20230613-151917.yaml diff --git a/.changes/unreleased/Features-20230613-151917.yaml b/.changes/unreleased/Features-20230613-151917.yaml new file mode 100644 index 00000000000..9aa0259966c --- /dev/null +++ b/.changes/unreleased/Features-20230613-151917.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enable setting packages in dependencies.yml +time: 2023-06-13T15:19:17.199297-04:00 +custom: + Author: gshank + Issue: 7372 7736 diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index f3723e41eb7..12c002ea665 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -38,6 +38,7 @@ SemverString, ) from dbt.contracts.project import PackageConfig, ProjectPackageMetadata +from dbt.contracts.publication import ProjectDependencies from dbt.dataclass_schema import ValidationError from .renderer import DbtProjectYamlRenderer, PackageRenderer from .selectors import ( @@ -93,17 +94,37 @@ def _load_yaml(path): return load_yaml_text(contents) -def package_data_from_root(project_root): +def package_and_project_data_from_root(project_root): package_filepath = resolve_path_from_base("packages.yml", project_root) + dependencies_filepath = resolve_path_from_base("dependencies.yml", project_root) + packages_yml_dict = {} + dependencies_yml_dict = {} if path_exists(package_filepath): - packages_dict = _load_yaml(package_filepath) - else: - packages_dict = None - return packages_dict + packages_yml_dict = _load_yaml(package_filepath) + if path_exists(dependencies_filepath): + dependencies_yml_dict = _load_yaml(dependencies_filepath) + + if "packages" in packages_yml_dict and "packages" in dependencies_yml_dict: + msg = "The 'packages' key cannot be specified in both packages.yml and dependencies.yml" + raise DbtProjectError(msg) + if "projects" in packages_yml_dict: + msg = "The 'projects' key cannot be specified in packages.yml" + raise DbtProjectError(msg) + packages_dict = {} + dependent_projects_dict = {} + if "packages" in dependencies_yml_dict: + packages_dict["packages"] = dependencies_yml_dict["packages"] + else: # don't check for "packages" here so we capture invalid keys in packages.yml + packages_dict = packages_yml_dict + if "projects" in dependencies_yml_dict: + dependent_projects_dict["projects"] = dependencies_yml_dict["projects"] -def package_config_from_data(packages_data: Dict[str, Any]): + return packages_dict, dependent_projects_dict + + +def package_config_from_data(packages_data: Dict[str, Any]) -> PackageConfig: if not packages_data: packages_data = {"packages": []} @@ -115,6 +136,21 @@ def package_config_from_data(packages_data: Dict[str, Any]): return packages +def dependent_project_config_from_data( + dependent_projects_data: Dict[str, Any] +) -> ProjectDependencies: + if not dependent_projects_data: + dependent_projects_data = {"projects": []} + + try: + ProjectDependencies.validate(dependent_projects_data) + dependent_projects = ProjectDependencies.from_dict(dependent_projects_data) + except ValidationError as e: + msg = f"Malformed dependencies.yml: {e}" + raise DbtProjectError(msg) + return dependent_projects + + def _parse_versions(versions: Union[List[str], str]) -> List[VersionSpecifier]: """Parse multiple versions as read from disk. The versions value may be any one of: @@ -239,6 +275,9 @@ def _get_required_version( class RenderComponents: project_dict: Dict[str, Any] = field(metadata=dict(description="The project dictionary")) packages_dict: Dict[str, Any] = field(metadata=dict(description="The packages dictionary")) + dependent_projects_dict: Dict[str, Any] = field( + metadata=dict(description="The dependent projects dictionary") + ) selectors_dict: Dict[str, Any] = field(metadata=dict(description="The selectors dictionary")) @@ -273,11 +312,15 @@ def get_rendered( rendered_project = renderer.render_project(self.project_dict, self.project_root) rendered_packages = renderer.render_packages(self.packages_dict) + rendered_dependent_projects = renderer.render_dependent_projects( + self.dependent_projects_dict + ) rendered_selectors = renderer.render_selectors(self.selectors_dict) return RenderComponents( project_dict=rendered_project, packages_dict=rendered_packages, + dependent_projects_dict=rendered_dependent_projects, selectors_dict=rendered_selectors, ) @@ -324,6 +367,7 @@ def create_project(self, rendered: RenderComponents) -> "Project": unrendered = RenderComponents( project_dict=self.project_dict, packages_dict=self.packages_dict, + dependent_projects_dict=self.dependent_projects_dict, selectors_dict=self.selectors_dict, ) dbt_version = _get_required_version( @@ -424,7 +468,10 @@ def create_project(self, rendered: RenderComponents) -> "Project": query_comment = _query_comment_from_cfg(cfg.query_comment) - packages = package_config_from_data(rendered.packages_dict) + packages: PackageConfig = package_config_from_data(rendered.packages_dict) + dependent_projects: ProjectDependencies = dependent_project_config_from_data( + rendered.dependent_projects_dict + ) selectors = selector_config_from_data(rendered.selectors_dict) manifest_selectors: Dict[str, Any] = {} if rendered.selectors_dict and rendered.selectors_dict["selectors"]: @@ -459,6 +506,7 @@ def create_project(self, rendered: RenderComponents) -> "Project": snapshots=snapshots, dbt_version=dbt_version, packages=packages, + dependent_projects=dependent_projects, manifest_selectors=manifest_selectors, selectors=selectors, query_comment=query_comment, @@ -481,6 +529,7 @@ def from_dicts( project_root: str, project_dict: Dict[str, Any], packages_dict: Dict[str, Any], + dependent_projects_dict: Dict[str, Any], selectors_dict: Dict[str, Any], *, verify_version: bool = False, @@ -495,6 +544,7 @@ def from_dicts( project_root=project_root, project_dict=project_dict, packages_dict=packages_dict, + dependent_projects_dict=dependent_projects_dict, selectors_dict=selectors_dict, verify_version=verify_version, ) @@ -505,13 +555,14 @@ def from_project_root( ) -> "PartialProject": project_root = os.path.normpath(project_root) project_dict = load_raw_project(project_root) - packages_dict = package_data_from_root(project_root) + packages_dict, dependent_projects_dict = package_and_project_data_from_root(project_root) selectors_dict = selector_data_from_root(project_root) return cls.from_dicts( project_root=project_root, project_dict=project_dict, selectors_dict=selectors_dict, packages_dict=packages_dict, + dependent_projects_dict=dependent_projects_dict, verify_version=verify_version, ) @@ -565,7 +616,8 @@ class Project: exposures: Dict[str, Any] vars: VarProvider dbt_version: List[VersionSpecifier] - packages: Dict[str, Any] + packages: PackageConfig + dependent_projects: ProjectDependencies manifest_selectors: Dict[str, Any] selectors: SelectorConfig query_comment: QueryComment diff --git a/core/dbt/config/renderer.py b/core/dbt/config/renderer.py index f41ac020662..3899d627b03 100644 --- a/core/dbt/config/renderer.py +++ b/core/dbt/config/renderer.py @@ -137,6 +137,10 @@ def render_packages(self, packages: Dict[str, Any]): package_renderer = self.get_package_renderer() return package_renderer.render_data(packages) + def render_dependent_projects(self, dependent_projects: Dict[str, Any]): + """This is a no-op to maintain regularity in the interfaces. We don't render dependencies.yml.""" + return dependent_projects + def render_selectors(self, selectors: Dict[str, Any]): return self.render_data(selectors) diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index 2f95ab6d664..2f20de2c81e 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -159,6 +159,7 @@ def from_parts( snapshots=project.snapshots, dbt_version=project.dbt_version, packages=project.packages, + dependent_projects=project.dependent_projects, manifest_selectors=project.manifest_selectors, selectors=project.selectors, query_comment=project.query_comment, diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 617a7cf63f8..5df82b79672 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -22,7 +22,7 @@ from typing_extensions import Protocol from uuid import UUID -from dbt.contracts.publication import ProjectDependencies, PublicationConfig, PublicModel +from dbt.contracts.publication import PublicationConfig, PublicModel from dbt.contracts.graph.nodes import ( BaseNode, @@ -708,7 +708,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin): disabled: MutableMapping[str, List[GraphMemberNode]] = field(default_factory=dict) env_vars: MutableMapping[str, str] = field(default_factory=dict) public_nodes: MutableMapping[str, PublicModel] = field(default_factory=dict) - project_dependencies: Optional[ProjectDependencies] = None publications: MutableMapping[str, PublicationConfig] = field(default_factory=dict) semantic_nodes: MutableMapping[str, SemanticModel] = field(default_factory=dict) diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index e11acfddaa3..48596f3e29b 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -35,13 +35,11 @@ get_adapter_package_names, ) from dbt.constants import ( - DEPENDENCIES_FILE_NAME, MANIFEST_FILE_NAME, PARTIAL_PARSE_FILE_NAME, SEMANTIC_MANIFEST_FILE_NAME, ) from dbt.helper_types import PathSet -from dbt.clients.yaml_helper import load_yaml_text from dbt.events.functions import fire_event, get_invocation_id, warn_or_error from dbt.events.helpers import datetime_to_json_string from dbt.events.types import ( @@ -72,8 +70,6 @@ path_exists, read_json, write_file, - resolve_path_from_base, - load_file_contents, ) from dbt.config import Project, RuntimeConfig from dbt.context.docs import generate_runtime_docs_context @@ -114,7 +110,6 @@ PublicationArtifact, PublicationMetadata, PublicModel, - ProjectDependencies, ) from dbt.exceptions import ( TargetNotFoundError, @@ -767,33 +762,27 @@ def build_public_nodes(self) -> bool: public nodes have been rebuilt.""" public_nodes_changed = False - # Load the dependencies from the dependencies.yml file - # TODO: dependencies might be better in the RuntimeConfig and - # loaded somewhere earlier, but leaving this here for later refactoring. - # Loading it elsewhere would make it harder to detect that there were - # no dependencies previously and still are none, though that could be - # inferred from the manifest publication configs. - dependencies_filepath = resolve_path_from_base( - DEPENDENCIES_FILE_NAME, self.root_project.project_root - ) - saved_manifest_dependencies = self.manifest.project_dependencies - if path_exists(dependencies_filepath): - contents = load_file_contents(dependencies_filepath) - dependencies_dict = load_yaml_text(contents) - dependencies = ProjectDependencies.from_dict(dependencies_dict) - self.manifest.project_dependencies = dependencies - else: - self.manifest.project_dependencies = None + # Construct a dictionary of project_names to generated_at timestamps in + # order to determine what has changed. The dependent_projects in the RuntimeConfig + # are new for this run. + saved_dependent_projects = {} + for pub in self.manifest.publications.values(): + saved_dependent_projects[pub.project_name] = pub.metadata.generated_at # Return False if there weren't any dependencies before and aren't any now. - if saved_manifest_dependencies is None and self.manifest.project_dependencies is None: + if ( + len(saved_dependent_projects) == 0 + and len(self.root_project.dependent_projects.projects) == 0 + ): return False - # collect the names of the projects for later use - project_dependency_names = [] - if self.manifest.project_dependencies: - for project in self.manifest.project_dependencies.projects: - project_dependency_names.append(project.name) + # Collect current dependent projects + current_dependent_projects = [] + if ( + self.root_project.dependent_projects + ): # ManifestLoader has been passed some publication artifacts + for project in self.root_project.dependent_projects.projects: + current_dependent_projects.append(project.name) # Save previous publications, for later removal of references saved_manifest_publications: MutableMapping[str, PublicationConfig] = deepcopy( @@ -801,7 +790,7 @@ def build_public_nodes(self) -> bool: ) if self.manifest.publications: for project_name, publication in self.manifest.publications.items(): - if project_name not in project_dependency_names: + if project_name not in current_dependent_projects: remove_dependent_project_references(self.manifest, publication) saved_manifest_publications.pop(project_name) fire_event( @@ -820,7 +809,7 @@ def build_public_nodes(self) -> bool: # Empty public_nodes since we're re-generating them all self.manifest.public_nodes = {} - if self.manifest.project_dependencies: + if len(self.root_project.dependent_projects.projects) > 0: self.load_new_public_nodes() # Now that we've loaded the current publications and public_nodes, look for @@ -855,7 +844,7 @@ def build_public_nodes(self) -> bool: return public_nodes_changed def load_new_public_nodes(self): - for project in self.manifest.project_dependencies.projects: + for project in self.root_project.dependent_projects.projects: try: publication = self.publications[project.name] except KeyError: @@ -864,7 +853,7 @@ def load_new_public_nodes(self): publication_config = PublicationConfig.from_publication(publication) self.manifest.publications[project.name] = publication_config - # Add to dictionary of public_nodes and save id in PublicationConfig + # Add public models to dictionary of public_nodes for public_node in publication.public_models.values(): self.manifest.public_nodes[public_node.unique_id] = public_node @@ -1857,8 +1846,8 @@ def log_publication_artifact(root_project: RuntimeConfig, manifest: Manifest): dependencies = [] # Get dependencies from dependencies.yml - if manifest.project_dependencies: - for dep_project in manifest.project_dependencies.projects: + if root_project.dependent_projects.projects: + for dep_project in root_project.dependent_projects.projects: dependencies.append(dep_project.name) # Get dependencies from publication dependencies for pub_project in manifest.publications.values(): diff --git a/core/dbt/tests/fixtures/project.py b/core/dbt/tests/fixtures/project.py index 23243caef58..19e418003fe 100644 --- a/core/dbt/tests/fixtures/project.py +++ b/core/dbt/tests/fixtures/project.py @@ -192,6 +192,24 @@ def dbt_project_yml(project_root, project_config_update): return project_config +# Fixture to provide dependencies +@pytest.fixture(scope="class") +def dependencies(): + return {} + + +# Write out the dependencies.yml file +# Write out the packages.yml file +@pytest.fixture(scope="class") +def dependencies_yml(project_root, dependencies): + if dependencies: + if isinstance(dependencies, str): + data = dependencies + else: + data = yaml.safe_dump(dependencies) + write_file(data, project_root, "dependencies.yml") + + # Fixture to provide packages as either yaml or dictionary @pytest.fixture(scope="class") def packages(): @@ -461,6 +479,7 @@ def project( profiles_yml, dbt_project_yml, packages_yml, + dependencies_yml, selectors_yml, adapter, project_files, diff --git a/tests/functional/dependencies/test_simple_dependency.py b/tests/functional/dependencies/test_simple_dependency.py index 140966e1c07..a88575ff0fb 100644 --- a/tests/functional/dependencies/test_simple_dependency.py +++ b/tests/functional/dependencies/test_simple_dependency.py @@ -105,6 +105,28 @@ def test_simple_dependency(self, run_deps, project, run_clean): check_relations_equal(project.adapter, ["seed", "incremental"]) +class TestSimpleDependencyWithDependenciesFile(SimpleDependencyBase): + @pytest.fixture(scope="class") + def packages(self): + return {} + + @pytest.fixture(scope="class") + def dependencies(self): + return { + "packages": [ + { + "git": "https://github.com/dbt-labs/dbt-integration-project", + "warn-unpinned": True, + } + ] + } + + def test_dependency_with_dependencies_file(self, run_deps, project): + # Tests that "packages" defined in a dependencies.yml file works + results = run_dbt() + assert len(results) == 4 + + class TestSimpleDependencyNoProfile(SimpleDependencyBase): """dbt deps and clean commands should not require a profile.""" diff --git a/tests/functional/multi_project/test_publication.py b/tests/functional/multi_project/test_publication.py index 7e2a51c0ae2..a50475b2529 100644 --- a/tests/functional/multi_project/test_publication.py +++ b/tests/functional/multi_project/test_publication.py @@ -7,7 +7,7 @@ run_dbt_and_capture, get_logging_events, ) -from dbt.contracts.publication import ProjectDependency, PublicationArtifact, PublicModel +from dbt.contracts.publication import PublicationArtifact, PublicModel from dbt.exceptions import ( PublicationConfigNotFound, TargetNotFoundError, @@ -130,8 +130,11 @@ def models(self): "models.yml": models_yml, } + @pytest.fixture(scope="class") + def dependencies(self): + return dependencies_yml + def test_pub_artifacts(self, project): - write_file(dependencies_yml, "dependencies.yml") # Dependencies lists "marketing" project, but no publications provided with pytest.raises(PublicationConfigNotFound): @@ -157,14 +160,6 @@ def test_pub_artifacts(self, project): publication = PublicationArtifact.from_dict(publication_dict) assert publication.dependencies == ["marketing"] - # check project_dependencies in manifest - project_dependencies = manifest.project_dependencies - assert project_dependencies - project_dependency = project_dependencies.projects[0] - assert isinstance(project_dependency, ProjectDependency) - assert project_dependency.name == "marketing" - assert "custom_field" in project_dependency._extra - # source_node, target_model_name, target_model_package, target_model_version, current_project, node_package resolved_node = manifest.resolve_ref(None, "fct_one", "marketing", None, "test", "test") assert resolved_node diff --git a/tests/functional/partial_parsing/test_partial_parsing.py b/tests/functional/partial_parsing/test_partial_parsing.py index 6d7532edee5..2714a64cca4 100644 --- a/tests/functional/partial_parsing/test_partial_parsing.py +++ b/tests/functional/partial_parsing/test_partial_parsing.py @@ -822,16 +822,19 @@ def models(self): def marketing_publication(self): return PublicationArtifact.from_dict(json.loads(marketing_pub_json)) + @pytest.fixture(scope="class") + def dependencies(self): + return dependencies_yml + def test_dependencies(self, project, marketing_publication): # initial run with dependencies - write_file(dependencies_yml, "dependencies.yml") manifest = run_dbt(["parse"], publications=[marketing_publication]) - assert len(manifest.project_dependencies.projects) == 1 + assert len(manifest.publications) == 1 # remove dependencies write_file(empty_dependencies_yml, "dependencies.yml") manifest = run_dbt(["parse"], publications=[marketing_publication]) - assert len(manifest.project_dependencies.projects) == 0 + assert len(manifest.publications) == 0 class TestPublicationArtifactAvailable: diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 409b0759bc0..bb7c6eb3f0e 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -575,6 +575,7 @@ def project_from_config_norender( project_dict=cfg, packages_dict=packages, selectors_dict={}, + dependent_projects_dict={}, verify_version=verify_version, ) # no rendering @@ -582,6 +583,7 @@ def project_from_config_norender( project_dict=partial.project_dict, packages_dict=partial.packages_dict, selectors_dict=partial.selectors_dict, + dependent_projects_dict=partial.dependent_projects_dict, ) return partial.create_project(rendered) @@ -596,6 +598,7 @@ def project_from_config_rendered( project_dict=cfg, packages_dict=packages, selectors_dict={}, + dependent_projects_dict={}, verify_version=verify_version, ) return partial.render(empty_project_renderer()) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 0f5c12ebbfd..48571cb0d54 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -70,6 +70,7 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars= project_root=project_root, project_dict=project, packages_dict=packages, + dependent_projects_dict={}, selectors_dict=selectors, ) return partial.render(renderer)