From 28c35f47bdbbb334ebb8e1b2eeba580701e24f1b Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Thu, 14 Sep 2023 16:02:44 +0100 Subject: [PATCH] fix: Support parsing dbt dbt_project.yml without target-path (#2106) As of dbt v1.5, usage of target-path in the dbt_project.yml file has been deprecated, now preferring a CLI flag or env var. It will be removed in a future version. See dbt-labs/dbt-core#6882 Docs: https://docs.getdbt.com/reference/project-configs/target-path This change allows users to run DbtLocalArtifactProcessor in dbt projects that don't declare target-path Fix: #2093 Signed-off-by: Tatiana Al-Chueyr Signed-off-by: Sheeri K. Cabral --- .../openlineage/common/provider/dbt/local.py | 32 ++++++++++++++-- .../common/tests/dbt/small/dbt_project.yml | 1 - .../common/tests/dbt/test_dbt_local.py | 38 +++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/integration/common/openlineage/common/provider/dbt/local.py b/integration/common/openlineage/common/provider/dbt/local.py index cc87b9f59b..e7acd911bd 100644 --- a/integration/common/openlineage/common/provider/dbt/local.py +++ b/integration/common/openlineage/common/provider/dbt/local.py @@ -11,6 +11,9 @@ from openlineage.common.provider.dbt.processor import DbtArtifactProcessor from openlineage.common.utils import get_from_nullable_chain +DBT_TARGET_PATH_ENVVAR = "DBT_TARGET_PATH" +DEFAULT_TARGET_PATH = "target" + class SkipUndefined(Undefined): def __getattr__(self, name): @@ -43,6 +46,7 @@ def __init__( project_dir: str, profile_name: Optional[str] = None, target: Optional[str] = None, + target_path: Optional[str] = None, *args, **kwargs, ): @@ -53,24 +57,44 @@ def __init__( dbt_project = self.load_yaml_with_jinja( os.path.join(project_dir, "dbt_project.yml") ) + self.target_path = target_path + target_path = self.build_target_path(dbt_project) self.manifest_path = os.path.join( - absolute_dir, dbt_project["target-path"], "manifest.json" + absolute_dir, target_path, "manifest.json" ) self.run_result_path = os.path.join( - absolute_dir, dbt_project["target-path"], "run_results.json" + absolute_dir, target_path, "run_results.json" ) self.catalog_path = os.path.join( - absolute_dir, dbt_project["target-path"], "catalog.json" + absolute_dir, target_path, "catalog.json" ) self.target = target - self.project_name = dbt_project["name"] self.profile_name = profile_name or dbt_project.get("profile") if not self.profile_name: raise KeyError(f"profile not found in {dbt_project}") + def build_target_path(self, dbt_project: dict, target_path: Optional[str] = None) -> str: + """ + Build dbt target path. Uses the following: + 1. target_path (user-defined value, normally given in --target-path CLI flag) + 2. DBT_TARGET_PATH environment variable + 3. target-path in dbt_project.yml + 4. default ("target") + + Precedence order: user-defined target_path > env var > dbt_project.yml > default + + Reference: + https://docs.getdbt.com/reference/project-configs/target-path + """ + return self.target_path or \ + os.getenv(DBT_TARGET_PATH_ENVVAR) or \ + dbt_project.get("target-path") or \ + DEFAULT_TARGET_PATH + + @classmethod def load_metadata( cls, path: str, desired_schema_versions: List[int], logger: logging.Logger diff --git a/integration/common/tests/dbt/small/dbt_project.yml b/integration/common/tests/dbt/small/dbt_project.yml index 3a28e18138..c67a647dc0 100644 --- a/integration/common/tests/dbt/small/dbt_project.yml +++ b/integration/common/tests/dbt/small/dbt_project.yml @@ -19,7 +19,6 @@ data-paths: ["data"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] -target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" - "dbt_modules" diff --git a/integration/common/tests/dbt/test_dbt_local.py b/integration/common/tests/dbt/test_dbt_local.py index 2490b35a30..51b2085a37 100644 --- a/integration/common/tests/dbt/test_dbt_local.py +++ b/integration/common/tests/dbt/test_dbt_local.py @@ -238,3 +238,41 @@ def test_logging_handler_does_not_warn(): DbtLocalArtifactProcessor.load_metadata(path, [2], logger) logger.warning.assert_not_called() + + +@mock.patch.dict(os.environ, {"DBT_TARGET_PATH": "target-from-envvar"}, clear=True) +def test_build_target_path_with_user_defined(): + processor = DbtLocalArtifactProcessor( + producer='https://github.com/OpenLineage/OpenLineage/tree/0.0.1/integration/dbt', + project_dir='tests/dbt/env_vars', + target='prod', + target_path="arg-target-name", + job_namespace="ol-namespace" + ) + assert processor.build_target_path({}) == "arg-target-name" + +@mock.patch.dict(os.environ, {"DBT_TARGET_PATH": "target-from-envvar"}, clear=True) +def test_build_target_path_with_envvar(): + processor = DbtLocalArtifactProcessor( + producer='https://github.com/OpenLineage/OpenLineage/tree/0.0.1/integration/dbt', + project_dir='tests/dbt/env_vars', + target='prod', + job_namespace="ol-namespace" + ) + assert processor.build_target_path({}) == "target-from-envvar" + +@pytest.mark.parametrize( + "test_name,dbt_project,expected", + [ + ("with_dbt_project", {"target-path": "from-dbt-project"}, "from-dbt-project"), + ("with_default", {}, "target") + ] +) +def test_build_target_path(test_name, dbt_project, expected): + processor = DbtLocalArtifactProcessor( + producer='https://github.com/OpenLineage/OpenLineage/tree/0.0.1/integration/dbt', + project_dir='tests/dbt/env_vars', + target='prod', + job_namespace="ol-namespace" + ) + assert processor.build_target_path(dbt_project) == expected