Skip to content

Commit

Permalink
fix(dbt): cache the file reads of dbt manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
rexledesma committed Nov 14, 2023
1 parent bcf6fcb commit 75f0341
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion python_modules/libraries/dagster-dbt/dagster_dbt/dbt_manifest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import lru_cache
from pathlib import Path
from typing import Any, Mapping, Union, cast

Expand All @@ -7,13 +8,27 @@
DbtManifestParam = Union[Mapping[str, Any], str, Path]


@lru_cache(maxsize=None)
def read_manifest_path(manifest_path: Path) -> Mapping[str, Any]:
"""Reads a dbt manifest path and returns the parsed JSON as a dict.
This function is cached to ensure that we don't read the same path multiple times, which
creates multiple copies of the parsed manifest in memory.
If we fix the fact that the manifest is held in memory instead of garbage collected, we
can delete this cache.
"""
return cast(Mapping[str, Any], orjson.loads(manifest_path.read_bytes()))


def validate_manifest(manifest: DbtManifestParam) -> Mapping[str, Any]:
check.inst_param(manifest, "manifest", (Path, str, dict))

if isinstance(manifest, str):
manifest = Path(manifest)

if isinstance(manifest, Path):
manifest = cast(Mapping[str, Any], orjson.loads(manifest.read_bytes()))
# Resolve the path to ensure a consistent key for the cache
manifest = read_manifest_path(manifest.resolve())

return manifest

0 comments on commit 75f0341

Please sign in to comment.