Skip to content

Commit

Permalink
Support custom YAML formatting (#42)
Browse files Browse the repository at this point in the history
Closes #33
  • Loading branch information
kokorin authored Jul 18, 2024
1 parent 2cf292a commit 6dad404
Show file tree
Hide file tree
Showing 9 changed files with 377 additions and 123 deletions.
33 changes: 24 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,9 @@ Or, if you need, you can specify `dbt-pumpkin-types` for specific Resources:
models:
"<YOUR_PROJECT_NAME>":
my_models:
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
sources:
"<YOUR_PROJECT_NAME>":
my_source:
Expand All @@ -212,15 +212,30 @@ sources:
seeds:
"<YOUR_PROJECT_NAME>":
my_seed:
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
snapshots:
"<YOUR_PROJECT_NAME>":
my_snapshot:
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
+dbt-pumpkin-types:
numeric-precision-and-scale: true
string-length: true
```

### YAML Format

You can configure how `dbt-pumpkin` formats YAML files. For that it's required to add specific DBT variable to your
project:

```yaml
vars:
dbt-pumpkin:
yaml:
# indent of properties in a map, default 2
indent: 2
# offset of items in a list, default 0
offset: 2
```

## Development
Expand Down
6 changes: 6 additions & 0 deletions dbt_pumpkin/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from pathlib import Path


@dataclass
class YamlFormat:
indent: int
offset: int


class ResourceType(Enum):
SEED = "seed"
SOURCE = "source"
Expand Down
36 changes: 32 additions & 4 deletions dbt_pumpkin/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,16 @@

from ruamel.yaml import YAML

from dbt_pumpkin.data import Resource, ResourceColumn, ResourceConfig, ResourceID, ResourceType, Table, TableColumn
from dbt_pumpkin.data import (
Resource,
ResourceColumn,
ResourceConfig,
ResourceID,
ResourceType,
Table,
TableColumn,
YamlFormat,
)
from dbt_pumpkin.dbt_compat import (
EventMsg,
Manifest,
Expand Down Expand Up @@ -191,7 +200,7 @@ def locate_project_dir(self) -> Path:

def _create_pumpkin_project(self, project_vars: dict[str, any]) -> Path:
"""
Creates fake DBT project with some important configurations copied to "vars" section.
Creates fake DBT project with provided "vars" section.
Allows hacking into DBT without using any internal DBT API.
"""
src_macros_path = Path(__file__).parent / "macros"
Expand Down Expand Up @@ -226,13 +235,32 @@ def _create_pumpkin_project(self, project_vars: dict[str, any]) -> Path:

return pumpkin_dir

def _load_project_yml(self):
def _load_project_yml(self) -> dict[str, any]:
project_yml_path = self.locate_project_dir() / "dbt_project.yml"
if not project_yml_path.exists() or not project_yml_path.is_file():
msg = f"dbt_project.yml is not found or doesn't exist: {project_yml_path}"
msg = f"dbt_project.yml not found: {project_yml_path}"
raise PumpkinError(msg)
return self._yaml.load(project_yml_path)

def detect_yaml_format(self) -> YamlFormat | None:
pumpkin_var = self._load_project_yml().get("vars", {}).get("dbt-pumpkin", {})
if not isinstance(pumpkin_var, dict):
msg = "YAML property is not an object: vars.dbt-pumpkin"
raise PumpkinError(msg)

yaml_format = pumpkin_var.get("yaml")
if not yaml_format:
return None

indent = yaml_format.get("indent")
offset = yaml_format.get("offset")

if indent is None or offset is None:
msg = "Both indent and offset must be specified"
raise PumpkinError(msg)

return YamlFormat(indent=int(indent), offset=int(offset))

def _run_operation(
self, operation_name: str, project_vars: dict[str, any] | None, result_callback: Callable[[any], None]
):
Expand Down
12 changes: 6 additions & 6 deletions dbt_pumpkin/pumpkin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,33 @@ def __init__(self, project_params: ProjectParams, resource_params: ResourceParam
def bootstrap(self, *, dry_run: bool):
loader = ResourceLoader(self.project_params, self.resource_params)

logger.info("Loading resource")
logger.info("Loading resources")
resources = loader.select_resources()

planner = BootstrapPlanner(resources)
plan = planner.plan()

storage = DiskStorage(loader.locate_project_dir(), read_only=dry_run)
storage = DiskStorage(loader.locate_project_dir(), loader.detect_yaml_format(), read_only=dry_run)
logger.info("Executing actions")
plan.execute(storage)

def relocate(self, *, dry_run: bool):
loader = ResourceLoader(self.project_params, self.resource_params)

logger.info("Loading resource")
logger.info("Loading resources")
resources = loader.select_resources()

planner = RelocationPlanner(resources)
plan = planner.plan()

storage = DiskStorage(loader.locate_project_dir(), read_only=dry_run)
storage = DiskStorage(loader.locate_project_dir(), loader.detect_yaml_format(), read_only=dry_run)
logger.info("Executing actions")
plan.execute(storage)

def synchronize(self, *, dry_run: bool):
loader = ResourceLoader(self.project_params, self.resource_params)

logger.info("Loading resource")
logger.info("Loading resources")
resources = loader.select_resources()

logger.info("Looking up tables")
Expand All @@ -51,6 +51,6 @@ def synchronize(self, *, dry_run: bool):
planner = SynchronizationPlanner(resources, tables)
plan = planner.plan()

storage = DiskStorage(loader.locate_project_dir(), read_only=dry_run)
storage = DiskStorage(loader.locate_project_dir(), loader.detect_yaml_format(), read_only=dry_run)
logger.info("Executing actions")
plan.execute(storage)
27 changes: 19 additions & 8 deletions dbt_pumpkin/storage.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,43 @@
from __future__ import annotations

import logging
from abc import abstractmethod
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING

from ruamel.yaml import YAML

if TYPE_CHECKING:
from pathlib import Path

from dbt_pumpkin.data import YamlFormat

logger = logging.getLogger(__name__)


class Storage:
@abstractmethod
def load_yaml(self, files: set[Path]) -> dict[Path, Any]:
def load_yaml(self, files: set[Path]) -> dict[Path, any]:
raise NotImplementedError

@abstractmethod
def save_yaml(self, files: dict[Path, Any]):
def save_yaml(self, files: dict[Path, any]):
raise NotImplementedError


class DiskStorage(Storage):
def __init__(self, root_dir: Path, *, read_only: bool):
def __init__(self, root_dir: Path, yaml_format: YamlFormat | None, *, read_only: bool):
self._root_dir = root_dir
self._read_only = read_only

self._yaml = YAML(typ="rt")
self._yaml.preserve_quotes = True
if yaml_format:
self._yaml.map_indent = yaml_format.indent
self._yaml.sequence_indent = yaml_format.indent + yaml_format.offset
self._yaml.sequence_dash_offset = yaml_format.offset

def load_yaml(self, files: set[Path]) -> dict[Path, Any]:
result: dict[Path, Any] = {}
def load_yaml(self, files: set[Path]) -> dict[Path, any]:
result: dict[Path, any] = {}

for file in files:
resolved_file = self._root_dir / file
Expand All @@ -39,7 +50,7 @@ def load_yaml(self, files: set[Path]) -> dict[Path, Any]:

return result

def save_yaml(self, files: dict[Path, Any]):
def save_yaml(self, files: dict[Path, any]):
if self._read_only:
return

Expand Down
73 changes: 73 additions & 0 deletions tests/mock_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations

import shutil
from dataclasses import dataclass
from pathlib import Path
from tempfile import mkdtemp

from ruamel.yaml import YAML


@dataclass
class Project:
project_yml: dict[str, any]
project_files: dict[str, any]
profiles_yml: dict[str, any] | None = None
local_packages: list[Project] | None = None


_yaml = YAML(typ="safe")


def _do_create_project(root: Path, project: Project):
project_yaml = {"packages-install-path": str(root / "dbt_packages"), **project.project_yml.copy()}
_yaml.dump(project_yaml, root / "dbt_project.yml")

for path_str, content in project.project_files.items():
path = root / path_str
path.parent.mkdir(exist_ok=True)
path.write_text(content, encoding="utf-8")

if project.local_packages:
packages_yml = {}

for package in project.local_packages:
package_name = package.project_yml["name"]
package_root = root / "sub_packages" / package_name
package_root.mkdir(parents=True, exist_ok=True)

_do_create_project(package_root, package)

packages_yml.setdefault("packages", []).append({"local": str(package_root)})

_yaml.dump(packages_yml, root / "packages.yml")
# DBT 1.5 can't install local deps on Windows, we just copy packages
# Besides that DBT 1.8 and earlier changes CWD when executing `dbt deps`
# # https://github.com/dbt-labs/dbt-core/issues/8997
# so copying file tree is the easiest fix

shutil.copytree(root / "sub_packages", root / "dbt_packages")


def mock_project(project: Project) -> Path:
project_dir = Path(mkdtemp(prefix="test_pumpkin_"))

default_profiles = {
"test_pumpkin": {
"target": "test",
"outputs": {
"test": {
# Comment to stop formatting in 1 line
"type": "duckdb",
"path": f"{project_dir}/dev.duckdb",
"threads": 1,
}
},
}
}

_do_create_project(project_dir, project)

_yaml.dump(default_profiles, project_dir / "profiles.yml")

return project_dir
Loading

0 comments on commit 6dad404

Please sign in to comment.