Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add get_catalog_for_single_relation macro and capability #231

Merged
merged 22 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240528-013623.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add get_catalog_for_single_relation macro and capability to enable adapters to optimize catalog generation
time: 2024-05-28T01:36:23.588295+01:00
custom:
Author: aranke
Issue: "231"
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pytest

from dbt.tests.util import run_dbt, get_connection

models__my_table_model_sql = """
select * from {{ ref('my_seed') }}
"""


models__my_view_model_sql = """
{{
config(
materialized='view',
)
}}

select * from {{ ref('my_seed') }}
"""

seed__my_seed_csv = """id,first_name,email,ip_address,updated_at
1,Larry,[email protected],69.135.206.194,2008-09-12 19:08:31
"""


class BaseGetCatalogForSingleRelation:
@pytest.fixture(scope="class")
def project_config_update(self):
return {"name": "get_catalog_for_single_relation"}

@pytest.fixture(scope="class")
def seeds(self):
return {
"my_seed.csv": seed__my_seed_csv,
}

@pytest.fixture(scope="class")
def models(self):
return {
"my_view_model.sql": models__my_view_model_sql,
"my_table_model.sql": models__my_table_model_sql,
}

@pytest.fixture(scope="class")
def expected_catalog_my_seed(self, project):
raise NotImplementedError(
"To use this test, please implement `get_catalog_for_single_relation`, inherited from `SQLAdapter`."
)

@pytest.fixture(scope="class")
def expected_catalog_my_model(self, project):
raise NotImplementedError(
"To use this test, please implement `get_catalog_for_single_relation`, inherited from `SQLAdapter`."
)

def get_relation_for_identifier(self, project, identifier):
return project.adapter.get_relation(
database=project.database,
schema=project.test_schema,
identifier=identifier,
)

def test_get_catalog_for_single_relation(
self, project, expected_catalog_my_seed, expected_catalog_my_view_model
):
results = run_dbt(["seed"])
assert len(results) == 1

my_seed_relation = self.get_relation_for_identifier(project, "my_seed")

with get_connection(project.adapter):
actual_catalog_my_seed = project.adapter.get_catalog_for_single_relation(
my_seed_relation
)

assert actual_catalog_my_seed == expected_catalog_my_seed

results = run_dbt(["run"])
assert len(results) == 2

my_view_model_relation = self.get_relation_for_identifier(project, "my_view_model")

with get_connection(project.adapter):
actual_catalog_my_view_model = project.adapter.get_catalog_for_single_relation(
my_view_model_relation
)

assert actual_catalog_my_view_model == expected_catalog_my_view_model
15 changes: 12 additions & 3 deletions dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import abc
import time
from concurrent.futures import as_completed, Future
from contextlib import contextmanager
from datetime import datetime
from enum import Enum
from multiprocessing.context import SpawnContext
import time
from typing import (
Any,
Callable,
Expand All @@ -23,12 +23,15 @@
TYPE_CHECKING,
)

import pytz
from dbt_common.clients.jinja import CallableMacroGenerator
from dbt_common.contracts.constraints import (
ColumnLevelConstraint,
ConstraintType,
ModelLevelConstraint,
)
from dbt_common.contracts.metadata import CatalogTable
from dbt_common.events.functions import fire_event, warn_or_error
from dbt_common.exceptions import (
DbtInternalError,
DbtRuntimeError,
Expand All @@ -38,14 +41,12 @@
NotImplementedError,
UnexpectedNullError,
)
from dbt_common.events.functions import fire_event, warn_or_error
from dbt_common.utils import (
AttrDict,
cast_to_str,
executor,
filter_null_values,
)
import pytz

from dbt.adapters.base.column import Column as BaseColumn
from dbt.adapters.base.connections import (
Expand Down Expand Up @@ -222,6 +223,7 @@ class BaseAdapter(metaclass=AdapterMeta):
- truncate_relation
- rename_relation
- get_columns_in_relation
- get_catalog_for_single_relation
- get_column_schema_from_query
- expand_column_types
- list_relations_without_caching
Expand Down Expand Up @@ -627,6 +629,13 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[BaseColumn]:
"""Get a list of the columns in the given Relation."""
raise NotImplementedError("`get_columns_in_relation` is not implemented for this adapter!")

@abc.abstractmethod
def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]:
"""Get catalog information including table-level and column-level metadata for a single relation."""
raise NotImplementedError(
"`get_catalog_for_single_relation` is not implemented for this adapter!"
)

@available.deprecated("get_columns_in_relation", lambda *a, **k: [])
def get_columns_in_table(self, schema: str, identifier: str) -> List[BaseColumn]:
"""DEPRECATED: Get a list of the columns in the given table."""
Expand Down
7 changes: 6 additions & 1 deletion dbt/adapters/capability.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ class Capability(str, Enum):
"""Indicates support for determining the time of the last table modification by querying database metadata."""

TableLastModifiedMetadataBatch = "TableLastModifiedMetadataBatch"
"""Indicates support for performantly determining the time of the last table modification by querying database metadata in batch."""
"""Indicates support for performantly determining the time of the last table modification by querying database
metadata in batch."""

GetCatalogForSingleRelation = "GetCatalogForSingleRelation"
"""Indicates support for getting catalog information including table-level and column-level metadata for a single
relation."""


class Support(str, Enum):
Expand Down
9 changes: 8 additions & 1 deletion dbt/adapters/sql/impl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List, Optional, Tuple, Type, TYPE_CHECKING

from dbt_common.contracts.metadata import CatalogTable
from dbt_common.events.functions import fire_event

from dbt.adapters.base import BaseAdapter, BaseRelation, available
Expand All @@ -9,9 +10,9 @@
from dbt.adapters.exceptions import RelationTypeNullError
from dbt.adapters.sql.connections import SQLConnectionManager


LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
GET_CATALOG_FOR_SINGLE_RELATION_NAME = "get_catalog_for_single_relation"
LIST_SCHEMAS_MACRO_NAME = "list_schemas"
CHECK_SCHEMA_EXISTS_MACRO_NAME = "check_schema_exists"
CREATE_SCHEMA_MACRO_NAME = "create_schema"
Expand Down Expand Up @@ -41,6 +42,7 @@ class SQLAdapter(BaseAdapter):
- get_catalog
- list_relations_without_caching
- get_columns_in_relation
- get_catalog_for_single_relation
"""

ConnectionManager: Type[SQLConnectionManager]
Expand Down Expand Up @@ -158,6 +160,11 @@ def get_columns_in_relation(self, relation):
GET_COLUMNS_IN_RELATION_MACRO_NAME, kwargs={"relation": relation}
)

def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]:
return self.execute_macro(
GET_CATALOG_FOR_SINGLE_RELATION_NAME, kwargs={"relation": relation}
)

def create_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaCreation(relation=_make_ref_key_dict(relation)))
Expand Down
9 changes: 9 additions & 0 deletions dbt/include/global_project/macros/adapters/metadata.sql
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

{% macro get_catalog_for_single_relation(relation) %}
{{ return(adapter.dispatch('get_catalog_for_single_relation', 'dbt')(relation)) }}
{% endmacro %}

{% macro default__get_catalog_for_single_relation(relation) %}
{{ exceptions.raise_not_implemented(
'get_catalog_for_single_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

{% macro get_relations() %}
{{ return(adapter.dispatch('get_relations', 'dbt')()) }}
{% endmacro %}
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
dependencies = [
"dbt-common<2.0",
"dbt-common>=1.3,<2.0",
"pytz>=2015.7",
# installed via dbt-common but used directly
"agate>=1.0,<2.0",
Expand All @@ -43,6 +43,9 @@ Changelog = "https://github.com/dbt-labs/dbt-adapters/blob/main/CHANGELOG.md"
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.version]
path = "dbt/adapters/__about__.py"

Expand Down
Loading