Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add get_catalog_for_single_relation macro and capability #231

Merged
merged 22 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240528-013623.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add get_catalog_for_single_relation macro and capability
time: 2024-05-28T01:36:23.588295+01:00
custom:
Author: aranke
Issue: "231"
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import pytest

from dbt.adapters.capability import Capability
from dbt_common.contracts.metadata import (
TableMetadata,
StatsItem,
CatalogTable,
ColumnMetadata,
)
from dbt.tests.util import run_dbt, get_connection

models__my_model_sql = """
{{
config(
materialized='view',
)
}}

select * from {{ ref('my_seed') }}
"""

seed__my_seed_csv = """id,first_name,email,ip_address,updated_at
1,Larry,[email protected],69.135.206.194,2008-09-12 19:08:31
"""


class BaseGetCatalogForSingleRelation:
@pytest.fixture(scope="class")
def project_config_update(self):
return {"name": "get_catalog_for_single_relation"}

@pytest.fixture(scope="class")
def seeds(self):
return {
"my_seed.csv": seed__my_seed_csv,
}

@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": models__my_model_sql,
}

@pytest.fixture(autouse=True)
def clean_up(self, project):
aranke marked this conversation as resolved.
Show resolved Hide resolved
yield
with project.adapter.connection_named("__test"):
relation = project.adapter.Relation.create(
database=project.database, schema=project.test_schema
)
project.adapter.drop_schema(relation)

pass
aranke marked this conversation as resolved.
Show resolved Hide resolved

def test_get_catalog_for_single_relation(self, project):
results = run_dbt(["seed"])
assert len(results) == 1

results = run_dbt(["run"])
assert len(results) == 1

if project.adapter.supports(Capability.GetCatalogForSingleRelation):
aranke marked this conversation as resolved.
Show resolved Hide resolved
expected = CatalogTable(
metadata=TableMetadata(
type="VIEW",
schema=project.test_schema.upper(),
name="MY_MODEL",
database=project.database,
comment="",
owner="TESTER",
),
columns={
"ID": ColumnMetadata(type="NUMBER", index=1, name="ID", comment=None),
"FIRST_NAME": ColumnMetadata(
type="VARCHAR", index=2, name="FIRST_NAME", comment=None
),
"EMAIL": ColumnMetadata(type="VARCHAR", index=3, name="EMAIL", comment=None),
"IP_ADDRESS": ColumnMetadata(
type="VARCHAR", index=4, name="IP_ADDRESS", comment=None
),
"UPDATED_AT": ColumnMetadata(
type="TIMESTAMP_NTZ", index=5, name="UPDATED_AT", comment=None
),
},
stats={
"has_stats": StatsItem(
id="has_stats",
label="Has Stats?",
value=True,
include=False,
description="Indicates whether there are statistics for this table",
),
"row_count": StatsItem(
id="row_count",
label="Row Count",
value=0,
include=True,
description="Number of rows in the table as reported by Snowflake",
aranke marked this conversation as resolved.
Show resolved Hide resolved
),
"bytes": StatsItem(
id="bytes",
label="Approximate Size",
value=0,
include=True,
description="Size of the table as reported by Snowflake",
),
},
unique_id=None,
)

with get_connection(project.adapter):
my_model_relation = project.adapter.get_relation(
database=project.database,
schema=project.test_schema,
identifier="MY_MODEL",
)
actual = project.adapter.get_catalog_for_single_relation(my_model_relation)
aranke marked this conversation as resolved.
Show resolved Hide resolved

assert actual == expected


class TestGetCatalogForSingleRelation(BaseGetCatalogForSingleRelation):
aranke marked this conversation as resolved.
Show resolved Hide resolved
pass
15 changes: 12 additions & 3 deletions dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import abc
import time
from concurrent.futures import as_completed, Future
from contextlib import contextmanager
from datetime import datetime
from enum import Enum
from multiprocessing.context import SpawnContext
import time
from typing import (
Any,
Callable,
Expand All @@ -23,12 +23,15 @@
TYPE_CHECKING,
)

import pytz
from dbt_common.clients.jinja import CallableMacroGenerator
from dbt_common.contracts.constraints import (
ColumnLevelConstraint,
ConstraintType,
ModelLevelConstraint,
)
from dbt_common.contracts.metadata import CatalogTable
from dbt_common.events.functions import fire_event, warn_or_error
from dbt_common.exceptions import (
DbtInternalError,
DbtRuntimeError,
Expand All @@ -38,14 +41,12 @@
NotImplementedError,
UnexpectedNullError,
)
from dbt_common.events.functions import fire_event, warn_or_error
from dbt_common.utils import (
AttrDict,
cast_to_str,
executor,
filter_null_values,
)
import pytz

from dbt.adapters.base.column import Column as BaseColumn
from dbt.adapters.base.connections import (
Expand Down Expand Up @@ -222,6 +223,7 @@ class BaseAdapter(metaclass=AdapterMeta):
- truncate_relation
- rename_relation
- get_columns_in_relation
- get_catalog_for_single_relation
- get_column_schema_from_query
- expand_column_types
- list_relations_without_caching
Expand Down Expand Up @@ -627,6 +629,13 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[BaseColumn]:
"""Get a list of the columns in the given Relation."""
raise NotImplementedError("`get_columns_in_relation` is not implemented for this adapter!")

@abc.abstractmethod
def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]:
"""Get catalog information including table-level and column-level metadata for a single relation."""
raise NotImplementedError(
"`get_catalog_for_single_relation` is not implemented for this adapter!"
)

@available.deprecated("get_columns_in_relation", lambda *a, **k: [])
def get_columns_in_table(self, schema: str, identifier: str) -> List[BaseColumn]:
"""DEPRECATED: Get a list of the columns in the given table."""
Expand Down
7 changes: 6 additions & 1 deletion dbt/adapters/capability.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ class Capability(str, Enum):
"""Indicates support for determining the time of the last table modification by querying database metadata."""

TableLastModifiedMetadataBatch = "TableLastModifiedMetadataBatch"
"""Indicates support for performantly determining the time of the last table modification by querying database metadata in batch."""
"""Indicates support for performantly determining the time of the last table modification by querying database
metadata in batch."""

GetCatalogForSingleRelation = "GetCatalogForSingleRelation"
"""Indicates support for getting catalog information including table-level and column-level metadata for a single
relation."""


class Support(str, Enum):
Expand Down
9 changes: 8 additions & 1 deletion dbt/adapters/sql/impl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Any, List, Optional, Tuple, Type, TYPE_CHECKING

from dbt_common.contracts.metadata import CatalogTable
from dbt_common.events.functions import fire_event

from dbt.adapters.base import BaseAdapter, BaseRelation, available
Expand All @@ -9,9 +10,9 @@
from dbt.adapters.exceptions import RelationTypeNullError
from dbt.adapters.sql.connections import SQLConnectionManager


LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
GET_CATALOG_FOR_SINGLE_RELATION_NAME = "get_catalog_for_single_relation"
LIST_SCHEMAS_MACRO_NAME = "list_schemas"
CHECK_SCHEMA_EXISTS_MACRO_NAME = "check_schema_exists"
CREATE_SCHEMA_MACRO_NAME = "create_schema"
Expand Down Expand Up @@ -41,6 +42,7 @@ class SQLAdapter(BaseAdapter):
- get_catalog
- list_relations_without_caching
- get_columns_in_relation
- get_catalog_for_single_relation
"""

ConnectionManager: Type[SQLConnectionManager]
Expand Down Expand Up @@ -158,6 +160,11 @@ def get_columns_in_relation(self, relation):
GET_COLUMNS_IN_RELATION_MACRO_NAME, kwargs={"relation": relation}
)

def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]:
return self.execute_macro(
GET_CATALOG_FOR_SINGLE_RELATION_NAME, kwargs={"relation": relation}
)

def create_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaCreation(relation=_make_ref_key_dict(relation)))
Expand Down
9 changes: 9 additions & 0 deletions dbt/include/global_project/macros/adapters/metadata.sql
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

{% macro get_catalog_for_single_relation(relation) %}
{{ return(adapter.dispatch('get_catalog_for_single_relation', 'dbt')(relation)) }}
{% endmacro %}

{% macro default__get_catalog_for_single_relation(relation) %}
{{ exceptions.raise_not_implemented(
'get_catalog_for_single_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

{% macro get_relations() %}
{{ return(adapter.dispatch('get_relations', 'dbt')()) }}
{% endmacro %}
Expand Down
9 changes: 6 additions & 3 deletions pyproject.toml
aranke marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
dependencies = [
"dbt-common<2.0",
"dbt-common@git+https://github.com/dbt-labs/dbt-common.git@main",
"pytz>=2015.7",
# installed via dbt-common but used directly
"agate>=1.0,<2.0",
Expand All @@ -43,6 +43,9 @@ Changelog = "https://github.com/dbt-labs/dbt-adapters/blob/main/CHANGELOG.md"
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.version]
path = "dbt/adapters/__about__.py"

Expand All @@ -54,7 +57,7 @@ include = ["dbt/adapters", "dbt/include", "dbt/__init__.py"]

[tool.hatch.envs.default]
dependencies = [
"dbt_common @ git+https://github.com/dbt-labs/dbt-common.git",
"dbt_common @ git+https://github.com/dbt-labs/dbt-common.git@main",
'pre-commit==3.7.0;python_version>="3.9"',
'pre-commit==3.5.0;python_version=="3.8"',
]
Expand All @@ -64,7 +67,7 @@ code-quality = "pre-commit run --all-files"

[tool.hatch.envs.unit-tests]
dependencies = [
"dbt_common @ git+https://github.com/dbt-labs/dbt-common.git",
"dbt_common @ git+https://github.com/dbt-labs/dbt-common.git@main",
"pytest",
"pytest-dotenv",
"pytest-xdist",
Expand Down
Loading