Skip to content

Commit

Permalink
Implementation of metadata-based freshness (#1072)
Browse files Browse the repository at this point in the history
* changelog

* turn on metadata-based source freshness capability

* add boundary test to confirm get_table raises an error properly

* add metadata-based source freshness for a relation

* remove unnecessary test setup

* remove unnecessary fixture from test

---------

Co-authored-by: colin-rogers-dbt <[email protected]>
  • Loading branch information
mikealfare and colin-rogers-dbt authored Jan 18, 2024
1 parent 8a2b9ed commit 24748d2
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 1 deletion.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231218-155409.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add support for checking table-last-modified by metadata
time: 2023-12-18T15:54:09.69635-05:00
custom:
Author: mikealfare
Issue: "938"
32 changes: 31 additions & 1 deletion dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from dataclasses import dataclass
from datetime import datetime
import json
import threading
import time
from typing import Any, Dict, List, Optional, Type, Set, Union
from typing import Any, Dict, List, Optional, Tuple, Type, Set, Union

import agate
from dbt import ui # type: ignore
Expand All @@ -16,7 +17,9 @@
SchemaSearchMap,
available,
)
from dbt.adapters.base.impl import FreshnessResponse
from dbt.adapters.cache import _make_ref_key_dict # type: ignore
from dbt.adapters.capability import CapabilityDict, CapabilitySupport, Support, Capability
import dbt.clients.agate_helper
from dbt.contracts.connection import AdapterResponse
from dbt.contracts.graph.manifest import Manifest
Expand All @@ -34,6 +37,7 @@
import google.cloud.bigquery
from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable
import google.cloud.exceptions
import pytz

from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager
from dbt.adapters.bigquery.column import get_nested_column_data_types
Expand Down Expand Up @@ -116,6 +120,12 @@ class BigQueryAdapter(BaseAdapter):
ConstraintType.foreign_key: ConstraintSupport.ENFORCED,
}

_capabilities: CapabilityDict = CapabilityDict(
{
Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Full),
}
)

def __init__(self, config) -> None:
super().__init__(config)
self.connections: BigQueryConnectionManager = self.connections
Expand Down Expand Up @@ -704,6 +714,26 @@ def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap:
)
return result

def calculate_freshness_from_metadata(
self,
source: BaseRelation,
manifest: Optional[Manifest] = None,
) -> Tuple[Optional[AdapterResponse], FreshnessResponse]:
conn = self.connections.get_thread_connection()
client: google.cloud.bigquery.Client = conn.handle

table_ref = self.get_table_ref_from_relation(source)
table = client.get_table(table_ref)
snapshot = datetime.now(tz=pytz.UTC)

freshness = FreshnessResponse(
max_loaded_at=table.modified,
snapshotted_at=snapshot,
age=(snapshot - table.modified).total_seconds(),
)

return None, freshness

@available.parse(lambda *a, **k: {})
def get_common_options(
self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool = False
Expand Down
18 changes: 18 additions & 0 deletions tests/boundary/test_bigquery_sdk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from dbt.tests.util import get_connection
from google.cloud.bigquery import Client, DatasetReference, TableReference
from google.api_core.exceptions import NotFound


@pytest.mark.parametrize("table_name", ["this_table_does_not_exist"])
def test_get_table_does_not_exist(project, table_name):
"""
TODO: replace dbt project methods with direct connection instantiation
"""
with get_connection(project.adapter) as conn:
client: Client = conn.handle
dataset_ref = DatasetReference(project.database, project.test_schema)
table_ref = TableReference(dataset_ref, table_name)
with pytest.raises(NotFound):
client.get_table(table_ref)
23 changes: 23 additions & 0 deletions tests/functional/adapter/sources_freshness_tests/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SCHEMA_YML = """version: 2
sources:
- name: test_source
freshness:
warn_after: {count: 10, period: hour}
error_after: {count: 1, period: day}
schema: "{{ env_var('DBT_GET_LAST_RELATION_TEST_SCHEMA') }}"
tables:
- name: test_source
"""

SEED_TEST_SOURCE_CSV = """
id,name
1,Martin
2,Jeter
3,Ruth
4,Gehrig
5,DiMaggio
6,Torre
7,Mantle
8,Berra
9,Maris
""".strip()
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import pytest

from dbt.tests.util import run_dbt

from tests.functional.adapter.sources_freshness_tests import files


class TestGetLastRelationModified:
@pytest.fixture(scope="class")
def seeds(self):
return {"test_source.csv": files.SEED_TEST_SOURCE_CSV}

@pytest.fixture(scope="class")
def models(self):
return {"schema.yml": files.SCHEMA_YML}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
# we need the schema name for the sources section
os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"] = project.test_schema
run_dbt(["seed"])
yield
del os.environ["DBT_GET_LAST_RELATION_TEST_SCHEMA"]

def test_get_last_relation_modified(self, project):
results = run_dbt(["source", "freshness"])
assert len(results) == 1
result = results[0]
assert result.status == "pass"

0 comments on commit 24748d2

Please sign in to comment.