Skip to content

Commit

Permalink
Update SemanticModel node to match DSI 0.1.0dev3 protocols (#7848)
Browse files Browse the repository at this point in the history
* Add tests to ensure our semantic layer nodes satisfy the DSI protocols

These tests create runtime checkable versions of the protocols defined in
DSI. Thus we can instantiate instances of our semantic layer nodes and
use `isinstance` to check that they satisfy the protocol. These `runtime_checkable`
versions of the protocols should only exist in testing and should never
be used in the actual package code.

* Update the `Dimension` object of `SemanticModel` node to match DSI protocol

* Make `UnparsedDimension` more strict and update schema readers accordingly

* Update the `Entity` object of `SemanticModel` node to match DSI protocol

* Make `UnparsedEntity` more strict and update schema readers accordingly

* Update the `Measure` object of `SemanticModel` node to match DSI protocol

* Make `UnparsedMeasure` more strict and update schema readers accordingly

* Update the `SemanticModel` node to match DSI protocol

A lot of the additions are helper functions which we don't actually
use in core. This is a known issue. We're in the process of removing
a fair number of them from the DSI protocol spec. However, in the meantime
we need to implement them to satisfy the protocol unfortunately.

* Make `UnparsedSemanticModel` more strict and update schema readers accordingly

* Changie entry for updating SemanticModel node
  • Loading branch information
QMalcolm authored Jun 13, 2023
1 parent 83d163a commit 38c0600
Show file tree
Hide file tree
Showing 7 changed files with 472 additions and 54 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20230612-175854.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Update SemanticModel node to properly impelment the DSI 0.1.0dev3 SemanticModel
protocol spec
time: 2023-06-12T17:58:54.289704-07:00
custom:
Author: QMalcolm
Issue: 7833 7827
100 changes: 67 additions & 33 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@

from dbt.clients.system import write_file
from dbt.contracts.files import FileHash
from dbt.contracts.graph.unparsed import (
from dbt.contracts.graph.semantic_models import (
Defaults,
Dimension,
Docs,
Entity,
Measure,
SourceFileMetadata,
)
from dbt.contracts.graph.unparsed import (
Docs,
ExposureType,
ExternalTable,
FreshnessThreshold,
HasYamlMetadata,
MacroArgument,
MaturityType,
Measure,
Owner,
Quoting,
TestDef,
Expand All @@ -43,7 +47,11 @@
from dbt.events.contextvars import set_contextvars
from dbt.flags import get_flags
from dbt.node_types import ModelLanguage, NodeType, AccessType
from dbt_semantic_interfaces.references import MeasureReference
from dbt_semantic_interfaces.references import (
MeasureReference,
LinkableElementReference,
SemanticModelReference,
)
from dbt_semantic_interfaces.references import MetricReference as DSIMetricReference
from dbt_semantic_interfaces.type_enums.metric_type import MetricType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
Expand Down Expand Up @@ -554,30 +562,6 @@ def depends_on_macros(self):
return self.depends_on.macros


@dataclass
class FileSlice(dbtClassMixin, Replaceable):
"""Provides file slice level context about what something was created from.
Implementation of the dbt-semantic-interfaces `FileSlice` protocol
"""

filename: str
content: str
start_line_number: int
end_line_number: int


@dataclass
class SourceFileMetadata(dbtClassMixin, Replaceable):
"""Provides file context about what something was created from.
Implementation of the dbt-semantic-interfaces `Metadata` protocol
"""

repo_file_path: str
file_slice: FileSlice


# ====================================
# CompiledNode subclasses
# ====================================
Expand Down Expand Up @@ -703,7 +687,6 @@ def same_contract(self, old, adapter_type=None) -> bool:
and old_value.constraints != self.columns[old_key].constraints
and old.materialization_enforces_constraints
):

for old_constraint in old_value.constraints:
if (
old_constraint not in self.columns[old_key].constraints
Expand Down Expand Up @@ -1493,12 +1476,63 @@ class NodeRelation(dbtClassMixin):

@dataclass
class SemanticModel(GraphNode):
description: Optional[str]
model: str
node_relation: Optional[NodeRelation]
entities: Sequence[Entity]
measures: Sequence[Measure]
dimensions: Sequence[Dimension]
description: Optional[str] = None
defaults: Optional[Defaults] = None
entities: Sequence[Entity] = field(default_factory=list)
measures: Sequence[Measure] = field(default_factory=list)
dimensions: Sequence[Dimension] = field(default_factory=list)
metadata: Optional[SourceFileMetadata] = None

@property
def entity_references(self) -> List[LinkableElementReference]:
return [entity.reference for entity in self.entities]

@property
def dimension_references(self) -> List[LinkableElementReference]:
return [dimension.reference for dimension in self.dimensions]

@property
def measure_references(self) -> List[MeasureReference]:
return [measure.reference for measure in self.measures]

@property
def has_validity_dimensions(self) -> bool:
return any([dim.validity_params is not None for dim in self.dimensions])

@property
def validity_start_dimension(self) -> Optional[Dimension]:
validity_start_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start
]
if not validity_start_dims:
return None
return validity_start_dims[0]

@property
def validity_end_dimension(self) -> Optional[Dimension]:
validity_end_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end
]
if not validity_end_dims:
return None
return validity_end_dims[0]

@property
def partitions(self) -> List[Dimension]: # noqa: D
return [dim for dim in self.dimensions or [] if dim.is_partition]

@property
def partition(self) -> Optional[Dimension]:
partitions = self.partitions
if not partitions:
return None
return partitions[0]

@property
def reference(self) -> SemanticModelReference:
return SemanticModelReference(semantic_model_name=self.name)


# ====================================
Expand Down
152 changes: 152 additions & 0 deletions core/dbt/contracts/graph/semantic_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from dataclasses import dataclass
from dbt.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
MeasureReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums.aggregation_type import AggregationType
from dbt_semantic_interfaces.type_enums.dimension_type import DimensionType
from dbt_semantic_interfaces.type_enums.entity_type import EntityType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing import List, Optional


@dataclass
class FileSlice(dbtClassMixin):
"""Provides file slice level context about what something was created from.
Implementation of the dbt-semantic-interfaces `FileSlice` protocol
"""

filename: str
content: str
start_line_number: int
end_line_number: int


@dataclass
class SourceFileMetadata(dbtClassMixin):
"""Provides file context about what something was created from.
Implementation of the dbt-semantic-interfaces `Metadata` protocol
"""

repo_file_path: str
file_slice: FileSlice


@dataclass
class Defaults(dbtClassMixin):
agg_time_dimension: Optional[str] = None


# ====================================
# Dimension objects
# ====================================


@dataclass
class DimensionValidityParams(dbtClassMixin):
is_start: bool = False
is_end: bool = False


@dataclass
class DimensionTypeParams(dbtClassMixin):
time_granularity: TimeGranularity
validity_params: Optional[DimensionValidityParams] = None


@dataclass
class Dimension(dbtClassMixin):
name: str
type: DimensionType
description: Optional[str] = None
is_partition: bool = False
type_params: Optional[DimensionTypeParams] = None
expr: Optional[str] = None
metadata: Optional[SourceFileMetadata] = None

@property
def reference(self) -> DimensionReference:
return DimensionReference(element_name=self.name)

@property
def time_dimension_reference(self) -> Optional[TimeDimensionReference]:
if self.type == DimensionType.TIME:
return TimeDimensionReference(element_name=self.name)
else:
return None

@property
def validity_params(self) -> Optional[DimensionValidityParams]:
if self.type_params:
return self.type_params.validity_params
else:
return None


# ====================================
# Entity objects
# ====================================


@dataclass
class Entity(dbtClassMixin):
name: str
type: EntityType
description: Optional[str] = None
role: Optional[str] = None
expr: Optional[str] = None

@property
def reference(self) -> EntityReference:
return EntityReference(element_name=self.name)

@property
def is_linkable_entity_type(self) -> bool:
return self.type in (EntityType.PRIMARY, EntityType.UNIQUE, EntityType.NATURAL)


# ====================================
# Measure objects
# ====================================


@dataclass
class MeasureAggregationParameters(dbtClassMixin):
percentile: Optional[float] = None
use_discrete_percentile: Optional[bool] = None
use_approximate_percentile: Optional[bool] = None


@dataclass
class NonAdditiveDimension(dbtClassMixin):
name: str
window_choice: AggregationType
window_grouples: List[str]


@dataclass
class Measure(dbtClassMixin):
name: str
agg: AggregationType
description: Optional[str] = None
create_metric: bool = False
expr: Optional[str] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[NonAdditiveDimension] = None
agg_time_dimension: Optional[str] = None

@property
def checked_agg_time_dimension(self) -> TimeDimensionReference:
if self.agg_time_dimension is not None:
return TimeDimensionReference(element_name=self.agg_time_dimension)
else:
raise Exception("Measure is missing agg_time_dimension!")

@property
def reference(self) -> MeasureReference:
return MeasureReference(element_name=self.name)
45 changes: 28 additions & 17 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

from dbt import deprecations
from dbt.node_types import NodeType
from dbt.contracts.graph.semantic_models import (
Defaults,
DimensionValidityParams,
MeasureAggregationParameters,
)
from dbt.contracts.util import (
AdditionalPropertiesMixin,
Mergeable,
Expand Down Expand Up @@ -673,52 +678,58 @@ def validate(cls, data):


@dataclass
class Entity(dbtClassMixin):
class UnparsedEntity(dbtClassMixin):
name: str
type: str # actually an enum
type: str # EntityType enum
description: Optional[str] = None
role: Optional[str] = None
expr: Optional[str] = None


@dataclass
class MeasureAggregationParameters(dbtClassMixin):
percentile: Optional[float] = None
use_discrete_percentile: bool = False
use_approximate_percentile: bool = False
class UnparsedNonAdditiveDimension(dbtClassMixin):
name: str
window_choice: str # AggregationType enum
window_grouples: List[str]


@dataclass
class Measure(dbtClassMixin):
class UnparsedMeasure(dbtClassMixin):
name: str
agg: str # actually an enum
description: Optional[str] = None
create_metric: Optional[bool] = None
create_metric: bool = False
expr: Optional[str] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[Dict[str, Any]] = None
non_additive_dimension: Optional[UnparsedNonAdditiveDimension] = None
agg_time_dimension: Optional[str] = None


@dataclass
class Dimension(dbtClassMixin):
class UnparsedDimensionTypeParams(dbtClassMixin):
time_granularity: str # TimeGranularity enum
validity_params: Optional[DimensionValidityParams] = None


@dataclass
class UnparsedDimension(dbtClassMixin):
name: str
type: str # actually an enum
description: Optional[str] = None
is_partition: Optional[bool] = False
type_params: Optional[Dict[str, Any]] = None
is_partition: bool = False
type_params: Optional[UnparsedDimensionTypeParams] = None
expr: Optional[str] = None
# TODO metadata: Optional[Metadata] (this would actually be the YML for the dimension)


@dataclass
class UnparsedSemanticModel(dbtClassMixin):
name: str
description: Optional[str]
model: str # looks like "ref(...)"
entities: List[Entity] = field(default_factory=list)
measures: List[Measure] = field(default_factory=list)
dimensions: List[Dimension] = field(default_factory=list)
description: Optional[str] = None
defaults: Optional[Defaults] = None
entities: List[UnparsedEntity] = field(default_factory=list)
measures: List[UnparsedMeasure] = field(default_factory=list)
dimensions: List[UnparsedDimension] = field(default_factory=list)


def normalize_date(d: Optional[datetime.date]) -> Optional[datetime.datetime]:
Expand Down
Loading

0 comments on commit 38c0600

Please sign in to comment.