Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update SemanticModel node to match DSI 0.1.0dev3 protocols #7848

Merged
merged 10 commits into from
Jun 13, 2023
Merged
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20230612-175854.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Update SemanticModel node to properly impelment the DSI 0.1.0dev3 SemanticModel
protocol spec
time: 2023-06-12T17:58:54.289704-07:00
custom:
Author: QMalcolm
Issue: 7833 7827
100 changes: 67 additions & 33 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@

from dbt.clients.system import write_file
from dbt.contracts.files import FileHash
from dbt.contracts.graph.unparsed import (
from dbt.contracts.graph.semantic_models import (
Defaults,
Dimension,
Docs,
Entity,
Measure,
SourceFileMetadata,
)
from dbt.contracts.graph.unparsed import (
Docs,
ExposureType,
ExternalTable,
FreshnessThreshold,
HasYamlMetadata,
MacroArgument,
MaturityType,
Measure,
Owner,
Quoting,
TestDef,
Expand All @@ -43,7 +47,11 @@
from dbt.events.contextvars import set_contextvars
from dbt.flags import get_flags
from dbt.node_types import ModelLanguage, NodeType, AccessType
from dbt_semantic_interfaces.references import MeasureReference
from dbt_semantic_interfaces.references import (
MeasureReference,
LinkableElementReference,
SemanticModelReference,
)
from dbt_semantic_interfaces.references import MetricReference as DSIMetricReference
from dbt_semantic_interfaces.type_enums.metric_type import MetricType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
Expand Down Expand Up @@ -554,30 +562,6 @@ def depends_on_macros(self):
return self.depends_on.macros


@dataclass
class FileSlice(dbtClassMixin, Replaceable):
"""Provides file slice level context about what something was created from.

Implementation of the dbt-semantic-interfaces `FileSlice` protocol
"""

filename: str
content: str
start_line_number: int
end_line_number: int


@dataclass
class SourceFileMetadata(dbtClassMixin, Replaceable):
"""Provides file context about what something was created from.

Implementation of the dbt-semantic-interfaces `Metadata` protocol
"""

repo_file_path: str
file_slice: FileSlice


# ====================================
# CompiledNode subclasses
# ====================================
Expand Down Expand Up @@ -703,7 +687,6 @@ def same_contract(self, old, adapter_type=None) -> bool:
and old_value.constraints != self.columns[old_key].constraints
and old.materialization_enforces_constraints
):

for old_constraint in old_value.constraints:
if (
old_constraint not in self.columns[old_key].constraints
Expand Down Expand Up @@ -1493,12 +1476,63 @@ class NodeRelation(dbtClassMixin):

@dataclass
class SemanticModel(GraphNode):
description: Optional[str]
model: str
node_relation: Optional[NodeRelation]
entities: Sequence[Entity]
measures: Sequence[Measure]
dimensions: Sequence[Dimension]
description: Optional[str] = None
defaults: Optional[Defaults] = None
entities: Sequence[Entity] = field(default_factory=list)
measures: Sequence[Measure] = field(default_factory=list)
dimensions: Sequence[Dimension] = field(default_factory=list)
metadata: Optional[SourceFileMetadata] = None

@property
def entity_references(self) -> List[LinkableElementReference]:
return [entity.reference for entity in self.entities]

@property
def dimension_references(self) -> List[LinkableElementReference]:
return [dimension.reference for dimension in self.dimensions]

@property
def measure_references(self) -> List[MeasureReference]:
return [measure.reference for measure in self.measures]

@property
def has_validity_dimensions(self) -> bool:
return any([dim.validity_params is not None for dim in self.dimensions])

@property
def validity_start_dimension(self) -> Optional[Dimension]:
validity_start_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start
]
if not validity_start_dims:
return None
return validity_start_dims[0]

@property
def validity_end_dimension(self) -> Optional[Dimension]:
validity_end_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end
]
if not validity_end_dims:
return None
return validity_end_dims[0]

@property
def partitions(self) -> List[Dimension]: # noqa: D
return [dim for dim in self.dimensions or [] if dim.is_partition]

@property
def partition(self) -> Optional[Dimension]:
partitions = self.partitions
if not partitions:
return None
return partitions[0]

@property
def reference(self) -> SemanticModelReference:
return SemanticModelReference(semantic_model_name=self.name)


# ====================================
Expand Down
152 changes: 152 additions & 0 deletions core/dbt/contracts/graph/semantic_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from dataclasses import dataclass
Copy link
Contributor

@peterallenwebb peterallenwebb Jun 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am very happy about moving the following classes out of nodes.py, but I wonder if we should move them out of the graph subdirectory? I could see arguments either way. We don't need to decide now.

Copy link
Contributor Author

@QMalcolm QMalcolm Jun 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! I mostly moved them to where they are because they were cluttering nodes.py and there was some precedent given the spearate graph/metrics.py file. I'm not sure if there is a right "other" place for them though 🤔 mostly because they are direct dependencies of specific graph nodes and not much else

from dbt.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
MeasureReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums.aggregation_type import AggregationType
from dbt_semantic_interfaces.type_enums.dimension_type import DimensionType
from dbt_semantic_interfaces.type_enums.entity_type import EntityType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing import List, Optional


@dataclass
class FileSlice(dbtClassMixin):
"""Provides file slice level context about what something was created from.

Implementation of the dbt-semantic-interfaces `FileSlice` protocol
"""

filename: str
content: str
start_line_number: int
end_line_number: int


@dataclass
class SourceFileMetadata(dbtClassMixin):
"""Provides file context about what something was created from.

Implementation of the dbt-semantic-interfaces `Metadata` protocol
"""

repo_file_path: str
file_slice: FileSlice


@dataclass
class Defaults(dbtClassMixin):
agg_time_dimension: Optional[str] = None


# ====================================
# Dimension objects
# ====================================


@dataclass
class DimensionValidityParams(dbtClassMixin):
is_start: bool = False
is_end: bool = False


@dataclass
class DimensionTypeParams(dbtClassMixin):
time_granularity: TimeGranularity
validity_params: Optional[DimensionValidityParams] = None


@dataclass
class Dimension(dbtClassMixin):
name: str
type: DimensionType
description: Optional[str] = None
is_partition: bool = False
type_params: Optional[DimensionTypeParams] = None
expr: Optional[str] = None
metadata: Optional[SourceFileMetadata] = None

@property
def reference(self) -> DimensionReference:
return DimensionReference(element_name=self.name)

@property
def time_dimension_reference(self) -> Optional[TimeDimensionReference]:
if self.type == DimensionType.TIME:
return TimeDimensionReference(element_name=self.name)
else:
return None

@property
def validity_params(self) -> Optional[DimensionValidityParams]:
if self.type_params:
return self.type_params.validity_params
else:
return None


# ====================================
# Entity objects
# ====================================


@dataclass
class Entity(dbtClassMixin):
name: str
type: EntityType
description: Optional[str] = None
role: Optional[str] = None
expr: Optional[str] = None

@property
def reference(self) -> EntityReference:
return EntityReference(element_name=self.name)

@property
def is_linkable_entity_type(self) -> bool:
return self.type in (EntityType.PRIMARY, EntityType.UNIQUE, EntityType.NATURAL)


# ====================================
# Measure objects
# ====================================


@dataclass
class MeasureAggregationParameters(dbtClassMixin):
percentile: Optional[float] = None
use_discrete_percentile: Optional[bool] = None
use_approximate_percentile: Optional[bool] = None


@dataclass
class NonAdditiveDimension(dbtClassMixin):
name: str
window_choice: AggregationType
window_grouples: List[str]


@dataclass
class Measure(dbtClassMixin):
name: str
agg: AggregationType
description: Optional[str] = None
create_metric: bool = False
expr: Optional[str] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[NonAdditiveDimension] = None
agg_time_dimension: Optional[str] = None

@property
def checked_agg_time_dimension(self) -> TimeDimensionReference:
if self.agg_time_dimension is not None:
return TimeDimensionReference(element_name=self.agg_time_dimension)
else:
raise Exception("Measure is missing agg_time_dimension!")

@property
def reference(self) -> MeasureReference:
return MeasureReference(element_name=self.name)
45 changes: 28 additions & 17 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

from dbt import deprecations
from dbt.node_types import NodeType
from dbt.contracts.graph.semantic_models import (
Defaults,
DimensionValidityParams,
MeasureAggregationParameters,
)
from dbt.contracts.util import (
AdditionalPropertiesMixin,
Mergeable,
Expand Down Expand Up @@ -673,52 +678,58 @@ def validate(cls, data):


@dataclass
class Entity(dbtClassMixin):
class UnparsedEntity(dbtClassMixin):
name: str
type: str # actually an enum
type: str # EntityType enum
description: Optional[str] = None
role: Optional[str] = None
expr: Optional[str] = None


@dataclass
class MeasureAggregationParameters(dbtClassMixin):
percentile: Optional[float] = None
use_discrete_percentile: bool = False
use_approximate_percentile: bool = False
class UnparsedNonAdditiveDimension(dbtClassMixin):
name: str
window_choice: str # AggregationType enum
window_grouples: List[str]


@dataclass
class Measure(dbtClassMixin):
class UnparsedMeasure(dbtClassMixin):
name: str
agg: str # actually an enum
description: Optional[str] = None
create_metric: Optional[bool] = None
create_metric: bool = False
expr: Optional[str] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[Dict[str, Any]] = None
non_additive_dimension: Optional[UnparsedNonAdditiveDimension] = None
agg_time_dimension: Optional[str] = None


@dataclass
class Dimension(dbtClassMixin):
class UnparsedDimensionTypeParams(dbtClassMixin):
time_granularity: str # TimeGranularity enum
validity_params: Optional[DimensionValidityParams] = None


@dataclass
class UnparsedDimension(dbtClassMixin):
name: str
type: str # actually an enum
description: Optional[str] = None
is_partition: Optional[bool] = False
type_params: Optional[Dict[str, Any]] = None
is_partition: bool = False
type_params: Optional[UnparsedDimensionTypeParams] = None
expr: Optional[str] = None
# TODO metadata: Optional[Metadata] (this would actually be the YML for the dimension)


@dataclass
class UnparsedSemanticModel(dbtClassMixin):
name: str
description: Optional[str]
model: str # looks like "ref(...)"
entities: List[Entity] = field(default_factory=list)
measures: List[Measure] = field(default_factory=list)
dimensions: List[Dimension] = field(default_factory=list)
description: Optional[str] = None
defaults: Optional[Defaults] = None
entities: List[UnparsedEntity] = field(default_factory=list)
measures: List[UnparsedMeasure] = field(default_factory=list)
dimensions: List[UnparsedDimension] = field(default_factory=list)


def normalize_date(d: Optional[datetime.date]) -> Optional[datetime.datetime]:
Expand Down
Loading