Skip to content

Commit

Permalink
Update spec MetricNode for dbt x MetricFlow integration and begin out…
Browse files Browse the repository at this point in the history
…putting semantic manifest artifact (#7812)

* Refactor MetricNode definition to satisfy DSI Metric protocol

* Fix tests involving metrics to have updated properties

* Update UnparsedMetricNode to match new metric yaml spec

* Update MetricParser for new unparsed and parsed MetricNodes

* Remove `rename_metric_attr`

We're intentionally breaking the spec. There will be a separate tool provided
for migrating from dbt-metrics to dbt x metricflow. This bit of code was renaming
things like `type` to `calculation_method`. This is problematic because `type` is
on the new spec, while `calculation_method` is not. Additionally, since we're
intentionally breaking the spec, this function, `rename_metric_attr`, shouldn't be
used for any property renaming.

* Fix tests for Metrics (1.6) changes

* Regenerated v10 manifest schema and associated functional test artifact state

* Remove no longer needed tests

* Skip / comment out tests for metrics functionality that we'll be implementing later

* Begin outputting semantic manifest artifact on every run

* Drop metrics during upgrade_manifest_json if manifest is v9 or before

* Update properties of `minimal_parsed_metric_dict` to match new metric spec

* Add changie entry for metric node breaking changes

* Add semantic model nodes to semantic manifest
  • Loading branch information
QMalcolm authored Jun 8, 2023
1 parent a89da7c commit 98d1a94
Show file tree
Hide file tree
Showing 26 changed files with 915 additions and 888 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Breaking Changes-20230607-190309.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Breaking Changes
body: Switch from dbt-metrics to dbt-semantic-interfaces for MetricNode definitions
time: 2023-06-07T19:03:09.680189-07:00
custom:
Author: QMalcolm
Issue: 7500 7404
1 change: 1 addition & 0 deletions core/dbt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@

DEPENDENCIES_FILE_NAME = "dependencies.yml"
MANIFEST_FILE_NAME = "manifest.json"
SEMANTIC_MANIFEST_FILE_NAME = "semantic_manifest.json"
PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack"
22 changes: 20 additions & 2 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@
from dbt.flags import get_flags, MP_CONTEXT
from dbt import tracking
import dbt.utils
from dbt_semantic_interfaces.implementations.metric import PydanticMetric
from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel

NodeEdgeMap = Dict[str, List[str]]
PackageName = str
Expand Down Expand Up @@ -985,6 +988,20 @@ def analysis_lookup(self) -> AnalysisLookup:
self._analysis_lookup = AnalysisLookup(self)
return self._analysis_lookup

@property
def pydantic_semantic_manifest(self) -> PydanticSemanticManifest:
pydantic_semantic_manifest = PydanticSemanticManifest(metrics=[], semantic_models=[])

for semantic_model in self.semantic_nodes.values():
pydantic_semantic_manifest.semantic_models.append(
PydanticSemanticModel.parse_obj(semantic_model.to_dict())
)

for metric in self.metrics.values():
pydantic_semantic_manifest.metrics.append(PydanticMetric.parse_obj(metric.to_dict()))

return pydantic_semantic_manifest

def resolve_refs(
self, source_node: GraphMemberNode, current_project: str
) -> List[MaybeNonSource]:
Expand Down Expand Up @@ -1377,8 +1394,9 @@ def compatible_previous_versions(self):
def upgrade_schema_version(cls, data):
"""This overrides the "upgrade_schema_version" call in VersionedSchema (via
ArtifactMixin) to modify the dictionary passed in from earlier versions of the manifest."""
if get_manifest_schema_version(data) <= 9:
data = upgrade_manifest_json(data)
manifest_schema_version = get_manifest_schema_version(data)
if manifest_schema_version <= 9:
data = upgrade_manifest_json(data, manifest_schema_version)
return cls.from_dict(data)

def __post_serialize__(self, dct):
Expand Down
59 changes: 18 additions & 41 deletions core/dbt/contracts/graph/manifest_upgrade.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,3 @@
from dbt import deprecations
from dbt.dataclass_schema import ValidationError


# we renamed these properties in v1.3
# this method allows us to be nice to the early adopters
def rename_metric_attr(data: dict, raise_deprecation_warning: bool = False) -> dict:
metric_name = data["name"]
if raise_deprecation_warning and (
"sql" in data.keys()
or "type" in data.keys()
or data.get("calculation_method") == "expression"
):
deprecations.warn("metric-attr-renamed", metric_name=metric_name)
duplicated_attribute_msg = """\n
The metric '{}' contains both the deprecated metric property '{}'
and the up-to-date metric property '{}'. Please remove the deprecated property.
"""
if "sql" in data.keys():
if "expression" in data.keys():
raise ValidationError(
duplicated_attribute_msg.format(metric_name, "sql", "expression")
)
else:
data["expression"] = data.pop("sql")
if "type" in data.keys():
if "calculation_method" in data.keys():
raise ValidationError(
duplicated_attribute_msg.format(metric_name, "type", "calculation_method")
)
else:
calculation_method = data.pop("type")
data["calculation_method"] = calculation_method
# we also changed "type: expression" -> "calculation_method: derived"
if data.get("calculation_method") == "expression":
data["calculation_method"] = "derived"
return data


def rename_sql_attr(node_content: dict) -> dict:
if "raw_sql" in node_content:
node_content["raw_code"] = node_content.pop("raw_sql")
Expand Down Expand Up @@ -88,7 +49,24 @@ def upgrade_seed_content(node_content):
node_content.get("depends_on", {}).pop("nodes", None)


def upgrade_manifest_json(manifest: dict) -> dict:
def drop_v9_and_prior_metrics(manifest: dict) -> None:
manifest["metrics"] = {}
filtered_disabled_entries = {}
for entry_name, resource_list in manifest.get("disabled", {}).items():
filtered_resource_list = []
for resource in resource_list:
if resource.get("resource_type") != "metric":
filtered_resource_list.append(resource)
filtered_disabled_entries[entry_name] = filtered_resource_list

manifest["disabled"] = filtered_disabled_entries


def upgrade_manifest_json(manifest: dict, manifest_schema_version: int) -> dict:
# this should remain 9 while the check in `upgrade_schema_version` may change
if manifest_schema_version <= 9:
drop_v9_and_prior_metrics(manifest=manifest)

for node_content in manifest.get("nodes", {}).values():
upgrade_node_content(node_content)
if node_content["resource_type"] == "seed":
Expand All @@ -109,7 +87,6 @@ def upgrade_manifest_json(manifest: dict) -> dict:
manifest["public_nodes"] = {}
for metric_content in manifest.get("metrics", {}).values():
# handle attr renames + value translation ("expression" -> "derived")
metric_content = rename_metric_attr(metric_content)
metric_content = upgrade_ref_content(metric_content)
if "root_path" in metric_content:
del metric_content["root_path"]
Expand Down
138 changes: 97 additions & 41 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
MacroArgument,
MaturityType,
Measure,
MetricFilter,
MetricTime,
Owner,
Quoting,
TestDef,
Expand All @@ -45,6 +43,10 @@
from dbt.events.contextvars import set_contextvars
from dbt.flags import get_flags
from dbt.node_types import ModelLanguage, NodeType, AccessType
from dbt_semantic_interfaces.references import MeasureReference
from dbt_semantic_interfaces.references import MetricReference as DSIMetricReference
from dbt_semantic_interfaces.type_enums.metric_type import MetricType
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from .model_config import (
NodeConfig,
Expand Down Expand Up @@ -1309,27 +1311,76 @@ def same_contents(self, old: Optional["Exposure"]) -> bool:
# ====================================


@dataclass
class WhereFilter(dbtClassMixin):
where_sql_template: str


@dataclass
class MetricInputMeasure(dbtClassMixin):
name: str
filter: Optional[WhereFilter] = None
alias: Optional[str] = None

def measure_reference(self) -> MeasureReference:
return MeasureReference(element_name=self.name)

def post_aggregation_measure_referenc(self) -> MeasureReference:
return MeasureReference(element_name=self.alias or self.name)


@dataclass
class MetricTimeWindow(dbtClassMixin):
count: int
granularity: TimeGranularity


@dataclass
class MetricInput(dbtClassMixin):
name: str
filter: Optional[WhereFilter] = None
alias: Optional[str] = None
offset_window: Optional[MetricTimeWindow] = None
offset_to_grain: Optional[TimeGranularity] = None

def as_reference(self) -> DSIMetricReference:
return DSIMetricReference(element_name=self.name)


@dataclass
class MetricTypeParams(dbtClassMixin):
measure: Optional[MetricInputMeasure] = None
measures: Optional[List[MetricInputMeasure]] = None
numerator: Optional[MetricInputMeasure] = None
denominator: Optional[MetricInputMeasure] = None
expr: Optional[str] = None
window: Optional[MetricTimeWindow] = None
grain_to_date: Optional[TimeGranularity] = None
metrics: Optional[List[MetricInput]] = None

def numerator_measure_reference(self) -> Optional[MeasureReference]:
return self.numerator.measure_reference() if self.numerator else None

def denominator_measure_reference(self) -> Optional[MeasureReference]:
return self.denominator.measure_reference() if self.denominator else None


@dataclass
class MetricReference(dbtClassMixin, Replaceable):
sql: Optional[Union[str, int]]
unique_id: Optional[str]
sql: Optional[Union[str, int]] = None
unique_id: Optional[str] = None


@dataclass
class Metric(GraphNode):
name: str
description: str
label: str
calculation_method: str
expression: str
filters: List[MetricFilter]
time_grains: List[str]
dimensions: List[str]
type: MetricType
type_params: MetricTypeParams
filter: Optional[WhereFilter] = None
metadata: Optional[SourceFileMetadata] = None
resource_type: NodeType = field(metadata={"restrict": [NodeType.Metric]})
timestamp: Optional[str] = None
window: Optional[MetricTime] = None
model: Optional[str] = None
model_unique_id: Optional[str] = None
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: MetricConfig = field(default_factory=MetricConfig)
Expand All @@ -1353,59 +1404,64 @@ def depends_on_public_nodes(self):
def search_name(self):
return self.name

def same_model(self, old: "Metric") -> bool:
return self.model == old.model
@property
def input_measures(self) -> List[MetricInputMeasure]:
tp = self.type_params
res = tp.measures or []
if tp.measure:
res.append(tp.measure)
if tp.numerator:
res.append(tp.numerator)
if tp.denominator:
res.append(tp.denominator)

def same_window(self, old: "Metric") -> bool:
return self.window == old.window
return res

def same_dimensions(self, old: "Metric") -> bool:
return self.dimensions == old.dimensions
@property
def measure_references(self) -> List[MeasureReference]:
return [x.measure_reference() for x in self.input_measures]

def same_filters(self, old: "Metric") -> bool:
return self.filters == old.filters
@property
def input_metrics(self) -> List[MetricInput]:
return self.type_params.metrics or []

def same_description(self, old: "Metric") -> bool:
return self.description == old.description

def same_label(self, old: "Metric") -> bool:
return self.label == old.label

def same_calculation_method(self, old: "Metric") -> bool:
return self.calculation_method == old.calculation_method

def same_expression(self, old: "Metric") -> bool:
return self.expression == old.expression

def same_timestamp(self, old: "Metric") -> bool:
return self.timestamp == old.timestamp

def same_time_grains(self, old: "Metric") -> bool:
return self.time_grains == old.time_grains

def same_config(self, old: "Metric") -> bool:
return self.config.same_contents(
self.unrendered_config,
old.unrendered_config,
)

def same_filter(self, old: "Metric") -> bool:
return True # TODO

def same_metadata(self, old: "Metric") -> bool:
return True # TODO

def same_type(self, old: "Metric") -> bool:
return self.type == old.type

def same_type_params(self, old: "Metric") -> bool:
return True # TODO

def same_contents(self, old: Optional["Metric"]) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True

return (
self.same_model(old)
and self.same_window(old)
and self.same_dimensions(old)
and self.same_filters(old)
self.same_filter(old)
and self.same_metadata(old)
and self.same_type(old)
and self.same_type_params(old)
and self.same_description(old)
and self.same_label(old)
and self.same_calculation_method(old)
and self.same_expression(old)
and self.same_timestamp(old)
and self.same_time_grains(old)
and self.same_config(old)
and True
)
Expand Down
Loading

0 comments on commit 98d1a94

Please sign in to comment.