Skip to content

Commit

Permalink
Update support for custom granularity in other types (#365)
Browse files Browse the repository at this point in the history
<!---
Include the number of the issue addressed by this PR above if
applicable.
  PRs for code changes without an associated issue *will not be merged*.
  See CONTRIBUTING.md for more information.
-->

### Description
Support custom grain by switching the typing to be not on
`TimeGranularity`
- `Metric.time_granularity` - `TimeGranularity -> str`
- `Metric.type_params.cumulative_type_params.grain_to_date` -
`TimeGranularity -> str`
- `MetricInput.offset_to_grain` - `TimeGranularity -> str`
- `MetricTimeWindow.granularity` - `TimeGranularity -> str` affects the
following
  - `Metric.type_params.conversion_type_params.window`
  - `Metric.type_params.cumulative_type_params.window`
  - `MetricInput.offset_window`

#### Changes to `MetricTimeWindow`
Because `MetricTimeWindow.granularity` is now a string, we allow it to
be not as strict when ensuring it's a valid granularity during initial
yaml parsing (since we don't have access to the custom granularities).
Then during validations, we will properly validate that it's a valid
grain, and during the transformation step we will remove trailing 's'.


<!---
Describe the Pull Request here. Add any references and info to help
reviewers
  understand your changes. Include any tradeoffs you considered.
-->

### Checklist

- [x] I have read [the contributing
guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md)
and understand what's expected of me
- [x] I have signed the
[CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
- [x] This PR includes tests, or tests are not required/relevant for
this PR
- [x] I have run `changie new` to [create a changelog
entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry)

Resolves SL-2826
  • Loading branch information
WilliamDee authored Nov 12, 2024
1 parent 5083005 commit b7ce0aa
Show file tree
Hide file tree
Showing 13 changed files with 552 additions and 124 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Breaking Changes-20241108-215906.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Breaking Changes
body: Adding support for custom grain in windows/grain_to_dates
time: 2024-11-08T21:59:06.162076-05:00
custom:
Author: WilliamDee
Issue: None
45 changes: 32 additions & 13 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class PydanticMetricTimeWindow(PydanticCustomInputParser, HashableBaseModel):
"""Describes the window of time the metric should be accumulated over, e.g., '1 day', '2 weeks', etc."""

count: int
granularity: TimeGranularity
granularity: str

@classmethod
def _from_yaml_value(cls, input: PydanticParseableValueType) -> PydanticMetricTimeWindow:
Expand All @@ -80,44 +80,61 @@ def _from_yaml_value(cls, input: PydanticParseableValueType) -> PydanticMetricTi
The MetricTimeWindow is always expected to be provided as a string in user-defined YAML configs.
"""
if isinstance(input, str):
return PydanticMetricTimeWindow.parse(input)
return PydanticMetricTimeWindow.parse(window=input.lower(), custom_granularity_names=(), strict=False)
else:
raise ValueError(
f"MetricTimeWindow inputs from model configs are expected to always be of type string, but got "
f"type {type(input)} with value: {input}"
)

@property
def is_standard_granularity(self) -> bool:
"""Returns whether the window uses standard TimeGranularity."""
return self.granularity.casefold() in {item.value.casefold() for item in TimeGranularity}

@property
def window_string(self) -> str:
"""Returns the string value of the time window."""
return f"{self.count} {self.granularity}"

@staticmethod
def parse(window: str) -> PydanticMetricTimeWindow:
def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = True) -> PydanticMetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise.
Output of the form: (<time unit count>, <time granularity>, <error message>) - error message is None if window
is formatted properly
If strict=True, then the granularity in the window must exist as a valid granularity.
Use strict=True for when you have all valid granularities, otherwise use strict=False.
"""
parts = window.split(" ")
parts = window.lower().split(" ")
if len(parts) != 2:
raise ParsingException(
f"Invalid window ({window}) in cumulative metric. Should be of the form `<count> <granularity>`, "
"e.g., `28 days`",
)

granularity = parts[1]

valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set(
c.lower() for c in custom_granularity_names
)

# if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s')
if granularity.endswith("s"):
if granularity.endswith("s") and granularity[:-1] in valid_time_granularities:
# months -> month
granularity = granularity[:-1]
if granularity not in [item.value for item in TimeGranularity]:

if strict and granularity not in valid_time_granularities:
raise ParsingException(
f"Invalid time granularity {granularity} in cumulative metric window string: ({window})",
f"Invalid time granularity {granularity} in metric window string: ({window})",
)
# If not strict and not standard granularity, it may be a custom grain, so validations happens later

count = parts[0]
if not count.isdigit():
raise ParsingException(f"Invalid count ({count}) in cumulative metric window string: ({window})")

return PydanticMetricTimeWindow(
count=int(count),
granularity=TimeGranularity(granularity),
granularity=granularity,
)


Expand All @@ -135,7 +152,7 @@ class PydanticMetricInput(HashableBaseModel):
filter: Optional[PydanticWhereFilterIntersection]
alias: Optional[str]
offset_window: Optional[PydanticMetricTimeWindow]
offset_to_grain: Optional[TimeGranularity]
offset_to_grain: Optional[str]

@property
def as_reference(self) -> MetricReference:
Expand Down Expand Up @@ -163,7 +180,7 @@ class PydanticCumulativeTypeParams(HashableBaseModel):
"""Type params to provide context for cumulative metrics properties."""

window: Optional[PydanticMetricTimeWindow]
grain_to_date: Optional[TimeGranularity]
grain_to_date: Optional[str]
period_agg: PeriodAggregation = PeriodAggregation.FIRST


Expand All @@ -174,7 +191,9 @@ class PydanticMetricTypeParams(HashableBaseModel):
numerator: Optional[PydanticMetricInput]
denominator: Optional[PydanticMetricInput]
expr: Optional[str]
# Legacy, supports custom grain through PydanticMetricTimeWindow changes (should deprecate though)
window: Optional[PydanticMetricTimeWindow]
# Legacy, will not support custom granularity
grain_to_date: Optional[TimeGranularity]
metrics: Optional[List[PydanticMetricInput]]
conversion_type_params: Optional[PydanticConversionTypeParams]
Expand Down Expand Up @@ -206,7 +225,7 @@ def _implements_protocol(self) -> Metric: # noqa: D
metadata: Optional[PydanticMetadata]
label: Optional[str] = None
config: Optional[PydanticMetricConfig]
time_granularity: Optional[TimeGranularity] = None
time_granularity: Optional[str] = None

@property
def input_measures(self) -> Sequence[PydanticMetricInputMeasure]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,30 +77,7 @@
"additionalProperties": false,
"properties": {
"grain_to_date": {
"enum": [
"NANOSECOND",
"MICROSECOND",
"MILLISECOND",
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR",
"nanosecond",
"microsecond",
"millisecond",
"second",
"minute",
"hour",
"day",
"week",
"month",
"quarter",
"year"
]
"type": "string"
},
"period_agg": {
"enum": [
Expand Down Expand Up @@ -488,30 +465,7 @@
"type": "string"
},
"time_granularity": {
"enum": [
"NANOSECOND",
"MICROSECOND",
"MILLISECOND",
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR",
"nanosecond",
"microsecond",
"millisecond",
"second",
"minute",
"hour",
"day",
"week",
"month",
"quarter",
"year"
]
"type": "string"
},
"type": {
"enum": [
Expand Down
4 changes: 2 additions & 2 deletions dbt_semantic_interfaces/parsing/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"type": "object",
"properties": {
"window": {"type": "string"},
"grain_to_date": {"enum": time_granularity_values},
"grain_to_date": {"type": "string"},
"period_agg": {"enum": period_agg_values},
},
"additionalProperties": False,
Expand Down Expand Up @@ -304,7 +304,7 @@
"description": {"type": "string"},
"label": {"type": "string"},
"config": {"$ref": "metric_config_schema"},
"time_granularity": {"enum": time_granularity_values},
"time_granularity": {"type": "string"},
},
"additionalProperties": False,
"required": ["name", "type", "type_params"],
Expand Down
18 changes: 14 additions & 4 deletions dbt_semantic_interfaces/protocols/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,17 @@ def count(self) -> int: # noqa: D

@property
@abstractmethod
def granularity(self) -> TimeGranularity: # noqa: D
def granularity(self) -> str: # noqa: D
pass

@property
@abstractmethod
def window_string(self) -> str: # noqa: D
pass

@property
@abstractmethod
def is_standard_granularity(self) -> bool: # noqa: D
pass


Expand Down Expand Up @@ -102,7 +112,7 @@ def offset_window(self) -> Optional[MetricTimeWindow]: # noqa: D

@property
@abstractmethod
def offset_to_grain(self) -> Optional[TimeGranularity]: # noqa: D
def offset_to_grain(self) -> Optional[str]: # noqa: D
pass

@property
Expand Down Expand Up @@ -189,7 +199,7 @@ def window(self) -> Optional[MetricTimeWindow]: # noqa: D

@property
@abstractmethod
def grain_to_date(self) -> Optional[TimeGranularity]: # noqa: D
def grain_to_date(self) -> Optional[str]: # noqa: D
pass

@property
Expand Down Expand Up @@ -328,7 +338,7 @@ def label(self) -> Optional[str]:

@property
@abstractmethod
def time_granularity(self) -> Optional[TimeGranularity]:
def time_granularity(self) -> Optional[str]:
"""Default grain used for the metric.
This will be used in a couple of circumstances:
Expand Down
4 changes: 2 additions & 2 deletions dbt_semantic_interfaces/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
PydanticSemanticModel,
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.validations.validator_helpers import (
SemanticManifestValidationResults,
ValidationIssue,
Expand Down Expand Up @@ -127,7 +127,7 @@ def metric_with_guaranteed_meta(
type_params: PydanticMetricTypeParams,
metadata: PydanticMetadata = default_meta(),
description: str = "adhoc metric",
time_granularity: Optional[TimeGranularity] = None,
time_granularity: Optional[str] = None,
) -> PydanticMetric:
"""Creates a metric with the given input.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSema
if metric.type_params.window and not metric.type_params.cumulative_type_params.window:
metric.type_params.cumulative_type_params.window = metric.type_params.window
if metric.type_params.grain_to_date and not metric.type_params.cumulative_type_params.grain_to_date:
metric.type_params.cumulative_type_params.grain_to_date = metric.type_params.grain_to_date
metric.type_params.cumulative_type_params.grain_to_date = metric.type_params.grain_to_date.value

return semantic_manifest
4 changes: 4 additions & 0 deletions dbt_semantic_interfaces/transformations/pydantic_rule_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
)
from dbt_semantic_interfaces.transformations.names import LowerCaseNamesRule
from dbt_semantic_interfaces.transformations.proxy_measure import CreateProxyMeasureRule
from dbt_semantic_interfaces.transformations.remove_plural_from_window_granularity import (
RemovePluralFromWindowGranularityRule,
)
from dbt_semantic_interfaces.transformations.rule_set import (
SemanticManifestTransformRuleSet,
)
Expand Down Expand Up @@ -54,6 +57,7 @@ def secondary_rules(self) -> Sequence[SemanticManifestTransformRule[PydanticSema
ConvertMedianToPercentileRule(),
AddInputMetricMeasuresRule(),
SetCumulativeTypeParamsRule(),
RemovePluralFromWindowGranularityRule(),
)

@property
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Sequence

from typing_extensions import override

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.errors import ModelTransformError
from dbt_semantic_interfaces.implementations.metric import PydanticMetricTimeWindow
from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.protocols import ProtocolHint
from dbt_semantic_interfaces.transformations.transform_rule import (
SemanticManifestTransformRule,
)
from dbt_semantic_interfaces.type_enums import MetricType


class RemovePluralFromWindowGranularityRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]):
"""Remove trailing s from granularity in MetricTimeWindow.
During parsing, MetricTimeWindow.granularity can still contain he trailing 's' (ie., 3 days).
This is because with the introduction of custom granularities, we don't have access to the valid
custom grains during parsing. This transformation rule is introduced to remove the trailing 's'
from `MetricTimeWindow.granularity` if necessary.
"""

@override
def _implements_protocol(self) -> SemanticManifestTransformRule[PydanticSemanticManifest]: # noqa: D
return self

@staticmethod
def _update_metric(
semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Sequence[str]
) -> None:
"""Mutates all the MetricTimeWindow by reparsing to remove the trailing 's'."""

def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow:
"""Reparse the window to remove the trailing 's'."""
return PydanticMetricTimeWindow.parse(
window=window.window_string, custom_granularity_names=custom_granularity_names
)

matched_metric = next(
iter((metric for metric in semantic_manifest.metrics if metric.name == metric_name)), None
)
if matched_metric:
if matched_metric.type is MetricType.CUMULATIVE:
if (
matched_metric.type_params.cumulative_type_params
and matched_metric.type_params.cumulative_type_params.window
):
matched_metric.type_params.cumulative_type_params.window = reparse_window(
matched_metric.type_params.cumulative_type_params.window
)
elif matched_metric.type is MetricType.CONVERSION:
if (
matched_metric.type_params.conversion_type_params
and matched_metric.type_params.conversion_type_params.window
):
matched_metric.type_params.conversion_type_params.window = reparse_window(
matched_metric.type_params.conversion_type_params.window
)

elif matched_metric.type is MetricType.DERIVED or matched_metric.type is MetricType.RATIO:
for input_metric in matched_metric.input_metrics:
if input_metric.offset_window:
input_metric.offset_window = reparse_window(input_metric.offset_window)
elif matched_metric.type is MetricType.SIMPLE:
pass
else:
assert_values_exhausted(matched_metric.type)
else:
raise ModelTransformError(f"Metric '{metric_name}' is not configured as a metric in the model.")

@staticmethod
def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSemanticManifest: # noqa: D
custom_granularity_names = [
granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]

for metric in semantic_manifest.metrics:
RemovePluralFromWindowGranularityRule._update_metric(
semantic_manifest=semantic_manifest,
metric_name=metric.name,
custom_granularity_names=custom_granularity_names,
)
return semantic_manifest
Loading

0 comments on commit b7ce0aa

Please sign in to comment.