Skip to content

Commit

Permalink
Width and Coverage metrics for prediction intervals (#638)
Browse files Browse the repository at this point in the history
  • Loading branch information
martins0n authored Apr 13, 2022
1 parent e5ec89a commit 8ade7d3
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
-
- `Width` and `Coverage` metrics for prediction intervals ([#638](https://github.com/tinkoff-ai/etna/pull/638))
-
- Masked backtest ([#613](https://github.com/tinkoff-ai/etna/pull/613))
-
Expand Down
3 changes: 2 additions & 1 deletion docs/source/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ See the API documentation for further details on available metrics:
:template: custom-module-template.rst
:recursive:

etna.metrics.metrics
etna.metrics.metrics
etna.metrics.intervals_metrics
2 changes: 2 additions & 0 deletions etna/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from etna.metrics.base import MetricAggregationMode
from etna.metrics.functional_metrics import mape
from etna.metrics.functional_metrics import smape
from etna.metrics.intervals_metrics import Coverage
from etna.metrics.intervals_metrics import Width
from etna.metrics.metrics import MAE
from etna.metrics.metrics import MAPE
from etna.metrics.metrics import MSE
Expand Down
152 changes: 152 additions & 0 deletions etna/metrics/intervals_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from typing import Dict
from typing import Sequence
from typing import Tuple
from typing import Union

import numpy as np

from etna.datasets import TSDataset
from etna.metrics.base import Metric
from etna.metrics.base import MetricAggregationMode


def dummy():
return np.nan


class _QuantileMetricMixin:
def _validate_tsdataset_quantiles(self, ts: TSDataset, quantiles: Sequence[float]) -> None:
"""Check if quantiles presented in y_pred."""
features = set(ts.df.columns.get_level_values("feature"))
for quantile in quantiles:
assert f"target_{quantile:.4g}" in features, f"Quantile {quantile} is not presented in tsdataset."


class Coverage(Metric, _QuantileMetricMixin):
"""Coverage metric for prediction intervals - precenteage of samples in the interval ``[lower quantile, upper quantile]``.
.. math::
Coverage(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n-1}{[ y\_true_i \\ge y\_pred_i^{lower\_quantile}] * [y\_true_i \\le y\_pred_i^{upper\_quantile}] }}{n}
Notes
-----
Works just if quantiles presented in y_pred
"""

def __init__(
self, quantiles: Tuple[float, float] = (0.025, 0.975), mode: str = MetricAggregationMode.per_segment, **kwargs
):
"""Init metric.
Parameters
----------
mode: 'macro' or 'per-segment'
metrics aggregation mode
kwargs:
metric's computation arguments
"""
super().__init__(mode=mode, metric_fn=dummy, **kwargs)
self.quantiles = quantiles

def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[str, float]]:
"""
Compute metric's value with y_true and y_pred.
Notes
-----
Note that if y_true and y_pred are not sorted Metric will sort it anyway
Parameters
----------
y_true:
dataset with true time series values
y_pred:
dataset with predicted time series values
Returns
-------
metric's value aggregated over segments or not (depends on mode)
"""
self._validate_segment_columns(y_true=y_true, y_pred=y_pred)
self._validate_tsdataset_quantiles(ts=y_pred, quantiles=self.quantiles)

segments = set(y_true.df.columns.get_level_values("segment"))
metrics_per_segment = {}
for segment in segments:
self._validate_timestamp_columns(
timestamp_true=y_true[:, segment, "target"].dropna().index,
timestamp_pred=y_pred[:, segment, "target"].dropna().index,
)
upper_quantile_flag = y_true[:, segment, "target"] <= y_pred[:, segment, f"target_{self.quantiles[1]:.4g}"]
lower_quantile_flag = y_true[:, segment, "target"] >= y_pred[:, segment, f"target_{self.quantiles[0]:.4g}"]

metrics_per_segment[segment] = np.mean(upper_quantile_flag * lower_quantile_flag)
metrics = self._aggregate_metrics(metrics_per_segment)
return metrics


class Width(Metric, _QuantileMetricMixin):
"""Mean width of prediction intervals.
.. math::
Width(y\_true, y\_pred) = \\frac{\\sum_{i=0}^{n-1}\\mid y\_pred_i^{upper\_quantile} - y\_pred_i^{lower\_quantile} \\mid}{n}
Notes
-----
Works just if quantiles presented in y_pred
"""

def __init__(
self, quantiles: Tuple[float, float] = (0.025, 0.975), mode: str = MetricAggregationMode.per_segment, **kwargs
):
"""Init metric.
Parameters
----------
mode: 'macro' or 'per-segment'
metrics aggregation mode
kwargs:
metric's computation arguments
"""
super().__init__(mode=mode, metric_fn=dummy, **kwargs)
self.quantiles = quantiles

def __call__(self, y_true: TSDataset, y_pred: TSDataset) -> Union[float, Dict[str, float]]:
"""
Compute metric's value with y_true and y_pred.
Notes
-----
Note that if y_true and y_pred are not sorted Metric will sort it anyway
Parameters
----------
y_true:
dataset with true time series values
y_pred:
dataset with predicted time series values
Returns
-------
metric's value aggregated over segments or not (depends on mode)
"""
self._validate_segment_columns(y_true=y_true, y_pred=y_pred)
self._validate_tsdataset_quantiles(ts=y_pred, quantiles=self.quantiles)

segments = set(y_true.df.columns.get_level_values("segment"))
metrics_per_segment = {}
for segment in segments:
self._validate_timestamp_columns(
timestamp_true=y_true[:, segment, "target"].dropna().index,
timestamp_pred=y_pred[:, segment, "target"].dropna().index,
)
upper_quantile = y_pred[:, segment, f"target_{self.quantiles[1]:.4g}"]
lower_quantile = y_pred[:, segment, f"target_{self.quantiles[0]:.4g}"]

metrics_per_segment[segment] = np.abs(lower_quantile - upper_quantile).mean()

metrics = self._aggregate_metrics(metrics_per_segment)
return metrics


__all__ = ["Coverage", "Width"]
72 changes: 72 additions & 0 deletions tests/test_metrics/test_intervals_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest

from etna.datasets import TSDataset
from etna.metrics import Coverage
from etna.metrics import Width


@pytest.fixture
def tsdataset_with_zero_width_quantiles(example_df):

ts_train = TSDataset.to_dataset(example_df)
ts_train = TSDataset(ts_train, freq="H")
example_df["target_0.025"] = example_df["target"]
example_df["target_0.975"] = example_df["target"]
ts_test = TSDataset.to_dataset(example_df)
ts_test = TSDataset(ts_test, freq="H")
return ts_train, ts_test


@pytest.fixture
def tsdataset_with_differnt_width_and_shifted_quantiles(example_df):

ts_train = TSDataset.to_dataset(example_df)
ts_train = TSDataset(ts_train, freq="H")
example_df["target_0.025"] = example_df["target"]
example_df["target_0.975"] = example_df["target"]

segment_one_index = example_df[lambda x: x.segment == "segment_1"].index

example_df.loc[segment_one_index, "target_0.025"] = example_df.loc[segment_one_index, "target_0.025"] + 1
example_df.loc[segment_one_index, "target_0.975"] = example_df.loc[segment_one_index, "target_0.975"] + 2

ts_test = TSDataset.to_dataset(example_df)
ts_test = TSDataset(ts_test, freq="H")
return ts_train, ts_test


def test_width_metric_with_zero_width_quantiles(tsdataset_with_zero_width_quantiles):
ts_train, ts_test = tsdataset_with_zero_width_quantiles

expected_metric = 0.0
width_metric = Width(mode="per-segment")(ts_train, ts_test)

for segment in width_metric:
assert width_metric[segment] == expected_metric


def test_width_metric_with_differnt_width_and_shifted_quantiles(tsdataset_with_differnt_width_and_shifted_quantiles):
ts_train, ts_test = tsdataset_with_differnt_width_and_shifted_quantiles

expected_metric = {"segment_1": 1.0, "segment_2": 0.0}
width_metric = Width(mode="per-segment")(ts_train, ts_test)

for segment in width_metric:
assert width_metric[segment] == expected_metric[segment]


def test_coverage_metric_with_differnt_width_and_shifted_quantiles(tsdataset_with_differnt_width_and_shifted_quantiles):
ts_train, ts_test = tsdataset_with_differnt_width_and_shifted_quantiles

expected_metric = {"segment_1": 0.0, "segment_2": 1.0}
coverage_metric = Coverage(mode="per-segment")(ts_train, ts_test)

for segment in coverage_metric:
assert coverage_metric[segment] == expected_metric[segment]


@pytest.mark.parametrize("metric", [Coverage(quantiles=(0.1, 0.3)), Width(quantiles=(0.1, 0.3))])
def test_using_not_presented_quantiles(metric, tsdataset_with_zero_width_quantiles):
ts_train, ts_test = tsdataset_with_zero_width_quantiles
with pytest.raises(AssertionError, match="Quantile .* is not presented in tsdataset."):
_ = metric(ts_train, ts_test)

1 comment on commit 8ade7d3

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.