Skip to content

Commit

Permalink
Add ConfidenceIntervalOutliersTransform (#196)
Browse files Browse the repository at this point in the history
* Add ConfidenceIntervalOutliersTransform

* Fixes

* Fix tests

* Update CHANGELOG

* Update Outliers notebook
  • Loading branch information
alex-hse-repository authored Oct 15, 2021
1 parent a049761 commit 4a0db38
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Examples for CatBoostModelMultiSegment and CatBoostModelPerSegment ([#181](https://github.com/tinkoff-ai/etna-ts/pull/181))
- Simplify TSDataset.train_test_split method by allowing to pass not all values ([#191](https://github.com/tinkoff-ai/etna-ts/pull/191))
- Confidence interval anomalies detection to EDA ([#182](https://github.com/tinkoff-ai/etna-ts/pull/182))
- ConfidenceIntervalOutliersTransform ([#196](https://github.com/tinkoff-ai/etna-ts/pull/196))

### Changed
- Delete offset from WindowStatisticsTransform ([#111](https://github.com/tinkoff-ai/etna-ts/pull/111))
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ We have also prepared a set of tutorials for an easy introduction:
- Point outliers
- Median method
- Density method
- Confidence interval method
- Sequence outliers
- Interactive visualization
- Outliers imputation
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from etna.transforms.imputation import TimeSeriesImputerTransform
from etna.transforms.lags import LagTransform
from etna.transforms.log import LogTransform
from etna.transforms.outliers import ConfidenceIntervalOutliersTransform
from etna.transforms.outliers import DensityOutliersTransform
from etna.transforms.outliers import MedianOutliersTransform
from etna.transforms.outliers import SAXOutliersTransform
Expand Down
55 changes: 54 additions & 1 deletion etna/transforms/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union

import numpy as np
import pandas as pd

from etna.analysis import get_anomalies_confidence_interval
from etna.analysis import get_anomalies_density
from etna.analysis import get_anomalies_median
from etna.analysis import get_sequence_anomalies
from etna.datasets import TSDataset
from etna.models import ProphetModel
from etna.models import SARIMAXModel
from etna.transforms.base import Transform


Expand Down Expand Up @@ -265,4 +270,52 @@ def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
)


__all__ = ["MedianOutliersTransform", "DensityOutliersTransform"]
class ConfidenceIntervalOutliersTransform(OutliersTransform):
"""Transform that uses get_anomalies_density to find anomalies in data."""

def __init__(
self,
in_column: str,
model: Union[Type["ProphetModel"], Type["SARIMAXModel"]],
interval_width: float = 0.95,
**model_kwargs,
):
"""Create instance of ConfidenceIntervalOutliersTransform.
Parameters
----------
ts:
TSDataset with timeseries data(should contains all the necessary features)
model:
model for confidence interval estimation
interval_width:
width of the confidence interval
"""
self.in_column = in_column
self.model = model
self.interval_width = interval_width
self.model_kwargs = model_kwargs
super().__init__(in_column=self.in_column)

def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
"""Call `get_anomalies_confidence_interval` function with self parameters.
Parameters
----------
ts:
dataset to process
Returns
-------
dict of outliers:
dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_confidence_interval(ts, self.model, self.interval_width, **self.model_kwargs)


__all__ = [
"MedianOutliersTransform",
"DensityOutliersTransform",
"SAXOutliersTransform",
"ConfidenceIntervalOutliersTransform",
]
98 changes: 79 additions & 19 deletions examples/outliers.ipynb

Large diffs are not rendered by default.

23 changes: 17 additions & 6 deletions tests/test_transforms/test_outliers_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
import pandas as pd
import pytest

from etna.analysis import get_anomalies_confidence_interval
from etna.analysis import get_anomalies_density
from etna.analysis import get_anomalies_median
from etna.analysis import get_sequence_anomalies
from etna.datasets.tsdataset import TSDataset
from etna.models import ProphetModel
from etna.transforms import ConfidenceIntervalOutliersTransform
from etna.transforms import DensityOutliersTransform
from etna.transforms import MedianOutliersTransform
from etna.transforms import SAXOutliersTransform
Expand Down Expand Up @@ -43,6 +46,7 @@ def outliers_solid_tsds():
MedianOutliersTransform(in_column="target"),
DensityOutliersTransform(in_column="target"),
SAXOutliersTransform(in_column="target"),
ConfidenceIntervalOutliersTransform(in_column="target", model=ProphetModel),
],
)
def test_interface(transform, example_tsds: TSDataset):
Expand All @@ -53,16 +57,21 @@ def test_interface(transform, example_tsds: TSDataset):


@pytest.mark.parametrize(
"transform, method",
"transform, method, method_kwargs",
[
(MedianOutliersTransform(in_column="target"), get_anomalies_median),
(DensityOutliersTransform(in_column="target"), get_anomalies_density),
(SAXOutliersTransform(in_column="target"), get_sequence_anomalies),
(MedianOutliersTransform(in_column="target"), get_anomalies_median, {}),
(DensityOutliersTransform(in_column="target"), get_anomalies_density, {}),
(SAXOutliersTransform(in_column="target"), get_sequence_anomalies, {}),
(
ConfidenceIntervalOutliersTransform(in_column="target", model=ProphetModel),
get_anomalies_confidence_interval,
{"model": ProphetModel},
),
],
)
def test_outliers_detection(transform, method, outliers_tsds):
def test_outliers_detection(transform, method, outliers_tsds, method_kwargs):
"""Checks that outliers transforms detect anomalies according to methods from etna.analysis."""
detectiom_method_results = method(outliers_tsds)
detectiom_method_results = method(outliers_tsds, **method_kwargs)

# save for each segment index without existing nans
non_nan_index = {}
Expand All @@ -82,6 +91,7 @@ def test_outliers_detection(transform, method, outliers_tsds):
MedianOutliersTransform(in_column="target"),
DensityOutliersTransform(in_column="target"),
SAXOutliersTransform(in_column="target"),
ConfidenceIntervalOutliersTransform(in_column="target", model=ProphetModel),
],
)
def test_inverse_transform_train(transform, outliers_solid_tsds):
Expand All @@ -99,6 +109,7 @@ def test_inverse_transform_train(transform, outliers_solid_tsds):
MedianOutliersTransform(in_column="target"),
DensityOutliersTransform(in_column="target"),
SAXOutliersTransform(in_column="target"),
ConfidenceIntervalOutliersTransform(in_column="target", model=ProphetModel),
],
)
def test_inverse_transform_future(transform, outliers_solid_tsds):
Expand Down

0 comments on commit 4a0db38

Please sign in to comment.