Skip to content

Commit

Permalink
Add inverse_transform in TimeSeriesImputerTransform (#135)
Browse files Browse the repository at this point in the history
* Add inverse_transform and tests for it, fixed bug with running_mean option

* Update changelog
  • Loading branch information
Mr-Geekman authored Oct 5, 2021
1 parent d4b98a4 commit 84fbd7d
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Property regressors to TSDataset ([#82](https://github.com/tinkoff-ai/etna-ts/pull/82))
- Clustering (#[110](https://github.com/tinkoff-ai/etna-ts/pull/110))
- Outliers notebook (#[123](https://github.com/tinkoff-ai/etna-ts/pull/123)))
- Method inverse_transform in TimeSeriesImputerTransform ([#135](https://github.com/tinkoff-ai/etna-ts/pull/135))

### Changed
- SklearnTransform out column names ([#99](https://github.com/tinkoff-ai/etna-ts/pull/99))
Expand Down
24 changes: 23 additions & 1 deletion etna/transforms/imputation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from enum import Enum

import numpy as np
import pandas as pd

from etna.transforms.base import PerSegmentWrapper
Expand Down Expand Up @@ -46,6 +47,7 @@ def __init__(self, in_column: str = "target", strategy: str = ImputerMode.zero,
self.strategy = ImputerMode(strategy)
self.window = window
self.fill_value = None
self.nan_timestamps = None

def fit(self, df: pd.DataFrame) -> "_OneSegmentTimeSeriesImputerTransform":
"""
Expand Down Expand Up @@ -81,8 +83,28 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.DataFrame
dataframe with in_column series with filled gaps
"""
self.nan_timestamps = df[df[self.in_column].isna()].index
result_df = df.copy()
result_df[self.in_column] = self._fill(df[self.in_column])
result_df[self.in_column] = self._fill(result_df[self.in_column])
return result_df

def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Inverse transform dataframe.
Parameters
----------
df: pd.Dataframe
inverse transform in_column series of given dataframe
Returns
-------
result: pd.DataFrame
dataframe with in_column series with initial values
"""
result_df = df.copy()
index = result_df.index.intersection(self.nan_timestamps)
result_df.loc[index, self.in_column] = np.nan
return result_df

def _fill(self, df: pd.Series) -> pd.Series:
Expand Down
59 changes: 53 additions & 6 deletions tests/test_transforms/test_impute_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pytest

from etna.datasets import TSDataset
from etna.transforms.imputation import TimeSeriesImputerTransform
from etna.transforms.imputation import _OneSegmentTimeSeriesImputerTransform

Expand Down Expand Up @@ -37,10 +38,7 @@ def all_date_present_df_two_segments(all_date_present_df: pd.Series) -> pd.DataF
df_2["segment"] = "segment_2"

classic_df = pd.concat([df_1, df_2], ignore_index=True)
df = classic_df.pivot(index="timestamp", columns="segment")
df = df.reorder_levels([1, 0], axis=1)
df = df.sort_index(axis=1)
df.columns.names = ["segment", "feature"]
df = TSDataset.to_dataset(classic_df)
return df


Expand Down Expand Up @@ -78,6 +76,21 @@ def df_with_missing_range_x_index(all_date_present_df: pd.DataFrame) -> Tuple[pd
return df, rng


@pytest.fixture()
def df_with_missing_range_x_index_two_segments(
df_with_missing_range_x_index: pd.DataFrame
) -> Tuple[pd.DataFrame, list]:
"""Create pd.DataFrame that contains some target on given range of dates with range of gaps."""
df_one_segment, rng = df_with_missing_range_x_index
df_1 = df_one_segment.reset_index()
df_2 = df_one_segment.copy().reset_index()
df_1["segment"] = "segment_1"
df_2["segment"] = "segment_2"
classic_df = pd.concat([df_1, df_2], ignore_index=True)
df = TSDataset.to_dataset(classic_df)
return df, rng


@pytest.mark.smoke
@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill"])
def test_all_dates_present_impute(all_date_present_df: pd.DataFrame, fill_strategy: str):
Expand Down Expand Up @@ -193,7 +206,41 @@ def test_range_missing_running_mean(df_with_missing_range_x_index: pd.DataFrame,
assert not result.isna().any()
for idx in timestamp_idxs:
if window == -1:
expected_value = df.loc[: timestamps[idx - 1], "target"].mean()
expected_value = result.loc[: timestamps[idx - 1]].mean()
else:
expected_value = df.loc[timestamps[idx - window] : timestamps[idx - 1], "target"].mean()
expected_value = result.loc[timestamps[idx - window] : timestamps[idx - 1]].mean()
assert result.loc[timestamps[idx]] == expected_value


@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill"])
def test_inverse_transform_one_segment(df_with_missing_range_x_index: pd.DataFrame, fill_strategy: str):
"""Check that transform + inverse_transform don't change original df for one segment."""
df, rng = df_with_missing_range_x_index
imputer = _OneSegmentTimeSeriesImputerTransform(strategy=fill_strategy)
transform_result = imputer.fit_transform(df)
inverse_transform_result = imputer.inverse_transform(transform_result)
np.testing.assert_array_equal(df, inverse_transform_result)


@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill"])
def test_inverse_transform_many_segments(df_with_missing_range_x_index_two_segments: pd.DataFrame, fill_strategy: str):
"""Check that transform + inverse_transform don't change original df for two segments."""
df, rng = df_with_missing_range_x_index_two_segments
imputer = TimeSeriesImputerTransform(strategy=fill_strategy)
transform_result = imputer.fit_transform(df)
inverse_transform_result = imputer.inverse_transform(transform_result)
np.testing.assert_array_equal(df, inverse_transform_result)


@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill"])
def test_inverse_transform_make_future(df_with_missing_range_x_index_two_segments: pd.DataFrame, fill_strategy: str):
"""Check that inverse_transform return nan-s back after make_future."""
df, rng = df_with_missing_range_x_index_two_segments
ts = TSDataset(df, freq=pd.infer_freq(df.index))
imputer = TimeSeriesImputerTransform(strategy=fill_strategy)
ts.fit_transform(transforms=[imputer])
ts_test = ts.make_future(3)
ts_test.inverse_transform()
df_test_final = ts_test.to_pandas()
for segment in ts.segments:
assert np.all(df_test_final.loc[:, pd.IndexSlice[segment, "target"]].isna())

0 comments on commit 84fbd7d

Please sign in to comment.