Skip to content

Commit

Permalink
fix statistics (#523)
Browse files Browse the repository at this point in the history
* fix statistics
* fix old test and style
* changelog
  • Loading branch information
iKintosh authored Feb 11, 2022
1 parent d80a910 commit 3d13428
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 32 deletions.
14 changes: 7 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
-
-
-
- Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523))
- Fixed wrong MeanTransform behaviour when using alpha parameter ([#523](https://github.com/tinkoff-ai/etna/pull/523))
- Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520))
-
-
-
-
-
-
-
-
-
-

## [1.6.2] - 2022-02-09
### Added
Expand Down
7 changes: 3 additions & 4 deletions etna/transforms/math/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
features = (
df.xs(self.in_column, level=1, axis=1)
.shift(1)
.rolling(
window=self.seasonality * self.window if self.window != -1 else len(df) - 1,
window=self.seasonality * self.window if self.window != -1 else len(df),
min_periods=self.min_required_len,
)
.aggregate(self._aggregate_window)
Expand Down Expand Up @@ -167,7 +166,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.DataFrame
dataframe with results
"""
size = self.window if self.window != -1 else len(df) - 1
size = self.window if self.window != -1 else len(df)
self._alpha_range = [self.alpha ** i for i in range(0, size)]
return super().transform(df=df)

Expand All @@ -177,7 +176,7 @@ def _aggregate_window(self, series: pd.Series) -> float:
raise ValueError("Something went wrong generating the alphas!")
tmp_series = self._get_required_lags(series)
size = len(tmp_series)
tmp = tmp_series * self._alpha_range[-size:]
tmp = tmp_series * self._alpha_range[:size]
return tmp.mean(**self.kwargs)


Expand Down
4 changes: 2 additions & 2 deletions tests/test_transforms/test_encoders/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def transformed_simple_df() -> pd.DataFrame:
df_1["segment"] = "Moscow"
df_1["target"] = [1.0, 2.0, 3.0, 4.0, 5.0, np.NAN, np.NAN]
df_1["exog"] = [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
df_1["regressor_segment_mean"] = [0, 1, 1.5, 2, 2.5, 3, 3]
df_1["regressor_segment_mean"] = [1, 1.5, 2, 2.5, 3, 3, 3]
df_2["segment"] = "Omsk"
df_2["target"] = [10.0, 20.0, 30.0, 40.0, 50.0, np.NAN, np.NAN]
df_2["exog"] = [60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0]
df_2["regressor_segment_mean"] = [0.0, 10.0, 15.0, 20.0, 25.0, 30, 30]
df_2["regressor_segment_mean"] = [10.0, 15.0, 20.0, 25.0, 30, 30, 30]
classic_df = pd.concat([df_1, df_2], ignore_index=True)
df = TSDataset.to_dataset(classic_df)
return df
45 changes: 26 additions & 19 deletions tests/test_transforms/test_math/test_statistics_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,19 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str):
@pytest.mark.parametrize(
"window,seasonality,alpha,periods,fill_na,expected",
(
(10, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(-1, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(3, 1, 1, 1, -17, np.array([-17, 0, 0.5, 1, 2, 3, 4, 5, 6, 7])),
(3, 1, 0.5, 1, -17, np.array([-17, 0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
(3, 1, 0.5, 3, -12, np.array([-12, -12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
(3, 2, 1, 1, -17, np.array([-17, 0, 1, 1, 2, 2, 3, 4, 5, 6])),
(10, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
(-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
(3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])),
(3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])),
(
3,
1,
0.5,
3,
-12,
np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3]),
),
(3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])),
),
)
def test_mean_feature(
Expand Down Expand Up @@ -115,8 +122,8 @@ def test_mean_feature(
(
(10, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
(-1, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
(3, 1, 1, -17, np.array([-17, 0, 0, 0, 1, 2, 3, 4, 5, 6])),
(3, 2, 1, -17, np.array([-17, 0, 1, 0, 1, 0, 1, 2, 3, 4])),
(3, 1, 1, -17, np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7])),
(3, 2, 1, -17, np.array([0, 1, 0, 1, 0, 1, 2, 3, 4, 5])),
),
)
def test_min_feature(
Expand All @@ -138,9 +145,9 @@ def test_min_feature(
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(10, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
(-1, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
(3, 2, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
(10, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
(-1, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
(3, 2, -17, np.array([-17, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
),
)
def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -155,8 +162,8 @@ def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, np.array([-17, -17, -17, 1, 2, 3, 4, 5, 6, 7])),
(-1, 1, -17, np.array([-17, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(3, 3, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
(-1, 1, -17, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
),
)
def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -171,8 +178,8 @@ def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: i
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, np.array([-17, -17, -17, 1, 1, 1, 1, 1, 1, 1])),
(3, 1, -17, np.array([-17, -17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1])),
(3, 3, -17, np.array([-17, -17, 1, 1, 1, 1, 1, 1, 1, 1])),
(3, 1, -17, np.array([-17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1, 1])),
),
)
def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -187,9 +194,9 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, [-17, -17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9]),
(4, 1, -17, [-17, 0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5]),
(-1, 1, 0, [0, 0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81]),
(3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]),
(4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]),
(-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]),
),
)
def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray):
Expand All @@ -202,7 +209,7 @@ def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill

@pytest.mark.parametrize(
"window,periods,fill_na,expected",
((3, 3, -17, [-17, -17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9]),),
((3, 3, -17, [-17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9, 22 / 9]),),
)
def test_mad_transform_with_nans(
df_for_agg_with_nan: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray
Expand Down

0 comments on commit 3d13428

Please sign in to comment.