From 201bb79a917ab3fbabc46cc8e8097e9717e717d5 Mon Sep 17 00:00:00 2001 From: iKintosh Date: Fri, 11 Feb 2022 14:27:39 +0300 Subject: [PATCH 1/4] fix statistics --- etna/transforms/math/statistics.py | 7 ++-- .../test_math/test_statistics_transform.py | 38 +++++++++---------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/etna/transforms/math/statistics.py b/etna/transforms/math/statistics.py index 9163283ec..0ff6fcd64 100644 --- a/etna/transforms/math/statistics.py +++ b/etna/transforms/math/statistics.py @@ -77,9 +77,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: """ features = ( df.xs(self.in_column, level=1, axis=1) - .shift(1) .rolling( - window=self.seasonality * self.window if self.window != -1 else len(df) - 1, + window=self.seasonality * self.window if self.window != -1 else len(df), min_periods=self.min_required_len, ) .aggregate(self._aggregate_window) @@ -167,7 +166,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: result: pd.DataFrame dataframe with results """ - size = self.window if self.window != -1 else len(df) - 1 + size = self.window if self.window != -1 else len(df) self._alpha_range = [self.alpha ** i for i in range(0, size)] return super().transform(df=df) @@ -177,7 +176,7 @@ def _aggregate_window(self, series: pd.Series) -> float: raise ValueError("Something went wrong generating the alphas!") tmp_series = self._get_required_lags(series) size = len(tmp_series) - tmp = tmp_series * self._alpha_range[-size:] + tmp = tmp_series * self._alpha_range[:size] return tmp.mean(**self.kwargs) diff --git a/tests/test_transforms/test_math/test_statistics_transform.py b/tests/test_transforms/test_math/test_statistics_transform.py index 8cfa4b92a..9070aba44 100644 --- a/tests/test_transforms/test_math/test_statistics_transform.py +++ b/tests/test_transforms/test_math/test_statistics_transform.py @@ -79,12 +79,12 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str): @pytest.mark.parametrize( "window,seasonality,alpha,periods,fill_na,expected", ( - (10, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), - (-1, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), - (3, 1, 1, 1, -17, np.array([-17, 0, 0.5, 1, 2, 3, 4, 5, 6, 7])), - (3, 1, 0.5, 1, -17, np.array([-17, 0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])), - (3, 1, 0.5, 3, -12, np.array([-12, -12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])), - (3, 2, 1, 1, -17, np.array([-17, 0, 1, 1, 2, 2, 3, 4, 5, 6])), + (10, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), + (-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), + (3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])), + (3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])), + (3, 1, 0.5, 3, -12, np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])), + (3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])), ), ) def test_mean_feature( @@ -115,8 +115,8 @@ def test_mean_feature( ( (10, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), (-1, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), - (3, 1, 1, -17, np.array([-17, 0, 0, 0, 1, 2, 3, 4, 5, 6])), - (3, 2, 1, -17, np.array([-17, 0, 1, 0, 1, 0, 1, 2, 3, 4])), + (3, 1, 1, -17, np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7])), + (3, 2, 1, -17, np.array([0, 1, 0, 1, 0, 1, 2, 3, 4, 5])), ), ) def test_min_feature( @@ -138,9 +138,9 @@ def test_min_feature( @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (10, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])), - (-1, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])), - (3, 2, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])), + (10, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), + (-1, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), + (3, 2, -17, np.array([-17, 1, 2, 3, 4, 5, 6, 7, 8, 9])), ), ) def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -155,8 +155,8 @@ def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, np.array([-17, -17, -17, 1, 2, 3, 4, 5, 6, 7])), - (-1, 1, -17, np.array([-17, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), + (3, 3, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])), + (-1, 1, -17, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), ), ) def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -171,8 +171,8 @@ def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: i @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, np.array([-17, -17, -17, 1, 1, 1, 1, 1, 1, 1])), - (3, 1, -17, np.array([-17, -17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1])), + (3, 3, -17, np.array([-17, -17, 1, 1, 1, 1, 1, 1, 1, 1])), + (3, 1, -17, np.array([-17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1, 1])), ), ) def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -187,9 +187,9 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, [-17, -17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9]), - (4, 1, -17, [-17, 0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5]), - (-1, 1, 0, [0, 0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81]), + (3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]), + (4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]), + (-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]), ), ) def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray): @@ -202,7 +202,7 @@ def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill @pytest.mark.parametrize( "window,periods,fill_na,expected", - ((3, 3, -17, [-17, -17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9]),), + ((3, 3, -17, [-17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9, 22 / 9]),), ) def test_mad_transform_with_nans( df_for_agg_with_nan: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray From d48231b4f7198b6885e8166421887bf10ac0f728 Mon Sep 17 00:00:00 2001 From: iKintosh Date: Fri, 11 Feb 2022 14:29:41 +0300 Subject: [PATCH 2/4] changelog) --- CHANGELOG.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ea0ac41..fe18f0a9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,14 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - -- -- +- Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523)) +- - Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520)) -- -- -- -- -- +- +- +- +- +- ## [1.6.2] - 2022-02-09 ### Added From aaa38563b1d71c1b0001c6d261c2adf2901c0d69 Mon Sep 17 00:00:00 2001 From: iKintosh Date: Fri, 11 Feb 2022 14:57:20 +0300 Subject: [PATCH 3/4] fix old test and style --- tests/test_transforms/test_encoders/conftest.py | 4 ++-- .../test_math/test_statistics_transform.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/test_transforms/test_encoders/conftest.py b/tests/test_transforms/test_encoders/conftest.py index fb2c482c5..4bdea21dd 100644 --- a/tests/test_transforms/test_encoders/conftest.py +++ b/tests/test_transforms/test_encoders/conftest.py @@ -44,11 +44,11 @@ def transformed_simple_df() -> pd.DataFrame: df_1["segment"] = "Moscow" df_1["target"] = [1.0, 2.0, 3.0, 4.0, 5.0, np.NAN, np.NAN] df_1["exog"] = [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] - df_1["regressor_segment_mean"] = [0, 1, 1.5, 2, 2.5, 3, 3] + df_1["regressor_segment_mean"] = [1, 1.5, 2, 2.5, 3, 3, 3] df_2["segment"] = "Omsk" df_2["target"] = [10.0, 20.0, 30.0, 40.0, 50.0, np.NAN, np.NAN] df_2["exog"] = [60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0] - df_2["regressor_segment_mean"] = [0.0, 10.0, 15.0, 20.0, 25.0, 30, 30] + df_2["regressor_segment_mean"] = [10.0, 15.0, 20.0, 25.0, 30, 30, 30] classic_df = pd.concat([df_1, df_2], ignore_index=True) df = TSDataset.to_dataset(classic_df) return df diff --git a/tests/test_transforms/test_math/test_statistics_transform.py b/tests/test_transforms/test_math/test_statistics_transform.py index 9070aba44..992f6612b 100644 --- a/tests/test_transforms/test_math/test_statistics_transform.py +++ b/tests/test_transforms/test_math/test_statistics_transform.py @@ -83,7 +83,14 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str): (-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), (3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])), (3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])), - (3, 1, 0.5, 3, -12, np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])), + ( + 3, + 1, + 0.5, + 3, + -12, + np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3]), + ), (3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])), ), ) @@ -189,7 +196,7 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, ( (3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]), (4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]), - (-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]), + (-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]), ), ) def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray): From afe35a149adf499af503a7c65e933c3ae9002978 Mon Sep 17 00:00:00 2001 From: iKintosh Date: Fri, 11 Feb 2022 14:58:38 +0300 Subject: [PATCH 4/4] changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe18f0a9e..709d0b505 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - - Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523)) -- +- Fixed wrong MeanTransform behaviour when using alpha parameter ([#523](https://github.com/tinkoff-ai/etna/pull/523)) - Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520)) - -