diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ea0ac41..709d0b505 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,14 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - -- -- +- Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523)) +- Fixed wrong MeanTransform behaviour when using alpha parameter ([#523](https://github.com/tinkoff-ai/etna/pull/523)) - Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520)) -- -- -- -- -- +- +- +- +- +- ## [1.6.2] - 2022-02-09 ### Added diff --git a/etna/transforms/math/statistics.py b/etna/transforms/math/statistics.py index 9163283ec..0ff6fcd64 100644 --- a/etna/transforms/math/statistics.py +++ b/etna/transforms/math/statistics.py @@ -77,9 +77,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: """ features = ( df.xs(self.in_column, level=1, axis=1) - .shift(1) .rolling( - window=self.seasonality * self.window if self.window != -1 else len(df) - 1, + window=self.seasonality * self.window if self.window != -1 else len(df), min_periods=self.min_required_len, ) .aggregate(self._aggregate_window) @@ -167,7 +166,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: result: pd.DataFrame dataframe with results """ - size = self.window if self.window != -1 else len(df) - 1 + size = self.window if self.window != -1 else len(df) self._alpha_range = [self.alpha ** i for i in range(0, size)] return super().transform(df=df) @@ -177,7 +176,7 @@ def _aggregate_window(self, series: pd.Series) -> float: raise ValueError("Something went wrong generating the alphas!") tmp_series = self._get_required_lags(series) size = len(tmp_series) - tmp = tmp_series * self._alpha_range[-size:] + tmp = tmp_series * self._alpha_range[:size] return tmp.mean(**self.kwargs) diff --git a/tests/test_transforms/test_encoders/conftest.py b/tests/test_transforms/test_encoders/conftest.py index fb2c482c5..4bdea21dd 100644 --- a/tests/test_transforms/test_encoders/conftest.py +++ b/tests/test_transforms/test_encoders/conftest.py @@ -44,11 +44,11 @@ def transformed_simple_df() -> pd.DataFrame: df_1["segment"] = "Moscow" df_1["target"] = [1.0, 2.0, 3.0, 4.0, 5.0, np.NAN, np.NAN] df_1["exog"] = [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] - df_1["regressor_segment_mean"] = [0, 1, 1.5, 2, 2.5, 3, 3] + df_1["regressor_segment_mean"] = [1, 1.5, 2, 2.5, 3, 3, 3] df_2["segment"] = "Omsk" df_2["target"] = [10.0, 20.0, 30.0, 40.0, 50.0, np.NAN, np.NAN] df_2["exog"] = [60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0] - df_2["regressor_segment_mean"] = [0.0, 10.0, 15.0, 20.0, 25.0, 30, 30] + df_2["regressor_segment_mean"] = [10.0, 15.0, 20.0, 25.0, 30, 30, 30] classic_df = pd.concat([df_1, df_2], ignore_index=True) df = TSDataset.to_dataset(classic_df) return df diff --git a/tests/test_transforms/test_math/test_statistics_transform.py b/tests/test_transforms/test_math/test_statistics_transform.py index 8cfa4b92a..992f6612b 100644 --- a/tests/test_transforms/test_math/test_statistics_transform.py +++ b/tests/test_transforms/test_math/test_statistics_transform.py @@ -79,12 +79,19 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str): @pytest.mark.parametrize( "window,seasonality,alpha,periods,fill_na,expected", ( - (10, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), - (-1, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), - (3, 1, 1, 1, -17, np.array([-17, 0, 0.5, 1, 2, 3, 4, 5, 6, 7])), - (3, 1, 0.5, 1, -17, np.array([-17, 0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])), - (3, 1, 0.5, 3, -12, np.array([-12, -12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])), - (3, 2, 1, 1, -17, np.array([-17, 0, 1, 1, 2, 2, 3, 4, 5, 6])), + (10, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), + (-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), + (3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])), + (3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])), + ( + 3, + 1, + 0.5, + 3, + -12, + np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3]), + ), + (3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])), ), ) def test_mean_feature( @@ -115,8 +122,8 @@ def test_mean_feature( ( (10, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), (-1, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])), - (3, 1, 1, -17, np.array([-17, 0, 0, 0, 1, 2, 3, 4, 5, 6])), - (3, 2, 1, -17, np.array([-17, 0, 1, 0, 1, 0, 1, 2, 3, 4])), + (3, 1, 1, -17, np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7])), + (3, 2, 1, -17, np.array([0, 1, 0, 1, 0, 1, 2, 3, 4, 5])), ), ) def test_min_feature( @@ -138,9 +145,9 @@ def test_min_feature( @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (10, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])), - (-1, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])), - (3, 2, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])), + (10, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), + (-1, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), + (3, 2, -17, np.array([-17, 1, 2, 3, 4, 5, 6, 7, 8, 9])), ), ) def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -155,8 +162,8 @@ def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, np.array([-17, -17, -17, 1, 2, 3, 4, 5, 6, 7])), - (-1, 1, -17, np.array([-17, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])), + (3, 3, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])), + (-1, 1, -17, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])), ), ) def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -171,8 +178,8 @@ def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: i @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, np.array([-17, -17, -17, 1, 1, 1, 1, 1, 1, 1])), - (3, 1, -17, np.array([-17, -17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1])), + (3, 3, -17, np.array([-17, -17, 1, 1, 1, 1, 1, 1, 1, 1])), + (3, 1, -17, np.array([-17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1, 1])), ), ) def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array): @@ -187,9 +194,9 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, @pytest.mark.parametrize( "window,periods,fill_na,expected", ( - (3, 3, -17, [-17, -17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9]), - (4, 1, -17, [-17, 0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5]), - (-1, 1, 0, [0, 0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81]), + (3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]), + (4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]), + (-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]), ), ) def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray): @@ -202,7 +209,7 @@ def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill @pytest.mark.parametrize( "window,periods,fill_na,expected", - ((3, 3, -17, [-17, -17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9]),), + ((3, 3, -17, [-17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9, 22 / 9]),), ) def test_mad_transform_with_nans( df_for_agg_with_nan: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray