Skip to content

fix statistics #523

Merged
merged 4 commits into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
-
-
-
- Fixed adding unnecessary lag=1 in statistics ([#523](https://github.com/tinkoff-ai/etna/pull/523))
- Fixed wrong MeanTransform behaviour when using alpha parameter ([#523](https://github.com/tinkoff-ai/etna/pull/523))
- Fix processing add_noise=True parameter in datasets generation ([#520](https://github.com/tinkoff-ai/etna/pull/520))
-
-
-
-
-
-
-
-
-
-

## [1.6.2] - 2022-02-09
### Added
Expand Down
7 changes: 3 additions & 4 deletions etna/transforms/math/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
features = (
df.xs(self.in_column, level=1, axis=1)
.shift(1)
.rolling(
window=self.seasonality * self.window if self.window != -1 else len(df) - 1,
window=self.seasonality * self.window if self.window != -1 else len(df),
min_periods=self.min_required_len,
)
.aggregate(self._aggregate_window)
Expand Down Expand Up @@ -167,7 +166,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.DataFrame
dataframe with results
"""
size = self.window if self.window != -1 else len(df) - 1
size = self.window if self.window != -1 else len(df)
self._alpha_range = [self.alpha ** i for i in range(0, size)]
return super().transform(df=df)

Expand All @@ -177,7 +176,7 @@ def _aggregate_window(self, series: pd.Series) -> float:
raise ValueError("Something went wrong generating the alphas!")
tmp_series = self._get_required_lags(series)
size = len(tmp_series)
tmp = tmp_series * self._alpha_range[-size:]
tmp = tmp_series * self._alpha_range[:size]
return tmp.mean(**self.kwargs)


Expand Down
4 changes: 2 additions & 2 deletions tests/test_transforms/test_encoders/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def transformed_simple_df() -> pd.DataFrame:
df_1["segment"] = "Moscow"
df_1["target"] = [1.0, 2.0, 3.0, 4.0, 5.0, np.NAN, np.NAN]
df_1["exog"] = [6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
df_1["regressor_segment_mean"] = [0, 1, 1.5, 2, 2.5, 3, 3]
df_1["regressor_segment_mean"] = [1, 1.5, 2, 2.5, 3, 3, 3]
df_2["segment"] = "Omsk"
df_2["target"] = [10.0, 20.0, 30.0, 40.0, 50.0, np.NAN, np.NAN]
df_2["exog"] = [60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0]
df_2["regressor_segment_mean"] = [0.0, 10.0, 15.0, 20.0, 25.0, 30, 30]
df_2["regressor_segment_mean"] = [10.0, 15.0, 20.0, 25.0, 30, 30, 30]
classic_df = pd.concat([df_1, df_2], ignore_index=True)
df = TSDataset.to_dataset(classic_df)
return df
45 changes: 26 additions & 19 deletions tests/test_transforms/test_math/test_statistics_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,19 @@ def test_interface_quantile(simple_df_for_agg: pd.DataFrame, out_column: str):
@pytest.mark.parametrize(
"window,seasonality,alpha,periods,fill_na,expected",
(
(10, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(-1, 1, 1, 1, 0, np.array([0, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(3, 1, 1, 1, -17, np.array([-17, 0, 0.5, 1, 2, 3, 4, 5, 6, 7])),
(3, 1, 0.5, 1, -17, np.array([-17, 0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
(3, 1, 0.5, 3, -12, np.array([-12, -12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3])),
(3, 2, 1, 1, -17, np.array([-17, 0, 1, 1, 2, 2, 3, 4, 5, 6])),
(10, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
(-1, 1, 1, 1, 0, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
(3, 1, 1, 1, -17, np.array([0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8])),
(3, 1, 0.5, 1, -17, np.array([0, 0.5, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3])),
(
3,
1,
0.5,
3,
-12,
np.array([-12, -12, 2.5 / 3, 4.25 / 3, 2, 7.75 / 3, 9.5 / 3, 11.25 / 3, 13 / 3, 14.75 / 3]),
),
(3, 2, 1, 1, -17, np.array([0, 1, 1, 2, 2, 3, 4, 5, 6, 7])),
),
)
def test_mean_feature(
Expand Down Expand Up @@ -115,8 +122,8 @@ def test_mean_feature(
(
(10, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
(-1, 1, 1, 0, np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])),
(3, 1, 1, -17, np.array([-17, 0, 0, 0, 1, 2, 3, 4, 5, 6])),
(3, 2, 1, -17, np.array([-17, 0, 1, 0, 1, 0, 1, 2, 3, 4])),
(3, 1, 1, -17, np.array([0, 0, 0, 1, 2, 3, 4, 5, 6, 7])),
(3, 2, 1, -17, np.array([0, 1, 0, 1, 0, 1, 2, 3, 4, 5])),
),
)
def test_min_feature(
Expand All @@ -138,9 +145,9 @@ def test_min_feature(
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(10, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
(-1, 1, 0, np.array([0, 0, 1, 2, 3, 4, 5, 6, 7, 8])),
(3, 2, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
(10, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
(-1, 1, 0, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
(3, 2, -17, np.array([-17, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
),
)
def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -155,8 +162,8 @@ def test_max_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, np.array([-17, -17, -17, 1, 2, 3, 4, 5, 6, 7])),
(-1, 1, -17, np.array([-17, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4])),
(3, 3, -17, np.array([-17, -17, 1, 2, 3, 4, 5, 6, 7, 8])),
(-1, 1, -17, np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])),
),
)
def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -171,8 +178,8 @@ def test_median_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: i
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, np.array([-17, -17, -17, 1, 1, 1, 1, 1, 1, 1])),
(3, 1, -17, np.array([-17, -17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1])),
(3, 3, -17, np.array([-17, -17, 1, 1, 1, 1, 1, 1, 1, 1])),
(3, 1, -17, np.array([-17, np.sqrt(0.5 ** 2 * 2), 1, 1, 1, 1, 1, 1, 1, 1])),
),
)
def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.array):
Expand All @@ -187,9 +194,9 @@ def test_std_feature(simple_df_for_agg: pd.DataFrame, window: int, periods: int,
@pytest.mark.parametrize(
"window,periods,fill_na,expected",
(
(3, 3, -17, [-17, -17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9]),
(4, 1, -17, [-17, 0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5]),
(-1, 1, 0, [0, 0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81]),
(3, 3, -17, [-17, -17, 4 / 3, 2 / 3, 2 / 3, 8 / 3, 2, 14 / 9, 10 / 9, 22 / 9]),
(4, 1, -17, [0, 1, 4 / 3, 1.25, 1, 2.25, 2.75, 2, 1.5, 9.5 / 4]),
(-1, 1, 0, [0, 1, 4 / 3, 1.25, 1.44, 7 / 3, 138 / 49, 2.625, 208 / 81, 27 / 10]),
),
)
def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray):
Expand All @@ -202,7 +209,7 @@ def test_mad_transform(df_for_agg: pd.DataFrame, window: int, periods: int, fill

@pytest.mark.parametrize(
"window,periods,fill_na,expected",
((3, 3, -17, [-17, -17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9]),),
((3, 3, -17, [-17, -17, 4 / 3, -17, -17, -17, 2, 14 / 9, 10 / 9, 22 / 9]),),
)
def test_mad_transform_with_nans(
df_for_agg_with_nan: pd.DataFrame, window: int, periods: int, fill_na: float, expected: np.ndarray
Expand Down