From 9483e6a1f1cc661682ca91ac134c1fdc68043292 Mon Sep 17 00:00:00 2001 From: alex-hse-repository <55380696+alex-hse-repository@users.noreply.github.com> Date: Fri, 31 Mar 2023 10:26:56 +0200 Subject: [PATCH] Add target components logic into AutoRegressivePipeline (#1188) --- CHANGELOG.md | 1 + etna/pipeline/autoregressive_pipeline.py | 23 +++++++++++----- .../test_autoregressive_pipeline.py | 27 +++++++++++++------ 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61f5f3ef8..be484d550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added +- Target components logic into `AutoRegressivePipeline` ([#1188](https://github.com/tinkoff-ai/etna/pull/1188)) - Add target components handling in `get_level_dataframe` ([#1179](https://github.com/tinkoff-ai/etna/pull/1179)) - Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180)) - Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173)) diff --git a/etna/pipeline/autoregressive_pipeline.py b/etna/pipeline/autoregressive_pipeline.py index 59d7d2d42..2b574621b 100644 --- a/etna/pipeline/autoregressive_pipeline.py +++ b/etna/pipeline/autoregressive_pipeline.py @@ -108,11 +108,9 @@ def _create_predictions_template(self, ts: TSDataset) -> pd.DataFrame: def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset: """Make predictions.""" - if return_components: - raise NotImplementedError("Adding target components is not currently implemented!") - prediction_df = self._create_predictions_template(ts) + target_components_dfs = [] for idx_start in range(0, self.horizon, self.step): current_step = min(self.step, self.horizon - idx_start) current_idx_border = ts.index.shape[0] + idx_start @@ -137,21 +135,34 @@ def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset: current_ts_forecast = current_ts.make_future( future_steps=current_step, tail_steps=self.model.context_size, transforms=self.transforms ) - current_ts_future = self.model.forecast(ts=current_ts_forecast, prediction_size=current_step) + current_ts_future = self.model.forecast( + ts=current_ts_forecast, prediction_size=current_step, return_components=return_components + ) else: self.model = cast(ContextIgnorantModelType, self.model) current_ts_forecast = current_ts.make_future(future_steps=current_step, transforms=self.transforms) - current_ts_future = self.model.forecast(ts=current_ts_forecast) + current_ts_future = self.model.forecast(ts=current_ts_forecast, return_components=return_components) current_ts_future.inverse_transform(self.transforms) + + if return_components: + target_components_dfs.append(current_ts_future.get_target_components()) + current_ts_future.drop_target_components() + prediction_df = prediction_df.combine_first(current_ts_future.to_pandas()[prediction_df.columns]) # construct dataset and add all features prediction_ts = TSDataset(df=prediction_df, freq=ts.freq, df_exog=ts.df_exog, known_future=ts.known_future) prediction_ts.transform(self.transforms) prediction_ts.inverse_transform(self.transforms) + # cut only last timestamps from result dataset prediction_ts.df = prediction_ts.df.tail(self.horizon) prediction_ts.raw_df = prediction_ts.raw_df.tail(self.horizon) + + if return_components: + target_components_df = pd.concat(target_components_dfs) + prediction_ts.add_target_components(target_components_df=target_components_df) + return prediction_ts def _predict( @@ -163,8 +174,6 @@ def _predict( quantiles: Sequence[float], return_components: bool = False, ) -> TSDataset: - if return_components: - raise NotImplementedError("Adding target components is not currently implemented!") return super()._predict( ts=ts, start_timestamp=start_timestamp, diff --git a/tests/test_pipeline/test_autoregressive_pipeline.py b/tests/test_pipeline/test_autoregressive_pipeline.py index ffe5ca8bd..82c842682 100644 --- a/tests/test_pipeline/test_autoregressive_pipeline.py +++ b/tests/test_pipeline/test_autoregressive_pipeline.py @@ -30,7 +30,6 @@ from tests.test_pipeline.utils import assert_pipeline_equals_loaded_original from tests.test_pipeline.utils import assert_pipeline_forecasts_given_ts from tests.test_pipeline.utils import assert_pipeline_forecasts_given_ts_with_prediction_intervals -from tests.utils import to_be_fixed DEFAULT_METRICS = [MAE(mode=MetricAggregationMode.per_segment)] @@ -43,7 +42,7 @@ def test_fit(example_tsds): pipeline.fit(example_tsds) -def fake_forecast(ts: TSDataset, prediction_size: Optional[int] = None): +def fake_forecast(ts: TSDataset, prediction_size: Optional[int] = None, return_components: bool = False): df = ts.to_pandas() df.loc[:, pd.IndexSlice[:, "target"]] = 0 @@ -84,7 +83,7 @@ def test_private_forecast_context_ignorant_model(model_class, example_tsds): assert make_future.mock.call_count == 5 make_future.mock.assert_called_with(future_steps=pipeline.step, transforms=()) assert model.forecast.call_count == 5 - model.forecast.assert_called_with(ts=ANY) + model.forecast.assert_called_with(ts=ANY, return_components=False) @pytest.mark.parametrize( @@ -106,7 +105,7 @@ def test_private_forecast_context_required_model(model_class, example_tsds): assert make_future.mock.call_count == 5 make_future.mock.assert_called_with(future_steps=pipeline.step, transforms=(), tail_steps=model.context_size) assert model.forecast.call_count == 5 - model.forecast.assert_called_with(ts=ANY, prediction_size=pipeline.step) + model.forecast.assert_called_with(ts=ANY, prediction_size=pipeline.step, return_components=False) def test_forecast_columns(example_reg_tsds): @@ -347,7 +346,6 @@ def test_forecast_given_ts_with_prediction_interval(model, transforms, example_t assert_pipeline_forecasts_given_ts_with_prediction_intervals(pipeline=pipeline, ts=example_tsds, horizon=horizon) -@to_be_fixed(NotImplementedError, "Adding target components is not currently implemented!") @pytest.mark.parametrize( "model_fixture", ( @@ -357,14 +355,20 @@ def test_forecast_given_ts_with_prediction_interval(model, transforms, example_t "prediction_interval_context_required_dummy_model", ), ) -def test_forecast_return_components(example_tsds, model_fixture, request): +def test_forecast_return_components( + example_tsds, model_fixture, request, expected_component_a=10, expected_component_b=90 +): model = request.getfixturevalue(model_fixture) pipeline = AutoRegressivePipeline(model=model, horizon=10) pipeline.fit(example_tsds) forecast = pipeline.forecast(return_components=True) + assert sorted(forecast.target_components_names) == sorted(["target_component_a", "target_component_b"]) + + target_components_df = TSDataset.to_flatten(forecast.get_target_components()) + assert (target_components_df["target_component_a"] == expected_component_a).all() + assert (target_components_df["target_component_b"] == expected_component_b).all() -@to_be_fixed(NotImplementedError, "Adding target components is not currently implemented!") @pytest.mark.parametrize( "model_fixture", ( @@ -374,8 +378,15 @@ def test_forecast_return_components(example_tsds, model_fixture, request): "prediction_interval_context_required_dummy_model", ), ) -def test_predict_return_components(example_tsds, model_fixture, request): +def test_predict_return_components( + example_tsds, model_fixture, request, expected_component_a=20, expected_component_b=180 +): model = request.getfixturevalue(model_fixture) pipeline = AutoRegressivePipeline(model=model, horizon=10) pipeline.fit(example_tsds) forecast = pipeline.predict(ts=example_tsds, return_components=True) + assert sorted(forecast.target_components_names) == sorted(["target_component_a", "target_component_b"]) + + target_components_df = TSDataset.to_flatten(forecast.get_target_components()) + assert (target_components_df["target_component_a"] == expected_component_a).all() + assert (target_components_df["target_component_b"] == expected_component_b).all()