Add target components logic into AutoRegressivePipeline (#1188)

tinkoff-ai · Mar 31, 2023 · 9483e6a · 9483e6a · github-actions · Mar 31, 2023
1 parent d10ce3f
commit 9483e6a
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
+- Target components logic into `AutoRegressivePipeline` ([#1188](https://github.com/tinkoff-ai/etna/pull/1188))
 - Add target components handling in `get_level_dataframe` ([#1179](https://github.com/tinkoff-ai/etna/pull/1179))
 - Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
 - Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))

diff --git a/etna/pipeline/autoregressive_pipeline.py b/etna/pipeline/autoregressive_pipeline.py
@@ -108,11 +108,9 @@ def _create_predictions_template(self, ts: TSDataset) -> pd.DataFrame:
 
     def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset:
         """Make predictions."""
-        if return_components:
-            raise NotImplementedError("Adding target components is not currently implemented!")
-
         prediction_df = self._create_predictions_template(ts)
 
+        target_components_dfs = []
         for idx_start in range(0, self.horizon, self.step):
             current_step = min(self.step, self.horizon - idx_start)
             current_idx_border = ts.index.shape[0] + idx_start
@@ -137,21 +135,34 @@ def _forecast(self, ts: TSDataset, return_components: bool) -> TSDataset:
                     current_ts_forecast = current_ts.make_future(
                         future_steps=current_step, tail_steps=self.model.context_size, transforms=self.transforms
                     )
-                    current_ts_future = self.model.forecast(ts=current_ts_forecast, prediction_size=current_step)
+                    current_ts_future = self.model.forecast(
+                        ts=current_ts_forecast, prediction_size=current_step, return_components=return_components
+                    )
                 else:
                     self.model = cast(ContextIgnorantModelType, self.model)
                     current_ts_forecast = current_ts.make_future(future_steps=current_step, transforms=self.transforms)
-                    current_ts_future = self.model.forecast(ts=current_ts_forecast)
+                    current_ts_future = self.model.forecast(ts=current_ts_forecast, return_components=return_components)
             current_ts_future.inverse_transform(self.transforms)
+
+            if return_components:
+                target_components_dfs.append(current_ts_future.get_target_components())
+                current_ts_future.drop_target_components()
+
             prediction_df = prediction_df.combine_first(current_ts_future.to_pandas()[prediction_df.columns])
 
         # construct dataset and add all features
         prediction_ts = TSDataset(df=prediction_df, freq=ts.freq, df_exog=ts.df_exog, known_future=ts.known_future)
         prediction_ts.transform(self.transforms)
         prediction_ts.inverse_transform(self.transforms)
+
         # cut only last timestamps from result dataset
         prediction_ts.df = prediction_ts.df.tail(self.horizon)
         prediction_ts.raw_df = prediction_ts.raw_df.tail(self.horizon)
+
+        if return_components:
+            target_components_df = pd.concat(target_components_dfs)
+            prediction_ts.add_target_components(target_components_df=target_components_df)
+
         return prediction_ts
 
     def _predict(
@@ -163,8 +174,6 @@ def _predict(
         quantiles: Sequence[float],
         return_components: bool = False,
     ) -> TSDataset:
-        if return_components:
-            raise NotImplementedError("Adding target components is not currently implemented!")
         return super()._predict(
             ts=ts,
             start_timestamp=start_timestamp,

diff --git a/tests/test_pipeline/test_autoregressive_pipeline.py b/tests/test_pipeline/test_autoregressive_pipeline.py
@@ -30,7 +30,6 @@
 from tests.test_pipeline.utils import assert_pipeline_equals_loaded_original
 from tests.test_pipeline.utils import assert_pipeline_forecasts_given_ts
 from tests.test_pipeline.utils import assert_pipeline_forecasts_given_ts_with_prediction_intervals
-from tests.utils import to_be_fixed
 
 DEFAULT_METRICS = [MAE(mode=MetricAggregationMode.per_segment)]
 
@@ -43,7 +42,7 @@ def test_fit(example_tsds):
     pipeline.fit(example_tsds)
 
 
-def fake_forecast(ts: TSDataset, prediction_size: Optional[int] = None):
+def fake_forecast(ts: TSDataset, prediction_size: Optional[int] = None, return_components: bool = False):
     df = ts.to_pandas()
 
     df.loc[:, pd.IndexSlice[:, "target"]] = 0
@@ -84,7 +83,7 @@ def test_private_forecast_context_ignorant_model(model_class, example_tsds):
     assert make_future.mock.call_count == 5
     make_future.mock.assert_called_with(future_steps=pipeline.step, transforms=())
     assert model.forecast.call_count == 5
-    model.forecast.assert_called_with(ts=ANY)
+    model.forecast.assert_called_with(ts=ANY, return_components=False)
 
 
 @pytest.mark.parametrize(
@@ -106,7 +105,7 @@ def test_private_forecast_context_required_model(model_class, example_tsds):
     assert make_future.mock.call_count == 5
     make_future.mock.assert_called_with(future_steps=pipeline.step, transforms=(), tail_steps=model.context_size)
     assert model.forecast.call_count == 5
-    model.forecast.assert_called_with(ts=ANY, prediction_size=pipeline.step)
+    model.forecast.assert_called_with(ts=ANY, prediction_size=pipeline.step, return_components=False)
 
 
 def test_forecast_columns(example_reg_tsds):
@@ -347,7 +346,6 @@ def test_forecast_given_ts_with_prediction_interval(model, transforms, example_t
     assert_pipeline_forecasts_given_ts_with_prediction_intervals(pipeline=pipeline, ts=example_tsds, horizon=horizon)
 
 
-@to_be_fixed(NotImplementedError, "Adding target components is not currently implemented!")
 @pytest.mark.parametrize(
     "model_fixture",
     (
@@ -357,14 +355,20 @@ def test_forecast_given_ts_with_prediction_interval(model, transforms, example_t
         "prediction_interval_context_required_dummy_model",
     ),
 )
-def test_forecast_return_components(example_tsds, model_fixture, request):
+def test_forecast_return_components(
+    example_tsds, model_fixture, request, expected_component_a=10, expected_component_b=90
+):
     model = request.getfixturevalue(model_fixture)
     pipeline = AutoRegressivePipeline(model=model, horizon=10)
     pipeline.fit(example_tsds)
     forecast = pipeline.forecast(return_components=True)
+    assert sorted(forecast.target_components_names) == sorted(["target_component_a", "target_component_b"])
+
+    target_components_df = TSDataset.to_flatten(forecast.get_target_components())
+    assert (target_components_df["target_component_a"] == expected_component_a).all()
+    assert (target_components_df["target_component_b"] == expected_component_b).all()
 
 
-@to_be_fixed(NotImplementedError, "Adding target components is not currently implemented!")
 @pytest.mark.parametrize(
     "model_fixture",
     (
@@ -374,8 +378,15 @@ def test_forecast_return_components(example_tsds, model_fixture, request):
         "prediction_interval_context_required_dummy_model",
     ),
 )
-def test_predict_return_components(example_tsds, model_fixture, request):
+def test_predict_return_components(
+    example_tsds, model_fixture, request, expected_component_a=20, expected_component_b=180
+):
     model = request.getfixturevalue(model_fixture)
     pipeline = AutoRegressivePipeline(model=model, horizon=10)
     pipeline.fit(example_tsds)
     forecast = pipeline.predict(ts=example_tsds, return_components=True)
+    assert sorted(forecast.target_components_names) == sorted(["target_component_a", "target_component_b"])
+
+    target_components_df = TSDataset.to_flatten(forecast.get_target_components())
+    assert (target_components_df["target_component_a"] == expected_component_a).all()
+    assert (target_components_df["target_component_b"] == expected_component_b).all()