From c63530ac08d505642db64f1918c59d0add532df2 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Wed, 12 Jan 2022 13:09:45 +0300 Subject: [PATCH 1/7] Update make_future and train_test_split --- etna/datasets/tsdataset.py | 10 ++++++---- tests/test_datasets/test_dataset.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 38c83d16f..cffd1444b 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -278,7 +278,9 @@ def make_future(self, future_steps: int) -> "TSDataset": future_dataset = df.tail(future_steps).copy(deep=True) future_dataset = future_dataset.sort_index(axis=1, level=(0, 1)) - future_ts = TSDataset(future_dataset, freq=self.freq) + future_ts = TSDataset(df=future_dataset, freq=self.freq) + future_ts.known_future = self.regressors + future_ts._regressors = self.regressors future_ts.transforms = self.transforms future_ts.df_exog = self.df_exog return future_ts @@ -332,7 +334,7 @@ def _check_regressors(df: pd.DataFrame, df_regressors: pd.DataFrame): def _merge_exog(self, df: pd.DataFrame) -> pd.DataFrame: segments = sorted(set(df.columns.get_level_values("segment"))) - df_regressors = self.df_exog.loc[:, pd.IndexSlice[segments, self.known_future]] + df_regressors = self.df_exog.loc[:, pd.IndexSlice[segments, self.regressors]] self._check_regressors(df=df, df_regressors=df_regressors) df = pd.merge(df, self.df_exog, left_index=True, right_index=True, how="left").sort_index(axis=1, level=(0, 1)) return df @@ -730,12 +732,12 @@ def train_test_split( train_df = self.df[train_start_defined:train_end_defined][self.raw_df.columns] # type: ignore train_raw_df = self.raw_df[train_start_defined:train_end_defined] # type: ignore - train = TSDataset(df=train_df, df_exog=self.df_exog, freq=self.freq) + train = TSDataset(df=train_df, df_exog=self.df_exog, freq=self.freq, known_future=self.regressors) train.raw_df = train_raw_df test_df = self.df[test_start_defined:test_end_defined][self.raw_df.columns] # type: ignore test_raw_df = self.raw_df[train_start_defined:test_end_defined] # type: ignore - test = TSDataset(df=test_df, df_exog=self.df_exog, freq=self.freq) + test = TSDataset(df=test_df, df_exog=self.df_exog, freq=self.freq, known_future=self.regressors) test.raw_df = test_raw_df return train, test diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 317a82068..fbda1b861 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -289,6 +289,14 @@ def test_train_test_split_failed(test_size, borders, match, tsdf_with_exog): ) +def test_train_test_split_pass_regressors_to_output(df_and_regressors): + df, df_exog, known_future = df_and_regressors + ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future=known_future) + train, test = ts.train_test_split(test_size=5) + assert train.regressors == ts.regressors + assert test.regressors == ts.regressors + + def test_dataset_datetime_conversion(): classic_df = generate_ar_df(periods=30, start_time="2021-06-01", n_segments=2) classic_df["timestamp"] = classic_df["timestamp"].astype(str) @@ -355,6 +363,13 @@ def test_make_future_with_regressors(df_and_regressors): assert set(ts_future.columns.get_level_values("feature")) == {"target", "regressor_1", "regressor_2"} +def test_make_future_inherits_regressors(df_and_regressors): + df, df_exog, known_future = df_and_regressors + ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future=known_future) + ts_future = ts.make_future(10) + assert ts_future.regressors == ts.regressors + + def test_make_future_warn_not_enough_regressors(df_and_regressors): """Check that warning is thrown if regressors don't have enough values for the future.""" df, df_exog, known_future = df_and_regressors From dccbd5d1f612fe7810fe0cf58522926c94f9761f Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Fri, 14 Jan 2022 11:40:01 +0300 Subject: [PATCH 2/7] Fix known_future --- etna/datasets/tsdataset.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index cffd1444b..bd73d92ea 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -279,7 +279,7 @@ def make_future(self, future_steps: int) -> "TSDataset": future_dataset = df.tail(future_steps).copy(deep=True) future_dataset = future_dataset.sort_index(axis=1, level=(0, 1)) future_ts = TSDataset(df=future_dataset, freq=self.freq) - future_ts.known_future = self.regressors + future_ts.known_future = self.known_future future_ts._regressors = self.regressors future_ts.transforms = self.transforms future_ts.df_exog = self.df_exog @@ -732,13 +732,15 @@ def train_test_split( train_df = self.df[train_start_defined:train_end_defined][self.raw_df.columns] # type: ignore train_raw_df = self.raw_df[train_start_defined:train_end_defined] # type: ignore - train = TSDataset(df=train_df, df_exog=self.df_exog, freq=self.freq, known_future=self.regressors) + train = TSDataset(df=train_df, df_exog=self.df_exog, freq=self.freq, known_future=self.known_future) train.raw_df = train_raw_df + train._regressors = self.regressors test_df = self.df[test_start_defined:test_end_defined][self.raw_df.columns] # type: ignore test_raw_df = self.raw_df[train_start_defined:test_end_defined] # type: ignore - test = TSDataset(df=test_df, df_exog=self.df_exog, freq=self.freq, known_future=self.regressors) + test = TSDataset(df=test_df, df_exog=self.df_exog, freq=self.freq, known_future=self.known_future) test.raw_df = test_raw_df + test._regressors = self.regressors return train, test From 74c652d6321d655670bf3b6e58a4a82b3f2f0287 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Fri, 14 Jan 2022 14:33:28 +0300 Subject: [PATCH 3/7] Fix pipeline --- etna/pipeline/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/pipeline/pipeline.py b/etna/pipeline/pipeline.py index 5c93eecb0..28428122f 100644 --- a/etna/pipeline/pipeline.py +++ b/etna/pipeline/pipeline.py @@ -112,7 +112,7 @@ def fit(self, ts: TSDataset) -> "Pipeline": def _forecast_prediction_interval(self, future: TSDataset) -> TSDataset: """Forecast prediction interval for the future.""" - _, forecasts, _ = self.backtest(self.ts, metrics=[MAE()], n_folds=self.n_folds) + _, forecasts, _ = self.backtest(ts=deepcopy(self.ts), metrics=[MAE()], n_folds=self.n_folds) forecasts = TSDataset(df=forecasts, freq=self.ts.freq) residuals = ( forecasts.loc[:, pd.IndexSlice[:, "target"]] From 9df9ab3477d43314c7f4b0b1ccd7310f4aa357c0 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Tue, 18 Jan 2022 08:47:07 +0300 Subject: [PATCH 4/7] Fixes --- etna/datasets/tsdataset.py | 1 + etna/pipeline/pipeline.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index bd73d92ea..55df47e00 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -190,6 +190,7 @@ def _update_regressors(self, transform: "Transform", columns_before: Set[str], c else: raise ValueError("Transform is not FutureMixin and does not have in_column attribute!") + new_regressors = [regressor for regressor in new_regressors if regressor not in self.regressors] self._regressors.extend(new_regressors) def __repr__(self): diff --git a/etna/pipeline/pipeline.py b/etna/pipeline/pipeline.py index 28428122f..ee0fb44be 100644 --- a/etna/pipeline/pipeline.py +++ b/etna/pipeline/pipeline.py @@ -112,7 +112,7 @@ def fit(self, ts: TSDataset) -> "Pipeline": def _forecast_prediction_interval(self, future: TSDataset) -> TSDataset: """Forecast prediction interval for the future.""" - _, forecasts, _ = self.backtest(ts=deepcopy(self.ts), metrics=[MAE()], n_folds=self.n_folds) + _, forecasts, _ = self.backtest(ts=self.ts, metrics=[MAE()], n_folds=self.n_folds) forecasts = TSDataset(df=forecasts, freq=self.ts.freq) residuals = ( forecasts.loc[:, pd.IndexSlice[:, "target"]] From ee811328caf794f4cb8bd3093aaedbbd88c3149c Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Wed, 19 Jan 2022 10:14:07 +0300 Subject: [PATCH 5/7] Add comment about known_future --- etna/datasets/tsdataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 55df47e00..c328d71cf 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -280,6 +280,8 @@ def make_future(self, future_steps: int) -> "TSDataset": future_dataset = df.tail(future_steps).copy(deep=True) future_dataset = future_dataset.sort_index(axis=1, level=(0, 1)) future_ts = TSDataset(df=future_dataset, freq=self.freq) + + # Can't put known_future into constructor, _check_known_future fails with df_exog=None future_ts.known_future = self.known_future future_ts._regressors = self.regressors future_ts.transforms = self.transforms From d3a824873868b6391b899dbdf36cfe9625f91a61 Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Wed, 19 Jan 2022 10:16:20 +0300 Subject: [PATCH 6/7] Fix _merge_exog --- etna/datasets/tsdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index c328d71cf..bccd9daa9 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -337,7 +337,7 @@ def _check_regressors(df: pd.DataFrame, df_regressors: pd.DataFrame): def _merge_exog(self, df: pd.DataFrame) -> pd.DataFrame: segments = sorted(set(df.columns.get_level_values("segment"))) - df_regressors = self.df_exog.loc[:, pd.IndexSlice[segments, self.regressors]] + df_regressors = self.df_exog.loc[:, pd.IndexSlice[segments, self.known_future]] self._check_regressors(df=df, df_regressors=df_regressors) df = pd.merge(df, self.df_exog, left_index=True, right_index=True, how="left").sort_index(axis=1, level=(0, 1)) return df From 60ae7fae41389f9405ba21dd082093d7b24eed8f Mon Sep 17 00:00:00 2001 From: alex-hse-repository Date: Wed, 19 Jan 2022 10:54:33 +0300 Subject: [PATCH 7/7] Fix --- etna/datasets/tsdataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index bccd9daa9..605882e28 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -281,7 +281,7 @@ def make_future(self, future_steps: int) -> "TSDataset": future_dataset = future_dataset.sort_index(axis=1, level=(0, 1)) future_ts = TSDataset(df=future_dataset, freq=self.freq) - # Can't put known_future into constructor, _check_known_future fails with df_exog=None + # can't put known_future into constructor, _check_known_future fails with df_exog=None future_ts.known_future = self.known_future future_ts._regressors = self.regressors future_ts.transforms = self.transforms