From 5e7035a22e98a9f26223d5668e8920f4f3d8c29f Mon Sep 17 00:00:00 2001 From: binbin Date: Wed, 31 Aug 2022 17:04:41 +0800 Subject: [PATCH] default roll=True --- .../how_to_train_forecaster_on_one_node.ipynb | 4 ++-- .../data_processing_feature_engineering.md | 4 ++-- ...ctive_hpo_with_builtin_latency_tutorial.ipynb | 4 ++-- .../src/bigdl/chronos/autots/tspipeline.py | 1 - .../chronos/src/bigdl/chronos/data/tsdataset.py | 15 +++++++-------- .../test/bigdl/chronos/data/test_tsdataset.py | 16 +++++----------- .../forecaster/test_autoformer_forecaster.py | 6 +++--- .../chronos/forecaster/test_lstm_forecaster.py | 6 ++---- .../chronos/forecaster/test_nbeats_forecaster.py | 6 ++---- .../forecaster/test_seq2seq_forecaster.py | 6 ++---- .../chronos/forecaster/test_tcn_forecaster.py | 6 ++---- .../chronos/use-case/electricity/autoformer.py | 6 +++--- python/chronos/use-case/electricity/tcn.py | 4 ++-- 13 files changed, 34 insertions(+), 50 deletions(-) diff --git a/docs/readthedocs/source/doc/Chronos/Howto/how_to_train_forecaster_on_one_node.ipynb b/docs/readthedocs/source/doc/Chronos/Howto/how_to_train_forecaster_on_one_node.ipynb index 1a9351c9bbf..3af545fa532 100644 --- a/docs/readthedocs/source/doc/Chronos/Howto/how_to_train_forecaster_on_one_node.ipynb +++ b/docs/readthedocs/source/doc/Chronos/Howto/how_to_train_forecaster_on_one_node.ipynb @@ -108,7 +108,7 @@ "train_data = tsdata_train\n", "\n", "# uncomment this line to change `train_data` as pytorch dataloader\n", - "# train_data = tsdata_train.to_torch_data_loader(roll=True, lookback=48, horizon=1)\n", + "# train_data = tsdata_train.to_torch_data_loader(lookback=48, horizon=1)\n", "\n", "# uncomment this line to change `train_data` as numpy ndarray\n", "# train_data = tsdata_train.roll(lookback=48, horizon=1).to_numpy()" @@ -185,7 +185,7 @@ "outputs": [], "source": [ "# comment this line if you want to try other formats\n", - "val_data = tsdata_val.to_torch_data_loader(roll=True, lookback=48, horizon=1)\n", + "val_data = tsdata_val.to_torch_data_loader(lookback=48, horizon=1)\n", "\n", "# uncomment this line to change `val_data` as numpy ndarray\n", "# val_data = tsdata_val.roll(lookback=48, horizon=1).to_numpy()" diff --git a/docs/readthedocs/source/doc/Chronos/Overview/data_processing_feature_engineering.md b/docs/readthedocs/source/doc/Chronos/Overview/data_processing_feature_engineering.md index 416c1a46e4b..ae7595003bf 100644 --- a/docs/readthedocs/source/doc/Chronos/Overview/data_processing_feature_engineering.md +++ b/docs/readthedocs/source/doc/Chronos/Overview/data_processing_feature_engineering.md @@ -189,9 +189,9 @@ Please refer to the API doc [`roll`](../../PythonAPI/Chronos/tsdataset.html#bigd .. note:: **Difference between `roll` and `to_torch_data_loader`**: - `.roll(...)` performs the rolling before RR forecasters/auto models training while `.to_torch_data_loader(roll=True, ...)` performs rolling during the training. + `.roll(...)` performs the rolling before RR forecasters/auto models training while `.to_torch_data_loader(...)` performs rolling during the training. - It is fine to use either of them when you have a relatively small dataset (less than 1G). `.to_torch_data_loader(roll=True, ...)` is recommended when you have a large dataset (larger than 1G) to save memory usage. + It is fine to use either of them when you have a relatively small dataset (less than 1G). `.to_torch_data_loader(...)` is recommended when you have a large dataset (larger than 1G) to save memory usage. ``` ```eval_rst diff --git a/python/chronos/example/hpo/muti_objective_hpo_with_builtin_latency_tutorial.ipynb b/python/chronos/example/hpo/muti_objective_hpo_with_builtin_latency_tutorial.ipynb index ac0f53296dd..e740b5c4e2f 100644 --- a/python/chronos/example/hpo/muti_objective_hpo_with_builtin_latency_tutorial.ipynb +++ b/python/chronos/example/hpo/muti_objective_hpo_with_builtin_latency_tutorial.ipynb @@ -96,8 +96,8 @@ " train_data = tsdata_train.roll(lookback=look_back, horizon=horizon).to_numpy()\n", " val_data = tsdata_val.roll(lookback=look_back, horizon=horizon).to_numpy()\n", " \n", - " train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon)\n", - " test_loader = tsdata_test.to_torch_data_loader(batch_size=1, roll=True, lookback=look_back, horizon=horizon, shuffle=False)\n", + " train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon)\n", + " test_loader = tsdata_test.to_torch_data_loader(batch_size=1, lookback=look_back, horizon=horizon, shuffle=False)\n", " \n", " return train_loader, test_loader, train_data, val_data\n", "\n", diff --git a/python/chronos/src/bigdl/chronos/autots/tspipeline.py b/python/chronos/src/bigdl/chronos/autots/tspipeline.py index 528daccaf2b..cdf33270544 100644 --- a/python/chronos/src/bigdl/chronos/autots/tspipeline.py +++ b/python/chronos/src/bigdl/chronos/autots/tspipeline.py @@ -557,7 +557,6 @@ def _tsdataset_to_loader(self, data, is_predict=False, batch_size=32): horizon = 0 if is_predict else self._best_config["future_seq_len"] selected_features = self._best_config["selected_features"] data_loader = data.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon, feature_col=selected_features) diff --git a/python/chronos/src/bigdl/chronos/data/tsdataset.py b/python/chronos/src/bigdl/chronos/data/tsdataset.py index 4041cd999f1..9910632f6e9 100644 --- a/python/chronos/src/bigdl/chronos/data/tsdataset.py +++ b/python/chronos/src/bigdl/chronos/data/tsdataset.py @@ -681,7 +681,7 @@ def roll(self, def to_torch_data_loader(self, batch_size=32, - roll=False, + roll=True, lookback='auto', horizon=None, feature_col=None, @@ -692,14 +692,14 @@ def to_torch_data_loader(self, is_predict=False): """ Convert TSDataset to a PyTorch DataLoader with or without rolling. We recommend to use - to_torch_data_loader(roll=True) if you don't need to output the rolled numpy array. It is - much more efficient than rolling separately, especially when the dataframe or lookback + to_torch_data_loader(default roll=True) if you don't need to output the rolled numpy array. + It is much more efficient than rolling separately, especially when the dataframe or lookback is large. :param batch_size: int, the batch_size for a Pytorch DataLoader. It defaults to 32. :param roll: Boolean. Whether to roll the dataframe before converting to DataLoader. If True, you must also specify lookback and horizon for rolling. If False, you must - have called tsdataset.roll() before calling to_torch_data_loader(). Default to False. + have called tsdataset.roll() before calling to_torch_data_loader(). Default to True. :param lookback: int, lookback value. Default to 'auto', the mode of time series' cycle length will be taken as the lookback. :param horizon: int or list, @@ -742,14 +742,13 @@ def to_torch_data_loader(self, >>> "extra feature 2"]) >>> horizon, lookback = 1, 1 >>> data_loader = tsdataset.to_torch_data_loader(batch_size=32, - >>> roll=True, >>> lookback=lookback, >>> horizon=horizon) >>> # or roll outside. That might be less efficient than the way above. >>> tsdataset.roll(lookback=lookback, horizon=horizon, id_sensitive=False) >>> x, y = tsdataset.to_numpy() >>> print(x, y) # x = [[[1.9, 1, 2 ]], [[2.3, 0, 9 ]]] y = [[[ 2.4 ]], [[ 2.6 ]]] - >>> data_loader = tsdataset.to_torch_data_loader(batch_size=32) + >>> data_loader = tsdataset.to_torch_data_loader(batch_size=32, roll=False) """ from torch.utils.data import TensorDataset, DataLoader @@ -758,7 +757,7 @@ def to_torch_data_loader(self, if roll: if horizon is None: invalidInputError(False, - "You must input horizon if roll is True") + "You must input horizon if roll is True (default roll=True)!") from bigdl.chronos.data.utils.roll_dataset import RollDataset feature_col = _to_list(feature_col, "feature_col") if feature_col is not None \ else self.feature_col @@ -808,7 +807,7 @@ def to_torch_data_loader(self, if self.numpy_x is None: invalidInputError(False, "Please call 'roll' method before transforming a TSDataset to " - "torch DataLoader without rolling (default roll=False)!") + "torch DataLoader if roll is False!") x, y = self.to_numpy() return DataLoader(TensorDataset(torch.from_numpy(x).float(), torch.from_numpy(y).float()), diff --git a/python/chronos/test/bigdl/chronos/data/test_tsdataset.py b/python/chronos/test/bigdl/chronos/data/test_tsdataset.py index 1d736a499a6..fe83b7d98eb 100644 --- a/python/chronos/test/bigdl/chronos/data/test_tsdataset.py +++ b/python/chronos/test/bigdl/chronos/data/test_tsdataset.py @@ -387,7 +387,7 @@ def test_tsdata_roll_timeenc_to_torch_data_loader(self): df = get_int_target_df() tsdata = TSDataset.from_pandas(df, dt_col='datetime', target_col='value', id_col="id") dataloader =\ - tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon, + tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon, time_enc=True, label_len=lookback-horizon) x, y, x_time, y_time = next(iter(dataloader)) assert x.shape[1:] == (lookback, 1) @@ -401,7 +401,7 @@ def test_tsdata_roll_timeenc_to_torch_data_loader_predict(self): df = get_int_target_df() tsdata = TSDataset.from_pandas(df, dt_col='datetime', target_col='value', id_col="id") dataloader =\ - tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon, + tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon, time_enc=True, label_len=5, is_predict=True, batch_size=len(df)) x, y, x_time, y_time = next(iter(dataloader)) assert x.shape[1:] == (lookback, 1) @@ -423,7 +423,6 @@ def test_tsdataset_to_torch_loader_roll(self): # train torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon) for x_batch, y_batch in torch_loader: @@ -433,7 +432,6 @@ def test_tsdataset_to_torch_loader_roll(self): # test torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=0) for x_batch in torch_loader: @@ -442,7 +440,6 @@ def test_tsdataset_to_torch_loader_roll(self): # specify feature_col torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon, feature_col=[]) @@ -454,7 +451,6 @@ def test_tsdataset_to_torch_loader_roll(self): # Non-subset relationship with pytest.raises(ValueError): tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon, target_col=['value', 'extra feature']) @@ -462,7 +458,6 @@ def test_tsdataset_to_torch_loader_roll(self): # specify horizon_list horizon_list = [1, 3, 5] torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon_list) for x_batch, y_batch in torch_loader: @@ -474,7 +469,6 @@ def test_tsdataset_to_torch_loader_roll(self): tsdata = TSDataset.from_pandas(df, dt_col="datetime", target_col=["value", "extra feature"], id_col="id") torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon) for x_batch, y_batch in torch_loader: @@ -492,10 +486,11 @@ def test_tsdataset_to_torch_loader(self): extra_feature_col=["extra feature"], id_col="id") with pytest.raises(RuntimeError): - tsdata.to_torch_data_loader() + tsdata.to_torch_data_loader(roll=False) tsdata.roll(lookback=lookback, horizon=horizon) loader = tsdata.to_torch_data_loader(batch_size=batch_size, + roll=False, lookback=lookback, horizon=horizon) for x_batch, y_batch in loader: @@ -510,7 +505,7 @@ def test_tsdataset_to_torch_loader_lessthansample(self): df.insert(0, "datetime", pd.date_range(start="2022-7-22", periods=100, freq="H")) tsdata = TSDataset.from_pandas(df, dt_col="datetime", target_col="target") with pytest.raises(RuntimeError): - tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon) + tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon) def test_tsdata_multi_unscale_numpy_torch_load(self): @@ -536,7 +531,6 @@ def test_tsdata_multi_unscale_numpy_torch_load(self): tsdata.scale(stand, fit=tsdata is tsdata_train) test_loader = tsdata_test.to_torch_data_loader(batch_size=batch_size, - roll=True, lookback=lookback, horizon=horizon) import torch diff --git a/python/chronos/test/bigdl/chronos/forecaster/test_autoformer_forecaster.py b/python/chronos/test/bigdl/chronos/forecaster/test_autoformer_forecaster.py index 51d8750f8e5..70eeea8dd44 100644 --- a/python/chronos/test/bigdl/chronos/forecaster/test_autoformer_forecaster.py +++ b/python/chronos/test/bigdl/chronos/forecaster/test_autoformer_forecaster.py @@ -41,11 +41,11 @@ def create_data(loader=False): TSDataset.from_pandas(df, dt_col="datetime", target_col=target, with_split=True, test_ratio=0.1, val_ratio=0.1) if loader: - train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=24, horizon=5, + train_loader = tsdata_train.to_torch_data_loader(lookback=24, horizon=5, time_enc=True, label_len=12) - val_loader = tsdata_val.to_torch_data_loader(roll=True, lookback=24, horizon=5, + val_loader = tsdata_val.to_torch_data_loader(lookback=24, horizon=5, time_enc=True, label_len=12, shuffle=False) - test_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=24, horizon=5, + test_loader = tsdata_test.to_torch_data_loader(lookback=24, horizon=5, time_enc=True, label_len=12, shuffle=False, is_predict=True) return train_loader, val_loader, test_loader diff --git a/python/chronos/test/bigdl/chronos/forecaster/test_lstm_forecaster.py b/python/chronos/test/bigdl/chronos/forecaster/test_lstm_forecaster.py index 59d44c2649e..dacba48bd4b 100644 --- a/python/chronos/test/bigdl/chronos/forecaster/test_lstm_forecaster.py +++ b/python/chronos/test/bigdl/chronos/forecaster/test_lstm_forecaster.py @@ -452,11 +452,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self): train, test = create_tsdataset(roll=False) train.gen_dt_feature(one_hot_features=['WEEK']) test.gen_dt_feature(one_hot_features=['WEEK']) - loader = train.to_torch_data_loader(roll=True, - lookback=24, + loader = train.to_torch_data_loader(lookback=24, horizon=1) - test_loader = test.to_torch_data_loader(roll=True, - lookback=24, + test_loader = test.to_torch_data_loader(lookback=24, horizon=1) lstm = LSTMForecaster.from_tsdataset(train) diff --git a/python/chronos/test/bigdl/chronos/forecaster/test_nbeats_forecaster.py b/python/chronos/test/bigdl/chronos/forecaster/test_nbeats_forecaster.py index b5f8812ae68..e89371e2e90 100644 --- a/python/chronos/test/bigdl/chronos/forecaster/test_nbeats_forecaster.py +++ b/python/chronos/test/bigdl/chronos/forecaster/test_nbeats_forecaster.py @@ -436,11 +436,9 @@ def test_forecaster_from_tsdataset(self): @skip_onnxrt def test_forecaster_from_tsdataset_data_loader_onnx(self): train, test = create_tsdataset(roll=False) - loader = train.to_torch_data_loader(roll=True, - lookback=24, + loader = train.to_torch_data_loader(lookback=24, horizon=5) - test_loader = test.to_torch_data_loader(roll=True, - lookback=24, + test_loader = test.to_torch_data_loader(lookback=24, horizon=5) nbeats = NBeatsForecaster.from_tsdataset(train) nbeats.fit(loader, epochs=2) diff --git a/python/chronos/test/bigdl/chronos/forecaster/test_seq2seq_forecaster.py b/python/chronos/test/bigdl/chronos/forecaster/test_seq2seq_forecaster.py index ed70a870238..f0390559375 100644 --- a/python/chronos/test/bigdl/chronos/forecaster/test_seq2seq_forecaster.py +++ b/python/chronos/test/bigdl/chronos/forecaster/test_seq2seq_forecaster.py @@ -395,11 +395,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self): train, test = create_tsdataset(roll=False) train.gen_dt_feature(one_hot_features=['WEEK']) test.gen_dt_feature(one_hot_features=['WEEK']) - loader = train.to_torch_data_loader(roll=True, - lookback=24, + loader = train.to_torch_data_loader(lookback=24, horizon=5) - test_loader = test.to_torch_data_loader(roll=True, - lookback=24, + test_loader = test.to_torch_data_loader(lookback=24, horizon=5) s2s = Seq2SeqForecaster.from_tsdataset(train) s2s.fit(loader, epochs=2) diff --git a/python/chronos/test/bigdl/chronos/forecaster/test_tcn_forecaster.py b/python/chronos/test/bigdl/chronos/forecaster/test_tcn_forecaster.py index aa5245f3c5a..900cbda450e 100644 --- a/python/chronos/test/bigdl/chronos/forecaster/test_tcn_forecaster.py +++ b/python/chronos/test/bigdl/chronos/forecaster/test_tcn_forecaster.py @@ -589,11 +589,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self): train.gen_dt_feature(one_hot_features=['WEEK']) test.gen_dt_feature(one_hot_features=['WEEK']) - loader = train.to_torch_data_loader(roll=True, - lookback=24, + loader = train.to_torch_data_loader(lookback=24, horizon=5) - test_loader = test.to_torch_data_loader(roll=True, - lookback=24, + test_loader = test.to_torch_data_loader(lookback=24, horizon=5) tcn = TCNForecaster.from_tsdataset(train) diff --git a/python/chronos/use-case/electricity/autoformer.py b/python/chronos/use-case/electricity/autoformer.py index 0457132595e..a622681cbc7 100644 --- a/python/chronos/use-case/electricity/autoformer.py +++ b/python/chronos/use-case/electricity/autoformer.py @@ -45,11 +45,11 @@ def generate_data(): .scale(standard_scaler, fit=(tsdata is tsdata_train)) # get dataset for training, evaluation and prediction - train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon, + train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon, time_enc=True, label_len=label_len) - test_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon, + test_loader = tsdata_test.to_torch_data_loader(lookback=look_back, horizon=horizon, time_enc=True, label_len=label_len, shuffle=False) - pred_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon, + pred_loader = tsdata_test.to_torch_data_loader(lookback=look_back, horizon=horizon, time_enc=True, label_len=label_len, is_predict=True, shuffle=False, batch_size=1) diff --git a/python/chronos/use-case/electricity/tcn.py b/python/chronos/use-case/electricity/tcn.py index ccb6a9fe736..6b62832f4eb 100644 --- a/python/chronos/use-case/electricity/tcn.py +++ b/python/chronos/use-case/electricity/tcn.py @@ -46,8 +46,8 @@ def generate_data(): tsdata.impute(mode="last")\ .scale(standard_scaler, fit=(tsdata is tsdata_train)) - train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon) - test_loader = tsdata_test.to_torch_data_loader(batch_size=1, roll=True, lookback=look_back, horizon=horizon, shuffle=False) + train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon) + test_loader = tsdata_test.to_torch_data_loader(batch_size=1, lookback=look_back, horizon=horizon, shuffle=False) return train_loader, test_loader