Skip to content

Commit

Permalink
Chronos: make roll=True the default value for to_torch_data_loader (
Browse files Browse the repository at this point in the history
  • Loading branch information
plusbang authored and ForJadeForest committed Sep 20, 2022
1 parent 99b810d commit 6e45868
Show file tree
Hide file tree
Showing 13 changed files with 34 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
"train_data = tsdata_train\n",
"\n",
"# uncomment this line to change `train_data` as pytorch dataloader\n",
"# train_data = tsdata_train.to_torch_data_loader(roll=True, lookback=48, horizon=1)\n",
"# train_data = tsdata_train.to_torch_data_loader(lookback=48, horizon=1)\n",
"\n",
"# uncomment this line to change `train_data` as numpy ndarray\n",
"# train_data = tsdata_train.roll(lookback=48, horizon=1).to_numpy()"
Expand Down Expand Up @@ -185,7 +185,7 @@
"outputs": [],
"source": [
"# comment this line if you want to try other formats\n",
"val_data = tsdata_val.to_torch_data_loader(roll=True, lookback=48, horizon=1)\n",
"val_data = tsdata_val.to_torch_data_loader(lookback=48, horizon=1)\n",
"\n",
"# uncomment this line to change `val_data` as numpy ndarray\n",
"# val_data = tsdata_val.roll(lookback=48, horizon=1).to_numpy()"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ Please refer to the API doc [`roll`](../../PythonAPI/Chronos/tsdataset.html#bigd
.. note::
**Difference between `roll` and `to_torch_data_loader`**:
`.roll(...)` performs the rolling before RR forecasters/auto models training while `.to_torch_data_loader(roll=True, ...)` performs rolling during the training.
`.roll(...)` performs the rolling before RR forecasters/auto models training while `.to_torch_data_loader(...)` performs rolling during the training.
It is fine to use either of them when you have a relatively small dataset (less than 1G). `.to_torch_data_loader(roll=True, ...)` is recommended when you have a large dataset (larger than 1G) to save memory usage.
It is fine to use either of them when you have a relatively small dataset (less than 1G). `.to_torch_data_loader(...)` is recommended when you have a large dataset (larger than 1G) to save memory usage.
```

```eval_rst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@
" train_data = tsdata_train.roll(lookback=look_back, horizon=horizon).to_numpy()\n",
" val_data = tsdata_val.roll(lookback=look_back, horizon=horizon).to_numpy()\n",
" \n",
" train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon)\n",
" test_loader = tsdata_test.to_torch_data_loader(batch_size=1, roll=True, lookback=look_back, horizon=horizon, shuffle=False)\n",
" train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon)\n",
" test_loader = tsdata_test.to_torch_data_loader(batch_size=1, lookback=look_back, horizon=horizon, shuffle=False)\n",
" \n",
" return train_loader, test_loader, train_data, val_data\n",
"\n",
Expand Down
1 change: 0 additions & 1 deletion python/chronos/src/bigdl/chronos/autots/tspipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,6 @@ def _tsdataset_to_loader(self, data, is_predict=False, batch_size=32):
horizon = 0 if is_predict else self._best_config["future_seq_len"]
selected_features = self._best_config["selected_features"]
data_loader = data.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon,
feature_col=selected_features)
Expand Down
15 changes: 7 additions & 8 deletions python/chronos/src/bigdl/chronos/data/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def roll(self,

def to_torch_data_loader(self,
batch_size=32,
roll=False,
roll=True,
lookback='auto',
horizon=None,
feature_col=None,
Expand All @@ -692,14 +692,14 @@ def to_torch_data_loader(self,
is_predict=False):
"""
Convert TSDataset to a PyTorch DataLoader with or without rolling. We recommend to use
to_torch_data_loader(roll=True) if you don't need to output the rolled numpy array. It is
much more efficient than rolling separately, especially when the dataframe or lookback
to_torch_data_loader(default roll=True) if you don't need to output the rolled numpy array.
It is much more efficient than rolling separately, especially when the dataframe or lookback
is large.
:param batch_size: int, the batch_size for a Pytorch DataLoader. It defaults to 32.
:param roll: Boolean. Whether to roll the dataframe before converting to DataLoader.
If True, you must also specify lookback and horizon for rolling. If False, you must
have called tsdataset.roll() before calling to_torch_data_loader(). Default to False.
have called tsdataset.roll() before calling to_torch_data_loader(). Default to True.
:param lookback: int, lookback value. Default to 'auto',
the mode of time series' cycle length will be taken as the lookback.
:param horizon: int or list,
Expand Down Expand Up @@ -742,14 +742,13 @@ def to_torch_data_loader(self,
>>> "extra feature 2"])
>>> horizon, lookback = 1, 1
>>> data_loader = tsdataset.to_torch_data_loader(batch_size=32,
>>> roll=True,
>>> lookback=lookback,
>>> horizon=horizon)
>>> # or roll outside. That might be less efficient than the way above.
>>> tsdataset.roll(lookback=lookback, horizon=horizon, id_sensitive=False)
>>> x, y = tsdataset.to_numpy()
>>> print(x, y) # x = [[[1.9, 1, 2 ]], [[2.3, 0, 9 ]]] y = [[[ 2.4 ]], [[ 2.6 ]]]
>>> data_loader = tsdataset.to_torch_data_loader(batch_size=32)
>>> data_loader = tsdataset.to_torch_data_loader(batch_size=32, roll=False)
"""
from torch.utils.data import TensorDataset, DataLoader
Expand All @@ -758,7 +757,7 @@ def to_torch_data_loader(self,
if roll:
if horizon is None:
invalidInputError(False,
"You must input horizon if roll is True")
"You must input horizon if roll is True (default roll=True)!")
from bigdl.chronos.data.utils.roll_dataset import RollDataset
feature_col = _to_list(feature_col, "feature_col") if feature_col is not None \
else self.feature_col
Expand Down Expand Up @@ -808,7 +807,7 @@ def to_torch_data_loader(self,
if self.numpy_x is None:
invalidInputError(False,
"Please call 'roll' method before transforming a TSDataset to "
"torch DataLoader without rolling (default roll=False)!")
"torch DataLoader if roll is False!")
x, y = self.to_numpy()
return DataLoader(TensorDataset(torch.from_numpy(x).float(),
torch.from_numpy(y).float()),
Expand Down
16 changes: 5 additions & 11 deletions python/chronos/test/bigdl/chronos/data/test_tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def test_tsdata_roll_timeenc_to_torch_data_loader(self):
df = get_int_target_df()
tsdata = TSDataset.from_pandas(df, dt_col='datetime', target_col='value', id_col="id")
dataloader =\
tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon,
tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon,
time_enc=True, label_len=lookback-horizon)
x, y, x_time, y_time = next(iter(dataloader))
assert x.shape[1:] == (lookback, 1)
Expand All @@ -401,7 +401,7 @@ def test_tsdata_roll_timeenc_to_torch_data_loader_predict(self):
df = get_int_target_df()
tsdata = TSDataset.from_pandas(df, dt_col='datetime', target_col='value', id_col="id")
dataloader =\
tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon,
tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon,
time_enc=True, label_len=5, is_predict=True, batch_size=len(df))
x, y, x_time, y_time = next(iter(dataloader))
assert x.shape[1:] == (lookback, 1)
Expand All @@ -423,7 +423,6 @@ def test_tsdataset_to_torch_loader_roll(self):

# train
torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon)
for x_batch, y_batch in torch_loader:
Expand All @@ -433,7 +432,6 @@ def test_tsdataset_to_torch_loader_roll(self):

# test
torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=0)
for x_batch in torch_loader:
Expand All @@ -442,7 +440,6 @@ def test_tsdataset_to_torch_loader_roll(self):

# specify feature_col
torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon,
feature_col=[])
Expand All @@ -454,15 +451,13 @@ def test_tsdataset_to_torch_loader_roll(self):
# Non-subset relationship
with pytest.raises(ValueError):
tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon,
target_col=['value', 'extra feature'])

# specify horizon_list
horizon_list = [1, 3, 5]
torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon_list)
for x_batch, y_batch in torch_loader:
Expand All @@ -474,7 +469,6 @@ def test_tsdataset_to_torch_loader_roll(self):
tsdata = TSDataset.from_pandas(df, dt_col="datetime",
target_col=["value", "extra feature"], id_col="id")
torch_loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon)
for x_batch, y_batch in torch_loader:
Expand All @@ -492,10 +486,11 @@ def test_tsdataset_to_torch_loader(self):
extra_feature_col=["extra feature"], id_col="id")

with pytest.raises(RuntimeError):
tsdata.to_torch_data_loader()
tsdata.to_torch_data_loader(roll=False)

tsdata.roll(lookback=lookback, horizon=horizon)
loader = tsdata.to_torch_data_loader(batch_size=batch_size,
roll=False,
lookback=lookback,
horizon=horizon)
for x_batch, y_batch in loader:
Expand All @@ -510,7 +505,7 @@ def test_tsdataset_to_torch_loader_lessthansample(self):
df.insert(0, "datetime", pd.date_range(start="2022-7-22", periods=100, freq="H"))
tsdata = TSDataset.from_pandas(df, dt_col="datetime", target_col="target")
with pytest.raises(RuntimeError):
tsdata.to_torch_data_loader(roll=True, lookback=lookback, horizon=horizon)
tsdata.to_torch_data_loader(lookback=lookback, horizon=horizon)


def test_tsdata_multi_unscale_numpy_torch_load(self):
Expand All @@ -536,7 +531,6 @@ def test_tsdata_multi_unscale_numpy_torch_load(self):
tsdata.scale(stand, fit=tsdata is tsdata_train)

test_loader = tsdata_test.to_torch_data_loader(batch_size=batch_size,
roll=True,
lookback=lookback,
horizon=horizon)
import torch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def create_data(loader=False):
TSDataset.from_pandas(df, dt_col="datetime", target_col=target,
with_split=True, test_ratio=0.1, val_ratio=0.1)
if loader:
train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=24, horizon=5,
train_loader = tsdata_train.to_torch_data_loader(lookback=24, horizon=5,
time_enc=True, label_len=12)
val_loader = tsdata_val.to_torch_data_loader(roll=True, lookback=24, horizon=5,
val_loader = tsdata_val.to_torch_data_loader(lookback=24, horizon=5,
time_enc=True, label_len=12, shuffle=False)
test_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=24, horizon=5,
test_loader = tsdata_test.to_torch_data_loader(lookback=24, horizon=5,
time_enc=True, label_len=12, shuffle=False,
is_predict=True)
return train_loader, val_loader, test_loader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,11 +454,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self):
train, test = create_tsdataset(roll=False)
train.gen_dt_feature(one_hot_features=['WEEK'])
test.gen_dt_feature(one_hot_features=['WEEK'])
loader = train.to_torch_data_loader(roll=True,
lookback=24,
loader = train.to_torch_data_loader(lookback=24,
horizon=1)
test_loader = test.to_torch_data_loader(roll=True,
lookback=24,
test_loader = test.to_torch_data_loader(lookback=24,
horizon=1)
lstm = LSTMForecaster.from_tsdataset(train)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,11 +438,9 @@ def test_forecaster_from_tsdataset(self):
@op_onnxrt16
def test_forecaster_from_tsdataset_data_loader_onnx(self):
train, test = create_tsdataset(roll=False)
loader = train.to_torch_data_loader(roll=True,
lookback=24,
loader = train.to_torch_data_loader(lookback=24,
horizon=5)
test_loader = test.to_torch_data_loader(roll=True,
lookback=24,
test_loader = test.to_torch_data_loader(lookback=24,
horizon=5)
nbeats = NBeatsForecaster.from_tsdataset(train)
nbeats.fit(loader, epochs=2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self):
train, test = create_tsdataset(roll=False)
train.gen_dt_feature(one_hot_features=['WEEK'])
test.gen_dt_feature(one_hot_features=['WEEK'])
loader = train.to_torch_data_loader(roll=True,
lookback=24,
loader = train.to_torch_data_loader(lookback=24,
horizon=5)
test_loader = test.to_torch_data_loader(roll=True,
lookback=24,
test_loader = test.to_torch_data_loader(lookback=24,
horizon=5)
s2s = Seq2SeqForecaster.from_tsdataset(train)
s2s.fit(loader, epochs=2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,9 @@ def test_forecaster_from_tsdataset_data_loader_onnx(self):
train.gen_dt_feature(one_hot_features=['WEEK'])
test.gen_dt_feature(one_hot_features=['WEEK'])

loader = train.to_torch_data_loader(roll=True,
lookback=24,
loader = train.to_torch_data_loader(lookback=24,
horizon=5)
test_loader = test.to_torch_data_loader(roll=True,
lookback=24,
test_loader = test.to_torch_data_loader(lookback=24,
horizon=5)
tcn = TCNForecaster.from_tsdataset(train)

Expand Down
6 changes: 3 additions & 3 deletions python/chronos/use-case/electricity/autoformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def generate_data():
.scale(standard_scaler, fit=(tsdata is tsdata_train))

# get dataset for training, evaluation and prediction
train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon,
train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon,
time_enc=True, label_len=label_len)
test_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon,
test_loader = tsdata_test.to_torch_data_loader(lookback=look_back, horizon=horizon,
time_enc=True, label_len=label_len, shuffle=False)
pred_loader = tsdata_test.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon,
pred_loader = tsdata_test.to_torch_data_loader(lookback=look_back, horizon=horizon,
time_enc=True, label_len=label_len, is_predict=True,
shuffle=False, batch_size=1)

Expand Down
4 changes: 2 additions & 2 deletions python/chronos/use-case/electricity/tcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def generate_data():
tsdata.impute(mode="last")\
.scale(standard_scaler, fit=(tsdata is tsdata_train))

train_loader = tsdata_train.to_torch_data_loader(roll=True, lookback=look_back, horizon=horizon)
test_loader = tsdata_test.to_torch_data_loader(batch_size=1, roll=True, lookback=look_back, horizon=horizon, shuffle=False)
train_loader = tsdata_train.to_torch_data_loader(lookback=look_back, horizon=horizon)
test_loader = tsdata_test.to_torch_data_loader(batch_size=1, lookback=look_back, horizon=horizon, shuffle=False)

return train_loader, test_loader

Expand Down

0 comments on commit 6e45868

Please sign in to comment.