Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Fix bug in number of tabular forecasting prediction samples #1149

Merged
merged 6 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed a bug where `TabularData` would not work correctly with no categorical variables ([#1144](https://github.com/PyTorchLightning/lightning-flash/pull/1144))

- Fixed a bug where loading `TabularForecastingData` for prediction would only yield a single sample per series ([#1149](https://github.com/PyTorchLightning/lightning-flash/pull/1149))

### Removed

- Removed the `Seq2SeqData` base class (use `TranslationData` or `SummarizationData` directly) ([#1128](https://github.com/PyTorchLightning/lightning-flash/pull/1128))
Expand Down
1 change: 0 additions & 1 deletion flash/tabular/forecasting/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def load_data(
time_series_dataset = TimeSeriesDataSet.from_parameters(
parameters,
data,
predict=True,
stop_randomization=True,
)
return time_series_dataset
Expand Down
12 changes: 9 additions & 3 deletions flash_examples/tabular_forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")

max_encoder_length = 60
max_prediction_length = 20

training_cutoff = data["time_idx"].max() - max_prediction_length
Expand All @@ -39,10 +40,11 @@
group_ids=["series"],
# only unknown variable is "value" - and N-Beats can also not take any additional variables
time_varying_unknown_reals=["value"],
max_encoder_length=60,
max_encoder_length=max_encoder_length,
max_prediction_length=max_prediction_length,
train_data_frame=data[lambda x: x.time_idx <= training_cutoff],
val_data_frame=data,
# validate on the last sequence
val_data_frame=data[lambda x: x.time_idx > training_cutoff - max_encoder_length],
batch_size=32,
)

Expand All @@ -58,7 +60,11 @@
trainer.fit(model, datamodule=datamodule)

# 4. Generate predictions
datamodule = TabularForecastingData.from_data_frame(predict_data_frame=data, parameters=datamodule.parameters)
datamodule = TabularForecastingData.from_data_frame(
predict_data_frame=data[lambda x: x.time_idx > training_cutoff - max_encoder_length],
parameters=datamodule.parameters,
batch_size=32,
)
predictions = trainer.predict(model, datamodule=datamodule)
print(predictions)

Expand Down
4 changes: 2 additions & 2 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
coverage
codecov>=2.1
pytest>=5.0
pytest>=5.0,<7.0
pytest-flake8
flake8
pytest-doctestplus
pytest-doctestplus>=0.9.0
pytest-rerunfailures>=10.0

# install pkg
Expand Down
4 changes: 4 additions & 0 deletions tests/examples/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@
"tabular_regression.py",
marks=pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed"),
),
pytest.param(
"tabular_forecasting.py",
marks=pytest.mark.skipif(not _TABULAR_TESTING, reason="tabular libraries aren't installed"),
),
pytest.param("template.py", marks=pytest.mark.skipif(not _SKLEARN_AVAILABLE, reason="sklearn isn't installed")),
pytest.param(
"text_classification.py",
Expand Down
4 changes: 2 additions & 2 deletions tests/tabular/forecasting/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_from_data_frame_time_series_data_set_single_call(patch_time_series_data
)

patch_time_series_data_set.from_parameters.assert_called_once_with(
{"test": None}, val_data, predict=True, stop_randomization=True
{"test": None}, val_data, stop_randomization=True
)


Expand Down Expand Up @@ -79,7 +79,7 @@ def test_from_data_frame_time_series_data_set_multi_call(patch_time_series_data_
)

patch_time_series_data_set.from_parameters.assert_called_once_with(
{"test": None}, val_data, predict=True, stop_randomization=True
{"test": None}, val_data, stop_randomization=True
)


Expand Down