Skip to content

Commit

Permalink
added auto support; updated docs
Browse files Browse the repository at this point in the history
  • Loading branch information
americast committed Sep 29, 2023
1 parent fda2b40 commit 736d9e0
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 22 deletions.
22 changes: 12 additions & 10 deletions docs/source/reference/ai/model-forecasting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,22 @@ EvaDB's default forecast framework is `statsforecast <https://nixtla.github.io/s
.. list-table:: Available Parameters
:widths: 25 75

* - PREDICT (**required**)
* - PREDICT (required)
- The name of the column we wish to forecast.
* - TIME
- The name of the column that contains the datestamp, wihch should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. Please visit the `pandas documentation <https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html>`_ for details. If not provided, an auto increasing ID column will be used.
* - ID
- The name of column that represents an identifier for the series. If not provided, the whole table is considered as one series of data.
* - LIBRARY
* - TIME (default: 'ds')
- The name of the column that contains the datestamp, which should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. Please visit the `pandas documentation <https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html>`_ for details. If relevant column is not found, an auto increasing ID column will be used.
* - ID (default: 'unique_id')
- The name of column that represents an identifier for the series. If relevant column is not found, the whole table is considered as one series of data.
* - LIBRARY (default: 'statsforecast')
- We can select one of `statsforecast` (default) or `neuralforecast`. `statsforecast` provides access to statistical forecasting methods, while `neuralforecast` gives access to deep-learning based forecasting methods.
* - MODEL
* - MODEL (default: 'AutoARIMA')
- If LIBRARY is `statsforecast`, we can select one of AutoARIMA, AutoCES, AutoETS, AutoTheta. The default is AutoARIMA. Check `Automatic Forecasting <https://nixtla.github.io/statsforecast/src/core/models_intro.html#automatic-forecasting>`_ to learn details about these models. If LIBRARY is `neuralforecast`, we can select one of NHITS or NBEATS. The default is NBEATS. Check `NBEATS docs <https://nixtla.github.io/neuralforecast/models.nbeats.html>`_ for details.
* - EXOGENOUS
* - AUTO (default: 'F')
- The names of columns to be treated as exogenous variables, separated by comma. These columns would be considered for forecasting by the backend only for LIBRARY `neuralforecast`.
* - Frequency
- A string indicating the frequency of the data. The common used ones are D, W, M, Y, which repestively represents day-, week-, month- and year- end frequency. The default value is M. Check `pandas available frequencies <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for all available frequencies.
* - Frequency (default: 'auto')
- A string indicating the frequency of the data. The common used ones are D, W, M, Y, which repestively represents day-, week-, month- and year- end frequency. The default value is M. Check `pandas available frequencies <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for all available frequencies. If it is not provided, the frequency is attempted to be determined automatically.

Note: If columns other than the ones required as mentioned above are passed while creating the function, they will be treated as exogenous variables if LIBRARY is `neuralforecast` and the AUTO is set to F. In other situations, they would be ignored.

Below is an example query specifying the above parameters:

Expand Down
37 changes: 27 additions & 10 deletions evadb/executor/create_function_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def handle_forecasting_function(self):
Set or infer data frequency
"""

if "frequency" not in arg_map.keys():
if "frequency" not in arg_map.keys() or arg_map["frequency"] == "auto":
arg_map["frequency"] = pd.infer_freq(data["ds"])
frequency = arg_map["frequency"]
if frequency is None:
Expand Down Expand Up @@ -290,6 +290,12 @@ def handle_forecasting_function(self):
if "model" not in arg_map.keys():
arg_map["model"] = "NBEATS"

if (
arg_map["model"].lower()[0] == "t"
and "auto" not in arg_map["model"].lower()
):
arg_map["model"] = "Auto" + arg_map["model"]

try:
model_here = model_dict[arg_map["model"]]
except Exception:
Expand All @@ -298,16 +304,17 @@ def handle_forecasting_function(self):
raise FunctionIODefinitionError(err_msg)
model_args = {}

if len(data.columns) >= 4:
exogenous_columns = [
x for x in list(data.columns) if x not in ["ds", "y", "unique_id"]
]
model_args["hist_exog_list"] = exogenous_columns

if "auto" not in arg_map["model"].lower():
model_args["input_size"] = 2 * horizon
if len(data.columns) >= 4:
exogenous_columns = [
x
for x in list(data.columns)
if x not in ["ds", "y", "unique_id"]
]
model_args["hist_exog_list"] = exogenous_columns

model_args["early_stop_patience_steps"] = 20
model_args["early_stop_patience_steps"] = 20

model_args["h"] = horizon

Expand All @@ -333,6 +340,12 @@ def handle_forecasting_function(self):
if "model" not in arg_map.keys():
arg_map["model"] = "AutoARIMA"

if (
arg_map["model"].lower()[0] == "t"
and "auto" not in arg_map["model"].lower()
):
arg_map["model"] = "Auto" + arg_map["model"]

try:
model_here = model_dict[arg_map["model"]]
except Exception:
Expand All @@ -348,7 +361,11 @@ def handle_forecasting_function(self):
data["ds"] = pd.to_datetime(data["ds"])

model_save_dir_name = library + "_" + arg_map["model"] + "_" + new_freq
if len(data.columns) >= 4:
if (
len(data.columns) >= 4
and "auto" not in arg_map["model"].lower()
and library == "neuralforecast"
):
model_save_dir_name += "_exogenous_" + str(sorted(exogenous_columns))

model_dir = os.path.join(
Expand All @@ -373,7 +390,7 @@ def handle_forecasting_function(self):
if int(x.split("horizon")[1].split(".pkl")[0]) >= horizon
]
if len(existing_model_files) == 0:
print("Training")
print("Training, please wait...")
if library == "neuralforecast":
model.fit(df=data, val_size=horizon)
else:
Expand Down
4 changes: 2 additions & 2 deletions test/integration_tests/long/test_model_forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def test_forecast(self):
HORIZON 12
PREDICT 'y'
LIBRARY 'neuralforecast'
EXOGENOUS 'trend'
FREQUENCY 'M';
"""
execute_query_fetch_all(self.evadb, create_predict_udf)
Expand All @@ -128,6 +127,7 @@ def test_forecast_with_column_rename(self):
WHERE bedrooms = 2
)
TYPE Forecasting
HORIZON 24
PREDICT 'ma'
ID 'type'
TIME 'saledate'
Expand All @@ -136,7 +136,7 @@ def test_forecast_with_column_rename(self):
execute_query_fetch_all(self.evadb, create_predict_udf)

predict_query = """
SELECT HomeForecast(12);
SELECT HomeForecast();
"""
result = execute_query_fetch_all(self.evadb, predict_query)
self.assertEqual(len(result), 24)
Expand Down

0 comments on commit 736d9e0

Please sign in to comment.