Skip to content

Commit

Permalink
breaking: add hist_exog_list argument to forecast (#505)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Oct 29, 2024
1 parent dc30aa0 commit 8834114
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 35 deletions.
102 changes: 86 additions & 16 deletions nbs/src/nixtla_client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -417,28 +417,46 @@
" id_col: str,\n",
" time_col: str,\n",
" target_col: str,\n",
" hist_exog: Optional[List[str]],\n",
") -> Tuple[DFType, Optional[DFType]]:\n",
"\n",
" exog_list = [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
"\n",
" exogs = [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
" if hist_exog is None:\n",
" hist_exog = []\n",
" if X_df is None:\n",
" df = df[[id_col, time_col, target_col, *exog_list]]\n",
" # all exogs must be historic\n",
" ignored_exogs = [c for c in exogs if c not in hist_exog]\n",
" if ignored_exogs:\n",
" warnings.warn(\n",
" f\"`df` contains the following exogenous features: {ignored_exogs}, \"\n",
" \"but `X_df` was not provided and they were not declared in `hist_exog_list`. \"\n",
" \"They will be ignored.\"\n",
" )\n",
" exogs = [c for c in exogs if c in hist_exog]\n",
" df = df[[id_col, time_col, target_col, *exogs]]\n",
" return df, None\n",
"\n",
" futr_exog_list = [c for c in X_df.columns if c not in (id_col, time_col)]\n",
" hist_exog_list = list(set(exog_list) - set(futr_exog_list))\n",
" # exogs in df that weren't declared as historic nor future\n",
" futr_exog = [c for c in X_df.columns if c not in (id_col, time_col)]\n",
" declared_exogs = {*hist_exog, *futr_exog}\n",
" ignored_exogs = [c for c in exogs if c not in declared_exogs]\n",
" if ignored_exogs:\n",
" warnings.warn(\n",
" f\"`df` contains the following exogenous features: {ignored_exogs}, \"\n",
" \"but they were not found in `X_df` nor declared in `hist_exog_list`. \"\n",
" \"They will be ignored.\"\n",
" )\n",
"\n",
" # Capture case where future exogenous are provided in X_df that are not in df\n",
" missing_futr = set(futr_exog_list) - set(exog_list)\n",
" # future exogenous are provided in X_df that are not in df\n",
" missing_futr = set(futr_exog) - set(exogs)\n",
" if missing_futr:\n",
" raise ValueError(\n",
" \"The following exogenous features are present in `X_df` \"\n",
" f\"but not in `df`: {missing_futr}.\"\n",
" )\n",
"\n",
" # Make sure df and X_df are in right order\n",
" df = df[[id_col, time_col, target_col, *futr_exog_list, *hist_exog_list]]\n",
" X_df = X_df[[id_col, time_col, *futr_exog_list]]\n",
" df = df[[id_col, time_col, target_col, *futr_exog, *hist_exog]]\n",
" X_df = X_df[[id_col, time_col, *futr_exog]]\n",
"\n",
" return df, X_df\n",
"\n",
Expand Down Expand Up @@ -929,6 +947,7 @@
" finetune_depth: _Finetune_Depth = 1,\n",
" finetune_loss: _Loss = 'default',\n",
" clean_ex_first: bool = True,\n",
" hist_exog_list: Optional[List[str]] = None,\n",
" validate_api_key: bool = False,\n",
" add_history: bool = False,\n",
" date_features: Union[bool, List[Union[str, Callable]]] = False,\n",
Expand Down Expand Up @@ -985,6 +1004,8 @@
" Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.\n",
" clean_ex_first : bool (default=True)\n",
" Clean exogenous signal before making forecasts using TimeGPT.\n",
" hist_exog_list : list of str, optional (default=None)\n",
" Column names of the historical exogenous features.\n",
" validate_api_key : bool (default=False)\n",
" If True, validates api_key before sending requests.\n",
" add_history : bool (default=False)\n",
Expand Down Expand Up @@ -1055,7 +1076,12 @@
" model=model,\n",
" )\n",
" df, X_df = _validate_exog(\n",
" df, X_df, id_col=id_col, time_col=time_col, target_col=target_col\n",
" df=df,\n",
" X_df=X_df,\n",
" id_col=id_col,\n",
" time_col=time_col,\n",
" target_col=target_col,\n",
" hist_exog=hist_exog_list,\n",
" )\n",
" level, quantiles = _prepare_level_and_quantiles(level, quantiles)\n",
" freq = _maybe_infer_freq(df, freq=freq, id_col=id_col, time_col=time_col)\n",
Expand Down Expand Up @@ -1095,12 +1121,9 @@
" if processed.data.shape[1] > 1:\n",
" X = processed.data[:, 1:].T\n",
" if futr_cols is not None:\n",
" hist_exog_set= set(x_cols) - set(futr_cols)\n",
" if hist_exog_set:\n",
" logger.info(f'Using historical exogenous features: {list(hist_exog_set)}')\n",
" logger.info(f'Using future exogenous features: {futr_cols}')\n",
" else:\n",
" logger.info(f'Using historical exogenous features: {x_cols}')\n",
" if hist_exog_list is not None:\n",
" logger.info(f'Using historical exogenous features: {hist_exog_list}')\n",
" else:\n",
" X = None\n",
"\n",
Expand Down Expand Up @@ -2572,6 +2595,53 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"# future and historic exogs\n",
"df = generate_series(n_series=2, min_length=5, max_length=20)\n",
"train, future = time_features(df, freq='D', features=['year', 'month'], h=5)\n",
"\n",
"# features in df but not in X_df\n",
"missing_exogenous = train.columns.drop(['unique_id', 'ds', 'y']).tolist()\n",
"expected_warning = (\n",
" f'`df` contains the following exogenous features: {missing_exogenous}, '\n",
" 'but `X_df` was not provided and they were not declared in `hist_exog_list`. '\n",
" 'They will be ignored.'\n",
")\n",
"with warnings.catch_warnings(record=True) as w:\n",
" forecasts = nixtla_client.forecast(train, h=5)\n",
" assert any(expected_warning in str(warning.message) for warning in w)\n",
"\n",
"# features in df not set as historic nor in X_df\n",
"expected_warning = (\n",
" f\"`df` contains the following exogenous features: ['month'], \"\n",
" 'but they were not found in `X_df` nor declared in `hist_exog_list`. '\n",
" 'They will be ignored.'\n",
")\n",
"with warnings.catch_warnings(record=True) as w:\n",
" forecasts = nixtla_client.forecast(train, h=5, X_df=future[['unique_id', 'ds', 'year']])\n",
" assert any(expected_warning in str(warning.message) for warning in w)\n",
"\n",
"# features in X_df not in df\n",
"test_fail(\n",
" lambda: nixtla_client.forecast(\n",
" train[['unique_id', 'ds', 'y']],\n",
" h=5,\n",
" X_df=future,\n",
" ),\n",
" contains='features are present in `X_df` but not in `df`'\n",
")\n",
"\n",
"# test setting one as historic and other as future\n",
"nixtla_client.forecast(train, h=5, X_df=future[['unique_id', 'ds', 'year']], hist_exog_list=['month'])\n",
"test_eq(nixtla_client.weights_x['features'].tolist(), ['year', 'month'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
59 changes: 40 additions & 19 deletions nixtla/nixtla_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,28 +347,46 @@ def _validate_exog(
id_col: str,
time_col: str,
target_col: str,
hist_exog: Optional[List[str]],
) -> Tuple[DFType, Optional[DFType]]:

exog_list = [c for c in df.columns if c not in (id_col, time_col, target_col)]

exogs = [c for c in df.columns if c not in (id_col, time_col, target_col)]
if hist_exog is None:
hist_exog = []
if X_df is None:
df = df[[id_col, time_col, target_col, *exog_list]]
# all exogs must be historic
ignored_exogs = [c for c in exogs if c not in hist_exog]
if ignored_exogs:
warnings.warn(
f"`df` contains the following exogenous features: {ignored_exogs}, "
"but `X_df` was not provided and they were not declared in `hist_exog_list`. "
"They will be ignored."
)
exogs = [c for c in exogs if c in hist_exog]
df = df[[id_col, time_col, target_col, *exogs]]
return df, None

futr_exog_list = [c for c in X_df.columns if c not in (id_col, time_col)]
hist_exog_list = list(set(exog_list) - set(futr_exog_list))
# exogs in df that weren't declared as historic nor future
futr_exog = [c for c in X_df.columns if c not in (id_col, time_col)]
declared_exogs = {*hist_exog, *futr_exog}
ignored_exogs = [c for c in exogs if c not in declared_exogs]
if ignored_exogs:
warnings.warn(
f"`df` contains the following exogenous features: {ignored_exogs}, "
"but they were not found in `X_df` nor declared in `hist_exog_list`. "
"They will be ignored."
)

# Capture case where future exogenous are provided in X_df that are not in df
missing_futr = set(futr_exog_list) - set(exog_list)
# future exogenous are provided in X_df that are not in df
missing_futr = set(futr_exog) - set(exogs)
if missing_futr:
raise ValueError(
"The following exogenous features are present in `X_df` "
f"but not in `df`: {missing_futr}."
)

# Make sure df and X_df are in right order
df = df[[id_col, time_col, target_col, *futr_exog_list, *hist_exog_list]]
X_df = X_df[[id_col, time_col, *futr_exog_list]]
df = df[[id_col, time_col, target_col, *futr_exog, *hist_exog]]
X_df = X_df[[id_col, time_col, *futr_exog]]

return df, X_df

Expand Down Expand Up @@ -859,6 +877,7 @@ def forecast(
finetune_depth: _Finetune_Depth = 1,
finetune_loss: _Loss = "default",
clean_ex_first: bool = True,
hist_exog_list: Optional[List[str]] = None,
validate_api_key: bool = False,
add_history: bool = False,
date_features: Union[bool, List[Union[str, Callable]]] = False,
Expand Down Expand Up @@ -915,6 +934,8 @@ def forecast(
Loss function to use for finetuning. Options are: `default`, `mae`, `mse`, `rmse`, `mape`, and `smape`.
clean_ex_first : bool (default=True)
Clean exogenous signal before making forecasts using TimeGPT.
hist_exog_list : list of str, optional (default=None)
Column names of the historical exogenous features.
validate_api_key : bool (default=False)
If True, validates api_key before sending requests.
add_history : bool (default=False)
Expand Down Expand Up @@ -985,7 +1006,12 @@ def forecast(
model=model,
)
df, X_df = _validate_exog(
df, X_df, id_col=id_col, time_col=time_col, target_col=target_col
df=df,
X_df=X_df,
id_col=id_col,
time_col=time_col,
target_col=target_col,
hist_exog=hist_exog_list,
)
level, quantiles = _prepare_level_and_quantiles(level, quantiles)
freq = _maybe_infer_freq(df, freq=freq, id_col=id_col, time_col=time_col)
Expand Down Expand Up @@ -1025,14 +1051,9 @@ def forecast(
if processed.data.shape[1] > 1:
X = processed.data[:, 1:].T
if futr_cols is not None:
hist_exog_set = set(x_cols) - set(futr_cols)
if hist_exog_set:
logger.info(
f"Using historical exogenous features: {list(hist_exog_set)}"
)
logger.info(f"Using future exogenous features: {futr_cols}")
else:
logger.info(f"Using historical exogenous features: {x_cols}")
if hist_exog_list is not None:
logger.info(f"Using historical exogenous features: {hist_exog_list}")
else:
X = None

Expand Down Expand Up @@ -1632,7 +1653,7 @@ def plot(
ax=ax,
)

# %% ../nbs/src/nixtla_client.ipynb 50
# %% ../nbs/src/nixtla_client.ipynb 51
def _forecast_wrapper(
df: pd.DataFrame,
client: NixtlaClient,
Expand Down

0 comments on commit 8834114

Please sign in to comment.