Skip to content

Commit

Permalink
Merge branch 'master' into issue-841
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-Geekman authored Aug 11, 2022
2 parents 020a8d2 + 1565b18 commit e1525e2
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 301 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
-
-
-
- Teach AutoARIMAModel to work with out-sample predictions ([#830](https://github.com/tinkoff-ai/etna/pull/830))
-
-
-
Expand Down
133 changes: 7 additions & 126 deletions etna/models/autoarima.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import warnings
from typing import List
from typing import Optional
from typing import Sequence

import numpy as np
import pandas as pd
import pmdarima as pm
from pmdarima.arima import ARIMA
from statsmodels.tools.sm_exceptions import ValueWarning
from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper

from etna.models.base import BaseAdapter
from etna.models.base import PerSegmentPredictionIntervalModel
from etna.models.sarimax import _SARIMAXBaseAdapter

warnings.filterwarnings(
message="No frequency information was provided, so inferred frequency .* will be used",
Expand All @@ -20,7 +16,7 @@
)


class _AutoARIMAAdapter(BaseAdapter):
class _AutoARIMAAdapter(_SARIMAXBaseAdapter):
"""
Class for holding auto arima model.
Expand All @@ -45,126 +41,11 @@ def __init__(
Training parameters for auto_arima from pmdarima package.
"""
self.kwargs = kwargs
self._model: Optional[ARIMA] = None
self.regressor_columns: List[str] = []
super().__init__()

def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_AutoARIMAAdapter":
"""
Fits auto ARIMA model.
Parameters
----------
df:
Features dataframe
regressors:
List of the columns with regressors
Returns
-------
:
Fitted model
"""
self.regressor_columns = regressors
categorical_cols = df.select_dtypes(include=["category"]).columns.tolist()
try:
df.loc[:, categorical_cols] = df[categorical_cols].astype(int)
except ValueError:
raise ValueError(
f"Categorical columns {categorical_cols} can not been converted to int.\n "
"Try to encode this columns manually."
)

self._check_df(df)

targets = df["target"]
targets.index = df["timestamp"]

exog_train = self._select_regressors(df)

self._model = pm.auto_arima(df["target"], X=exog_train, **self.kwargs)
return self

def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequence[float]) -> pd.DataFrame:
"""
Compute predictions from auto ARIMA model.
Parameters
----------
df:
Features dataframe
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution
Returns
-------
:
DataFrame with predictions
"""
if self._model is None:
raise ValueError("AutoARIMA model is not fitted! Fit the model before calling predict method!")
horizon = len(df)
self._check_df(df, horizon)

categorical_cols = df.select_dtypes(include=["category"]).columns.tolist()
try:
df.loc[:, categorical_cols] = df[categorical_cols].astype(int)
except ValueError:
raise ValueError(
f"Categorical columns {categorical_cols} can not been converted to int.\n "
"Try to encode this columns manually."
)

exog_future = self._select_regressors(df)
if prediction_interval:
confints = np.unique([2 * i if i < 0.5 else 2 * (1 - i) for i in quantiles])

y_pred = pd.DataFrame({"target": self._model.predict(len(df), X=exog_future), "timestamp": df["timestamp"]})

for confint in confints:
forecast = self._model.predict(len(df), X=exog_future, return_conf_int=True, alpha=confint)
if confint / 2 in quantiles:
y_pred[f"target_{confint/2:.4g}"] = forecast[1][:, :1]
if 1 - confint / 2 in quantiles:
y_pred[f"target_{1 - confint/2:.4g}"] = forecast[1][:, 1:]
else:
y_pred = pd.DataFrame({"target": self._model.predict(len(df), X=exog_future), "timestamp": df["timestamp"]})
y_pred = y_pred.reset_index(drop=True, inplace=False)
return y_pred

def _check_df(self, df: pd.DataFrame, horizon: Optional[int] = None):
column_to_drop = [col for col in df.columns if col not in ["target", "timestamp"] + self.regressor_columns]
if column_to_drop:
warnings.warn(
message=f"AutoARIMA model does not work with exogenous features (features unknown in future).\n "
f"{column_to_drop} will be dropped"
)
if horizon:
short_regressors = [regressor for regressor in self.regressor_columns if df[regressor].count() < horizon]
if short_regressors:
raise ValueError(
f"Regressors {short_regressors} are too short for chosen horizon value.\n "
"Try lower horizon value, or drop this regressors."
)

def _select_regressors(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
if self.regressor_columns:
exog_future = df[self.regressor_columns]
exog_future.index = df["timestamp"]
else:
exog_future = None
return exog_future

def get_model(self) -> ARIMA:
"""Get internal pmdarima.arima.arima.ARIMA model that is used inside etna class.
Returns
-------
:
Internal model
"""
return self._model
def _get_fit_results(self, endog: pd.Series, exog: pd.DataFrame) -> SARIMAXResultsWrapper:
model = pm.auto_arima(endog, X=exog, **self.kwargs)
return model.arima_res_


class AutoARIMAModel(PerSegmentPredictionIntervalModel):
Expand Down
Loading

0 comments on commit e1525e2

Please sign in to comment.