Skip to content

Commit

Permalink
Merge pull request #244 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.13
  • Loading branch information
winedarksea authored May 14, 2024
2 parents 7a843d0 + 0c94d37 commit bd26a75
Show file tree
Hide file tree
Showing 41 changed files with 336 additions and 150 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

AutoTS is a time series package for Python designed for rapidly deploying high-accuracy forecasts at scale.

In 2023, AutoTS has won in the M6 forecasting competition, delivering the highest performance investment decisions across 12 months of stock market forecasting.
In 2023, AutoTS won in the M6 forecasting competition, delivering the highest performance investment decisions across 12 months of stock market forecasting.

There are dozens of forecasting models usable in the `sklearn` style of `.fit()` and `.predict()`.
These includes naive, statistical, machine learning, and deep learning models.
Expand Down
10 changes: 3 additions & 7 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,10 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.12 🇺🇦 🇺🇦 🇺🇦
# 0.6.13 🇺🇦 🇺🇦 🇺🇦
* trend_phi directly into Prophet
* subset arg to make KalmanStateSpace more scalable to memory
* bug fixes
* added DMD model
* modified the `constraints` options so it now accepts of list of dictionaries of constraints with new last_window and slope options
* 'dampening' as a constraint method to dampen all forecasts, fixed Cassandra trend_phi dampening
* new med_diff anomaly method and 'laplace' added as distribution option
* modified fourier_df to now work with sub daily data
* some madness with wavelets attempting to use them like fourier series for seasonality

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.12'
__version__ = '0.6.13'

TransformTS = GeneralTransformer

Expand Down
28 changes: 14 additions & 14 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ def __init__(

full_params['transformations'] = transformations
full_params['transformation_params'] = transformation_params
self.initial_template.loc[index, 'TransformationParameters'] = (
json.dumps(full_params)
)
self.initial_template.loc[
index, 'TransformationParameters'
] = json.dumps(full_params)

self.regressor_used = False
self.grouping_ids = None
Expand Down Expand Up @@ -1827,10 +1827,10 @@ def _run_template(
self.model_count = template_result.model_count
# capture results from lower-level template run
if "TotalRuntime" in template_result.model_results.columns:
template_result.model_results['TotalRuntime'] = (
template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
template_result.model_results[
'TotalRuntime'
] = template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
else:
# trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
Expand Down Expand Up @@ -1930,9 +1930,9 @@ def _run_validations(
frac=0.8, random_state=self.random_seed
).reindex(idx)
nan_frac = val_df_train.shape[1] / num_validations
val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
np.nan
)
val_df_train.iloc[
-2:, int(nan_frac * y) : int(nan_frac * (y + 1))
] = np.nan

# run validation template on current slice
result = self._run_template(
Expand Down Expand Up @@ -2195,7 +2195,7 @@ def export_template(
max_per_model_class: int = None,
include_results: bool = False,
unpack_ensembles: bool = False,
min_metrics: list = ['smape', 'spl'],
min_metrics: list = ['smape', 'spl', 'wasserstein', 'mle', 'imle', 'ewmae'],
max_metrics: list = None,
):
"""Export top results as a reusable template.
Expand Down Expand Up @@ -3851,9 +3851,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
)
y = pd.json_normalize(json.loads(row["ModelParameters"]))
y.index = [row['ID']]
y['Model'] = (
x # might need to remove this and do analysis independently for each
)
y[
'Model'
] = x # might need to remove this and do analysis independently for each
res.append(
pd.DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,18 +690,18 @@ def long_form_results(
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[interval_name] = (
f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[interval_name] = (
f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_lower[
interval_name
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
Expand Down
104 changes: 81 additions & 23 deletions autots/models/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,7 +2057,7 @@ class KalmanStateSpace(ModelObject):
name (str): String to identify class
frequency (str): String alias of datetime index frequency or else 'infer'
prediction_interval (float): Confidence interval for probabilistic forecast
subset (int): if not None, forecasts in chunks of this size. Reduces memory at the expense of compute time.
"""

def __init__(
Expand All @@ -2075,6 +2075,7 @@ def __init__(
em_iter: int = 10,
model_name: str = "undefined",
forecast_length: int = None,
subset=None,
**kwargs,
):
ModelObject.__init__(
Expand All @@ -2093,6 +2094,7 @@ def __init__(
self.em_iter = em_iter
self.model_name = model_name
self.forecast_length = forecast_length
self.subset = subset

def fit(self, df, future_regressor=None):
"""Train algorithm given data supplied.
Expand All @@ -2102,31 +2104,55 @@ def fit(self, df, future_regressor=None):
"""
self.fit_data(df)

if self.subset is None:
self.kf = self._fit(df, future_regressor=None)
elif isinstance(self.subset, (float, int)):
if self.subset < 1 and self.subset > 0:
self.subset = self.subset * df.shape[1]
chunks = df.shape[1] // self.subset
if chunks > 1:
self.kf = {}
self.subset_columns = {}
if (df.shape[1] % self.subset) != 0:
chunks += 1
for x in range(chunks):
subset = df.iloc[:, self.subset * x : (self.subset * (x + 1))]
self.subset_columns[str(x)] = subset.columns.tolist()
self.kf[str(x)] = self._fit(subset, future_regressor=None)
else:
self.kf = self._fit(df, future_regressor=None)
else:
raise ValueError(f"subset arg {self.subset} not recognized")

self.fit_runtime = datetime.datetime.now() - self.startTime
return self

def _fit(self, df, future_regressor=None):
if self.observation_noise == "auto":
self.fit_noise = self.tune_observational_noise(df)[0]
else:
self.fit_noise = self.observation_noise
self.kf = KalmanFilter(
kf = KalmanFilter(
state_transition=self.state_transition, # matrix A
process_noise=self.process_noise, # Q
observation_model=self.observation_model, # H
observation_noise=self.fit_noise, # R
)
if self.em_iter is not None:
self.kf = self.kf.em(self.df_train, n_iter=self.em_iter)
kf = kf.em(df.to_numpy().T, n_iter=self.em_iter)

self.fit_runtime = datetime.datetime.now() - self.startTime
return self
return kf

def fit_data(self, df, future_regressor=None):
df = self.basic_profile(df)
self.df_train = df.to_numpy().T
self.df_train = df # df.to_numpy().T
self.train_index = df.index
return self

def cost_function(self, param, df):
try:
# evaluating on a single, most recent holdout only, for simplicity
local = df.to_numpy().T
kf = KalmanFilter(
state_transition=self.state_transition, # matrix A
process_noise=self.process_noise, # Q
Expand All @@ -2135,16 +2161,12 @@ def cost_function(self, param, df):
# covariances=False,
)
if self.em_iter is not None:
kf = kf.em(
self.df_train[:, : -self.forecast_length], n_iter=self.em_iter
)
result = kf.predict(
self.df_train[:, : -self.forecast_length], self.forecast_length
)
kf = kf.em(local[:, : -self.forecast_length], n_iter=self.em_iter)
result = kf.predict(local[:, : -self.forecast_length], self.forecast_length)
df_smooth = pd.DataFrame(
result.observations.mean.T,
index=df.index[-self.forecast_length :],
columns=self.column_names,
columns=df.columns,
)
df_stdev = np.sqrt(result.observations.cov).T
bound = df_stdev * norm.ppf(self.prediction_interval)
Expand Down Expand Up @@ -2219,20 +2241,48 @@ def predict(
"must provide forecast_length to KalmanStateSpace predict"
)
predictStartTime = datetime.datetime.now()
result = self.kf.predict(self.df_train, forecast_length)
df = pd.DataFrame(
result.observations.mean.T,
index=self.create_forecast_index(forecast_length),
columns=self.column_names,
)

if just_point_forecast:
return df
future_index = self.create_forecast_index(forecast_length)
if isinstance(self.kf, dict):
forecasts = []
uppers = []
lowers = []
for x in self.subset_columns:
current_cols = self.subset_columns[x]
result = self.kf[x].predict(
self.df_train.reindex(columns=current_cols).to_numpy().T,
forecast_length,
)
df = pd.DataFrame(
result.observations.mean.T,
index=future_index,
columns=current_cols,
)
forecasts.append(df)
df_stdev = np.sqrt(result.observations.cov).T
bound = df_stdev * norm.ppf(self.prediction_interval)
uppers.append(df + bound)
lowers.append(df - bound)
df = pd.concat(forecasts, axis=1).reindex(columns=self.column_names)
upper_forecast = pd.concat(uppers, axis=1).reindex(
columns=self.column_names
)
lower_forecast = pd.concat(lowers, axis=1).reindex(
columns=self.column_names
)
else:
result = self.kf.predict(self.df_train.to_numpy().T, forecast_length)
df = pd.DataFrame(
result.observations.mean.T,
index=future_index,
columns=self.column_names,
)
df_stdev = np.sqrt(result.observations.cov).T
bound = df_stdev * norm.ppf(self.prediction_interval)
upper_forecast = df + bound
lower_forecast = df - bound
if just_point_forecast:
return df
else:
predict_runtime = datetime.datetime.now() - predictStartTime
prediction = PredictionObject(
model_name=self.name,
Expand All @@ -2251,7 +2301,14 @@ def predict(

def get_new_params(self, method: str = "random"):
# predefined, or random
return new_kalman_params(method=method)
new_params = new_kalman_params(method=method)
if method in ['deep']:
new_params['subset'] = random.choices([None, 200, 300], [0.3, 0.3, 0.3])[0]
else:
new_params['subset'] = random.choices([100, 200, 300], [0.3, 0.3, 0.3])[
0
] # probably no difference
return new_params

def get_params(self):
"""Return dict of current parameters."""
Expand All @@ -2261,6 +2318,7 @@ def get_params(self):
"process_noise": self.process_noise,
"observation_model": self.observation_model,
"observation_noise": self.observation_noise,
"subset": self.subset,
}


Expand Down
18 changes: 9 additions & 9 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,15 +1838,15 @@ def MosaicEnsemble(
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
+ mi
)
melted['forecast'] = (
fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['upper_forecast'] = (
u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['lower_forecast'] = (
l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted[
'forecast'
] = fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'upper_forecast'
] = u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'lower_forecast'
] = l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]

forecast_df = melted.pivot(
values="forecast", columns="series_id", index="forecast_period"
Expand Down
4 changes: 2 additions & 2 deletions autots/models/matrix_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ def predict(
def get_new_params(self, method: str = 'random'):
"""Return dict of new parameters for parameter tuning."""
return {
'method': random.choice(['als', 'dmd']),
'rank': random.choice([2, 4, 0.1, 0.2, 0.5]),
'method': random.choices(['als', 'dmd'], [0.7, 0.3])[0],
'rank': random.choice([2, 4, 6, 0.1, 0.2, 0.5]),
'maxiter': 200,
}

Expand Down
Loading

0 comments on commit bd26a75

Please sign in to comment.