diff --git a/pyforecaster/forecasting_models/fast_adaptive_models.py b/pyforecaster/forecasting_models/fast_adaptive_models.py index e156823..767534d 100644 --- a/pyforecaster/forecasting_models/fast_adaptive_models.py +++ b/pyforecaster/forecasting_models/fast_adaptive_models.py @@ -192,7 +192,7 @@ def store_basis(self): def predict(self, x_pd, **kwargs): x = x_pd.values y = x_pd[self.target_name].values - return self.run(x, y, start_from=0, fit=False) + return pd.DataFrame(self.run(x, y, start_from=0, fit=False), index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)]) def run(self, x, y, return_coeffs=False, start_from=0, fit=True): @@ -360,7 +360,7 @@ def store_basis(self): def predict(self, x_pd, **kwargs): x = x_pd.values y = x_pd[self.target_name].values - return self.run(x, y, start_from=0, fit=False) + return pd.DataFrame(self.run(x, y, start_from=0, fit=False), index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)]) def run(self, x, y, return_coeffs=False, start_from=0, fit=True): @@ -508,7 +508,7 @@ def fit(self, x_pd, y_pd=None, **kwargs): return self def predict(self, x_pd, **kwargs): - return self.run(x_pd, fit=False, return_coeffs=True)[0] + return pd.DataFrame(self.run(x_pd, fit=False, return_coeffs=True)[0], index=x_pd.index, columns=[f'{self.target_name}_t+{i}' for i in range(1, self.n_sa+1)]) def run(self, x_pd, return_coeffs=True, fit=True): diff --git a/pyforecaster/forecasting_models/holtwinters.py b/pyforecaster/forecasting_models/holtwinters.py index 66701a0..8296dec 100644 --- a/pyforecaster/forecasting_models/holtwinters.py +++ b/pyforecaster/forecasting_models/holtwinters.py @@ -121,7 +121,7 @@ def score_autoregressive(model, x, tr_ratio=0.7, target_name=None, n_sa=1): model.fit(x_tr, x_tr) y_hat = model.predict(x_te) - return np.mean((y_hat[:len(target), :] - target) ** 2) + return np.mean((y_hat.values[:len(target), :] - target) ** 2) class HoltWinters(ScenarioGenerator): def __init__(self, periods, target_name, targets_names=None, q_vect=None, val_ratio=None, nodes_at_step=None, optimization_budget=800, n_sa=1, constraints=None, @@ -230,7 +230,7 @@ def predict(self, x_pd, **kwargs): self.y_hat_te = y_hat - return y_hat + return pd.DataFrame(y_hat, index=x_pd.index, columns=self.target_cols) def _predict_quantiles(self, x, **kwargs): preds = self.predict(x) @@ -441,9 +441,9 @@ def predict(self, x, **kwargs): for i,m in enumerate(self.models): y_hat_m = m.predict(x) selection = np.arange(k, y_hat_m.shape[1]) - y_hat[:, selection] = y_hat_m[:, selection] + y_hat[:, selection] = y_hat_m.iloc[:, selection] k = y_hat_m.shape[1] - return y_hat + return pd.DataFrame(y_hat, index=x.index, columns=['{}_{}'.format(self.target_name, t) for t in np.arange(self.n_sa)]) def reinit(self, x): for i,m in enumerate(self.models): diff --git a/pyforecaster/forecasting_models/neural_models/base_nn.py b/pyforecaster/forecasting_models/neural_models/base_nn.py index d7e23ac..2d339e9 100644 --- a/pyforecaster/forecasting_models/neural_models/base_nn.py +++ b/pyforecaster/forecasting_models/neural_models/base_nn.py @@ -88,6 +88,7 @@ class FeedForwardModule(nn.Module): n_layers: Union[int, np.array, list] n_out: int=None n_neurons: int=None + split_heads: bool = False @nn.compact def __call__(self, x): if isinstance(self.n_layers, int): @@ -97,9 +98,31 @@ def __call__(self, x): else: layers = self.n_layers for i, n in enumerate(layers): - x = nn.Dense(features=n, name='dense_{}'.format(i))(x) + if i < len(layers)-1: + x = nn.Dense(features=n, name='dense_{}'.format(i))(x) x = nn.relu(x) + else: + if self.split_heads: + n_out = self.n_out if self.n_out is not None else layers[-1] + # split into n_out heads to predict the output independently + subnets = [nn.relu(nn.Dense(features=layers[np.maximum(-2, -len(layers))], name='subnet_in_{}'.format(k))(x)) for k in range(n_out)] + out = [nn.Dense(features=1, name='subnet_out_{}'.format(k))(subnets[k]) for k in range(n_out)] + x = jnp.hstack(out) + """ + # Combine the outputs in a single dense layer + n_last = layers[np.maximum(-2, -len(layers))] + x = nn.Dense(features=self.n_out * n_last, name='combined_dense')(x) + x = nn.relu(x) + # Reshape into separate heads + x = x.reshape((-1, self.n_out, n_last)) + x = nn.relu(x) + # Final layer to predict the output for each head + x = nn.Dense(features=1, name='final_out')(x).squeeze(-1) + """ + else: + x = nn.Dense(features=n, name='dense_{}'.format(i))(x) + return x class NN(ScenarioGenerator): @@ -384,3 +407,12 @@ def __init__(self, n_out=None, q_vect=None, n_epochs=10, val_ratio=None, nodes_a scengen_dict={}, batch_size=None, **model_kwargs): super().__init__(n_out=n_out, q_vect=q_vect, n_epochs=n_epochs, val_ratio=val_ratio, nodes_at_step=nodes_at_step, learning_rate=learning_rate, nn_module=FeedForwardModule, scengen_dict=scengen_dict, batch_size=batch_size, **model_kwargs) + + def set_arch(self): + self.optimizer = optax.adamw(learning_rate=self.learning_rate) + self.model = FeedForwardModule(n_layers=self.n_layers, n_neurons=self.n_hidden_x, + n_out=self.n_out, split_heads=True) + self.predict_batch = vmap(jitting_wrapper(predict_batch, self.model), in_axes=(None, 0)) + self.loss_fn = jitting_wrapper(probabilistic_loss_fn, self.predict_batch) if self.probabilistic else ( + jitting_wrapper(loss_fn, self.predict_batch)) + self.train_step = jitting_wrapper(partial(train_step, loss_fn=self.loss_fn), self.optimizer) \ No newline at end of file diff --git a/tests/test_models.py b/tests/test_models.py index b04f591..2ec8e4c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -48,7 +48,7 @@ def test_hw(self): #hw.reinit(x_tr['target']) y_hat = hw.predict(pd.concat([x_te,y_te], axis=1)) - ts_animation([y_hat], names=['y_hat', 'target'], target=y_te.values, frames=100, repeat=False) + ts_animation([y_hat.values], names=['y_hat', 'target'], target=y_te.values, frames=100, repeat=False) def test_fast_linreg(self): @@ -120,7 +120,7 @@ def test_hw_difficult(self): target_name='target', models_periods=np.array([1,2,3,5, 10, 24]), constraints=[0, np.inf]).fit(y_tr,y_tr) y_hat_multi = hw_multi.predict(y_te) - ts_animation([y_hat, y_hat_multi], names=['y_hat', 'y_hat_multi', 'target'], target=y_te.values, frames=100, repeat=False) + ts_animation([y_hat.values, y_hat_multi.values], names=['y_hat', 'y_hat_multi', 'target'], target=y_te.values, frames=100, repeat=False) def test_hw_multi(self): @@ -155,7 +155,7 @@ def test_hw_multi(self): y_hat_fks_multi = fks_multi.predict(df_te) y_hat_fks_multi_q = fks_multi.predict_quantiles(df_te) - ys = [y_hat, y_hat_multi, y_hat_fes, y_hat_fks, y_hat_fks_multi] + ys = [y_hat.values, y_hat_multi.values, y_hat_fes.values, y_hat_fks.values, y_hat_fks_multi.values] ts_animation(ys, target = df_te['all'].values, names = ['hw', 'hw_multi', 'fes', 'fks', 'fks_multi', 'target'], frames = 120, interval = 1, step = 1, repeat = False) def test_linear_val_split(self): diff --git a/tests/test_nns.py b/tests/test_nns.py index cd04e65..a473599 100644 --- a/tests/test_nns.py +++ b/tests/test_nns.py @@ -497,24 +497,24 @@ def test_invertible_causal_nn(self): ax.plot(y_invert.iloc[i, 144:].values, linestyle='--') plt.pause(1e-6) """ - m = FFNN(n_layers=1, learning_rate=1e-3, batch_size=100, load_path=None, n_out=120, rel_tol=-1, stopping_rounds=20).fit(e_tr.iloc[:, :184], e_tr.iloc[:, -120:]) - y_hat = m.predict(e_te.iloc[:, :184]) + m = FFNN(n_hidden_x=50, n_layers=1, learning_rate=1e-3, batch_size=100, load_path=None, n_out=143, rel_tol=-1, stopping_rounds=20,n_epochs=1).fit(e_tr.iloc[:, :145], e_tr.iloc[:, -143:]) + y_hat = m.predict(e_te.iloc[:, :145]) - m = CausalInvertibleNN(learning_rate=1e-2, batch_size=200, load_path=None, n_in=184, - n_layers=3, normalize_target=False, n_epochs=1, stopping_rounds=20, rel_tol=-1, - end_to_end='full', n_hidden_y=300, n_prediction_layers=3, n_out=120,names_exogenous=['all_lag_000']).fit(e_tr.iloc[:, :184], e_tr.iloc[:, -120:]) + m = CausalInvertibleNN(learning_rate=1e-2, batch_size=300, load_path=None, n_in=145, + n_layers=2, normalize_target=False, n_epochs=5, stopping_rounds=30, rel_tol=-1, + end_to_end='full', n_hidden_y=300, n_prediction_layers=3, n_out=143,names_exogenous=['all_lag_000']).fit(e_tr.iloc[:, :145], e_tr.iloc[:, -143:]) - z_hat_ete = m.predict(e_te.iloc[:, :184]) + z_hat_ete = m.predict(e_te.iloc[:, :145]) - np.mean((z_hat_ete.values- e_te.iloc[:, -120:].values)**2) - np.mean((y_hat.values- e_te.iloc[:, -120:].values)**2) - np.mean((y_hat_lin.values- e_te.iloc[:, -120:].values)**2) + np.mean((z_hat_ete.values- e_te.iloc[:, -143:].values)**2) + np.mean((y_hat.values- e_te.iloc[:, -143:].values)**2) + np.mean((y_hat_lin.values- e_te.iloc[:, -143:].values)**2) fig, ax = plt.subplots(1, 1, figsize=(4, 3)) for i in range(100): - if i%10 == 0: + if i%5 == 0: plt.cla() - ax.plot(e_te.iloc[i, -120:].values) + ax.plot(e_te.iloc[i, -143:].values) ax.plot(y_hat_lin.iloc[i, :].values, linewidth=1) ax.plot(z_hat_ete.iloc[i, :].values, linestyle='--') plt.pause(1e-6)