Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
nepslor committed Nov 4, 2024
2 parents 7a24a8d + 406a1a3 commit f68f0e4
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
12 changes: 8 additions & 4 deletions pyforecaster/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def add_time_features(self, x):
def add_holidays(self, x, state_code='CH', **kwargs):
self.logger.info('Adding holidays')
holidays = holidays_api.country_holidays(country=state_code, years=x.index.year.unique(), **kwargs)
bridges, long_weekends = spot_holiday_bridges(start=x.index[0]-pd.Timedelta('2D'), end=x.index[-1]+pd.Timedelta('2D'), holidays=holidays)
bridges, long_weekends = spot_holiday_bridges(start=x.index[0]-pd.Timedelta('2D'), end=x.index[-1]+pd.Timedelta('2D'), holidays=pd.DatetimeIndex(holidays.keys()))
bridges = np.array([b.date() for b in bridges])
long_weekends = np.array([b.date() for b in long_weekends])

Expand Down Expand Up @@ -794,12 +794,16 @@ def transform(self, x=None, augment=True, simulate=False):
self.logger.info('Added {} to the dataframe'.format(trans_names))

if self.agg_bins is None:
lags_and_fun = product([0] if self.lags is None else self.lags, function_names)
lags_and_fun = product([None] if self.lags is None else self.lags, function_names)
lags_aux = np.array([lf[0] for lf in product([0] if self.lags is None else self.lags, function_names)])

metadata_n = pd.DataFrame(lags_and_fun, columns=['lag', 'function'], index=trans_names)

metadata_n['aggregation_time'] = self.agg_freq
metadata_n['spacing_time'] = pd.Timedelta(spacing_time)
metadata_n['start_time'] = - spacing_time * metadata_n['lag'] - agg_steps * dt + dt
metadata_n['end_time'] = - spacing_time * metadata_n['lag'] + dt

metadata_n['start_time'] = - spacing_time * lags_aux - agg_steps * dt + dt
metadata_n['end_time'] = - spacing_time * lags_aux + dt
else:
lags_expanded = np.outer(lag_steps, np.ones(len(self.agg_bins) - 1)).ravel()
lags_and_fun =product(function_names, lags_expanded)
Expand Down
18 changes: 9 additions & 9 deletions tests/test_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def test_holidays(self):
formatter = pyf.Formatter().add_transform([0], lags=np.arange(10), agg_freq='20min',
relative_lags=True)
formatter.add_transform([0], ['min', 'max'], agg_bins=[-10, -15, -20])
df = formatter.transform(self.x2, time_features=True, holidays=True, prov='ZH')
df = formatter.transform(self.x2, time_features=True, holidays=True, subdiv='ZH')



Expand All @@ -236,7 +236,7 @@ def test_global_multiindex(self):
agg_freq='20min',
relative_lags=True)
formatter.add_target_transform(['target'], ['mean'], agg_bins=[-10, -15, -20])
df = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True, parallel=False)
df = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True, parallel=False)

def test_global_multiindex_with_col_reordering(self):
x_private = pd.DataFrame(np.random.randn(500, 15), index=pd.date_range('01-01-2020', '01-05-2020', 500, tz='Europe/Zurich'), columns=pd.MultiIndex.from_product([['b1', 'b2', 'b3'], ['a', 'b', 'c', 'd', 'e']]))
Expand All @@ -250,7 +250,7 @@ def test_global_multiindex_with_col_reordering(self):
agg_freq='20min',
relative_lags=True)
formatter.add_target_transform(['target'], ['mean'], agg_bins=[-10, -15, -20])
df = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True, corr_reorder=True, parallel=False ,reduce_memory=False)
df = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True, corr_reorder=True, parallel=False ,reduce_memory=False)


def test_normalizers(self):
Expand All @@ -259,11 +259,11 @@ def test_normalizers(self):
formatter.add_target_transform(['a'], lags=-np.arange(1, 5), agg_freq='20min')
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_movingavg')
formatter.add_target_normalizer(['a'], 'std', agg_freq='10H', name='a_movingstd')
x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
x, y = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH')

formatter.add_normalizing_fun(expr="(df[t] - df['a_movingavg']) / (df['a_movingstd'] + 1)",
inv_expr="df[t]*(df['a_movingstd']+1) + df['a_movingavg']")
x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
x, y_norm = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH')

y_unnorm = formatter.denormalize(x, y_norm)

Expand All @@ -278,10 +278,10 @@ def test_normalizers_complex(self):
formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_n')
formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b_n')

x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
x, y = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH')

formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a_n']) + df['b_n']", inv_expr="np.log(df[t]-df['b_n']) -df['a_n']")
x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH')
x, y_norm = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH')
y_unnorm = formatter.denormalize(x, y_norm)

# check if back-transform works
Expand All @@ -308,9 +308,9 @@ def test_normalizers_impossible(self):
formatter.add_target_normalizer(['target'], 'mean', agg_freq='10H', name='mean')
formatter.add_target_normalizer(['target'], 'std', agg_freq='5H', name='std')

x, y = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True)
x, y = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True)
formatter.add_normalizing_fun("(df[t] - df['mean'])/(df['std']+1)", "df[t]*(df['std']+1) + df['mean']")
x, y_norm = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True)
x, y_norm = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True)

xs = formatter.global_form_preprocess(df_mi)

Expand Down

0 comments on commit f68f0e4

Please sign in to comment.