From 4aebcb8b15934f3c93d3e3659421c43eff3d3049 Mon Sep 17 00:00:00 2001 From: vascomedici Date: Mon, 21 Oct 2024 15:42:42 +0200 Subject: [PATCH 1/3] consistently put None in the metadata for lags if not specified --- pyforecaster/formatter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyforecaster/formatter.py b/pyforecaster/formatter.py index 65a7e10..053aa35 100644 --- a/pyforecaster/formatter.py +++ b/pyforecaster/formatter.py @@ -794,12 +794,17 @@ def transform(self, x=None, augment=True, simulate=False): self.logger.info('Added {} to the dataframe'.format(trans_names)) if self.agg_bins is None: - lags_and_fun = product([0] if self.lags is None else self.lags, function_names) + lags_and_fun = product([None] if self.lags is None else self.lags, function_names) + lags_aux = np.array([lf[0] for lf in product([0] if self.lags is None else self.lags, function_names)]) + metadata_n = pd.DataFrame(lags_and_fun, columns=['lag', 'function'], index=trans_names) + metadata_n['aggregation_time'] = self.agg_freq metadata_n['spacing_time'] = pd.Timedelta(spacing_time) - metadata_n['start_time'] = - spacing_time * metadata_n['lag'] - agg_steps * dt + dt - metadata_n['end_time'] = - spacing_time * metadata_n['lag'] + dt + + metadata_n['start_time'] = - spacing_time * lags_aux - agg_steps * dt + dt + metadata_n['end_time'] = - spacing_time * lags_aux + dt + print(metadata_n) else: lags_expanded = np.outer(lag_steps, np.ones(len(self.agg_bins) - 1)).ravel() lags_and_fun =product(function_names, lags_expanded) From 2aa3d4037d4d2ae2d121f19f3e529d9b60fe81ee Mon Sep 17 00:00:00 2001 From: vascomedici Date: Mon, 21 Oct 2024 16:15:07 +0200 Subject: [PATCH 2/3] removed print --- pyforecaster/formatter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyforecaster/formatter.py b/pyforecaster/formatter.py index 053aa35..bd4d4bf 100644 --- a/pyforecaster/formatter.py +++ b/pyforecaster/formatter.py @@ -804,7 +804,6 @@ def transform(self, x=None, augment=True, simulate=False): metadata_n['start_time'] = - spacing_time * lags_aux - agg_steps * dt + dt metadata_n['end_time'] = - spacing_time * lags_aux + dt - print(metadata_n) else: lags_expanded = np.outer(lag_steps, np.ones(len(self.agg_bins) - 1)).ravel() lags_and_fun =product(function_names, lags_expanded) From 406a1a377c71edf623ab400e2d701f5a5a515ca6 Mon Sep 17 00:00:00 2001 From: vascomedici Date: Thu, 24 Oct 2024 18:03:13 +0200 Subject: [PATCH 3/3] fixed deprecations in holidays handling --- pyforecaster/formatter.py | 2 +- tests/test_formatter.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pyforecaster/formatter.py b/pyforecaster/formatter.py index bd4d4bf..c61f9e9 100644 --- a/pyforecaster/formatter.py +++ b/pyforecaster/formatter.py @@ -76,7 +76,7 @@ def add_time_features(self, x): def add_holidays(self, x, state_code='CH', **kwargs): self.logger.info('Adding holidays') holidays = holidays_api.country_holidays(country=state_code, years=x.index.year.unique(), **kwargs) - bridges, long_weekends = spot_holiday_bridges(start=x.index[0]-pd.Timedelta('2D'), end=x.index[-1]+pd.Timedelta('2D'), holidays=holidays) + bridges, long_weekends = spot_holiday_bridges(start=x.index[0]-pd.Timedelta('2D'), end=x.index[-1]+pd.Timedelta('2D'), holidays=pd.DatetimeIndex(holidays.keys())) bridges = np.array([b.date() for b in bridges]) long_weekends = np.array([b.date() for b in long_weekends]) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index 2afc2d9..ef106e4 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -220,7 +220,7 @@ def test_holidays(self): formatter = pyf.Formatter().add_transform([0], lags=np.arange(10), agg_freq='20min', relative_lags=True) formatter.add_transform([0], ['min', 'max'], agg_bins=[-10, -15, -20]) - df = formatter.transform(self.x2, time_features=True, holidays=True, prov='ZH') + df = formatter.transform(self.x2, time_features=True, holidays=True, subdiv='ZH') @@ -236,7 +236,7 @@ def test_global_multiindex(self): agg_freq='20min', relative_lags=True) formatter.add_target_transform(['target'], ['mean'], agg_bins=[-10, -15, -20]) - df = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True, parallel=False) + df = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True, parallel=False) def test_global_multiindex_with_col_reordering(self): x_private = pd.DataFrame(np.random.randn(500, 15), index=pd.date_range('01-01-2020', '01-05-2020', 500, tz='Europe/Zurich'), columns=pd.MultiIndex.from_product([['b1', 'b2', 'b3'], ['a', 'b', 'c', 'd', 'e']])) @@ -250,7 +250,7 @@ def test_global_multiindex_with_col_reordering(self): agg_freq='20min', relative_lags=True) formatter.add_target_transform(['target'], ['mean'], agg_bins=[-10, -15, -20]) - df = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True, corr_reorder=True, parallel=False ,reduce_memory=False) + df = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True, corr_reorder=True, parallel=False ,reduce_memory=False) def test_normalizers(self): @@ -259,11 +259,11 @@ def test_normalizers(self): formatter.add_target_transform(['a'], lags=-np.arange(1, 5), agg_freq='20min') formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_movingavg') formatter.add_target_normalizer(['a'], 'std', agg_freq='10H', name='a_movingstd') - x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH') + x, y = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH') formatter.add_normalizing_fun(expr="(df[t] - df['a_movingavg']) / (df['a_movingstd'] + 1)", inv_expr="df[t]*(df['a_movingstd']+1) + df['a_movingavg']") - x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH') + x, y_norm = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH') y_unnorm = formatter.denormalize(x, y_norm) @@ -278,10 +278,10 @@ def test_normalizers_complex(self): formatter.add_target_normalizer(['a'], 'mean', agg_freq='10H', name='a_n') formatter.add_target_normalizer(['a'], 'std', agg_freq='5H', name='b_n') - x, y = formatter.transform(df, time_features=True, holidays=True, prov='ZH') + x, y = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH') formatter.add_normalizing_fun(expr="np.exp(df[t]+df['a_n']) + df['b_n']", inv_expr="np.log(df[t]-df['b_n']) -df['a_n']") - x, y_norm = formatter.transform(df, time_features=True, holidays=True, prov='ZH') + x, y_norm = formatter.transform(df, time_features=True, holidays=True, subdiv='ZH') y_unnorm = formatter.denormalize(x, y_norm) # check if back-transform works @@ -308,9 +308,9 @@ def test_normalizers_impossible(self): formatter.add_target_normalizer(['target'], 'mean', agg_freq='10H', name='mean') formatter.add_target_normalizer(['target'], 'std', agg_freq='5H', name='std') - x, y = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True) + x, y = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True) formatter.add_normalizing_fun("(df[t] - df['mean'])/(df['std']+1)", "df[t]*(df['std']+1) + df['mean']") - x, y_norm = formatter.transform(df_mi, time_features=True, holidays=True, prov='ZH',global_form=True) + x, y_norm = formatter.transform(df_mi, time_features=True, holidays=True, subdiv='ZH',global_form=True) xs = formatter.global_form_preprocess(df_mi)