From 3aa91e21f7730c281e3037693e5fe249272f87cf Mon Sep 17 00:00:00 2001 From: nepslor Date: Fri, 26 Jul 2024 23:50:11 +0200 Subject: [PATCH] loosened causality --- pyforecaster/formatter.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyforecaster/formatter.py b/pyforecaster/formatter.py index 55932bf..b31eaee 100644 --- a/pyforecaster/formatter.py +++ b/pyforecaster/formatter.py @@ -414,18 +414,19 @@ def prune_dataset_at_stepahead(self, df, target_col_num, metadata_features, meth c1 = (target_start_time - metadata['start_time']) % pd.Timedelta(period) <= pd.Timedelta(tol_period) c2 = (target_end_time - metadata['end_time']) % pd.Timedelta(period) <= pd.Timedelta(tol_period) causality = metadata['end_time'] <= target_start_time - metadata['keep'] = (c1 | c2) & causality + metadata['keep'] = (c1 | c2) # & causality elif method == 'up_to': - metadata['keep'] = metadata['end_time'] <= target_start_time + metadata['keep'] = metadata['end_time'] <= target_end_time else: metadata['keep'] = True if keep_last_n_lags > 0: metadata['keep'] = metadata['keep'] | metadata['lag'].isin(np.arange(keep_last_n_lags)) - if keep_last_seconds >0: - close = (metadata['end_time'] <= pd.Timedelta(keep_last_seconds, unit='s')) | (metadata['end_time'] <= pd.Timedelta(keep_last_seconds, unit='s')) - predecessor = metadata['end_time'] <= target_start_time + if keep_last_seconds > 0: + close = (metadata['end_time'] <= pd.Timedelta(keep_last_seconds, unit='s')) | ( + metadata['end_time'] <= pd.Timedelta(keep_last_seconds, unit='s')) + predecessor = metadata['end_time'] <= target_end_time metadata['keep'] = metadata['keep'] | (close & predecessor) features = list(metadata.loc[metadata['keep']].index) + metadata_features