From f62b7def62a3290278f154aade82a5442fa0f0dc Mon Sep 17 00:00:00 2001 From: Helena Peic Tukuljac Date: Fri, 21 Feb 2020 20:57:15 +0100 Subject: [PATCH 1/2] moving custom function applied to acoustics --- gtime/custom/CrestFactorDetrending.py | 80 +++++++++++++++++++++++++++ gtime/custom/SortedDensity.py | 75 +++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 gtime/custom/CrestFactorDetrending.py create mode 100644 gtime/custom/SortedDensity.py diff --git a/gtime/custom/CrestFactorDetrending.py b/gtime/custom/CrestFactorDetrending.py new file mode 100644 index 0000000..f579afa --- /dev/null +++ b/gtime/custom/CrestFactorDetrending.py @@ -0,0 +1,80 @@ +import pandas as pd +from sklearn.utils.validation import check_is_fitted +from gtime.feature_extraction import MovingCustomFunction + +class CrestFactorDetrending(MovingCustomFunction): + """Crest factor detrending model. + This class removes the trend from the data by using the crest factor definition. + Each sample is normalize by its weighted surrounding. + Generalized detrending is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki, + H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo + Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International + Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United + Kingdom, 2019, pp. 1-5. + Parameters + ---------- + window_size : int, optional, default: ``1`` + The number of previous points on which to compute the crest factor detrending. + is_causal : bool, optional, default: ``True`` + Whether the current sample is computed based only on the past or also on the future. + Examples + >>> import pandas as pd + >>> from CrestFactorDetrending import CrestFactorDetrending + >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5]) + >>> gnrl_dtr = CrestFactorDetrending(window_size=2) + >>> gnrl_dtr.fit_transform(ts) + 0__CrestFactorDetrending + 0 NaN + 1 1.000000 + 2 0.800000 + 3 0.692308 + 4 0.640000 + 5 0.609756 + -------- + """ + + def __init__(self, window_size: int = 1, is_causal: bool = True): + super().__init__(self.detrend) + self.window_size = window_size + self.is_causal = is_causal + + def detrend(self, signal): + import numpy as np + N = 2 + signal = np.array(signal) + large_signal_segment = signal**N + large_segment_mean = np.sum(large_signal_segment) + if (self.is_causal): + ref_index = -1 + else: + ref_index = int(len(signal)/2) + small_signal_segment = signal[ref_index]**N + return small_signal_segment/large_segment_mean # (eq. 1) + + def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: + """For every row of ``time_series``, compute the moving crest factor detrending function of the + previous ``window_size`` elements. + Parameters + ---------- + time_series : pd.DataFrame, shape (n_samples, 1), required + The DataFrame on which to compute the rolling moving custom function. + Returns + ------- + time_series_t : pd.DataFrame, shape (n_samples, 1) + A DataFrame, with the same length as ``time_series``, containing the rolling + moving custom function for each element. + """ + check_is_fitted(self) + + if(self.is_causal): + time_series_mvg_dtr = time_series.rolling(self.window_size).apply( + self.detrend, raw=self.raw + ) + else: + time_series_mvg_dtr = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply( + self.detrend, raw=self.raw + ) + time_series_mvg_dtr = time_series_mvg_dtr.dropna() + + time_series_t = time_series_mvg_dtr + return time_series_t \ No newline at end of file diff --git a/gtime/custom/SortedDensity.py b/gtime/custom/SortedDensity.py new file mode 100644 index 0000000..4b5024e --- /dev/null +++ b/gtime/custom/SortedDensity.py @@ -0,0 +1,75 @@ +import pandas as pd +from sklearn.utils.validation import check_is_fitted +from gtime.feature_extraction import MovingCustomFunction + +class SortedDensity(MovingCustomFunction): + """For each row in ``time_series``, compute the sorted density function of the + previous ``window_size`` rows. If there are not enough rows, the value is ``Nan``. + Sorted density measured is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki, + H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo + Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International + Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United + Kingdom, 2019, pp. 1-5. + Parameters + ---------- + window_size : int, optional, default: ``1`` + The number of previous points on which to compute the sorted density. + is_causal : bool, optional, default: ``True`` + Whether the current sample is computed based only on the past or also on the future. + Examples + -------- + >>> import pandas as pd + >>> from gtime.feature_extraction import SortedDensity + >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5]) + >>> mv_avg = SortedDensity(window_size=2) + >>> mv_avg.fit_transform(ts) + 0__SortedDensity + 0 NaN + 1 0.500000 + 2 0.666667 + 3 0.700000 + 4 0.714286 + 5 0.722222 + -------- + """ + def __init__(self, window_size: int = 1, is_causal: bool = True): + super().__init__(self.sorted_density) + self.window_size = window_size + self.is_causal = is_causal + + def sorted_density(self, signal): + import numpy as np + t = (np.array(range(len(signal))) + 1) + signal = signal[signal.argsort()[::-1]] + t = np.reshape(t, signal.shape) + SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2) + SD = SD/(len(signal)) + return SD + + def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: + """For every row of ``time_series``, compute the moving sorted density function of the + previous ``window_size`` elements. + Parameters + ---------- + time_series : pd.DataFrame, shape (n_samples, 1), required + The DataFrame on which to compute the rolling moving custom function. + Returns + ------- + time_series_t : pd.DataFrame, shape (n_samples, 1) + A DataFrame, with the same length as ``time_series``, containing the rolling + moving custom function for each element. + """ + check_is_fitted(self) + + if(self.is_causal): + time_series_mvg_sd = time_series.rolling(self.window_size).apply( + self.sorted_density, raw=self.raw + ) + else: + time_series_mvg_sd = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply( + self.sorted_density, raw=self.raw + ) + time_series_mvg_sd = time_series_mvg_sd.dropna() + + time_series_t = time_series_mvg_sd + return time_series_t \ No newline at end of file From 4e710d198d77185f3413394b9bfa779eac6f0676 Mon Sep 17 00:00:00 2001 From: Helena Peic Tukuljac Date: Wed, 18 Mar 2020 12:48:34 +0100 Subject: [PATCH 2/2] Crest factor detrending and sorted density by MovingCustomFunction. --- gtime/custom/__init__.py | 12 ++++ ...trending.py => crest_factor_detrending.py} | 36 ++++++----- .../{SortedDensity.py => sorted_density.py} | 29 +++++---- gtime/custom/tests/__init__.py | 0 .../tests/test_crest_factor_detrending.py | 63 +++++++++++++++++++ gtime/custom/tests/test_sorted_density.py | 63 +++++++++++++++++++ 6 files changed, 173 insertions(+), 30 deletions(-) create mode 100644 gtime/custom/__init__.py rename gtime/custom/{CrestFactorDetrending.py => crest_factor_detrending.py} (79%) rename gtime/custom/{SortedDensity.py => sorted_density.py} (82%) create mode 100644 gtime/custom/tests/__init__.py create mode 100644 gtime/custom/tests/test_crest_factor_detrending.py create mode 100644 gtime/custom/tests/test_sorted_density.py diff --git a/gtime/custom/__init__.py b/gtime/custom/__init__.py new file mode 100644 index 0000000..2f60d66 --- /dev/null +++ b/gtime/custom/__init__.py @@ -0,0 +1,12 @@ +""" +The :mod:`gtime.custom` module implements custom methods for time +series. +""" + +from .crest_factor_detrending import CrestFactorDetrending +from .sorted_density import SortedDensity + +__all__ = [ + "CrestFactorDetrending", + "SortedDensity", +] \ No newline at end of file diff --git a/gtime/custom/CrestFactorDetrending.py b/gtime/custom/crest_factor_detrending.py similarity index 79% rename from gtime/custom/CrestFactorDetrending.py rename to gtime/custom/crest_factor_detrending.py index f579afa..b3bdb1f 100644 --- a/gtime/custom/CrestFactorDetrending.py +++ b/gtime/custom/crest_factor_detrending.py @@ -1,5 +1,7 @@ import pandas as pd from sklearn.utils.validation import check_is_fitted + +from ..base import add_class_name from gtime.feature_extraction import MovingCustomFunction class CrestFactorDetrending(MovingCustomFunction): @@ -34,23 +36,23 @@ class CrestFactorDetrending(MovingCustomFunction): """ def __init__(self, window_size: int = 1, is_causal: bool = True): - super().__init__(self.detrend) + def detrend(signal): + import numpy as np + N = 2 + signal = np.array(signal) + large_signal_segment = signal**N + large_segment_mean = np.sum(large_signal_segment) + if (self.is_causal): + ref_index = -1 + else: + ref_index = int(len(signal)/2) + small_signal_segment = signal[ref_index]**N + return small_signal_segment/large_segment_mean # (eq. 1) + super().__init__(detrend) self.window_size = window_size self.is_causal = is_causal - def detrend(self, signal): - import numpy as np - N = 2 - signal = np.array(signal) - large_signal_segment = signal**N - large_segment_mean = np.sum(large_signal_segment) - if (self.is_causal): - ref_index = -1 - else: - ref_index = int(len(signal)/2) - small_signal_segment = signal[ref_index]**N - return small_signal_segment/large_segment_mean # (eq. 1) - + @add_class_name def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: """For every row of ``time_series``, compute the moving crest factor detrending function of the previous ``window_size`` elements. @@ -64,15 +66,15 @@ def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: A DataFrame, with the same length as ``time_series``, containing the rolling moving custom function for each element. """ - check_is_fitted(self) + check_is_fitted(self) if(self.is_causal): time_series_mvg_dtr = time_series.rolling(self.window_size).apply( - self.detrend, raw=self.raw + self.custom_feature_function, raw=self.raw ) else: time_series_mvg_dtr = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply( - self.detrend, raw=self.raw + self.custom_feature_function, raw=self.raw ) time_series_mvg_dtr = time_series_mvg_dtr.dropna() diff --git a/gtime/custom/SortedDensity.py b/gtime/custom/sorted_density.py similarity index 82% rename from gtime/custom/SortedDensity.py rename to gtime/custom/sorted_density.py index 4b5024e..104b03e 100644 --- a/gtime/custom/SortedDensity.py +++ b/gtime/custom/sorted_density.py @@ -1,5 +1,7 @@ import pandas as pd from sklearn.utils.validation import check_is_fitted + +from ..base import add_class_name from gtime.feature_extraction import MovingCustomFunction class SortedDensity(MovingCustomFunction): @@ -32,20 +34,20 @@ class SortedDensity(MovingCustomFunction): 5 0.722222 -------- """ - def __init__(self, window_size: int = 1, is_causal: bool = True): - super().__init__(self.sorted_density) + def __init__(self, window_size: int = 1, is_causal: bool = True): + def sorted_density(signal): + import numpy as np + t = (np.array(range(len(signal))) + 1) + signal = signal[signal.argsort()[::-1]] + t = np.reshape(t, signal.shape) + SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2) + SD = SD/(len(signal)) + return SD + super().__init__(sorted_density) self.window_size = window_size self.is_causal = is_causal - - def sorted_density(self, signal): - import numpy as np - t = (np.array(range(len(signal))) + 1) - signal = signal[signal.argsort()[::-1]] - t = np.reshape(t, signal.shape) - SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2) - SD = SD/(len(signal)) - return SD + @add_class_name def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: """For every row of ``time_series``, compute the moving sorted density function of the previous ``window_size`` elements. @@ -60,14 +62,15 @@ def transform(self, time_series: pd.DataFrame) -> pd.DataFrame: moving custom function for each element. """ check_is_fitted(self) + if(self.is_causal): time_series_mvg_sd = time_series.rolling(self.window_size).apply( - self.sorted_density, raw=self.raw + self.custom_feature_function, raw=self.raw ) else: time_series_mvg_sd = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply( - self.sorted_density, raw=self.raw + self.custom_feature_function, raw=self.raw ) time_series_mvg_sd = time_series_mvg_sd.dropna() diff --git a/gtime/custom/tests/__init__.py b/gtime/custom/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gtime/custom/tests/test_crest_factor_detrending.py b/gtime/custom/tests/test_crest_factor_detrending.py new file mode 100644 index 0000000..5093d8a --- /dev/null +++ b/gtime/custom/tests/test_crest_factor_detrending.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd +import pandas.util.testing as testing +import pytest + +from gtime.custom.crest_factor_detrending import CrestFactorDetrending + +def get_input_data(): + input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]}) + input_data.index = [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return input_data + +def get_output_causal(): + custom_feature = CrestFactorDetrending(window_size=2, is_causal = True) + feature_name = custom_feature.__class__.__name__ + output_causal = pd.DataFrame.from_dict( + { + f"x_1__{feature_name}": [np.nan, 1.0, 0.07547169811320754], + f"x_2__{feature_name}": [np.nan, 0.9615384615384616, 0.13793103448275862], + } + ) + output_causal.index = [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return output_causal + +def get_output_anticausal(): + custom_feature = CrestFactorDetrending(window_size=2, is_causal = False) + feature_name = custom_feature.__class__.__name__ + output_anticausal = pd.DataFrame.from_dict( + { + f"x_1__{feature_name}": [1.0, 0.07547169811320754], + f"x_2__{feature_name}": [0.9615384615384616, 0.13793103448275862], + } + ) + output_anticausal.index = [ + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return output_anticausal + +input_data = get_input_data() +output_causal = get_output_causal() +output_anticausal = get_output_anticausal() + +class TestCrestFactorDetrending: + @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)]) + def test_crest_factor_detrending_causal(self, test_input, expected): + feature = CrestFactorDetrending(window_size=2, is_causal = True) + output = feature.fit_transform(test_input) + testing.assert_frame_equal(output, expected) + + @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)]) + def test_crest_factor_detrending_anticausal(self, test_input, expected): + feature = CrestFactorDetrending(window_size=2, is_causal = False) + output = feature.fit_transform(test_input) + testing.assert_frame_equal(output, expected) \ No newline at end of file diff --git a/gtime/custom/tests/test_sorted_density.py b/gtime/custom/tests/test_sorted_density.py new file mode 100644 index 0000000..e012b7d --- /dev/null +++ b/gtime/custom/tests/test_sorted_density.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd +import pandas.util.testing as testing +import pytest + +from gtime.custom.sorted_density import SortedDensity + +def get_input_data(): + input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]}) + input_data.index = [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return input_data + +def get_output_causal(): + custom_feature = SortedDensity(window_size=2, is_causal = True) + feature_name = custom_feature.__class__.__name__ + output_causal = pd.DataFrame.from_dict( + { + f"x_1__{feature_name}": [np.nan, 0.5, 0.6111111111111112], + f"x_2__{feature_name}": [np.nan, 0.5833333333333334, 0.6428571428571429], + } + ) + output_causal.index = [ + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return output_causal + +def get_output_anticausal(): + custom_feature = SortedDensity(window_size=2, is_causal = False) + feature_name = custom_feature.__class__.__name__ + output_anticausal = pd.DataFrame.from_dict( + { + f"x_1__{feature_name}": [0.5, 0.6111111111111112], + f"x_2__{feature_name}": [0.5833333333333334, 0.6428571428571429], + } + ) + output_anticausal.index = [ + pd.Timestamp(2000, 2, 1), + pd.Timestamp(2000, 3, 1), + ] + return output_anticausal + +input_data = get_input_data() +output_causal = get_output_causal() +output_anticausal = get_output_anticausal() + +class TestSortedDensity: + @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)]) + def test_crest_factor_detrending_causal(self, test_input, expected): + feature = SortedDensity(window_size=2, is_causal = True) + output = feature.fit_transform(test_input) + testing.assert_frame_equal(output, expected) + + @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)]) + def test_crest_factor_detrending_anticausal(self, test_input, expected): + feature = SortedDensity(window_size=2, is_causal = False) + output = feature.fit_transform(test_input) + testing.assert_frame_equal(output, expected) \ No newline at end of file