From f62b7def62a3290278f154aade82a5442fa0f0dc Mon Sep 17 00:00:00 2001
From: Helena Peic Tukuljac <helena.peictukuljac@epfl.ch>
Date: Fri, 21 Feb 2020 20:57:15 +0100
Subject: [PATCH 1/2] moving custom function applied to acoustics

---
 gtime/custom/CrestFactorDetrending.py | 80 +++++++++++++++++++++++++++
 gtime/custom/SortedDensity.py         | 75 +++++++++++++++++++++++++
 2 files changed, 155 insertions(+)
 create mode 100644 gtime/custom/CrestFactorDetrending.py
 create mode 100644 gtime/custom/SortedDensity.py

diff --git a/gtime/custom/CrestFactorDetrending.py b/gtime/custom/CrestFactorDetrending.py
new file mode 100644
index 0000000..f579afa
--- /dev/null
+++ b/gtime/custom/CrestFactorDetrending.py
@@ -0,0 +1,80 @@
+import pandas as pd
+from sklearn.utils.validation import check_is_fitted
+from gtime.feature_extraction import MovingCustomFunction
+
+class CrestFactorDetrending(MovingCustomFunction):
+    """Crest factor detrending model.
+    This class removes the trend from the data by using the crest factor definition.
+    Each sample is normalize by its weighted surrounding.
+    Generalized detrending is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki,
+    H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo 
+    Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International 
+    Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United 
+    Kingdom, 2019, pp. 1-5.
+    Parameters
+    ----------
+    window_size : int, optional, default: ``1``
+        The number of previous points on which to compute the crest factor detrending.    
+    is_causal : bool, optional, default: ``True``
+        Whether the current sample is computed based only on the past or also on the future.
+    Examples
+    >>> import pandas as pd
+    >>> from CrestFactorDetrending import CrestFactorDetrending    
+    >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5]) 
+    >>> gnrl_dtr = CrestFactorDetrending(window_size=2)  
+    >>> gnrl_dtr.fit_transform(ts)
+       0__CrestFactorDetrending
+    0                       NaN
+    1                  1.000000
+    2                  0.800000
+    3                  0.692308
+    4                  0.640000
+    5                  0.609756
+    --------
+    """
+
+    def __init__(self, window_size: int = 1, is_causal: bool = True):
+        super().__init__(self.detrend)
+        self.window_size = window_size
+        self.is_causal = is_causal
+    
+    def detrend(self, signal):
+        import numpy as np
+        N = 2
+        signal = np.array(signal)
+        large_signal_segment = signal**N
+        large_segment_mean = np.sum(large_signal_segment)
+        if (self.is_causal):
+            ref_index = -1
+        else:
+            ref_index = int(len(signal)/2) 
+        small_signal_segment = signal[ref_index]**N
+        return small_signal_segment/large_segment_mean # (eq. 1)
+     
+    def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
+        """For every row of ``time_series``, compute the moving crest factor detrending function of the
+         previous ``window_size`` elements.
+        Parameters
+        ----------
+        time_series : pd.DataFrame, shape (n_samples, 1), required
+            The DataFrame on which to compute the rolling moving custom function.
+        Returns
+        -------
+        time_series_t : pd.DataFrame, shape (n_samples, 1)
+            A DataFrame, with the same length as ``time_series``, containing the rolling
+            moving custom function for each element.
+        """
+        check_is_fitted(self)
+
+        if(self.is_causal):
+            time_series_mvg_dtr = time_series.rolling(self.window_size).apply(
+                self.detrend, raw=self.raw
+            )
+        else:
+            time_series_mvg_dtr = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply(
+                self.detrend, raw=self.raw
+            )
+            time_series_mvg_dtr = time_series_mvg_dtr.dropna()
+            
+        time_series_t = time_series_mvg_dtr
+        return time_series_t
\ No newline at end of file
diff --git a/gtime/custom/SortedDensity.py b/gtime/custom/SortedDensity.py
new file mode 100644
index 0000000..4b5024e
--- /dev/null
+++ b/gtime/custom/SortedDensity.py
@@ -0,0 +1,75 @@
+import pandas as pd
+from sklearn.utils.validation import check_is_fitted
+from gtime.feature_extraction import MovingCustomFunction
+
+class SortedDensity(MovingCustomFunction):
+    """For each row in ``time_series``, compute the sorted density function of the
+    previous ``window_size`` rows. If there are not enough rows, the value is ``Nan``.
+    Sorted density measured is defined in (eq. 1) of: H. P. Tukuljac, V. Pulkki,
+    H. Gamper, K. Godin, I. J. Tashev and N. Raghuvanshi, "A Sparsity Measure for Echo 
+    Density Growth in General Environments," ICASSP 2019 - 2019 IEEE International 
+    Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United 
+    Kingdom, 2019, pp. 1-5.
+    Parameters
+    ----------
+    window_size : int, optional, default: ``1``
+        The number of previous points on which to compute the sorted density.    
+    is_causal : bool, optional, default: ``True``
+        Whether the current sample is computed based only on the past or also on the future.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> from gtime.feature_extraction import SortedDensity
+    >>> ts = pd.DataFrame([0, 1, 2, 3, 4, 5])
+    >>> mv_avg = SortedDensity(window_size=2)
+    >>> mv_avg.fit_transform(ts)
+       0__SortedDensity
+    0                      NaN
+    1                 0.500000
+    2                 0.666667
+    3                 0.700000
+    4                 0.714286
+    5                 0.722222
+    --------
+    """    
+    def __init__(self, window_size: int = 1, is_causal: bool = True):        
+        super().__init__(self.sorted_density)
+        self.window_size = window_size
+        self.is_causal = is_causal
+        
+    def sorted_density(self, signal):
+        import numpy as np
+        t = (np.array(range(len(signal))) + 1)
+        signal = signal[signal.argsort()[::-1]]
+        t = np.reshape(t, signal.shape)
+        SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2)
+        SD = SD/(len(signal))
+        return SD
+    
+    def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
+        """For every row of ``time_series``, compute the moving sorted density function of the
+         previous ``window_size`` elements.
+        Parameters
+        ----------
+        time_series : pd.DataFrame, shape (n_samples, 1), required
+            The DataFrame on which to compute the rolling moving custom function.
+        Returns
+        -------
+        time_series_t : pd.DataFrame, shape (n_samples, 1)
+            A DataFrame, with the same length as ``time_series``, containing the rolling
+            moving custom function for each element.
+        """
+        check_is_fitted(self)
+
+        if(self.is_causal):
+            time_series_mvg_sd = time_series.rolling(self.window_size).apply(
+                self.sorted_density, raw=self.raw
+            )
+        else:
+            time_series_mvg_sd = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply(
+                self.sorted_density, raw=self.raw
+            )
+            time_series_mvg_sd = time_series_mvg_sd.dropna()
+            
+        time_series_t = time_series_mvg_sd
+        return time_series_t
\ No newline at end of file

From 4e710d198d77185f3413394b9bfa779eac6f0676 Mon Sep 17 00:00:00 2001
From: Helena Peic Tukuljac <hela_su@yahoo.com>
Date: Wed, 18 Mar 2020 12:48:34 +0100
Subject: [PATCH 2/2] Crest factor detrending and sorted density by
 MovingCustomFunction.

---
 gtime/custom/__init__.py                      | 12 ++++
 ...trending.py => crest_factor_detrending.py} | 36 ++++++-----
 .../{SortedDensity.py => sorted_density.py}   | 29 +++++----
 gtime/custom/tests/__init__.py                |  0
 .../tests/test_crest_factor_detrending.py     | 63 +++++++++++++++++++
 gtime/custom/tests/test_sorted_density.py     | 63 +++++++++++++++++++
 6 files changed, 173 insertions(+), 30 deletions(-)
 create mode 100644 gtime/custom/__init__.py
 rename gtime/custom/{CrestFactorDetrending.py => crest_factor_detrending.py} (79%)
 rename gtime/custom/{SortedDensity.py => sorted_density.py} (82%)
 create mode 100644 gtime/custom/tests/__init__.py
 create mode 100644 gtime/custom/tests/test_crest_factor_detrending.py
 create mode 100644 gtime/custom/tests/test_sorted_density.py

diff --git a/gtime/custom/__init__.py b/gtime/custom/__init__.py
new file mode 100644
index 0000000..2f60d66
--- /dev/null
+++ b/gtime/custom/__init__.py
@@ -0,0 +1,12 @@
+"""
+The :mod:`gtime.custom` module implements custom methods for time
+series.
+"""
+
+from .crest_factor_detrending import CrestFactorDetrending
+from .sorted_density import SortedDensity
+
+__all__ = [
+    "CrestFactorDetrending",
+    "SortedDensity",
+]
\ No newline at end of file
diff --git a/gtime/custom/CrestFactorDetrending.py b/gtime/custom/crest_factor_detrending.py
similarity index 79%
rename from gtime/custom/CrestFactorDetrending.py
rename to gtime/custom/crest_factor_detrending.py
index f579afa..b3bdb1f 100644
--- a/gtime/custom/CrestFactorDetrending.py
+++ b/gtime/custom/crest_factor_detrending.py
@@ -1,5 +1,7 @@
 import pandas as pd
 from sklearn.utils.validation import check_is_fitted
+
+from ..base import add_class_name
 from gtime.feature_extraction import MovingCustomFunction
 
 class CrestFactorDetrending(MovingCustomFunction):
@@ -34,23 +36,23 @@ class CrestFactorDetrending(MovingCustomFunction):
     """
 
     def __init__(self, window_size: int = 1, is_causal: bool = True):
-        super().__init__(self.detrend)
+        def detrend(signal):
+            import numpy as np
+            N = 2
+            signal = np.array(signal)
+            large_signal_segment = signal**N
+            large_segment_mean = np.sum(large_signal_segment)
+            if (self.is_causal):
+                ref_index = -1
+            else:
+                ref_index = int(len(signal)/2) 
+            small_signal_segment = signal[ref_index]**N
+            return small_signal_segment/large_segment_mean # (eq. 1)
+        super().__init__(detrend)
         self.window_size = window_size
         self.is_causal = is_causal
     
-    def detrend(self, signal):
-        import numpy as np
-        N = 2
-        signal = np.array(signal)
-        large_signal_segment = signal**N
-        large_segment_mean = np.sum(large_signal_segment)
-        if (self.is_causal):
-            ref_index = -1
-        else:
-            ref_index = int(len(signal)/2) 
-        small_signal_segment = signal[ref_index]**N
-        return small_signal_segment/large_segment_mean # (eq. 1)
-     
+    @add_class_name 
     def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
         """For every row of ``time_series``, compute the moving crest factor detrending function of the
          previous ``window_size`` elements.
@@ -64,15 +66,15 @@ def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
             A DataFrame, with the same length as ``time_series``, containing the rolling
             moving custom function for each element.
         """
-        check_is_fitted(self)
+        check_is_fitted(self) 
 
         if(self.is_causal):
             time_series_mvg_dtr = time_series.rolling(self.window_size).apply(
-                self.detrend, raw=self.raw
+                self.custom_feature_function, raw=self.raw
             )
         else:
             time_series_mvg_dtr = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply(
-                self.detrend, raw=self.raw
+                self.custom_feature_function, raw=self.raw
             )
             time_series_mvg_dtr = time_series_mvg_dtr.dropna()
             
diff --git a/gtime/custom/SortedDensity.py b/gtime/custom/sorted_density.py
similarity index 82%
rename from gtime/custom/SortedDensity.py
rename to gtime/custom/sorted_density.py
index 4b5024e..104b03e 100644
--- a/gtime/custom/SortedDensity.py
+++ b/gtime/custom/sorted_density.py
@@ -1,5 +1,7 @@
 import pandas as pd
 from sklearn.utils.validation import check_is_fitted
+
+from ..base import add_class_name
 from gtime.feature_extraction import MovingCustomFunction
 
 class SortedDensity(MovingCustomFunction):
@@ -32,20 +34,20 @@ class SortedDensity(MovingCustomFunction):
     5                 0.722222
     --------
     """    
-    def __init__(self, window_size: int = 1, is_causal: bool = True):        
-        super().__init__(self.sorted_density)
+    def __init__(self, window_size: int = 1, is_causal: bool = True):   
+        def sorted_density(signal):
+            import numpy as np
+            t = (np.array(range(len(signal))) + 1)
+            signal = signal[signal.argsort()[::-1]]
+            t = np.reshape(t, signal.shape)
+            SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2)
+            SD = SD/(len(signal))
+            return SD     
+        super().__init__(sorted_density)
         self.window_size = window_size
         self.is_causal = is_causal
-        
-    def sorted_density(self, signal):
-        import numpy as np
-        t = (np.array(range(len(signal))) + 1)
-        signal = signal[signal.argsort()[::-1]]
-        t = np.reshape(t, signal.shape)
-        SD = np.sum(np.multiply(t, signal))/np.sum(signal) # (eq. 2)
-        SD = SD/(len(signal))
-        return SD
     
+    @add_class_name 
     def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
         """For every row of ``time_series``, compute the moving sorted density function of the
          previous ``window_size`` elements.
@@ -60,14 +62,15 @@ def transform(self, time_series: pd.DataFrame) -> pd.DataFrame:
             moving custom function for each element.
         """
         check_is_fitted(self)
+        
 
         if(self.is_causal):
             time_series_mvg_sd = time_series.rolling(self.window_size).apply(
-                self.sorted_density, raw=self.raw
+                self.custom_feature_function, raw=self.raw
             )
         else:
             time_series_mvg_sd = time_series.rolling(self.window_size, min_periods = int(self.window_size/2)).apply(
-                self.sorted_density, raw=self.raw
+                self.custom_feature_function, raw=self.raw
             )
             time_series_mvg_sd = time_series_mvg_sd.dropna()
             
diff --git a/gtime/custom/tests/__init__.py b/gtime/custom/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gtime/custom/tests/test_crest_factor_detrending.py b/gtime/custom/tests/test_crest_factor_detrending.py
new file mode 100644
index 0000000..5093d8a
--- /dev/null
+++ b/gtime/custom/tests/test_crest_factor_detrending.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as testing
+import pytest
+
+from gtime.custom.crest_factor_detrending import CrestFactorDetrending
+
+def get_input_data():
+    input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]})
+    input_data.index = [
+        pd.Timestamp(2000, 1, 1),
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]
+    return input_data
+
+def get_output_causal():
+    custom_feature = CrestFactorDetrending(window_size=2, is_causal = True)
+    feature_name = custom_feature.__class__.__name__
+    output_causal = pd.DataFrame.from_dict(
+        {
+            f"x_1__{feature_name}": [np.nan, 1.0, 0.07547169811320754],
+            f"x_2__{feature_name}": [np.nan, 0.9615384615384616, 0.13793103448275862],
+        }
+    )
+    output_causal.index = [
+        pd.Timestamp(2000, 1, 1),
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]
+    return output_causal
+    
+def get_output_anticausal():
+    custom_feature = CrestFactorDetrending(window_size=2, is_causal = False)
+    feature_name = custom_feature.__class__.__name__
+    output_anticausal = pd.DataFrame.from_dict(
+        {
+            f"x_1__{feature_name}": [1.0, 0.07547169811320754],
+            f"x_2__{feature_name}": [0.9615384615384616, 0.13793103448275862],
+        }
+    )
+    output_anticausal.index = [
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]    
+    return output_anticausal
+
+input_data = get_input_data()
+output_causal = get_output_causal()
+output_anticausal = get_output_anticausal()
+
+class TestCrestFactorDetrending:
+    @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)])
+    def test_crest_factor_detrending_causal(self, test_input, expected):
+        feature = CrestFactorDetrending(window_size=2, is_causal = True)
+        output = feature.fit_transform(test_input)
+        testing.assert_frame_equal(output, expected)
+
+    @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)])
+    def test_crest_factor_detrending_anticausal(self, test_input, expected):
+        feature = CrestFactorDetrending(window_size=2, is_causal = False)
+        output = feature.fit_transform(test_input)
+        testing.assert_frame_equal(output, expected)
\ No newline at end of file
diff --git a/gtime/custom/tests/test_sorted_density.py b/gtime/custom/tests/test_sorted_density.py
new file mode 100644
index 0000000..e012b7d
--- /dev/null
+++ b/gtime/custom/tests/test_sorted_density.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as testing
+import pytest
+
+from gtime.custom.sorted_density import SortedDensity
+
+def get_input_data():
+    input_data = pd.DataFrame.from_dict({"x_1": [0, 7, 2], "x_2": [2, 10, 4]})
+    input_data.index = [
+        pd.Timestamp(2000, 1, 1),
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]
+    return input_data
+
+def get_output_causal():
+    custom_feature = SortedDensity(window_size=2, is_causal = True)
+    feature_name = custom_feature.__class__.__name__
+    output_causal = pd.DataFrame.from_dict(
+        {
+            f"x_1__{feature_name}": [np.nan, 0.5, 0.6111111111111112],
+            f"x_2__{feature_name}": [np.nan, 0.5833333333333334, 0.6428571428571429],
+        }
+    )
+    output_causal.index = [
+        pd.Timestamp(2000, 1, 1),
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]
+    return output_causal
+    
+def get_output_anticausal():
+    custom_feature = SortedDensity(window_size=2, is_causal = False)
+    feature_name = custom_feature.__class__.__name__
+    output_anticausal = pd.DataFrame.from_dict(
+        {
+            f"x_1__{feature_name}": [0.5, 0.6111111111111112],
+            f"x_2__{feature_name}": [0.5833333333333334, 0.6428571428571429],
+        }
+    )
+    output_anticausal.index = [
+        pd.Timestamp(2000, 2, 1),
+        pd.Timestamp(2000, 3, 1),
+    ]    
+    return output_anticausal
+
+input_data = get_input_data()
+output_causal = get_output_causal()
+output_anticausal = get_output_anticausal()
+
+class TestSortedDensity:
+    @pytest.mark.parametrize("test_input, expected", [(input_data, output_causal)])
+    def test_crest_factor_detrending_causal(self, test_input, expected):
+        feature = SortedDensity(window_size=2, is_causal = True)
+        output = feature.fit_transform(test_input)
+        testing.assert_frame_equal(output, expected)
+
+    @pytest.mark.parametrize("test_input, expected", [(input_data, output_anticausal)])
+    def test_crest_factor_detrending_anticausal(self, test_input, expected):
+        feature = SortedDensity(window_size=2, is_causal = False)
+        output = feature.fit_transform(test_input)
+        testing.assert_frame_equal(output, expected)
\ No newline at end of file