From 97ff8a58d67d9e979387da8315611f6975890c16 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Wed, 26 Dec 2018 21:06:40 +0200
Subject: [PATCH 1/5] wrote methods for dynamic error thresholding

---
 mlprimitives/timeseries_errors.py | 244 ++++++++++++++++++++++++++++++
 1 file changed, 244 insertions(+)
 create mode 100644 mlprimitives/timeseries_errors.py

diff --git a/mlprimitives/timeseries_errors.py b/mlprimitives/timeseries_errors.py
new file mode 100644
index 00000000..b79e2e22
--- /dev/null
+++ b/mlprimitives/timeseries_errors.py
@@ -0,0 +1,244 @@
+"""
+Methods to do dynamic error thresholding on timeseries data.
+Implementation inspired by: https://arxiv.org/pdf/1802.04431.pdf
+"""
+
+import numpy as np
+import more_itertools as mit
+
+def get_forecast_error(y_hat, y_true, window_size=5, batch_size=30, smoothing_percent=0.05, smoothed=True):
+    """
+    Calculates the forecasting error for two arrays of data. If smoothed errors desired,
+        runs EWMA.
+    Args:
+        y_hat (list): forecasted values. len(y_hat)==len(y_true).
+        y_true (list): true values. len(y_hat)==len(y_true).
+        window_size (int):
+        batch_size (int):
+        smoothing_percent (float):
+        smoothed (bool): whether the returned errors should be smoothed with EWMA.
+    Returns:
+        (list): error residuals. Smoothed if specified by user.
+    """
+    errors = [abs(y_h - y_t) for y_h, y_t in zip(y_hat, y_true)]
+
+    if not smoothed:
+        return errors
+
+    historical_error_window = window_size * batch_size * smoothing_percent
+    moving_avg = []
+    for i in range(len(errors)):
+        left_window = i - historical_error_window
+        right_window = i + historical_error_window + 1
+        if left_window < 0:
+            left_window = 0
+        if right_window > len(errors):
+            right_window = len(errors)
+        moving_avg.append(np.mean(errors[left_window:right_window]))
+
+    return moving_avg
+
+def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_buffer):
+    """
+        Extracts anomalies from the errors.
+        Args:
+            y_true ():
+            smoothed_errors ():
+            window_size (int):
+            batch_size (int):
+            error_buffer (int):
+        Returns:
+    """
+    if len(y_true) <= batch_size * window_size:
+        raise ValueError("Window size (%s) larger than y_true (len=%s)." % (batch_size, len(y_true)))
+    num_windows = (len(y_true) - (batch_size * window_size)) / batch_size
+
+    anomalies_indices = []
+
+
+    for i in range(num_windows + 1):
+        prev_index = i * batch_size
+        curr_index = (window_size * batch_size) + (i * batch_size)
+
+        if i == num_windows + 1:
+            curr_index = len(y_true)
+
+
+        window_smoothed_errors = smoothed_errors[prev_index:curr_index]
+        window_y_true = y_true[prev_index:curr_index]
+
+        epsilon, sd_threshold = compute_threshold(window_smoothed_errors, error_buffer)
+        window_anom_indices = get_anomalies(window_smoothed_errors, window_y_true, sd_threshold, i, anomalies_indices, len(y_true))
+
+        # get anomalies from inverse of smoothed errors
+        # This was done in the implementation of NASA paper but
+        # wasn't referenced in the paper
+        
+        # we get the inverse by flipping around the mean
+        smoothed_errors_inv = [mu + (mu-e) for e in window_smoothed_errors]
+        epsilon_inv, sd_inv = compute_threshold(smoothed_errors_inv, error_buffer)
+        inv_anom_indices = get_anomalies(smoothed_errors_inv, window_y_true, sd_inv, i, anomalies_indices, len(y_true))
+
+        anomalies_indices = list(set(anomalies_indices + inv_anom_indices))
+
+        anomalies_indices.extend([i_a + i*batch_size for i_a in window_anom_indices])
+
+    # group anomalies
+    anomalies_indices = sorted(list(set(anomalies_indices)))
+    anomalies_groups = [list(group) for group in mit.consecutive_groups(anomalies_indices)]
+    anomaly_sequences = [(g[0], g[-1]) for g in anomalies_groups if not g[0] == g[-1]]
+
+    # generate "scores" for anomalies based on the max distance from epsilon for each sequence
+    anomalies_scores = []
+    for e_seq in anomaly_sequences:
+        denominator = np.mean(smoothed_errors) + np.std(smoothed_errors)
+        score = max([abs(smoothed_errors[x] - epsilon) / denominator
+                    for x in range(e_seq[0], e_seq[1])])
+        anomalies_scores.append(score)
+
+    return anomaly_sequences, anomalies_scores
+
+def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
+    """
+        Helper method for extract_anomalies().
+            Calculates the epsilon (threshold) for anomalies.
+    """
+    mu = np.mean(smoothed_errors)
+    sigma = np.std(smoothed_errors)
+
+    max_epsilon = 0
+    sigma_threshold = sd_limit
+
+    # The treshold is determined dynamically by testing multiple Zs.
+    # z is drawn from an ordered set of positive values representing the 
+    # number of standard deviations above mean(smoothed_errors)
+
+    # here we iterate in increments of 0.5 on the range that the NASA paper found to be good
+    for z in np.arange(2.5, sd_limit, 0.5):
+        epsilon = mu + (sigma * z)
+        below_epsilon, below_indices, above_epsilon = [], [], []
+
+        for i in range(len(smoothed_errors)):
+            e = smoothed_errors[i]
+            if e < epsilon:
+                # save to compute delta mean and delta std
+                # these are important for epsilon calculation
+                below_epsilon.append(e)
+                below_indices.append(i)
+            if e > epsilon:
+                # above_epsilon values are anomalies
+                for j in range(0, error_buffer):
+                    if (i + j) not in above_epsilon and (i + j) < len(smoothed_errors):
+                        above_epsilon.append(i + j)
+                    if (i - j) not in above_epsilon and (i - j) >= 0:
+                        above_epsilon.append(i - j)
+
+        if len(above_epsilon) == 0:
+            continue
+
+        # generate sequences
+        above_epsilon = sorted(list(set(above_epsilon)))
+        groups = [list(group) for group in mit.consecutive_groups(above_epsilon)]
+        above_sequences = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]]
+            
+        mean_perc_decrease = (mu - np.mean(pruned_errors)) / mu
+        sd_perc_decrease = (sigma - np.std(pruned_errors)) / sd
+        epsilon = (mean_perc_decrease + sd_perc_decrease) / (len(above_sequences)**2 + len(anomaly_indices))
+
+        # update the largest epsilon we've seen so far
+        if epsilon > max_epsilon:
+            sd_threshold = z
+            max_epsilon = epsilon
+
+    # sd_threshold can be multiplied by sigma to get epsilon
+    return max_epsilon, sd_threshold
+
+
+def get_anomalies(smoothed_errors, y_true, z):
+    """
+        Helper method to get anomalies.
+    """
+
+    mu = np.mean(smoothed_errors)
+    sigma = np.std(smoothed_errors)
+
+    epsilon = mu + z*sigma
+
+    # compare to epsilon
+    errors_seq, anomaly_indices, max_error_below_e = group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies)
+
+    if len(errors_seq) > 0:
+        anomaly_indices = prune_anomalies(errors_seq, smoothed_errors, non_anomaly_max, anomaly_indices)
+    else:
+        print("No anomalies found.")
+
+    return anomaly_indices
+
+def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies, batch_size=30):
+    channel_std = np.std(y_true)
+    upper_percentile, lower_percentile = np.percentile(y_true, [95, 5]
+    accepted_range = upper_percentile - lower_percentile
+
+    anomaly_indices = []
+    max_error_below_e = 0
+    
+    for i in range(len(smoothed_errors)):
+        if smoothed_errors[i] <= epsilon or smoothed_errors[i] <= 0.05*accepted_range:
+            # not an anomaly
+            continue
+        for j in range(error_buffer):
+            if (i + j) < len(smoothed_errors) and (i + j) not in anomaly_indices:
+                anomaly_indices.append(i + j)
+            if (i - j) < len(smoothed_errors) and (i - j) not in anomaly_indices:
+                anomaly_indices.append(i - j)
+
+    # get all the errors that are below epsilon and which weren't identified as anomalies to process them
+    for i in range(len(smoothed_errors)):
+        adjusted_index = i + (window - 1) * batch_size
+        if smoothed_errors[i] > max_error_below_e and adjusted_index not in all_anomalies and i not in anomaly_indices:
+            max_error_below_e = smoothed_errors[i]
+
+    # group anomalies into continuous sequences
+    anomaly_indices = sorted(list(set(anomaly_indices)))
+    groups = [list(group) for group in mit.consecutive_groups(anomaly_indices)]
+    E_seq = [(g[0], g[-1]) for g in group if g[0] != g[-1]]
+
+    return E_seq, anomaly_indices, max_error_below_e
+
+def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
+    """ Helper method that removes anomalies which don't meet
+        a minimum separation from next anomaly.
+    """
+    # min accepted perc decrease btwn max errors in anomalous sequences
+    MIN_PERCENT_DECREASE = 0.05
+    E_seq_max, smoothed_errors_max = [], []
+
+    for error_seq in E_seq:
+        sliced_errors = smoothed_errors[error_seq[0]:error_seq[1]]
+        E_seq_max.append(max(sliced_errors))
+        smoothed_errors_max.append(max(sliced_errors))
+
+    smoothed_errors_max.sort(reverse=True)
+
+    if max_error_below_e > 0:
+        smoothed_errors_max.append(max_error_below_e)
+    indices_remove = []
+
+    for i in range(len(smoothed_errors_max)):
+        if i < len(smoothed_errors_max) - 1:
+            delta = smoothed_errors_max[i] - smoothed_errors_max[i + 1]
+            perc_change = delta / smoothed_errors_max[i]
+            if perc_change < MIN_PERCENT_DECREASE:
+                indices_remove.append(E_seq_max.index(smoothed_errors_max[i]))
+
+    for index in sorted(indices_remove, reverse = True):
+        del E_seq[index]
+
+    pruned_indices = []
+
+    for i in anomaly_indices:
+        for error_seq in E_seq:
+            if i >= error_seq[0] and i <= error_seq[1]:
+                pruned_indices.append(i) 
+
+    return pruned_indices

From 715a4cbd11742c070849777c5539f110498902e5 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Wed, 26 Dec 2018 22:20:54 +0200
Subject: [PATCH 2/5] Issue 48: created json primitive files

---
 ...s.timeseries_errors.extract_anomalies.json | 62 +++++++++++++++++
 ....timeseries_errors.get_forecast_error.json | 66 +++++++++++++++++++
 mlprimitives/timeseries_errors.py             | 66 ++++++++++---------
 3 files changed, 163 insertions(+), 31 deletions(-)
 create mode 100644 mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
 create mode 100644 mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json

diff --git a/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json b/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
new file mode 100644
index 00000000..6848c154
--- /dev/null
+++ b/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
@@ -0,0 +1,62 @@
+{
+    "name": "mlprimitives.timeseries_errors.extract_anomalies",
+    "author": "Ihssan Tinawi <itinawi@mit.edu>",
+    "description": "mlprimitives.timeseries_errors.extract_anomalies",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_extractor"
+    },
+    "modalities": ["timeseries"],
+    "primitive": "mlprimitives.timeseries_errors.extract_anomalies",
+    "produce": {
+        "args": [
+            {
+                "name": "y_true",
+                "type": "np.array"
+            },
+            {
+                "name": "smoothed_errors",
+                "type": "list"
+            },
+            {
+                "name": "window_size",
+                "type": "int"
+            },
+            {
+                "name": "batch_size",
+                "type": "int"
+            },
+            {
+                "name": "error_buffer",
+                "type": "int"
+            }
+        ],
+        "output": [
+            {
+                "name": "anomaly_sequences",
+                "type": "list"
+            },
+            {
+                "name": "anomalies_scores",
+                "type": "list"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "window_size": {
+                "type": "int",
+                "default": 50
+            },
+            "batch_size": {
+                "type": "int",
+                "default": 200
+            },
+            "error_buffer": {
+                "type": "int",
+                "default": 10
+            }
+        },
+        "tunable": {}
+    }
+}
diff --git a/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json b/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
new file mode 100644
index 00000000..eb176fbf
--- /dev/null
+++ b/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
@@ -0,0 +1,66 @@
+{
+    "name": "mlprimitives.timeseries_errors.get_forecast_errors",
+    "author": "Ihssan Tinawi <itinawi@mit.edu>",
+    "description": "mlprimitives.timeseries_errors.get_forecast_errors",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_extractor"
+    },
+    "modalities": ["timeseries"],
+    "primitive": "mlprimitives.timeseries_errors.get_forecast_errors",
+    "produce": {
+        "args": [
+            {
+                "name": "y_hat",
+                "type": "np.array"
+            },
+            {
+                "name": "y_true",
+                "type": "np.array"
+            },
+            {
+                "name": "window_size",
+                "type": "int"
+            },
+            {
+                "name": "batch_size",
+                "type": "int"
+            },
+            {
+                "name": "smoothing_percent",
+                "type": "float"
+            },
+            {
+                "name": "smoothed",
+                "type": "bool"
+            }
+        ],
+        "output": [
+            {
+                "name": "moving_avg",
+                "type": "list"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "window_size": {
+                "type": "int",
+                "default": 5
+            },
+            "batch_size": {
+                "type": "int",
+                "default": 30
+            },
+            "smoothing_percent": {
+                "type": "float",
+                "default": 0.05
+            },
+            "smoothed": {
+                "type": "bool",
+                "default": True
+            }
+        },
+        "tunable": {}
+    }
+}
diff --git a/mlprimitives/timeseries_errors.py b/mlprimitives/timeseries_errors.py
index b79e2e22..52b76dfe 100644
--- a/mlprimitives/timeseries_errors.py
+++ b/mlprimitives/timeseries_errors.py
@@ -1,10 +1,9 @@
-"""
-Methods to do dynamic error thresholding on timeseries data.
-Implementation inspired by: https://arxiv.org/pdf/1802.04431.pdf
-"""
-
-import numpy as np
 import more_itertools as mit
+import numpy as np
+
+# Methods to do dynamic error thresholding on timeseries data
+# Implementation inspired by: https://arxiv.org/pdf/1802.04431.pdf
+
 
 def get_forecast_error(y_hat, y_true, window_size=5, batch_size=30, smoothing_percent=0.05, smoothed=True):
     """
@@ -38,6 +37,7 @@ def get_forecast_error(y_hat, y_true, window_size=5, batch_size=30, smoothing_pe
 
     return moving_avg
 
+
 def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_buffer):
     """
         Extracts anomalies from the errors.
@@ -55,7 +55,6 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
 
     anomalies_indices = []
 
-
     for i in range(num_windows + 1):
         prev_index = i * batch_size
         curr_index = (window_size * batch_size) + (i * batch_size)
@@ -63,25 +62,26 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
         if i == num_windows + 1:
             curr_index = len(y_true)
 
-
         window_smoothed_errors = smoothed_errors[prev_index:curr_index]
         window_y_true = y_true[prev_index:curr_index]
 
         epsilon, sd_threshold = compute_threshold(window_smoothed_errors, error_buffer)
-        window_anom_indices = get_anomalies(window_smoothed_errors, window_y_true, sd_threshold, i, anomalies_indices, len(y_true))
+
+        window_anom_indices = get_anomalies(window_smoothed_errors, window_y_true, sd_threshold, i, anomalies_indices, error_buffer)
 
         # get anomalies from inverse of smoothed errors
         # This was done in the implementation of NASA paper but
         # wasn't referenced in the paper
-        
+
         # we get the inverse by flipping around the mean
-        smoothed_errors_inv = [mu + (mu-e) for e in window_smoothed_errors]
+        mu = np.mean(window_smoothed_errors)
+        smoothed_errors_inv = [mu + (mu - e) for e in window_smoothed_errors]
         epsilon_inv, sd_inv = compute_threshold(smoothed_errors_inv, error_buffer)
         inv_anom_indices = get_anomalies(smoothed_errors_inv, window_y_true, sd_inv, i, anomalies_indices, len(y_true))
 
         anomalies_indices = list(set(anomalies_indices + inv_anom_indices))
 
-        anomalies_indices.extend([i_a + i*batch_size for i_a in window_anom_indices])
+        anomalies_indices.extend([i_a + i * batch_size for i_a in window_anom_indices])
 
     # group anomalies
     anomalies_indices = sorted(list(set(anomalies_indices)))
@@ -98,6 +98,7 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
 
     return anomaly_sequences, anomalies_scores
 
+
 def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
     """
         Helper method for extract_anomalies().
@@ -107,10 +108,10 @@ def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
     sigma = np.std(smoothed_errors)
 
     max_epsilon = 0
-    sigma_threshold = sd_limit
+    sd_threshold = sd_limit
 
     # The treshold is determined dynamically by testing multiple Zs.
-    # z is drawn from an ordered set of positive values representing the 
+    # z is drawn from an ordered set of positive values representing the
     # number of standard deviations above mean(smoothed_errors)
 
     # here we iterate in increments of 0.5 on the range that the NASA paper found to be good
@@ -140,10 +141,10 @@ def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
         above_epsilon = sorted(list(set(above_epsilon)))
         groups = [list(group) for group in mit.consecutive_groups(above_epsilon)]
         above_sequences = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]]
-            
-        mean_perc_decrease = (mu - np.mean(pruned_errors)) / mu
-        sd_perc_decrease = (sigma - np.std(pruned_errors)) / sd
-        epsilon = (mean_perc_decrease + sd_perc_decrease) / (len(above_sequences)**2 + len(anomaly_indices))
+
+        mean_perc_decrease = (mu - np.mean(below_epsilon)) / mu
+        sd_perc_decrease = (sigma - np.std(below_epsilon)) / sigma
+        epsilon = (mean_perc_decrease + sd_perc_decrease) / (len(above_sequences)**2 + len(above_epsilon))
 
         # update the largest epsilon we've seen so far
         if epsilon > max_epsilon:
@@ -154,7 +155,7 @@ def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
     return max_epsilon, sd_threshold
 
 
-def get_anomalies(smoothed_errors, y_true, z):
+def get_anomalies(smoothed_errors, y_true, z, window, all_anomalies, error_buffer):
     """
         Helper method to get anomalies.
     """
@@ -162,28 +163,28 @@ def get_anomalies(smoothed_errors, y_true, z):
     mu = np.mean(smoothed_errors)
     sigma = np.std(smoothed_errors)
 
-    epsilon = mu + z*sigma
+    epsilon = mu + (z * sigma)
 
     # compare to epsilon
     errors_seq, anomaly_indices, max_error_below_e = group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies)
 
     if len(errors_seq) > 0:
-        anomaly_indices = prune_anomalies(errors_seq, smoothed_errors, non_anomaly_max, anomaly_indices)
+        anomaly_indices = prune_anomalies(errors_seq, smoothed_errors, max_error_below_e, anomaly_indices)
     else:
         print("No anomalies found.")
 
     return anomaly_indices
 
+
 def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies, batch_size=30):
-    channel_std = np.std(y_true)
-    upper_percentile, lower_percentile = np.percentile(y_true, [95, 5]
+    upper_percentile, lower_percentile = np.percentile(y_true, [95, 5])
     accepted_range = upper_percentile - lower_percentile
 
     anomaly_indices = []
     max_error_below_e = 0
-    
+
     for i in range(len(smoothed_errors)):
-        if smoothed_errors[i] <= epsilon or smoothed_errors[i] <= 0.05*accepted_range:
+        if smoothed_errors[i] <= epsilon or smoothed_errors[i] <= 0.05 * accepted_range:
             # not an anomaly
             continue
         for j in range(error_buffer):
@@ -192,19 +193,22 @@ def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer,
             if (i - j) < len(smoothed_errors) and (i - j) not in anomaly_indices:
                 anomaly_indices.append(i - j)
 
-    # get all the errors that are below epsilon and which weren't identified as anomalies to process them
+    # get all the errors that are below epsilon and which
+    #  weren't identified as anomalies to process them
     for i in range(len(smoothed_errors)):
         adjusted_index = i + (window - 1) * batch_size
-        if smoothed_errors[i] > max_error_below_e and adjusted_index not in all_anomalies and i not in anomaly_indices:
-            max_error_below_e = smoothed_errors[i]
+        if smoothed_errors[i] > max_error_below_e and adjusted_index not in all_anomalies:
+            if i not in anomaly_indices:
+                max_error_below_e = smoothed_errors[i]
 
     # group anomalies into continuous sequences
     anomaly_indices = sorted(list(set(anomaly_indices)))
     groups = [list(group) for group in mit.consecutive_groups(anomaly_indices)]
-    E_seq = [(g[0], g[-1]) for g in group if g[0] != g[-1]]
+    E_seq = [(g[0], g[-1]) for g in groups if g[0] != g[-1]]
 
     return E_seq, anomaly_indices, max_error_below_e
 
+
 def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
     """ Helper method that removes anomalies which don't meet
         a minimum separation from next anomaly.
@@ -231,7 +235,7 @@ def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
             if perc_change < MIN_PERCENT_DECREASE:
                 indices_remove.append(E_seq_max.index(smoothed_errors_max[i]))
 
-    for index in sorted(indices_remove, reverse = True):
+    for index in sorted(indices_remove, reverse=True):
         del E_seq[index]
 
     pruned_indices = []
@@ -239,6 +243,6 @@ def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
     for i in anomaly_indices:
         for error_seq in E_seq:
             if i >= error_seq[0] and i <= error_seq[1]:
-                pruned_indices.append(i) 
+                pruned_indices.append(i)
 
     return pruned_indices

From 0aa74a5ef921f70d87571107410a1339e76ada8b Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 3 Jan 2019 20:12:26 +0200
Subject: [PATCH 3/5] fixed lint errors, removed args that should be
 hyperparameters

---
 ...s.timeseries_errors.extract_anomalies.json | 16 +---
 ....timeseries_errors.get_forecast_error.json | 27 ++----
 mlprimitives/timeseries_errors.py             | 92 ++++++++++++++-----
 3 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json b/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
index 6848c154..7655396a 100644
--- a/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
+++ b/mlblocks_primitives/mlprimitives.timeseries_errors.extract_anomalies.json
@@ -17,18 +17,6 @@
             {
                 "name": "smoothed_errors",
                 "type": "list"
-            },
-            {
-                "name": "window_size",
-                "type": "int"
-            },
-            {
-                "name": "batch_size",
-                "type": "int"
-            },
-            {
-                "name": "error_buffer",
-                "type": "int"
             }
         ],
         "output": [
@@ -43,7 +31,7 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {
+        "tunable": {
             "window_size": {
                 "type": "int",
                 "default": 50
@@ -57,6 +45,6 @@
                 "default": 10
             }
         },
-        "tunable": {}
+        "fixed": {}
     }
 }
diff --git a/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json b/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
index eb176fbf..ec3b9289 100644
--- a/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
+++ b/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
@@ -17,22 +17,6 @@
             {
                 "name": "y_true",
                 "type": "np.array"
-            },
-            {
-                "name": "window_size",
-                "type": "int"
-            },
-            {
-                "name": "batch_size",
-                "type": "int"
-            },
-            {
-                "name": "smoothing_percent",
-                "type": "float"
-            },
-            {
-                "name": "smoothed",
-                "type": "bool"
             }
         ],
         "output": [
@@ -43,7 +27,7 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {
+        "tunable": {
             "window_size": {
                 "type": "int",
                 "default": 5
@@ -55,12 +39,13 @@
             "smoothing_percent": {
                 "type": "float",
                 "default": 0.05
-            },
+            }
+        },
+        "fixed": {
             "smoothed": {
                 "type": "bool",
-                "default": True
+                "default": true
             }
-        },
-        "tunable": {}
+        }
     }
 }
diff --git a/mlprimitives/timeseries_errors.py b/mlprimitives/timeseries_errors.py
index 52b76dfe..1d296bf3 100644
--- a/mlprimitives/timeseries_errors.py
+++ b/mlprimitives/timeseries_errors.py
@@ -5,7 +5,12 @@
 # Implementation inspired by: https://arxiv.org/pdf/1802.04431.pdf
 
 
-def get_forecast_error(y_hat, y_true, window_size=5, batch_size=30, smoothing_percent=0.05, smoothed=True):
+def get_forecast_error(y_hat,
+                       y_true,
+                       window_size=5,
+                       batch_size=30,
+                       smoothing_percent=0.05,
+                       smoothed=True):
     """
     Calculates the forecasting error for two arrays of data. If smoothed errors desired,
         runs EWMA.
@@ -31,8 +36,10 @@ def get_forecast_error(y_hat, y_true, window_size=5, batch_size=30, smoothing_pe
         right_window = i + historical_error_window + 1
         if left_window < 0:
             left_window = 0
+
         if right_window > len(errors):
             right_window = len(errors)
+
         moving_avg.append(np.mean(errors[left_window:right_window]))
 
     return moving_avg
@@ -50,7 +57,9 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
         Returns:
     """
     if len(y_true) <= batch_size * window_size:
-        raise ValueError("Window size (%s) larger than y_true (len=%s)." % (batch_size, len(y_true)))
+        raise ValueError("Window size (%s) larger than y_true (len=%s)."
+                         % (batch_size, len(y_true)))
+
     num_windows = (len(y_true) - (batch_size * window_size)) / batch_size
 
     anomalies_indices = []
@@ -67,7 +76,14 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
 
         epsilon, sd_threshold = compute_threshold(window_smoothed_errors, error_buffer)
 
-        window_anom_indices = get_anomalies(window_smoothed_errors, window_y_true, sd_threshold, i, anomalies_indices, error_buffer)
+        window_anom_indices = get_anomalies(
+            window_smoothed_errors,
+            window_y_true,
+            sd_threshold,
+            i,
+            anomalies_indices,
+            error_buffer
+        )
 
         # get anomalies from inverse of smoothed errors
         # This was done in the implementation of NASA paper but
@@ -77,7 +93,14 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
         mu = np.mean(window_smoothed_errors)
         smoothed_errors_inv = [mu + (mu - e) for e in window_smoothed_errors]
         epsilon_inv, sd_inv = compute_threshold(smoothed_errors_inv, error_buffer)
-        inv_anom_indices = get_anomalies(smoothed_errors_inv, window_y_true, sd_inv, i, anomalies_indices, len(y_true))
+        inv_anom_indices = get_anomalies(
+            smoothed_errors_inv,
+            window_y_true,
+            sd_inv,
+            i,
+            anomalies_indices,
+            len(y_true)
+        )
 
         anomalies_indices = list(set(anomalies_indices + inv_anom_indices))
 
@@ -92,17 +115,19 @@ def extract_anomalies(y_true, smoothed_errors, window_size, batch_size, error_bu
     anomalies_scores = []
     for e_seq in anomaly_sequences:
         denominator = np.mean(smoothed_errors) + np.std(smoothed_errors)
-        score = max([abs(smoothed_errors[x] - epsilon) / denominator
-                    for x in range(e_seq[0], e_seq[1])])
+        score = max([
+            abs(smoothed_errors[x] - epsilon) / denominator
+            for x in range(e_seq[0], e_seq[1])
+        ])
+
         anomalies_scores.append(score)
 
     return anomaly_sequences, anomalies_scores
 
 
 def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
-    """
-        Helper method for extract_anomalies().
-            Calculates the epsilon (threshold) for anomalies.
+    """Helper method for `extract_anomalies` method.
+    Calculates the epsilon (threshold) for anomalies.
     """
     mu = np.mean(smoothed_errors)
     sigma = np.std(smoothed_errors)
@@ -126,11 +151,13 @@ def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
                 # these are important for epsilon calculation
                 below_epsilon.append(e)
                 below_indices.append(i)
+
             if e > epsilon:
                 # above_epsilon values are anomalies
                 for j in range(0, error_buffer):
                     if (i + j) not in above_epsilon and (i + j) < len(smoothed_errors):
                         above_epsilon.append(i + j)
+
                     if (i - j) not in above_epsilon and (i - j) >= 0:
                         above_epsilon.append(i - j)
 
@@ -144,7 +171,8 @@ def compute_threshold(smoothed_errors, error_buffer, sd_limit=12.0):
 
         mean_perc_decrease = (mu - np.mean(below_epsilon)) / mu
         sd_perc_decrease = (sigma - np.std(below_epsilon)) / sigma
-        epsilon = (mean_perc_decrease + sd_perc_decrease) / (len(above_sequences)**2 + len(above_epsilon))
+        epsilon = (mean_perc_decrease + sd_perc_decrease) /\
+                  (len(above_sequences)**2 + len(above_epsilon))
 
         # update the largest epsilon we've seen so far
         if epsilon > max_epsilon:
@@ -166,17 +194,35 @@ def get_anomalies(smoothed_errors, y_true, z, window, all_anomalies, error_buffe
     epsilon = mu + (z * sigma)
 
     # compare to epsilon
-    errors_seq, anomaly_indices, max_error_below_e = group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies)
+    errors_seq, anomaly_indices, max_error_below_e = group_consecutive_anomalies(
+        smoothed_errors,
+        epsilon,
+        y_true,
+        error_buffer,
+        window,
+        all_anomalies
+    )
 
     if len(errors_seq) > 0:
-        anomaly_indices = prune_anomalies(errors_seq, smoothed_errors, max_error_below_e, anomaly_indices)
+        anomaly_indices = prune_anomalies(
+            errors_seq,
+            smoothed_errors,
+            max_error_below_e,
+            anomaly_indices
+        )
     else:
         print("No anomalies found.")
 
     return anomaly_indices
 
 
-def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer, window, all_anomalies, batch_size=30):
+def group_consecutive_anomalies(smoothed_errors,
+                                epsilon,
+                                y_true,
+                                error_buffer,
+                                window,
+                                all_anomalies,
+                                batch_size=30):
     upper_percentile, lower_percentile = np.percentile(y_true, [95, 5])
     accepted_range = upper_percentile - lower_percentile
 
@@ -187,9 +233,11 @@ def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer,
         if smoothed_errors[i] <= epsilon or smoothed_errors[i] <= 0.05 * accepted_range:
             # not an anomaly
             continue
+
         for j in range(error_buffer):
             if (i + j) < len(smoothed_errors) and (i + j) not in anomaly_indices:
                 anomaly_indices.append(i + j)
+
             if (i - j) < len(smoothed_errors) and (i - j) not in anomaly_indices:
                 anomaly_indices.append(i - j)
 
@@ -204,22 +252,22 @@ def group_consecutive_anomalies(smoothed_errors, epsilon, y_true, error_buffer,
     # group anomalies into continuous sequences
     anomaly_indices = sorted(list(set(anomaly_indices)))
     groups = [list(group) for group in mit.consecutive_groups(anomaly_indices)]
-    E_seq = [(g[0], g[-1]) for g in groups if g[0] != g[-1]]
+    e_seq = [(g[0], g[-1]) for g in groups if g[0] != g[-1]]
 
-    return E_seq, anomaly_indices, max_error_below_e
+    return e_seq, anomaly_indices, max_error_below_e
 
 
-def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
+def prune_anomalies(e_seq, smoothed_errors, max_error_below_e, anomaly_indices):
     """ Helper method that removes anomalies which don't meet
         a minimum separation from next anomaly.
     """
     # min accepted perc decrease btwn max errors in anomalous sequences
     MIN_PERCENT_DECREASE = 0.05
-    E_seq_max, smoothed_errors_max = [], []
+    e_seq_max, smoothed_errors_max = [], []
 
-    for error_seq in E_seq:
+    for error_seq in e_seq:
         sliced_errors = smoothed_errors[error_seq[0]:error_seq[1]]
-        E_seq_max.append(max(sliced_errors))
+        e_seq_max.append(max(sliced_errors))
         smoothed_errors_max.append(max(sliced_errors))
 
     smoothed_errors_max.sort(reverse=True)
@@ -233,15 +281,15 @@ def prune_anomalies(E_seq, smoothed_errors, max_error_below_e, anomaly_indices):
             delta = smoothed_errors_max[i] - smoothed_errors_max[i + 1]
             perc_change = delta / smoothed_errors_max[i]
             if perc_change < MIN_PERCENT_DECREASE:
-                indices_remove.append(E_seq_max.index(smoothed_errors_max[i]))
+                indices_remove.append(e_seq_max.index(smoothed_errors_max[i]))
 
     for index in sorted(indices_remove, reverse=True):
-        del E_seq[index]
+        del e_seq[index]
 
     pruned_indices = []
 
     for i in anomaly_indices:
-        for error_seq in E_seq:
+        for error_seq in e_seq:
             if i >= error_seq[0] and i <= error_seq[1]:
                 pruned_indices.append(i)
 

From b8aa84b3f781b31da98d70a3f80854933a124478 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 3 Jan 2019 20:24:25 +0200
Subject: [PATCH 4/5] changed names of files to avoid failing tests

---
 ... => mlprimitives.timeseries_errors.get_forecast_errors.json} | 0
 mlprimitives/timeseries_errors.py                               | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename mlblocks_primitives/{mlprimitives.timeseries_errors.get_forecast_error.json => mlprimitives.timeseries_errors.get_forecast_errors.json} (100%)

diff --git a/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json b/mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_errors.json
similarity index 100%
rename from mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_error.json
rename to mlblocks_primitives/mlprimitives.timeseries_errors.get_forecast_errors.json
diff --git a/mlprimitives/timeseries_errors.py b/mlprimitives/timeseries_errors.py
index 1d296bf3..3ba0498a 100644
--- a/mlprimitives/timeseries_errors.py
+++ b/mlprimitives/timeseries_errors.py
@@ -5,7 +5,7 @@
 # Implementation inspired by: https://arxiv.org/pdf/1802.04431.pdf
 
 
-def get_forecast_error(y_hat,
+def get_forecast_errors(y_hat,
                        y_true,
                        window_size=5,
                        batch_size=30,

From b3d08e701abc30de8c696a14969c1850974f4488 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 3 Jan 2019 21:04:47 +0200
Subject: [PATCH 5/5] fixed lint error: under indented lines

---
 mlprimitives/timeseries_errors.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlprimitives/timeseries_errors.py b/mlprimitives/timeseries_errors.py
index 3ba0498a..19eb2ae7 100644
--- a/mlprimitives/timeseries_errors.py
+++ b/mlprimitives/timeseries_errors.py
@@ -6,11 +6,11 @@
 
 
 def get_forecast_errors(y_hat,
-                       y_true,
-                       window_size=5,
-                       batch_size=30,
-                       smoothing_percent=0.05,
-                       smoothed=True):
+                        y_true,
+                        window_size=5,
+                        batch_size=30,
+                        smoothing_percent=0.05,
+                        smoothed=True):
     """
     Calculates the forecasting error for two arrays of data. If smoothed errors desired,
         runs EWMA.