From 361428749ef952b6c952672f3058a2a7080a8464 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <plamen@pythiac.com>
Date: Wed, 19 Sep 2018 17:09:49 +0200
Subject: [PATCH 01/32] Working on a new better tested RandomForrestClassifier
 with correct min max range of values

---
 ...learn.ensemble.RandomForestClassifier.json | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
index df01686a..9cbb693a 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
@@ -46,9 +46,14 @@
             }
         },
         "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
             "criterion": {
                 "type": "str",
-                "default": "entropy",
+                "default": "gini",
                 "values": ["entropy", "gini"]
             },
             "max_features": {
@@ -62,19 +67,24 @@
                 "range": [1, 30]
             },
             "min_samples_split": {
-                "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
             },
             "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
                 "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "default": 0.0,
+                "range": [0.0, 0.5]
             },
-            "n_estimators": {
+            "max_leaf_nodes": {
                 "type": "int",
-                "default": 30,
-                "values": [2, 500]
+                "default": null,
+                "range": [1, 100]
             },
             "class_weight": {
                 "type": "str",

From feaf11d21a39952f7fb48843f848b35a69279806 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <plamen@pythiac.com>
Date: Thu, 20 Sep 2018 12:42:02 +0200
Subject: [PATCH 02/32] Modified and created primitives that reffer to
 sklearn.ensemble that don't require base_estimator or estimator.

---
 ...sklearn.ensemble.ExtraTreesClassifier.json | 116 ++++++++++++++++
 .../sklearn.ensemble.ExtraTreesRegressor.json | 112 ++++++++++++++++
 ...n.ensemble.GradientBoostingClassifier.json | 120 +++++++++++++++++
 ...rn.ensemble.GradientBoostingRegressor.json | 124 ++++++++++++++++++
 .../sklearn.ensemble.IsolationForest.json     |  77 +++++++++++
 ...learn.ensemble.RandomForestClassifier.json |  38 ++++--
 ...klearn.ensemble.RandomForestRegressor.json |  54 ++++++--
 ...sklearn.ensemble.RandomTreesEmbedding.json |  98 ++++++++++++++
 8 files changed, 716 insertions(+), 23 deletions(-)
 create mode 100644 mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.IsolationForest.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json

diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
new file mode 100644
index 00000000..efa755a4
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
@@ -0,0 +1,116 @@
+{
+    "name": "sklearn.ensemble.ExtraTreesClassifier",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html",
+    "description": "Scikit-learn ExtraTreesClassifier. Implements a meta estimator that fits a number of randomized decision trees.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.ExtraTreesClassifier",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
+            "criterion": {
+                "type": "str",
+                "default": "gini",
+                "values": ["entropy", "gini"]
+            },
+            "max_features": {
+                "type": "str",
+                "default": null,
+                "values": [null, "auto", "log2", "sqrt"]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": null,
+                "range": [1, 30]
+            },
+            "min_samples_split": {
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
+            },
+            "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 0.5]
+            },
+            "max_leaf_nodes": {
+                "type": "int",
+                "default": null,
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": false
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "class_weight": {
+                "type": "iterable",
+                "default": null
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
new file mode 100644
index 00000000..f8ef31cd
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
@@ -0,0 +1,112 @@
+{
+    "name": "sklearn.ensemble.ExtraTreesRegressor",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesRegressor.html",
+    "description": "Scikit-learn ExtraTreesRegressor. Implements a meta estimator that fits a number of randomized decision trees (a.k.a. extra-trees) on various sub-samples of the dataset and use averaging to improve the predictive accuracy and control over-fitting.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "regressor"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.ExtraTreesRegressor",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
+            "criterion": {
+                "type": "str",
+                "default": "mse",
+                "values": ["mae", "mse"]
+            },
+            "max_features": {
+                "type": "str",
+                "default": null,
+                "values": [null, "auto", "log2", "sqrt"]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": null,
+                "range": [1, 30]
+            },
+            "min_samples_split": {
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
+            },
+            "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 0.5]
+            },
+            "max_leaf_nodes": {
+                "type": "int",
+                "default": null,
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": false
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
new file mode 100644
index 00000000..9eb9a98e
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
@@ -0,0 +1,120 @@
+{
+    "name": "sklearn.ensemble.GradientBoostingClassifier",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html",
+    "description": "Scikit-learn GradientBoostingClassifier.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.GradientBoostingClassifier",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {
+            "loss": {
+                "type": "str",
+                "default": "deviance",
+                "values": ["deviance", "exponential"]
+            },
+            "learning_rate": {
+                "type": "float",
+                "default": 0.1
+            },
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": 3,
+                "range": [1, 30]
+            },
+            "criterion": {
+                "type": "str",
+                "default": "friedman_mse",
+                "values": ["friedman_mse", "friedman_mae"]
+            },
+            "min_samples_split": {
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
+            },
+            "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 0.5]
+            },
+            "subsample": {
+                "type": "float",
+                "default": 1.0
+            },
+            "max_features": {
+                "type": "str",
+                "default": null,
+                "values": [null, "auto", "log2", "sqrt"]
+            },
+            "max_leaf_nodes": {
+                "type": "int",
+                "default": null,
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "init": {
+                "type": "object",
+                "default": null
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 100]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "presort": {
+                "type": "bool",
+                "default": "false"
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
new file mode 100644
index 00000000..8a73bdd0
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
@@ -0,0 +1,124 @@
+{
+    "name": "sklearn.ensemble.GradientBoostingRegressor",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html",
+    "description": "Scikit-learn GradientBoostingRegressor.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.GradientBoostingRegressor",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {
+            "loss": {
+                "type": "str",
+                "default": "ls",
+                "values": ["ls", "lad", "huber", "quantile"]
+            },
+            "learning_rate": {
+                "type": "float",
+                "default": 0.1
+            },
+            "n_estimators": {
+                "type": "int",
+                "default": 100,
+                "range": [1, 500]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": 3,
+                "range": [1, 30]
+            },
+            "criterion": {
+                "type": "str",
+                "default": "friedman_mse",
+                "values": ["friedman_mse", "friedman_mae"]
+            },
+            "min_samples_split": {
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
+            },
+            "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 0.5]
+            },
+            "subsample": {
+                "type": "float",
+                "default": 1.0
+            },
+            "max_features": {
+                "type": "str",
+                "default": null,
+                "values": [null, "auto", "log2", "sqrt"]
+            },
+            "max_leaf_nodes": {
+                "type": "int",
+                "default": null,
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "alpha": {
+                "type": "float",
+                "default": 0.9
+            },
+            "init": {
+                "type": "object",
+                "default": null
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 100]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "presort": {
+                "type": "bool",
+                "default": "false"
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
new file mode 100644
index 00000000..b7a22fb9
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -0,0 +1,77 @@
+{
+    "name": "sklearn.ensemble.IsolationForest",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html",
+    "description": "Scikit-learn IsolationForest. The IsolationForest ‘isolates’ observations by randomly selecting a feature and then randomly selecting a split value between the maximum and minimum values of the selected feature.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.IsolationForest",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            },
+            "contamination": {
+                "type": "float",
+                "default": 0.1,
+                "range": [0.0, 0.5]
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 100,
+                "range": [1, 500]
+            },
+            "max_samples": {
+                "type": "str",
+                "default": "auto"
+            },
+            "max_features": {
+                "type": "float",
+                "default": 1.0
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 100]
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
index 9cbb693a..e9822a15 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
@@ -9,7 +9,6 @@
     },
     "modalities": [],
     "primitive": "sklearn.ensemble.RandomForestClassifier",
-    "validation_dataset": "wine",
     "fit": {
         "method": "fit",
         "args": [
@@ -19,7 +18,7 @@
             },
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -34,7 +33,7 @@
         "output": [
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -59,11 +58,11 @@
             "max_features": {
                 "type": "str",
                 "default": null,
-                "range": [null, "auto", "log2"]
+                "values": [null, "auto", "log2", "sqrt"]
             },
             "max_depth": {
                 "type": "int",
-                "default": 10,
+                "default": null,
                 "range": [1, 30]
             },
             "min_samples_split": {
@@ -84,12 +83,33 @@
             "max_leaf_nodes": {
                 "type": "int",
                 "default": null,
-                "range": [1, 100]
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": true
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
             },
             "class_weight": {
-                "type": "str",
-                "default": null,
-                "range": [null, "balanced"]
+                "type": "iterable",
+                "default": null
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
index 7cc9e08f..d9728d65 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
@@ -9,7 +9,6 @@
     },
     "modalities": [],
     "primitive": "sklearn.ensemble.RandomForestRegressor",
-    "validation_dataset": "boston",
     "fit": {
         "method": "fit",
         "args": [
@@ -19,7 +18,7 @@
             },
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -34,7 +33,7 @@
         "output": [
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -46,6 +45,10 @@
             }
         },
         "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10
+            },
             "criterion": {
                 "type": "str",
                 "default": "mse",
@@ -53,28 +56,51 @@
             },
             "max_features": {
                 "type": "str",
-                "default": null,
-                "range": [null, "auto", "log2"]
+                "default": "auto",
+                "range": [null, "auto", "log2", "sqrt"]
             },
             "max_depth": {
                 "type": "int",
-                "default": 10,
+                "default": null,
                 "range": [1, 30]
             },
             "min_samples_split": {
-                "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "type": "int",
+                "default": 2,
+                "range": [2, 1000]
             },
             "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 1000]
+            },
+            "min_weight_fraction_leaf": {
                 "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "default": 0.0
             },
-            "n_estimators": {
+            "max_leaf_nodes": {
                 "type": "int",
-                "default": 30,
-                "values": [2, 500]
+                "default": null
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": true
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
new file mode 100644
index 00000000..49601790
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
@@ -0,0 +1,98 @@
+{
+    "name": "sklearn.ensemble.RandomTreesEmbedding",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomTreesEmbedding.html",
+    "description": "Scikit-learn RandomTreesEmbedding. An unsupervised transformation of a dataset to a high-dimensional sparse representation.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.RandomTreesEmbedding",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "Sparse"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            },
+            "sparse_output": {
+                "type": "bool",
+                "default": true
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": 5,
+                "range": [1, 30]
+            },
+            "min_samples_split": {
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
+            },
+            "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 0.5]
+            },
+            "max_leaf_nodes": {
+                "type": "int",
+                "default": null,
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            }
+        }
+    }
+}

From 725de7b61ece6784b9979b28a424f1e80e9ca9c5 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <plamen@pythiac.com>
Date: Thu, 20 Sep 2018 16:05:12 +0200
Subject: [PATCH 03/32] Decomposition PCA and DL done

---
 ...earn.decomposition.DictionaryLearning.json | 96 +++++++++++++++++++
 .../sklearn.decomposition.PCA.json            | 25 ++++-
 2 files changed, 119 insertions(+), 2 deletions(-)
 create mode 100644 mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json

diff --git a/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
new file mode 100644
index 00000000..112908ee
--- /dev/null
+++ b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
@@ -0,0 +1,96 @@
+{
+    "name": "sklearn.decomposition.DictionaryLearning",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html",
+    "description": "Dictionary learning.",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_selector"
+    },
+    "modalities": [],
+    "primitive": "sklearn.decomposition.DictionaryLearning",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "split_sign": {
+                "type": "bool",
+                "default": false
+            },
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            }
+        },
+        "tunable": {
+            "n_components": {
+                "type": "int",
+                "default": null
+            },
+            "alpha": {
+                "type": "float",
+                "default": 1.0
+            },
+            "max_iter": {
+                "type": "int",
+                "default": 1000
+            },
+            "tol": {
+                "type": "float",
+                "default": 1e-08
+            },
+            "fit_algorithm": {
+                "type": "str",
+                "default": "lars",
+                "values": ["lars", "cd"]
+            },
+            "transform_algorithm": {
+                "type": "str",
+                "default": "omp",
+                "values": ["lasso_lars", "lasso_cd", "lars", "omp", "threshold"]
+            },
+            "transform_n_nonzero_coefs": {
+                "type": "int",
+                "default": null
+            },
+            "transform_alpha": {
+                "type": "float",
+                "default": 1.0
+            },
+            "code_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "dict_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "verbose": {
+                "type": "bool",
+                "default": false
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.decomposition.PCA.json b/mlblocks_primitives/sklearn.decomposition.PCA.json
index 57958b46..27355d9b 100644
--- a/mlblocks_primitives/sklearn.decomposition.PCA.json
+++ b/mlblocks_primitives/sklearn.decomposition.PCA.json
@@ -9,7 +9,6 @@
     },
     "modalities": [],
     "primitive": "sklearn.decomposition.PCA",
-    "validation_dataset": "wine",
     "fit": {
         "method": "fit",
         "args": [
@@ -35,10 +34,32 @@
         ]
     },
     "hyperparameters": {
-        "FIXME": "This needs to be reviewed",
         "fixed": {
+            "copy": {
+                "type": "bool",
+                "default": true
+            }
         },
         "tunable": {
+            "tol": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 100.0]
+            },
+            "iterated_power":{
+                "type": "int",
+                "default": "auto",
+                "range": [0, 1000]
+            },
+            "whiten": {
+                "type": "bool",
+                "default": false
+            },
+            "svd_solver": {
+                "type": "str",
+                "default": "auto",
+                "values": ["auto", "arpack", "full", "randomized"]
+            }
         }
     }
 }

From 88cf33d68b139df4fdb6cb041c75a43262266d88 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <plamen@pythiac.com>
Date: Thu, 20 Sep 2018 16:35:15 +0200
Subject: [PATCH 04/32] Added AdaBoost and Bagging from sklearn

---
 .../sklearn.ensemble.AdaBoostClassifier.json  | 64 ++++++++++++++
 .../sklearn.ensemble.AdaBoostRegressor.json   | 64 ++++++++++++++
 .../sklearn.ensemble.BaggingClassifier.json   | 87 +++++++++++++++++++
 .../sklearn.ensemble.BaggingRegressor.json    | 87 +++++++++++++++++++
 4 files changed, 302 insertions(+)
 create mode 100644 mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
 create mode 100644 mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json

diff --git a/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json b/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
new file mode 100644
index 00000000..f7962be2
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
@@ -0,0 +1,64 @@
+{
+    "name": "sklearn.ensemble.AdaBoostClassifier",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html",
+    "description": "Scikit-learn AdaBoostClassifier.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.AdaBoostClassifier",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "base_estimator": {
+                "type": "object",
+                "default": null
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 50,
+                "range": [1, 500]
+            },
+            "learning_rate": {
+                "type": "float",
+                "default": 1.0
+            },
+            "algorithm": {
+                "type": "str",
+                "default": "SAMME.R",
+                "values": ["SAMME", "SAMME.R"]
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json b/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
new file mode 100644
index 00000000..a7c6eb93
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
@@ -0,0 +1,64 @@
+{
+    "name": "sklearn.ensemble.AdaBoostRegressor",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html",
+    "description": "Scikit-learn AdaBoostRegressor.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "regressor"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.AdaBoostRegressor",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "base_estimator": {
+                "type": "object",
+                "default": null
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 50,
+                "range": [1, 500]
+            },
+            "learning_rate": {
+                "type": "float",
+                "default": 1.0
+            },
+            "loss": {
+                "type": "str",
+                "default": "linear",
+                "values": ["linear", "square", "exponential"]
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
new file mode 100644
index 00000000..6686ea4c
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
@@ -0,0 +1,87 @@
+{
+    "name": "sklearn.ensemble.BaggingClassifier",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html",
+    "description": "Scikit-learn BaggingClassifier.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.BaggingClassifier",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "base_estimator": {
+                "type": "object",
+                "default": null
+            },
+            "n_jobs": {
+                "type": "int",
+                "default": 1
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 50,
+                "range": [1, 500]
+            },
+            "max_samples": {
+                "type": "float",
+                "default": 1.0
+            },
+            "max_features": {
+                "type": "int",
+                "default": 1.0
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": true
+            },
+            "bootstrap_features": {
+                "type": "bool",
+                "default": false
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
new file mode 100644
index 00000000..904aaa9a
--- /dev/null
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
@@ -0,0 +1,87 @@
+{
+    "name": "sklearn.ensemble.BaggingRegressor",
+    "author": "Carles Sala <carles@pythiac.com>",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingRegressor.html",
+    "description": "Scikit-learn BaggingRegressor.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "regressor"
+    },
+    "modalities": [],
+    "primitive": "sklearn.ensemble.BaggingRegressor",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "base_estimator": {
+                "type": "object",
+                "default": null
+            },
+            "n_jobs": {
+                "type": "int",
+                "default": 1
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
+            "max_samples": {
+                "type": "float",
+                "default": 1.0
+            },
+            "max_features": {
+                "type": "int",
+                "default": 1.0
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": true
+            },
+            "bootstrap_features": {
+                "type": "bool",
+                "default": false
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            }
+        }
+    }
+}

From e200bdf3acd0bc23f614ddab3860ae775565d785 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <plamen@pythiac.com>
Date: Fri, 21 Sep 2018 10:16:23 +0200
Subject: [PATCH 05/32] Modified min / max range for int / floats

---
 .../sklearn.ensemble.AdaBoostClassifier.json      |  3 ++-
 .../sklearn.ensemble.AdaBoostRegressor.json       |  3 ++-
 .../sklearn.ensemble.BaggingClassifier.json       | 12 ++++++++----
 .../sklearn.ensemble.BaggingRegressor.json        | 14 +++++++++-----
 .../sklearn.ensemble.ExtraTreesClassifier.json    |  3 ++-
 .../sklearn.ensemble.ExtraTreesRegressor.json     |  3 ++-
 ...learn.ensemble.GradientBoostingClassifier.json |  8 +++++---
 ...klearn.ensemble.GradientBoostingRegressor.json | 13 ++++++++-----
 .../sklearn.ensemble.IsolationForest.json         | 11 +++++++----
 .../sklearn.ensemble.RandomForestClassifier.json  |  3 ++-
 ...arn.ensemble.RandomForestClassifier_proba.json |  9 +++++----
 .../sklearn.ensemble.RandomForestRegressor.json   | 15 ++++++++++-----
 .../sklearn.ensemble.RandomTreesEmbedding.json    |  3 ++-
 13 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json b/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
index f7962be2..d0de949e 100644
--- a/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.AdaBoostClassifier.json
@@ -52,7 +52,8 @@
             },
             "learning_rate": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 10.0]
             },
             "algorithm": {
                 "type": "str",
diff --git a/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json b/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
index a7c6eb93..e9635b4f 100644
--- a/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.AdaBoostRegressor.json
@@ -52,7 +52,8 @@
             },
             "learning_rate": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 10.0]
             },
             "loss": {
                 "type": "str",
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
index 6686ea4c..1f13d56a 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
@@ -45,7 +45,8 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": 1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
@@ -56,11 +57,13 @@
             },
             "max_samples": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 100.0]
             },
             "max_features": {
                 "type": "int",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 1000.0]
             },
             "bootstrap": {
                 "type": "bool",
@@ -80,7 +83,8 @@
             },
             "verbose": {
                 "type": "int",
-                "default": 0
+                "default": 0,
+                "range": [0, 100]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
index 904aaa9a..d7357d49 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
@@ -45,7 +45,8 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": 1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
@@ -56,11 +57,13 @@
             },
             "max_samples": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 100.0]
             },
             "max_features": {
-                "type": "int",
-                "default": 1.0
+                "type": "float",
+                "default": 1.0,
+                "range": [1.0, 1000.0]
             },
             "bootstrap": {
                 "type": "bool",
@@ -80,7 +83,8 @@
             },
             "verbose": {
                 "type": "int",
-                "default": 0
+                "default": 0,
+                "range": [0, 100]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
index efa755a4..b38f6bc8 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
@@ -41,7 +41,8 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
index f8ef31cd..ff964555 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
@@ -41,7 +41,8 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
index 9eb9a98e..985517b5 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
@@ -47,7 +47,8 @@
             },
             "learning_rate": {
                 "type": "float",
-                "default": 0.1
+                "default": 0.1,
+                "range": [0.01, 10.0]
             },
             "n_estimators": {
                 "type": "int",
@@ -77,11 +78,12 @@
             "min_weight_fraction_leaf": {
                 "type": "float",
                 "default": 0.0,
-                "range": [0.0, 0.5]
+                "range": [0.0, 10.0]
             },
             "subsample": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [0.001, 100.0]
             },
             "max_features": {
                 "type": "str",
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
index 8a73bdd0..72227af3 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
@@ -47,7 +47,8 @@
             },
             "learning_rate": {
                 "type": "float",
-                "default": 0.1
+                "default": 0.1,
+                "range": [0.01, 10.0]
             },
             "n_estimators": {
                 "type": "int",
@@ -77,11 +78,12 @@
             "min_weight_fraction_leaf": {
                 "type": "float",
                 "default": 0.0,
-                "range": [0.0, 0.5]
+                "range": [0.0, 10.5]
             },
             "subsample": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [0.01, 100.0]
             },
             "max_features": {
                 "type": "str",
@@ -96,11 +98,12 @@
             "min_impurity_decrease": {
                 "type": "float",
                 "default": 0.0,
-                "range": [0.0, 1000.0]
+                "range": [0.0, 100.0]
             },
             "alpha": {
                 "type": "float",
-                "default": 0.9
+                "default": 0.9,
+                "range": [0.01, 10]
             },
             "init": {
                 "type": "object",
diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index b7a22fb9..79952909 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -41,7 +41,8 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             },
             "contamination": {
                 "type": "float",
@@ -56,12 +57,14 @@
                 "range": [1, 500]
             },
             "max_samples": {
-                "type": "str",
-                "default": "auto"
+                "type": "int",
+                "default": "1",
+                "range": [0, 100]
             },
             "max_features": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [1.0, 1000.0]
             },
             "bootstrap": {
                 "type": "bool",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
index e9822a15..308a484b 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
@@ -41,7 +41,8 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
index 9fe13eac..56d31a9b 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
@@ -19,7 +19,7 @@
             },
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -34,7 +34,7 @@
         "output": [
             {
                 "name": "y",
-                "type": "array"
+                "type": "ndarray"
             }
         ]
     },
@@ -42,13 +42,14 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
             "criterion": {
                 "type": "str",
-                "default": "entropy",
+                "default": "gini",
                 "values": ["entropy", "gini"]
             },
             "max_features": {
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
index d9728d65..c8db2aeb 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
@@ -41,13 +41,15 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
             "n_estimators": {
                 "type": "int",
-                "default": 10
+                "default": 10,
+                "range": [1, 500]
             },
             "criterion": {
                 "type": "str",
@@ -76,7 +78,8 @@
             },
             "min_weight_fraction_leaf": {
                 "type": "float",
-                "default": 0.0
+                "default": 0.0,
+                "range": [0.0, 100.0]
             },
             "max_leaf_nodes": {
                 "type": "int",
@@ -84,7 +87,8 @@
             },
             "min_impurity_decrease": {
                 "type": "float",
-                "default": 0.0
+                "default": 0.0,
+                "range": [0.0, 10.0]
             },
             "bootstrap": {
                 "type": "bool",
@@ -96,7 +100,8 @@
             },
             "verbose": {
                 "type": "int",
-                "default": 0
+                "default": 0,
+                "range": [0, 100]
             },
             "warm_start": {
                 "type": "bool",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
index 49601790..abebfdfe 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
@@ -41,7 +41,8 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             },
             "sparse_output": {
                 "type": "bool",

From 43d28d97f86e63e858a94df405f9da734ae97237 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Fri, 21 Sep 2018 12:48:46 +0200
Subject: [PATCH 06/32] Added myself as contributor.

---
 ...earn.decomposition.DictionaryLearning.json |  20 ++--
 .../sklearn.decomposition.FactorAnalysis.json |  75 ++++++++++++
 .../sklearn.decomposition.FastICA.json        |  78 +++++++++++++
 .../sklearn.decomposition.KernelPCA.json      | 110 ++++++++++++++++++
 .../sklearn.decomposition.PCA.json            |   2 +-
 .../sklearn.decomposition.TruncatedSVD.json   |  61 ++++++++++
 6 files changed, 338 insertions(+), 8 deletions(-)
 create mode 100644 mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
 create mode 100644 mlblocks_primitives/sklearn.decomposition.FastICA.json
 create mode 100644 mlblocks_primitives/sklearn.decomposition.KernelPCA.json
 create mode 100644 mlblocks_primitives/sklearn.decomposition.TruncatedSVD.json

diff --git a/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
index 112908ee..88f77d10 100644
--- a/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
+++ b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
@@ -1,6 +1,6 @@
 {
     "name": "sklearn.decomposition.DictionaryLearning",
-    "author": "Carles Sala <carles@pythiac.com>",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
     "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html",
     "description": "Dictionary learning.",
     "classifiers": {
@@ -41,7 +41,8 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": -1
+                "default": -1,
+                "range": [-1, 10]
             }
         },
         "tunable": {
@@ -51,15 +52,18 @@
             },
             "alpha": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [0.0, 10.0]
             },
             "max_iter": {
                 "type": "int",
-                "default": 1000
+                "default": 1000,
+                "range": [0, 10000]
             },
             "tol": {
                 "type": "float",
-                "default": 1e-08
+                "default": 1e-08,
+                "range": [0.0, 1.0]
             },
             "fit_algorithm": {
                 "type": "str",
@@ -73,11 +77,13 @@
             },
             "transform_n_nonzero_coefs": {
                 "type": "int",
-                "default": null
+                "default": null,
+                "range": [0, 100]
             },
             "transform_alpha": {
                 "type": "float",
-                "default": 1.0
+                "default": 1.0,
+                "range": [0.0, 10.0]
             },
             "code_init": {
                 "type": "iterable",
diff --git a/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json b/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
new file mode 100644
index 00000000..6a9e7b72
--- /dev/null
+++ b/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
@@ -0,0 +1,75 @@
+{
+    "name": "sklearn.decomposition.FactorAnalysis",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FactorAnalysis.html",
+    "description": "Factor Analysis. A simple linear generative model with Gaussian latent variables.",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_selector"
+    },
+    "modalities": [],
+    "primitive": "sklearn.decomposition.FactorAnalysis",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "copy": {
+                "type": "bool",
+                "default": true
+            }
+        },
+        "tunable": {
+            "n_components": {
+                "type": "int",
+                "default": null,
+                "range": [0, 500]
+            },
+            "tol": {
+                "type": "float",
+                "default": 0.01,
+                "range": [0.0, 0.5]
+            },
+            "max_iter": {
+                "type": "int",
+                "default": 1000,
+                "range": [10, 10000]
+            },
+            "noise_variance_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "svd_method": {
+                "type": "str",
+                "default": "randomized",
+                "values": ["lapack", "randomized"]
+            },
+            "iterated_power": {
+                "type": "int",
+                "default": 3,
+                "range": [0, 10]
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.decomposition.FastICA.json b/mlblocks_primitives/sklearn.decomposition.FastICA.json
new file mode 100644
index 00000000..a73b3c2a
--- /dev/null
+++ b/mlblocks_primitives/sklearn.decomposition.FastICA.json
@@ -0,0 +1,78 @@
+{
+    "name": "sklearn.decomposition.FastICA",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html",
+    "description": "FastICA: a fast algorithm for Independent Component Analysis.",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_selector"
+    },
+    "modalities": [],
+    "primitive": "sklearn.decomposition.FastICA",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {
+            "n_components": {
+                "type": "int",
+                "default": null,
+                "range": [1, 500]
+            },
+            "algorithm": {
+                "type": "str",
+                "default": "parallel",
+                "values": ["parallel", "deflation"]
+            },
+            "whiten": {
+                "type": "bool",
+                "default": true
+            },
+            "fun": {
+                "type": "string",
+                "default": "logcosh",
+                "values": ["logcosh", "exp", "cube"]
+            },
+            "fun_args": {
+                "type": "iterable",
+                "default": null
+            },
+            "max_iter": {
+                "type": "int",
+                "default": 200,
+                "range": [1, 1000]
+            },
+            "tol": {
+                "type": "float",
+                "default": 0.0001,
+                "range": [0.00001, 0.5]
+            },
+            "w_init": {
+                "type": "iterable",
+                "default": null
+            }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.decomposition.KernelPCA.json b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
new file mode 100644
index 00000000..184085ac
--- /dev/null
+++ b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
@@ -0,0 +1,110 @@
+{
+    "name": "sklearn.decomposition.KernelPCA",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html",
+    "description": "Kernel Principal Component Analysis.",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_selector"
+    },
+    "modalities": [],
+    "primitive": "sklearn.decomposition.KernelPCA",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": 1,
+                "range": [-1, 10]
+            }
+        },
+        "tunable": {
+           "n_components": {
+                "type": "int",
+                "default": null,
+                "range": [1, 500]
+           },
+           "kernel": {
+               "type": "str",
+               "default": "linear",
+               "values": [
+                   "linear",
+                   "poly",
+                   "rbf",
+                   "sigmoid",
+                   "cosine",
+                   "precomputed"
+               ]
+           },
+           "gamma": {
+               "type": "float",
+               "default": null,
+               "range": [0.0, 0.5]
+           },
+           "coef0": {
+               "type": "float",
+               "default": 1.0,
+               "range": [0.0, 10.0]
+           },
+           "kernel_params": {
+               "type": "str",
+               "default": null
+           },
+           "alpha": {
+               "type": "int",
+               "default": 1,
+               "range": [0, 10]
+           },
+           "fit_inverse_transform": {
+               "type": "bool",
+               "default": false
+           },
+           "eigen_solver": {
+               "type": "str",
+               "default": "auto",
+               "values": ["auto", "arpack", "dense"]
+           },
+           "tol": {
+               "type": "float",
+               "default": 0.0,
+               "range": [0.0, 10.0]
+           },
+           "max_iter": {
+               "type": "int",
+               "default": null,
+               "range": [0, 100]
+           },
+           "remove_zero_eig": {
+               "type": "bool",
+               "default": false
+           },
+           "copy_X": {
+               "type": "bool",
+               "default": true
+           }
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.decomposition.PCA.json b/mlblocks_primitives/sklearn.decomposition.PCA.json
index 27355d9b..83287d69 100644
--- a/mlblocks_primitives/sklearn.decomposition.PCA.json
+++ b/mlblocks_primitives/sklearn.decomposition.PCA.json
@@ -1,6 +1,6 @@
 {
     "name": "sklearn.decomposition.PCA",
-    "author": "Carles Sala <carles@pythiac.com>",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
     "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html",
     "description": "Principal component analysis (PCA)",
     "classifiers": {
diff --git a/mlblocks_primitives/sklearn.decomposition.TruncatedSVD.json b/mlblocks_primitives/sklearn.decomposition.TruncatedSVD.json
new file mode 100644
index 00000000..6342162e
--- /dev/null
+++ b/mlblocks_primitives/sklearn.decomposition.TruncatedSVD.json
@@ -0,0 +1,61 @@
+{
+    "name": "sklearn.decomposition.TruncatedSVD",
+    "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html",
+    "description": "Dimensionality reduction using truncated SVD.",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_selector"
+    },
+    "modalities": [],
+    "primitive": "sklearn.decomposition.TruncatedSVD",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "produce": {
+        "method": "transform",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {
+            "n_components": {
+                "type": "int",
+                "default": 2,
+                "range": [1, 500]
+            },
+            "algorithm": {
+                "type": "str",
+                "default": "randomized",
+                "values": ["arpack", "randomized"]
+            },
+            "n_iter": {
+                "type": "int",
+                "default": 5,
+                "range": [1, 100]
+            },
+            "tol": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.001, 0.5]
+            }
+        }
+    }
+}

From 334d4822054d86040ddd2b20ff8d459d72cf0659 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 09:54:10 +0200
Subject: [PATCH 07/32] Changed some hyperparameters to Fixed instead of
 Tunable

---
 .../sklearn.ensemble.BaggingClassifier.json   | 20 +++---
 .../sklearn.ensemble.BaggingRegressor.json    | 20 +++---
 ...sklearn.ensemble.ExtraTreesClassifier.json | 29 ++++-----
 .../sklearn.ensemble.ExtraTreesRegressor.json | 20 +++---
 ...n.ensemble.GradientBoostingClassifier.json | 36 +++++------
 ...rn.ensemble.GradientBoostingRegressor.json | 36 +++++------
 .../sklearn.ensemble.IsolationForest.json     | 13 ++--
 ...learn.ensemble.RandomForestClassifier.json | 28 ++++----
 ...ensemble.RandomForestClassifier_proba.json | 64 +++++++++++++------
 ...klearn.ensemble.RandomForestRegressor.json | 21 +++---
 ...sklearn.ensemble.RandomTreesEmbedding.json | 18 +++---
 11 files changed, 161 insertions(+), 144 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
index 90a27ee9..e3a4ff0c 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
@@ -45,8 +45,15 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
             }
         },
         "tunable": {
@@ -76,15 +83,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
index 3dececf1..7d5d6a18 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
@@ -45,8 +45,15 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
             }
         },
         "tunable": {
@@ -76,15 +83,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
index 9d53b333..4057fb5e 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
@@ -41,8 +41,20 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "class_weight": {
+                "type": "iterable",
+                "default": null
             }
         },
         "tunable": {
@@ -98,19 +110,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 1000]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "class_weight": {
-                "type": "iterable",
-                "default": null
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
index 39811888..d21fef9f 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
@@ -41,8 +41,15 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
             }
         },
         "tunable": {
@@ -98,15 +105,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 1000]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
index dae8ed39..9ea2dc21 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
@@ -38,7 +38,24 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {},
+        "fixed": {
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "init": {
+                "type": "object",
+                "default": null
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "presort": {
+                "type": "bool",
+                "default": "false"
+            }
+        },
         "tunable": {
             "loss": {
                 "type": "str",
@@ -99,23 +116,6 @@
                 "type": "float",
                 "default": 0.0,
                 "range": [0.0, 1000.0]
-            },
-            "init": {
-                "type": "object",
-                "default": null
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "presort": {
-                "type": "bool",
-                "default": "false"
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
index ae2d170b..44c86610 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
@@ -38,7 +38,24 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {},
+        "fixed": {
+            "init": {
+                "type": "object",
+                "default": null
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "presort": {
+                "type": "bool",
+                "default": "false"
+            }
+        },
         "tunable": {
             "loss": {
                 "type": "str",
@@ -104,23 +121,6 @@
                 "type": "float",
                 "default": 0.9,
                 "range": [0.01, 10]
-            },
-            "init": {
-                "type": "object",
-                "default": null
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "presort": {
-                "type": "bool",
-                "default": "false"
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index 0f6dada6..d4509fe9 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -41,13 +41,17 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
             },
             "contamination": {
                 "type": "float",
                 "default": 0.1,
                 "range": [0.0, 0.5]
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 100]
             }
         },
         "tunable": {
@@ -69,11 +73,6 @@
             "bootstrap": {
                 "type": "bool",
                 "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
index 28ee9bff..b8d23228 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
@@ -41,8 +41,19 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "class_weight": {
+                "type": "iterable",
+                "default": null
             }
         },
         "tunable": {
@@ -98,19 +109,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 1000]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
-            },
-            "class_weight": {
-                "type": "iterable",
-                "default": null
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
index f00490d9..ab4a79e3 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
@@ -8,8 +8,7 @@
         "subtype": "classifier"
     },
     "modalities": [],
-    "primitive": "sklearn.ensemble.RandomForestClassifier",
-    "validation_dataset": "wine",
+    "primitive": "sklearn.ensemble.RandomForestClassifier_proba",
     "fit": {
         "method": "fit",
         "args": [
@@ -42,11 +41,27 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
+            },
+            "class_weight": {
+                "type": "iterable",
+                "default": null
             }
         },
         "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 10,
+                "range": [1, 500]
+            },
             "criterion": {
                 "type": "str",
                 "default": "gini",
@@ -55,32 +70,45 @@
             "max_features": {
                 "type": "str",
                 "default": null,
-                "range": [null, "auto", "log2"]
+                "values": [null, "auto", "log2", "sqrt"]
             },
             "max_depth": {
                 "type": "int",
-                "default": 10,
+                "default": null,
                 "range": [1, 30]
             },
             "min_samples_split": {
-                "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "type": "int",
+                "default": 2,
+                "range": [2, 100]
             },
             "min_samples_leaf": {
+                "type": "int",
+                "default": 1,
+                "range": [1, 100]
+            },
+            "min_weight_fraction_leaf": {
                 "type": "float",
-                "default": 0.1,
-                "range": [0.0001, 0.5]
+                "default": 0.0,
+                "range": [0.0, 0.5]
             },
-            "n_estimators": {
+            "max_leaf_nodes": {
                 "type": "int",
-                "default": 30,
-                "values": [2, 500]
-            },
-            "class_weight": {
-                "type": "str",
                 "default": null,
-                "range": [null, "balanced"]
+                "range": [2, 1000]
+            },
+            "min_impurity_decrease": {
+                "type": "float",
+                "default": 0.0,
+                "range": [0.0, 1000.0]
+            },
+            "bootstrap": {
+                "type": "bool",
+                "default": true
+            },
+            "oob_score": {
+                "type": "bool",
+                "default": false
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
index 947fec9d..5ed09b96 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
@@ -41,8 +41,16 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": 1
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 100]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
             }
         },
         "tunable": {
@@ -97,15 +105,6 @@
             "oob_score": {
                 "type": "bool",
                 "default": false
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 100]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
index 9a6c2009..551a551c 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
@@ -47,6 +47,15 @@
             "sparse_output": {
                 "type": "bool",
                 "default": true
+            },
+            "verbose": {
+                "type": "int",
+                "default": 0,
+                "range": [0, 1000]
+            },
+            "warm_start": {
+                "type": "bool",
+                "default": false
             }
         },
         "tunable": {
@@ -84,15 +93,6 @@
                 "type": "float",
                 "default": 0.0,
                 "range": [0.0, 1000.0]
-            },
-            "verbose": {
-                "type": "int",
-                "default": 0,
-                "range": [0, 1000]
-            },
-            "warm_start": {
-                "type": "bool",
-                "default": false
             }
         }
     }

From d93dd795e32d211307050b2ee7f7d99527ce7c13 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 10:34:09 +0200
Subject: [PATCH 08/32] Added new hyperparams from scikitlearn 2.0

---
 ...learn.ensemble.GradientBoostingClassifier.json | 15 +++++++++++++++
 ...klearn.ensemble.GradientBoostingRegressor.json | 15 +++++++++++++++
 .../sklearn.ensemble.IsolationForest.json         |  2 +-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
index 9ea2dc21..9d79ec83 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
@@ -116,6 +116,21 @@
                 "type": "float",
                 "default": 0.0,
                 "range": [0.0, 1000.0]
+            },
+            "validation_fraction": {
+                "type": "float",
+                "default": 0.1,
+                "range": [0.0, 1.0]
+            },
+            "n_iter_no_change": {
+                "type": "int",
+                "default": null,
+                "range": [1, 1000]
+            },
+            "tol": {
+                "type": "float",
+                "default": 1e-4,
+                "range": [0.0, 2.0]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
index 44c86610..6766bf02 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
@@ -121,6 +121,21 @@
                 "type": "float",
                 "default": 0.9,
                 "range": [0.01, 10]
+            },
+            "validation_fraction": {
+                "type": "float",
+                "default": 0.1,
+                "range": [0.0, 1.0]
+            },
+            "n_iter_no_change": {
+                "type": "int",
+                "default": null,
+                "range": [1, 1000]
+            },
+            "tol": {
+                "type": "float",
+                "default": 1e-4,
+                "range": [0.0, 2.0]
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index d4509fe9..e2b803dc 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -1,7 +1,7 @@
 {
     "name": "sklearn.ensemble.IsolationForest",
     "contributors": ["Carles Sala <carles@pythiac.com>", "Plamen Valentinov <pvkdeveloper@gmx.com>"],
-    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html",
+    "documentation": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html",
     "description": "Scikit-learn IsolationForest. The IsolationForest ‘isolates’ observations by randomly selecting a feature and then randomly selecting a split value between the maximum and minimum values of the selected feature.",
     "classifiers": {
         "type": "estimator",

From eaf50aa098f640dac21aef7101b704199c8c3194 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 12:09:03 +0200
Subject: [PATCH 09/32] Fixed n_jobs

---
 mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json    | 2 +-
 mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json     | 2 +-
 mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json | 2 +-
 mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json  | 2 +-
 mlblocks_primitives/sklearn.ensemble.IsolationForest.json      | 2 +-
 .../sklearn.ensemble.RandomForestClassifier.json               | 2 +-
 .../sklearn.ensemble.RandomForestClassifier_proba.json         | 2 +-
 .../sklearn.ensemble.RandomForestRegressor.json                | 2 +-
 mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json | 3 +--
 9 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
index e3a4ff0c..55757a7a 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingClassifier.json
@@ -45,7 +45,7 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "warm_start": {
                 "type": "bool",
diff --git a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
index 7d5d6a18..eb16c812 100644
--- a/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.BaggingRegressor.json
@@ -45,7 +45,7 @@
             },
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "warm_start": {
                 "type": "bool",
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
index 4057fb5e..f061fc58 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesClassifier.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "verbose": {
                 "type": "int",
diff --git a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
index d21fef9f..5cf7cbd3 100644
--- a/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.ExtraTreesRegressor.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "verbose": {
                 "type": "int",
diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index e2b803dc..3f1d6366 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "contamination": {
                 "type": "float",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
index b8d23228..256278f8 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "verbose": {
                 "type": "int",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
index ab4a79e3..f79fad20 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestClassifier_proba.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "verbose": {
                 "type": "int",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
index 5ed09b96..4db89c45 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomForestRegressor.json
@@ -41,7 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1
+                "default": null
             },
             "verbose": {
                 "type": "int",
diff --git a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
index 551a551c..6a9168d9 100644
--- a/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
+++ b/mlblocks_primitives/sklearn.ensemble.RandomTreesEmbedding.json
@@ -41,8 +41,7 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+                "default": null
             },
             "sparse_output": {
                 "type": "bool",

From e071532a510872f4c7cffd4f81d36fab67eb2e6a Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 12:26:59 +0200
Subject: [PATCH 10/32] Fixed some hyperparams and added new ones from sk 2.0

---
 ...earn.decomposition.DictionaryLearning.json | 35 +++++++++++--------
 .../sklearn.decomposition.KernelPCA.json      | 13 ++++---
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
index 88f77d10..9515a05a 100644
--- a/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
+++ b/mlblocks_primitives/sklearn.decomposition.DictionaryLearning.json
@@ -35,14 +35,29 @@
     },
     "hyperparameters": {
         "fixed": {
-            "split_sign": {
+            "n_jobs": {
+                "type": "int",
+                "default": null
+            },
+            "code_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "dict_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "verbose": {
                 "type": "bool",
                 "default": false
             },
-            "n_jobs": {
-                "type": "int",
-                "default": -1,
-                "range": [-1, 10]
+            "positive_code": {
+                "type": "bool",
+                "default": false
+            },
+            "positive_dict": {
+                "type": "bool",
+                "default": false
             }
         },
         "tunable": {
@@ -85,15 +100,7 @@
                 "default": 1.0,
                 "range": [0.0, 10.0]
             },
-            "code_init": {
-                "type": "iterable",
-                "default": null
-            },
-            "dict_init": {
-                "type": "iterable",
-                "default": null
-            },
-            "verbose": {
+            "split_sign": {
                 "type": "bool",
                 "default": false
             }
diff --git a/mlblocks_primitives/sklearn.decomposition.KernelPCA.json b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
index 184085ac..a8e5394b 100644
--- a/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
+++ b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
@@ -37,9 +37,12 @@
         "fixed": {
             "n_jobs": {
                 "type": "int",
-                "default": 1,
-                "range": [-1, 10]
-            }
+                "default": null
+            },
+            "copy_X": {
+                "type": "bool",
+                "default": true
+           }
         },
         "tunable": {
            "n_components": {
@@ -100,10 +103,6 @@
            "remove_zero_eig": {
                "type": "bool",
                "default": false
-           },
-           "copy_X": {
-               "type": "bool",
-               "default": true
            }
         }
     }

From 289eabd5e8c2b8def75f42e5bd87a66770153684 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 12:30:42 +0200
Subject: [PATCH 11/32] Moved contamination to tunable

---
 .../sklearn.ensemble.IsolationForest.json              | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index 3f1d6366..0522cb89 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -43,11 +43,6 @@
                 "type": "int",
                 "default": null
             },
-            "contamination": {
-                "type": "float",
-                "default": 0.1,
-                "range": [0.0, 0.5]
-            },
             "verbose": {
                 "type": "int",
                 "default": 0,
@@ -73,6 +68,11 @@
             "bootstrap": {
                 "type": "bool",
                 "default": false
+            },
+            "contamination": {
+                "type": "float",
+                "default": 0.1,
+                "range": [0.0, 0.5]
             }
         }
     }

From 3d9dbd2861cb139d7792d28698b332d73ae3ffc6 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 3 Oct 2018 13:03:03 +0200
Subject: [PATCH 12/32] Fixes on hyperparams

---
 .../sklearn.decomposition.FactorAnalysis.json |  8 ++++----
 .../sklearn.decomposition.FastICA.json        | 19 ++++++++++---------
 .../sklearn.decomposition.KernelPCA.json      |  8 ++++----
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json b/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
index 6a9e7b72..9ad0d89a 100644
--- a/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
+++ b/mlblocks_primitives/sklearn.decomposition.FactorAnalysis.json
@@ -38,6 +38,10 @@
             "copy": {
                 "type": "bool",
                 "default": true
+            },
+            "noise_variance_init": {
+                "type": "iterable",
+                "default": null
             }
         },
         "tunable": {
@@ -56,10 +60,6 @@
                 "default": 1000,
                 "range": [10, 10000]
             },
-            "noise_variance_init": {
-                "type": "iterable",
-                "default": null
-            },
             "svd_method": {
                 "type": "str",
                 "default": "randomized",
diff --git a/mlblocks_primitives/sklearn.decomposition.FastICA.json b/mlblocks_primitives/sklearn.decomposition.FastICA.json
index a73b3c2a..5b081c05 100644
--- a/mlblocks_primitives/sklearn.decomposition.FastICA.json
+++ b/mlblocks_primitives/sklearn.decomposition.FastICA.json
@@ -34,7 +34,16 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {},
+        "fixed": {
+            "w_init": {
+                "type": "iterable",
+                "default": null
+            },
+            "fun_args": {
+                "type": "iterable",
+                "default": null
+            }
+        },
         "tunable": {
             "n_components": {
                 "type": "int",
@@ -55,10 +64,6 @@
                 "default": "logcosh",
                 "values": ["logcosh", "exp", "cube"]
             },
-            "fun_args": {
-                "type": "iterable",
-                "default": null
-            },
             "max_iter": {
                 "type": "int",
                 "default": 200,
@@ -68,10 +73,6 @@
                 "type": "float",
                 "default": 0.0001,
                 "range": [0.00001, 0.5]
-            },
-            "w_init": {
-                "type": "iterable",
-                "default": null
             }
         }
     }
diff --git a/mlblocks_primitives/sklearn.decomposition.KernelPCA.json b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
index a8e5394b..3fc2cbde 100644
--- a/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
+++ b/mlblocks_primitives/sklearn.decomposition.KernelPCA.json
@@ -42,6 +42,10 @@
             "copy_X": {
                 "type": "bool",
                 "default": true
+           },
+           "kernel_params": {
+               "type": "str",
+               "default": null
            }
         },
         "tunable": {
@@ -72,10 +76,6 @@
                "default": 1.0,
                "range": [0.0, 10.0]
            },
-           "kernel_params": {
-               "type": "str",
-               "default": null
-           },
            "alpha": {
                "type": "int",
                "default": 1,

From c4a3a0e45918924f6725661e9ce22b674adc6744 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 17 Oct 2018 14:23:18 +0200
Subject: [PATCH 13/32] Updated scikit requirement

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b8a1257d..b5a261d8 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     'opencv-python>=3.4.0.12',
     'python-louvain>=0.10',
     'scikit-image>=0.13.1',
-    'scikit-learn>=0.19.1',
+    'scikit-learn>=0.20',
     'scipy>=1.1.0',
     'tensorflow==1.8.0',
     'xgboost>=0.72.1',

From 08bf926d665fed814beec338dc2d76f6700f0f8d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Wed, 17 Oct 2018 14:27:28 +0200
Subject: [PATCH 14/32] Updated scikit requirement.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 797d6086..86b396b7 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     'opencv-python>=3.4.0.12',
     'python-louvain>=0.10',
     'scikit-image>=0.13.1',
-    'scikit-learn>=0.19.1',
+    'scikit-learn>=0.20',
     'scipy>=1.1.0',
     'tensorflow==1.8.0',
     'xgboost>=0.72.1',

From 738eceaddb214be137b1efcea94fc94fb706ee3c Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Fri, 30 Nov 2018 15:03:24 +0100
Subject: [PATCH 15/32] Created piplenes with sklearn.ensemble classifiers

---
 .../sklearn.ensemble.AdaBoostClassifier.json  | 24 +++++++++++++++++
 .../sklearn.ensemble.BaggingClassifier.json   | 26 ++++++++++++++++++
 ...sklearn.ensemble.ExtraTreesClassifier.json | 24 +++++++++++++++++
 ...n.ensemble.GradientBoostingClassifier.json | 27 +++++++++++++++++++
 ...learn.ensemble.RandomForestClassifier.json | 26 ++++++++++++++++++
 ...n.ensemble.GradientBoostingClassifier.json |  2 +-
 6 files changed, 128 insertions(+), 1 deletion(-)
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json

diff --git a/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
new file mode 100644
index 00000000..54d79d18
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
@@ -0,0 +1,24 @@
+{
+    "metadata": {
+        "name": "AdaBoostClassifier/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.AdaBoostClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.AdaBoostClassifier#1": {
+            "learning_rate": 0.1,
+            "n_estimators": 300
+        }
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
new file mode 100644
index 00000000..c8a8d4a5
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
@@ -0,0 +1,26 @@
+{
+    "metadata": {
+        "name": "BaggingClassifier/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.BaggingClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.BaggingClassifier#1": {
+            "n_jobs": -1,
+            "n_estimators": 300,
+            "max_samples": 1.0,
+            "bootstrap_features": true
+        }
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
new file mode 100644
index 00000000..50660710
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
@@ -0,0 +1,24 @@
+{
+    "metadata": {
+        "name": "ExtraTreesClassifier/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.ExtraTreesClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.ExtraTreesClassifier#1": {
+            "n_jobs": -1,
+            "n_estimators": 300
+        }
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
new file mode 100644
index 00000000..8cc73441
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
@@ -0,0 +1,27 @@
+{
+    "metadata": {
+        "name": "GradientBoosting/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.GradientBoostingClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.GradientBoostingClassifier#1": {
+            "warm_start": true,
+            "presort": true,
+            "learning_rate": 0.1,
+            "n_estimators": 300,
+            "max_depth": 5
+        }
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
new file mode 100644
index 00000000..1ea222eb
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
@@ -0,0 +1,26 @@
+{
+    "metadata": {
+        "name": "RandomForestClassifier/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.RandomForestClassifier#1": {
+            "n_jobs": -1,
+            "n_estimators": 300,
+            "criterion": "entropy",
+            "warm_start": true
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
index 9d79ec83..574bcaa0 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingClassifier.json
@@ -53,7 +53,7 @@
             },
             "presort": {
                 "type": "bool",
-                "default": "false"
+                "default": false
             }
         },
         "tunable": {

From a293c7221639b53baa10beefa7d5d9e2b8fa3363 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Fri, 30 Nov 2018 17:19:26 +0100
Subject: [PATCH 16/32] Created piplenes with sklearn.ensemble regressors

---
 .../sklearn.ensemble.AdaBoostRegressor.json   | 22 ++++++++++++++++
 .../sklearn.ensemble.BaggingRegressor.json    | 25 +++++++++++++++++++
 .../sklearn.ensemble.ExtraTreesRegressor.json | 22 ++++++++++++++++
 ...rn.ensemble.GradientBoostingRegressor.json | 23 +++++++++++++++++
 ...klearn.ensemble.RandomForestRegressor.json | 23 +++++++++++++++++
 ...rn.ensemble.GradientBoostingRegressor.json |  2 +-
 6 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json

diff --git a/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json b/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
new file mode 100644
index 00000000..87155b9a
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
@@ -0,0 +1,22 @@
+{
+    "metadata": {
+        "name": "AdaBoostRegressor/regression/default",
+        "data_type": "tabular",
+        "task_type": "regression"
+    },
+    "validation": {
+        "dataset": "boston",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.AdaBoostRegressor"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.AdaBoostRegressor#1": {
+            "learning_rate": 0.1,
+            "n_estimators": 300
+        }
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json b/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
new file mode 100644
index 00000000..3e3feb59
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
@@ -0,0 +1,25 @@
+{
+    "metadata": {
+        "name": "BaggingRegressor/regression/default",
+        "data_type": "tabular",
+        "task_type": "regression"
+    },
+    "validation": {
+        "dataset": "boston",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.BaggingRegressor"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.BaggingRegressor#1": {
+            "n_jobs": -1,
+            "warm_start": true,
+            "n_estimators": 300
+
+        }
+    }
+}
+
diff --git a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
new file mode 100644
index 00000000..c58e75d0
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
@@ -0,0 +1,22 @@
+{
+    "metadata": {
+        "name": "ExtraTreesRegressor/regression/default",
+        "data_type": "tabular",
+        "task_type": "regression"
+    },
+    "validation": {
+        "dataset": "boston",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.ExtraTreesRegressor"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.ExtraTreesRegressor#1": {
+            "n_jobs": -1,
+            "n_estimators": 300
+        }
+    }
+}
\ No newline at end of file
diff --git a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
new file mode 100644
index 00000000..ec56d0d6
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
@@ -0,0 +1,23 @@
+{
+    "metadata": {
+        "name": "GradientBoostingRegressor/regression/default",
+        "data_type": "tabular",
+        "task_type": "regression"
+    },
+    "validation": {
+        "dataset": "boston",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.GradientBoostingRegressor"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.GradientBoostingRegressor#1": {
+            "learning_rate": 0.1,
+            "n_estimators": 300,
+            "verbose": 1
+        }
+    }
+}
\ No newline at end of file
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json
new file mode 100644
index 00000000..be89b1d1
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json
@@ -0,0 +1,23 @@
+{
+    "metadata": {
+        "name": "RandomForestRegressor/regression/default",
+        "data_type": "tabular",
+        "task_type": "regression"
+    },
+    "validation": {
+        "dataset": "boston",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestRegressor"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.RandomForestRegressor#1": {
+            "n_jobs": -1,
+            "warm_start": true,
+            "n_estimators": 300
+        }
+    }
+}
diff --git a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
index 6766bf02..8659dd74 100644
--- a/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
+++ b/mlblocks_primitives/sklearn.ensemble.GradientBoostingRegressor.json
@@ -53,7 +53,7 @@
             },
             "presort": {
                 "type": "bool",
-                "default": "false"
+                "default": false
             }
         },
         "tunable": {

From 8965a4df92aa29f4a2122c06d3b55da3affb53d7 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Fri, 30 Nov 2018 17:33:34 +0100
Subject: [PATCH 17/32] Pipeline for Isolation Forest

---
 .../sklearn.ensemble.IsolationForest.json     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.IsolationForest.json

diff --git a/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json b/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json
new file mode 100644
index 00000000..49135795
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json
@@ -0,0 +1,24 @@
+{
+    "metadata": {
+        "name": "IsolationForest/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.IsolationForest"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.IsolationForest#1": {
+            "n_jobs": -1,
+            "n_estimators": 300,
+            "contamination": 0.2
+        }
+    }
+}

From 844f2e9cda1b41355e614c7ef778ebdd1e147ba3 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Fri, 30 Nov 2018 17:34:37 +0100
Subject: [PATCH 18/32] Fix isolation forest hyperparameter

---
 mlblocks_primitives/sklearn.ensemble.IsolationForest.json | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
index 0522cb89..44090512 100644
--- a/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_primitives/sklearn.ensemble.IsolationForest.json
@@ -56,9 +56,8 @@
                 "range": [1, 500]
             },
             "max_samples": {
-                "type": "int",
-                "default": "1",
-                "range": [0, 100]
+                "type": "str",
+                "default": "auto"
             },
             "max_features": {
                 "type": "float",

From 2dc8f90114c5b2e8434b134b2235627d62682378 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Mon, 3 Dec 2018 12:57:43 +0100
Subject: [PATCH 19/32] Fix classification pipelines

---
 mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json    | 3 +--
 mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json     | 3 +--
 mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json  | 3 +--
 .../sklearn.ensemble.GradientBoostingClassifier.json           | 3 +--
 .../sklearn.ensemble.RandomForestClassifier.json               | 3 +--
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
index 54d79d18..2e1e025e 100644
--- a/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
@@ -12,8 +12,7 @@
         "mlprimitives.preprocessing.ClassEncoder",
         "sklearn.preprocessing.Imputer",
         "sklearn.preprocessing.StandardScaler",
-        "sklearn.ensemble.AdaBoostClassifier",
-        "mlprimitives.preprocessing.ClassDecoder"
+        "sklearn.ensemble.AdaBoostClassifier"
     ],
     "hyperparameters": {
         "sklearn.ensemble.AdaBoostClassifier#1": {
diff --git a/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
index c8a8d4a5..74546278 100644
--- a/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
@@ -12,8 +12,7 @@
         "mlprimitives.preprocessing.ClassEncoder",
         "sklearn.preprocessing.Imputer",
         "sklearn.preprocessing.StandardScaler",
-        "sklearn.ensemble.BaggingClassifier",
-        "mlprimitives.preprocessing.ClassDecoder"
+        "sklearn.ensemble.BaggingClassifier"
     ],
     "hyperparameters": {
         "sklearn.ensemble.BaggingClassifier#1": {
diff --git a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
index 50660710..d2126353 100644
--- a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
@@ -12,8 +12,7 @@
         "mlprimitives.preprocessing.ClassEncoder",
         "sklearn.preprocessing.Imputer",
         "sklearn.preprocessing.StandardScaler",
-        "sklearn.ensemble.ExtraTreesClassifier",
-        "mlprimitives.preprocessing.ClassDecoder"
+        "sklearn.ensemble.ExtraTreesClassifier"
     ],
     "hyperparameters": {
         "sklearn.ensemble.ExtraTreesClassifier#1": {
diff --git a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
index 8cc73441..ad8090c3 100644
--- a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
@@ -12,8 +12,7 @@
         "mlprimitives.preprocessing.ClassEncoder",
         "sklearn.preprocessing.Imputer",
         "sklearn.preprocessing.StandardScaler",
-        "sklearn.ensemble.GradientBoostingClassifier",
-        "mlprimitives.preprocessing.ClassDecoder"
+        "sklearn.ensemble.GradientBoostingClassifier"
     ],
     "hyperparameters": {
         "sklearn.ensemble.GradientBoostingClassifier#1": {
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
index 1ea222eb..655b52d7 100644
--- a/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
@@ -12,8 +12,7 @@
         "mlprimitives.preprocessing.ClassEncoder",
         "sklearn.preprocessing.Imputer",
         "sklearn.preprocessing.StandardScaler",
-        "sklearn.ensemble.RandomForestClassifier",
-        "mlprimitives.preprocessing.ClassDecoder"
+        "sklearn.ensemble.RandomForestClassifier"
     ],
     "hyperparameters": {
         "sklearn.ensemble.RandomForestClassifier#1": {

From 41ab90fe544834844f5b779a19eb28e8e55f0745 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Mon, 3 Dec 2018 14:55:54 +0100
Subject: [PATCH 20/32] Created pipeline tests for sklearn.decomposition

---
 ...earn.decomposition.DictionaryLearning.json | 20 +++++++++++++++++++
 .../sklearn.decomposition.FactorAnalysis.json | 20 +++++++++++++++++++
 .../sklearn.decomposition.FastICA.json        | 19 ++++++++++++++++++
 .../sklearn.decomposition.KernelPCA.json      | 19 ++++++++++++++++++
 .../sklearn.decomposition.PCA.json            | 20 +++++++++++++++++++
 .../sklearn.decomposition.TruncatedSVD.json   | 20 +++++++++++++++++++
 6 files changed, 118 insertions(+)
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.DictionaryLearning.json
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.FactorAnalysis.json
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.FastICA.json
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.KernelPCA.json
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.PCA.json
 create mode 100644 mlblocks_pipelines/sklearn.decomposition.TruncatedSVD.json

diff --git a/mlblocks_pipelines/sklearn.decomposition.DictionaryLearning.json b/mlblocks_pipelines/sklearn.decomposition.DictionaryLearning.json
new file mode 100644
index 00000000..3658d1da
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.DictionaryLearning.json
@@ -0,0 +1,20 @@
+{
+    "metadata": {
+        "name": "FactorAnalysis/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.DictionaryLearning",
+        "sklearn.decomposition.FactorAnalysis",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.FactorAnalysis#1": {}
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.decomposition.FactorAnalysis.json b/mlblocks_pipelines/sklearn.decomposition.FactorAnalysis.json
new file mode 100644
index 00000000..3658d1da
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.FactorAnalysis.json
@@ -0,0 +1,20 @@
+{
+    "metadata": {
+        "name": "FactorAnalysis/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.DictionaryLearning",
+        "sklearn.decomposition.FactorAnalysis",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.FactorAnalysis#1": {}
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.decomposition.FastICA.json b/mlblocks_pipelines/sklearn.decomposition.FastICA.json
new file mode 100644
index 00000000..488b1751
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.FastICA.json
@@ -0,0 +1,19 @@
+{
+    "metadata": {
+        "name": "FastICA/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.DictionaryLearning",
+        "sklearn.decomposition.FastICA",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.FastICA#1": {}
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.decomposition.KernelPCA.json b/mlblocks_pipelines/sklearn.decomposition.KernelPCA.json
new file mode 100644
index 00000000..13e2c99f
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.KernelPCA.json
@@ -0,0 +1,19 @@
+{
+    "metadata": {
+        "name": "KernelPCA/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.KernelPCA",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.KernelPCA#1": {}
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.decomposition.PCA.json b/mlblocks_pipelines/sklearn.decomposition.PCA.json
new file mode 100644
index 00000000..1cfb9084
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.PCA.json
@@ -0,0 +1,20 @@
+{
+    "metadata": {
+        "name": "PCA/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.DictionaryLearning",
+        "sklearn.decomposition.PCA",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.PCA#1": {}
+    }
+}
diff --git a/mlblocks_pipelines/sklearn.decomposition.TruncatedSVD.json b/mlblocks_pipelines/sklearn.decomposition.TruncatedSVD.json
new file mode 100644
index 00000000..ccecc11f
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.decomposition.TruncatedSVD.json
@@ -0,0 +1,20 @@
+{
+    "metadata": {
+        "name": "TruncatedSVD/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "sklearn.decomposition.DictionaryLearning",
+        "sklearn.decomposition.TruncatedSVD",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.decomposition.TruncatedSVD#1": {}
+    }
+}

From d65828815e3a12247069e3e4422e99f77e2d71ae Mon Sep 17 00:00:00 2001
From: Plamen Valentinov <pvkdeveloper@gmx.com>
Date: Mon, 3 Dec 2018 15:57:58 +0100
Subject: [PATCH 21/32] Changed sklearn.ensemble pipelines to have default
 values and added RandomTreesEmbedding

---
 .../sklearn.ensemble.AdaBoostClassifier.json  |  5 +----
 .../sklearn.ensemble.AdaBoostRegressor.json   |  5 +----
 .../sklearn.ensemble.BaggingClassifier.json   |  7 +------
 .../sklearn.ensemble.BaggingRegressor.json    |  7 +------
 ...sklearn.ensemble.ExtraTreesClassifier.json |  5 +----
 .../sklearn.ensemble.ExtraTreesRegressor.json |  5 +----
 ...n.ensemble.GradientBoostingClassifier.json |  8 +------
 ...rn.ensemble.GradientBoostingRegressor.json |  6 +-----
 .../sklearn.ensemble.IsolationForest.json     |  6 +-----
 ...learn.ensemble.RandomForestClassifier.json |  7 +------
 ...klearn.ensemble.RandomForestRegressor.json |  6 +-----
 ...sklearn.ensemble.RandomTreesEmbedding.json | 21 +++++++++++++++++++
 12 files changed, 32 insertions(+), 56 deletions(-)
 create mode 100644 mlblocks_pipelines/sklearn.ensemble.RandomTreesEmbedding.json

diff --git a/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
index 2e1e025e..414524fe 100644
--- a/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.AdaBoostClassifier.json
@@ -15,9 +15,6 @@
         "sklearn.ensemble.AdaBoostClassifier"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.AdaBoostClassifier#1": {
-            "learning_rate": 0.1,
-            "n_estimators": 300
-        }
+        "sklearn.ensemble.AdaBoostClassifier#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json b/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
index 87155b9a..74277d7f 100644
--- a/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
+++ b/mlblocks_pipelines/sklearn.ensemble.AdaBoostRegressor.json
@@ -14,9 +14,6 @@
         "sklearn.ensemble.AdaBoostRegressor"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.AdaBoostRegressor#1": {
-            "learning_rate": 0.1,
-            "n_estimators": 300
-        }
+        "sklearn.ensemble.AdaBoostRegressor#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
index 74546278..c3cd98b4 100644
--- a/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.BaggingClassifier.json
@@ -15,11 +15,6 @@
         "sklearn.ensemble.BaggingClassifier"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.BaggingClassifier#1": {
-            "n_jobs": -1,
-            "n_estimators": 300,
-            "max_samples": 1.0,
-            "bootstrap_features": true
-        }
+        "sklearn.ensemble.BaggingClassifier#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json b/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
index 3e3feb59..4f6dfdb4 100644
--- a/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
+++ b/mlblocks_pipelines/sklearn.ensemble.BaggingRegressor.json
@@ -14,12 +14,7 @@
         "sklearn.ensemble.BaggingRegressor"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.BaggingRegressor#1": {
-            "n_jobs": -1,
-            "warm_start": true,
-            "n_estimators": 300
-
-        }
+        "sklearn.ensemble.BaggingRegressor#1": {}
     }
 }
 
diff --git a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
index d2126353..49790e86 100644
--- a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesClassifier.json
@@ -15,9 +15,6 @@
         "sklearn.ensemble.ExtraTreesClassifier"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.ExtraTreesClassifier#1": {
-            "n_jobs": -1,
-            "n_estimators": 300
-        }
+        "sklearn.ensemble.ExtraTreesClassifier#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
index c58e75d0..00b305c5 100644
--- a/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
+++ b/mlblocks_pipelines/sklearn.ensemble.ExtraTreesRegressor.json
@@ -14,9 +14,6 @@
         "sklearn.ensemble.ExtraTreesRegressor"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.ExtraTreesRegressor#1": {
-            "n_jobs": -1,
-            "n_estimators": 300
-        }
+        "sklearn.ensemble.ExtraTreesRegressor#1": {}
     }
 }
\ No newline at end of file
diff --git a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
index ad8090c3..7a92adfa 100644
--- a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingClassifier.json
@@ -15,12 +15,6 @@
         "sklearn.ensemble.GradientBoostingClassifier"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.GradientBoostingClassifier#1": {
-            "warm_start": true,
-            "presort": true,
-            "learning_rate": 0.1,
-            "n_estimators": 300,
-            "max_depth": 5
-        }
+        "sklearn.ensemble.GradientBoostingClassifier#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
index ec56d0d6..6d4ff0d7 100644
--- a/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
+++ b/mlblocks_pipelines/sklearn.ensemble.GradientBoostingRegressor.json
@@ -14,10 +14,6 @@
         "sklearn.ensemble.GradientBoostingRegressor"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.GradientBoostingRegressor#1": {
-            "learning_rate": 0.1,
-            "n_estimators": 300,
-            "verbose": 1
-        }
+        "sklearn.ensemble.GradientBoostingRegressor#1": {}
     }
 }
\ No newline at end of file
diff --git a/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json b/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json
index 49135795..9b684039 100644
--- a/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json
+++ b/mlblocks_pipelines/sklearn.ensemble.IsolationForest.json
@@ -15,10 +15,6 @@
         "sklearn.ensemble.IsolationForest"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.IsolationForest#1": {
-            "n_jobs": -1,
-            "n_estimators": 300,
-            "contamination": 0.2
-        }
+        "sklearn.ensemble.IsolationForest#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
index 655b52d7..747f6118 100644
--- a/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomForestClassifier.json
@@ -15,11 +15,6 @@
         "sklearn.ensemble.RandomForestClassifier"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.RandomForestClassifier#1": {
-            "n_jobs": -1,
-            "n_estimators": 300,
-            "criterion": "entropy",
-            "warm_start": true
-        }
+        "sklearn.ensemble.RandomForestClassifier#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json b/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json
index be89b1d1..0c9985e5 100644
--- a/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomForestRegressor.json
@@ -14,10 +14,6 @@
         "sklearn.ensemble.RandomForestRegressor"
     ],
     "hyperparameters": {
-        "sklearn.ensemble.RandomForestRegressor#1": {
-            "n_jobs": -1,
-            "warm_start": true,
-            "n_estimators": 300
-        }
+        "sklearn.ensemble.RandomForestRegressor#1": {}
     }
 }
diff --git a/mlblocks_pipelines/sklearn.ensemble.RandomTreesEmbedding.json b/mlblocks_pipelines/sklearn.ensemble.RandomTreesEmbedding.json
new file mode 100644
index 00000000..d78157de
--- /dev/null
+++ b/mlblocks_pipelines/sklearn.ensemble.RandomTreesEmbedding.json
@@ -0,0 +1,21 @@
+{
+    "metadata": {
+        "name": "RandomTreesEmbedding/classification/default",
+        "data_type": "tabular",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "iris",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "sklearn.preprocessing.Imputer",
+        "sklearn.preprocessing.StandardScaler",
+        "sklearn.ensemble.RandomTreesEmbedding",
+        "sklearn.ensemble.RandomForestClassifier"
+    ],
+    "hyperparameters": {
+        "sklearn.ensemble.RandomTreesEmbedding#1": {}
+    }
+}

From 669b719bd022b985655bc5d223e7dd3672cfa090 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Tue, 18 Dec 2018 15:54:27 +0200
Subject: [PATCH 22/32] Issue 47: added primitive for LSTM TimeSeries Regressor

---
 ...as.Sequential.LSTMTimeSeriesRegressor.json | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json

diff --git a/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json b/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json
new file mode 100644
index 00000000..82fc5948
--- /dev/null
+++ b/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json
@@ -0,0 +1,122 @@
+{
+    "name": "keras.Sequential.LSTMTimeSeriesRegressor",
+    "author": "Ihssan Tinawi <itinawi@mit.edu>",
+    "documentation": "",
+    "description": "This primitive consists of multiple Keras layers that can pass time-series data through an LSTM in order to predict the value at x_{t+1}",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "regressor"
+    },
+    "modalities": [],
+    "primitive": "mlprimitives.adapters.keras.Sequential",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "array"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "array"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "input_length": {
+                "type": "int",
+                "default": 1500
+            },
+            "dense_units": {
+                "type": "int",
+                "description": "Number of classes"
+            },
+            "classification": {
+                "type": "bool",
+                "default": false
+            },
+            "dense_activation": {
+                "type": "str",
+                "default": "tanh"
+            },
+            "optimizer": {
+                "type": "str",
+                "default": "keras.optimizers.Adam"
+            },
+            "loss": {
+                "type": "str",
+                "default": "keras.losses.mean_squared_error"
+            },
+            "metrics": {
+                "type": "list",
+                "default": [
+                    "accuracy"
+                ]
+            },
+            "layers": {
+                "type": "list",
+                "default": [
+                    {
+                        "class": "keras.layers.Input",
+                        "parameters": {
+                            "shape": "input_shape"
+                        }
+                    },
+                    {
+                        "class": "keras.layers.Dropout",
+                        "parameters": {
+                            "rate": "dropout_rate"
+                        }
+                    },
+                    {
+                        "class": "keras.layers.LSTM",
+                        "parameters": {
+                            "units": "lstm_units"
+                        }
+                    },
+                    {
+                        "class": "keras.layers.Dense",
+                        "parameters": {
+                            "units": "dense_units",
+                            "activation": "dense_activation"
+                        }
+                    }
+                ]
+            }
+        },
+        "tunable": {
+            "lstm_units": {
+                "type": "int",
+                "default": 50,
+                "range": [
+                    1,
+                    500
+                ]
+            },
+            "dropout_rate": {
+                "type": "float",
+                "default": 0.1,
+                "range": [
+                    0.01,
+                    0.75
+                ]
+            }
+        }
+    }
+}

From 31631c97b95b9fd538e8c8459f5aaa5efd2868d4 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Wed, 19 Dec 2018 18:15:27 +0200
Subject: [PATCH 23/32] Issue 53: added time series primitives and function

---
 ...ves.timeseries.aggregate_average_time.json | 42 +++++++++++++++
 ...ves.timeseries.create_window_sequence.json | 41 +++++++++++++++
 mlprimitives/timeseries.py                    | 52 +++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
 create mode 100644 mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json
 create mode 100644 mlprimitives/timeseries.py

diff --git a/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json b/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
new file mode 100644
index 00000000..63e1a3af
--- /dev/null
+++ b/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
@@ -0,0 +1,42 @@
+{
+    "name": "mlprimitives.timeseries.aggregate_average_time",
+    "author": "Ihssan Tinawi <itinawi@mit.edu>",
+    "description": "mlprimitives.timeseries.aggregate_average_time",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_extractor"
+    },
+    "modalities": ["timeseries"],
+    "primitive": "mlprimitives.timeseries.aggregate_average_time",
+    "produce": {
+        "args": [
+            {
+                "name": "df_time_value",
+                "type": "Pandas.DataFrame"
+            },
+            {
+                "name": "interval_time_delta",
+                "type": "int"
+            },
+            {
+                "name": "start_time",
+                "type": "int"
+            },
+            {
+                "name": "end_time",
+                "type": "int"
+            }
+
+        ],
+        "output": [
+            {
+                "name": "aggregated_df",
+                "type": "Pandas.DataFrame"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {}
+    }
+}
diff --git a/mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json b/mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json
new file mode 100644
index 00000000..8d96f29f
--- /dev/null
+++ b/mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json
@@ -0,0 +1,41 @@
+{
+    "name": "mlprimitives.timeseries.create_window_sequence",
+    "author": "Ihssan Tinawi <itinawi@mit.edu>",
+    "description": "mlprimitives.timeseries.create_window_sequence",
+    "classifiers": {
+        "type": "preprocessor",
+        "subtype": "feature_extractor"
+    },
+    "modalities": ["timeseries"],
+    "primitive": "mlprimitives.timeseries.create_window_sequence",
+    "produce": {
+        "args": [
+            {
+                "name": "df_timeseries",
+                "type": "Pandas.DataFrame"
+            },
+            {
+                "name": "window_size",
+                "type": "int"
+            }
+        ],
+        "output": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "Y",
+                "type": "ndarray"
+            },
+            {
+                "name": "time",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {},
+        "tunable": {}
+    }
+}
diff --git a/mlprimitives/timeseries.py b/mlprimitives/timeseries.py
new file mode 100644
index 00000000..4681c296
--- /dev/null
+++ b/mlprimitives/timeseries.py
@@ -0,0 +1,52 @@
+import pandas as pd
+import time
+
+
+def create_window_sequences(df_timeseries, window_size):
+    """
+        Function that takes in a Pandas.DataFrame and a window_size then creates output arrays that correspond to a timeseries sequence with window_size overlap. The output arrays can be fed into a timeseries forecasting model.
+        Inputs:
+            df_timeseries (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' and 'value' columns, and is sorted based on timestamp. The timestamp column is in UNIX format (in seconds).
+            window_size (int): number of values that overlap to create the sequence.
+        Outputs:
+            x (numpy.ndarray): contains the time series sequenced data.
+            y (numpy.ndarray): acts as the label for the forecasting problem.
+            time (numpy.ndarray): the corresponding timestamps series.
+    """
+    X = []
+    Y = []
+    time = []
+    for i in range(len(df) - window_size):
+        X.append(df[i:i+window_size]['value'].values.copy().reshape([-1, 1]))
+        Y.append(df[i+1:i+window_size+1]['value'].values.copy().reshape([-1, 1]))
+        time.append(df.iloc[i+window_size]['timestamp'])
+    
+    return np.asarray(X), np.asarray(Y), np.asarray(time)
+
+
+def aggregate_average_time(df_time_value, interval_time_delta, start_time, end_time):
+    """
+        Function that aggregates data in a Pandas dataframe by averaging over a given interval. It starts averaging from specified start_time.
+        Inputs:
+            df_time_value (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' and 'value' columns, and is sorted based on timestamp. The timestamp column is in UNIX format (in seconds).
+            interval_time_delta (int): an Integer denoting the number of seconds in the desired interval.
+            start_time (int): a UNIX time stamp indicating the time to start aggregating. Can be smaller than the smallest time stamp value in the dataframe.
+            end_time (int): a UNIX time stamp indicating the time to end aggregating. Can be larger than the largest time stamp value in the dataframe.
+            
+        Outputs:
+            aggregated_df (Pandas.DataFrame): a Pandas dataframe with two colums ('timestamp' and 'value'), where each `timestamp` is the starting time of an interval and the `value` is the result of aggregation. For intervals that don't have data in df_time_value but are still included in start_time and end_time then the value will be NaN.
+            
+    """
+    start_ts = start_time
+    accepted_points = []
+    while start_ts < end_time:
+        # average the values between start_ts, [start_ts + timedelta (e.g. 6hrs)]
+        upper_ts = start_ts + time_delta
+        mask = (df_time_value['timestamp'] > start_ts) & (df_time_value['timestamp'] <= upper_ts)
+        average_value = df.loc[mask]['value'].mean(skipna=True)
+
+        accepted_points.append([start_ts, average_value])
+        start_ts = upper_ts # update the timestamp
+
+    new_df = pd.DataFrame(accepted_points, columns=['timestamp','value']) 
+    return new_df

From feb64b8b0c8c984f50feb8c9a229ebab248dab80 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Wed, 19 Dec 2018 19:32:13 +0200
Subject: [PATCH 24/32] Issue 53: fixed lint and syntax errors

---
 ...s.timeseries.create_window_sequences.json} |  6 +--
 mlprimitives/timeseries.py                    | 51 +++++++++++--------
 2 files changed, 34 insertions(+), 23 deletions(-)
 rename mlblocks_primitives/{mlprimitives.timeseries.create_window_sequence.json => mlprimitives.timeseries.create_window_sequences.json} (92%)

diff --git a/mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json b/mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json
similarity index 92%
rename from mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json
rename to mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json
index 8d96f29f..51ccd336 100644
--- a/mlblocks_primitives/mlprimitives.timeseries.create_window_sequence.json
+++ b/mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json
@@ -1,13 +1,13 @@
 {
-    "name": "mlprimitives.timeseries.create_window_sequence",
+    "name": "mlprimitives.timeseries.create_window_sequences",
     "author": "Ihssan Tinawi <itinawi@mit.edu>",
-    "description": "mlprimitives.timeseries.create_window_sequence",
+    "description": "mlprimitives.timeseries.create_window_sequences",
     "classifiers": {
         "type": "preprocessor",
         "subtype": "feature_extractor"
     },
     "modalities": ["timeseries"],
-    "primitive": "mlprimitives.timeseries.create_window_sequence",
+    "primitive": "mlprimitives.timeseries.create_window_sequences",
     "produce": {
         "args": [
             {
diff --git a/mlprimitives/timeseries.py b/mlprimitives/timeseries.py
index 4681c296..317a422e 100644
--- a/mlprimitives/timeseries.py
+++ b/mlprimitives/timeseries.py
@@ -1,12 +1,16 @@
 import pandas as pd
-import time
+import numpy as np
 
 
 def create_window_sequences(df_timeseries, window_size):
     """
-        Function that takes in a Pandas.DataFrame and a window_size then creates output arrays that correspond to a timeseries sequence with window_size overlap. The output arrays can be fed into a timeseries forecasting model.
+        Function that takes in a Pandas.DataFrame and a window_size then creates
+            output arrays that correspond to a timeseries sequence with window_size overlap.
+            The output arrays can be fed into a timeseries forecasting model.
         Inputs:
-            df_timeseries (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' and 'value' columns, and is sorted based on timestamp. The timestamp column is in UNIX format (in seconds).
+            df_timeseries (Pandas.DataFrame): a Pandas dataframe which has 'timestamp'
+                and 'value' columns, and is sorted based on timestamp. 
+                The timestamp column is in UNIX format (in seconds).
             window_size (int): number of values that overlap to create the sequence.
         Outputs:
             x (numpy.ndarray): contains the time series sequenced data.
@@ -16,37 +20,44 @@ def create_window_sequences(df_timeseries, window_size):
     X = []
     Y = []
     time = []
-    for i in range(len(df) - window_size):
-        X.append(df[i:i+window_size]['value'].values.copy().reshape([-1, 1]))
-        Y.append(df[i+1:i+window_size+1]['value'].values.copy().reshape([-1, 1]))
-        time.append(df.iloc[i+window_size]['timestamp'])
-    
+    for i in range(len(df_timeseries) - window_size):
+        X.append(df_timeseries[i: i + window_size]['value'].values.copy().reshape([-1, 1]))
+        Y.append(df_timeseries[i + 1: i + window_size + 1]['value'].values.copy().reshape([-1, 1]))
+        time.append(df_timeseries.iloc[i + window_size]['timestamp'])
     return np.asarray(X), np.asarray(Y), np.asarray(time)
 
 
 def aggregate_average_time(df_time_value, interval_time_delta, start_time, end_time):
     """
-        Function that aggregates data in a Pandas dataframe by averaging over a given interval. It starts averaging from specified start_time.
+        Function that aggregates data in a Pandas dataframe by averaging over a given interval. 
+            It starts averaging from specified start_time.
         Inputs:
-            df_time_value (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' and 'value' columns, and is sorted based on timestamp. The timestamp column is in UNIX format (in seconds).
-            interval_time_delta (int): an Integer denoting the number of seconds in the desired interval.
-            start_time (int): a UNIX time stamp indicating the time to start aggregating. Can be smaller than the smallest time stamp value in the dataframe.
-            end_time (int): a UNIX time stamp indicating the time to end aggregating. Can be larger than the largest time stamp value in the dataframe.
-            
+            df_time_value (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' 
+                and 'value' columns, and is sorted based on timestamp. The timestamp
+                column is in UNIX format (in seconds).
+            interval_time_delta (int): an Integer denoting the number of seconds 
+                in the desired interval.
+            start_time (int): a UNIX time stamp indicating the time to start
+                aggregating. Can be smaller than the smallest time stamp value in the dataframe.
+            end_time (int): a UNIX time stamp indicating the time to end aggregating. 
+                Can be larger than the largest time stamp value in the dataframe.
         Outputs:
-            aggregated_df (Pandas.DataFrame): a Pandas dataframe with two colums ('timestamp' and 'value'), where each `timestamp` is the starting time of an interval and the `value` is the result of aggregation. For intervals that don't have data in df_time_value but are still included in start_time and end_time then the value will be NaN.
-            
+            aggregated_df (Pandas.DataFrame): a Pandas dataframe with two colums 
+                ('timestamp' and 'value'), where each `timestamp` is the starting time of 
+                an interval and the `value` is the result of aggregation. For intervals that 
+                don't have data in df_time_value but are still included in start_time 
+                and end_time then the value will be NaN.
     """
     start_ts = start_time
     accepted_points = []
     while start_ts < end_time:
         # average the values between start_ts, [start_ts + timedelta (e.g. 6hrs)]
-        upper_ts = start_ts + time_delta
+        upper_ts = start_ts + interval_time_delta
         mask = (df_time_value['timestamp'] > start_ts) & (df_time_value['timestamp'] <= upper_ts)
-        average_value = df.loc[mask]['value'].mean(skipna=True)
+        average_value = df_time_value.loc[mask]['value'].mean(skipna=True)
 
         accepted_points.append([start_ts, average_value])
-        start_ts = upper_ts # update the timestamp
+        start_ts = upper_ts  # update the timestamp
 
-    new_df = pd.DataFrame(accepted_points, columns=['timestamp','value']) 
+    new_df = pd.DataFrame(accepted_points, columns=['timestamp', 'value'])
     return new_df

From 4e6f32102fddd5104e3b76d48c8489863889088c Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Wed, 19 Dec 2018 18:55:36 +0100
Subject: [PATCH 25/32] add google-compute-engine which travis complains about

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 4d028100..ece8de69 100644
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,7 @@
 tests_require = [
     'mlblocks>=0.2.0',
     'pytest>=3.4.2',
+    'google-compute-engine==2.8.12',    # required by travis
 ]
 
 

From 46c1e98c22d6d6f68a38dfec3ba0ebf04d3dcd8e Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 20 Dec 2018 08:03:29 +0200
Subject: [PATCH 26/32] rewrote signatures of methods and adjusted json files
 accordingly

---
 ...ves.timeseries.aggregate_average_time.json | 33 ++++----
 ....timeseries.rolling_window_sequences.json} | 22 +++---
 mlprimitives/timeseries.py                    | 78 ++++++++++---------
 3 files changed, 70 insertions(+), 63 deletions(-)
 rename mlblocks_primitives/{mlprimitives.timeseries.create_window_sequences.json => mlprimitives.timeseries.rolling_window_sequences.json} (58%)

diff --git a/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json b/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
index 63e1a3af..1703e604 100644
--- a/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
+++ b/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
@@ -1,42 +1,43 @@
 {
-    "name": "mlprimitives.timeseries.aggregate_average_time",
+    "name": "mlprimitives.timeseries.time_segments_average",
     "author": "Ihssan Tinawi <itinawi@mit.edu>",
-    "description": "mlprimitives.timeseries.aggregate_average_time",
+    "description": "mlprimitives.timeseries.time_segments_average",
     "classifiers": {
         "type": "preprocessor",
         "subtype": "feature_extractor"
     },
     "modalities": ["timeseries"],
-    "primitive": "mlprimitives.timeseries.aggregate_average_time",
+    "primitive": "mlprimitives.timeseries.time_segments_average",
     "produce": {
         "args": [
             {
-                "name": "df_time_value",
-                "type": "Pandas.DataFrame"
+                "name": "X",
+                "keyword": "time_value",
+                "type": "pandas.DataFrame"
             },
             {
-                "name": "interval_time_delta",
-                "type": "int"
+                "name": "value_column",
+                "type": "str"
             },
             {
-                "name": "start_time",
-                "type": "int"
-            },
-            {
-                "name": "end_time",
-                "type": "int"
+                "name": "name_column",
+                "type": "str"
             }
-
         ],
         "output": [
             {
                 "name": "aggregated_df",
-                "type": "Pandas.DataFrame"
+                "type": "pandas.DataFrame"
             }
         ]
     },
     "hyperparameters": {
-        "fixed": {},
+        "fixed": {
+            "interval": {
+                "type": "int",
+                "default": 3600
+            }
+        },
         "tunable": {}
     }
 }
diff --git a/mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json b/mlblocks_primitives/mlprimitives.timeseries.rolling_window_sequences.json
similarity index 58%
rename from mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json
rename to mlblocks_primitives/mlprimitives.timeseries.rolling_window_sequences.json
index 51ccd336..92b4f034 100644
--- a/mlblocks_primitives/mlprimitives.timeseries.create_window_sequences.json
+++ b/mlblocks_primitives/mlprimitives.timeseries.rolling_window_sequences.json
@@ -1,22 +1,19 @@
 {
-    "name": "mlprimitives.timeseries.create_window_sequences",
+    "name": "mlprimitives.timeseries.rolling_window_sequences",
     "author": "Ihssan Tinawi <itinawi@mit.edu>",
-    "description": "mlprimitives.timeseries.create_window_sequences",
+    "description": "mlprimitives.timeseries.rolling_window_sequences",
     "classifiers": {
         "type": "preprocessor",
         "subtype": "feature_extractor"
     },
     "modalities": ["timeseries"],
-    "primitive": "mlprimitives.timeseries.create_window_sequences",
+    "primitive": "mlprimitives.timeseries.rolling_window_sequences",
     "produce": {
         "args": [
             {
-                "name": "df_timeseries",
-                "type": "Pandas.DataFrame"
-            },
-            {
-                "name": "window_size",
-                "type": "int"
+                "name": "X",
+                "keyword": "time_value",
+                "type": "pandas.DataFrame"
             }
         ],
         "output": [
@@ -35,7 +32,12 @@
         ]
     },
     "hyperparameters": {
-        "fixed": {},
+        "fixed": {
+            "window_size": {
+                "type": "int",
+                "default": 50
+            }
+        },
         "tunable": {}
     }
 }
diff --git a/mlprimitives/timeseries.py b/mlprimitives/timeseries.py
index 317a422e..c982b914 100644
--- a/mlprimitives/timeseries.py
+++ b/mlprimitives/timeseries.py
@@ -2,62 +2,66 @@
 import numpy as np
 
 
-def create_window_sequences(df_timeseries, window_size):
+def rolling_window_sequences(X, window_size, value_column, time_column):
     """
-        Function that takes in a Pandas.DataFrame and a window_size then creates
+        Function that takes in a pandas.DataFrame and a window_size then creates
             output arrays that correspond to a timeseries sequence with window_size overlap.
             The output arrays can be fed into a timeseries forecasting model.
-        Inputs:
-            df_timeseries (Pandas.DataFrame): a Pandas dataframe which has 'timestamp'
+            Assumes the input is timeseries sorted.
+        Args:
+            X (pandas.DataFrame): a pandas dataframe which has 'timestamp'
                 and 'value' columns, and is sorted based on timestamp. 
                 The timestamp column is in UNIX format (in seconds).
             window_size (int): number of values that overlap to create the sequence.
-        Outputs:
-            x (numpy.ndarray): contains the time series sequenced data.
-            y (numpy.ndarray): acts as the label for the forecasting problem.
-            time (numpy.ndarray): the corresponding timestamps series.
+            value_column (string): name of column that has the value field.
+            time_column (string): name of column that has the time field.
+        Returns:
+            (numpy.ndarray): contains the time series sequenced data with each 
+                entry having window_size rows.
+            (numpy.ndarray): acts as the label for the forecasting problem with 
+                each entry having window_size rows.
+            (numpy.ndarray): the corresponding timestamps series.
     """
-    X = []
+    output_X = []
     Y = []
     time = []
-    for i in range(len(df_timeseries) - window_size):
-        X.append(df_timeseries[i: i + window_size]['value'].values.copy().reshape([-1, 1]))
-        Y.append(df_timeseries[i + 1: i + window_size + 1]['value'].values.copy().reshape([-1, 1]))
-        time.append(df_timeseries.iloc[i + window_size]['timestamp'])
-    return np.asarray(X), np.asarray(Y), np.asarray(time)
+    for i in range(len(X) - window_size):
+        # reshape into a vector to fit into a neural network model (vectorize it)
+        output_X.append(X[i: i + window_size][value_column].values.copy().reshape([-1, 1])) 
+        Y.append(X[i + window_size + 1][value_column].values.copy().reshape([-1, 1]))
+        time.append(X.iloc[i + window_size][time_column])
 
+    return np.asarray(output_X), np.asarray(Y), np.asarray(time)
 
-def aggregate_average_time(df_time_value, interval_time_delta, start_time, end_time):
+
+def time_segments_average(X, interval, value_column, time_column):
     """
-        Function that aggregates data in a Pandas dataframe by averaging over a given interval. 
-            It starts averaging from specified start_time.
-        Inputs:
-            df_time_value (Pandas.DataFrame): a Pandas dataframe which has 'timestamp' 
-                and 'value' columns, and is sorted based on timestamp. The timestamp
-                column is in UNIX format (in seconds).
-            interval_time_delta (int): an Integer denoting the number of seconds 
+        function that aggregates data in a pandas dataframe by averaging over a given interval. 
+            it starts averaging from the smallest timestamp in the dataframe and ends at the
+            largest timestamp. assumes the input is timeseries sorted.
+        args:
+            X (pandas.dataframe): a pandas dataframe which has 'timestamp' 
+                and 'value' columns, and is sorted based on timestamp. the timestamp
+                column is in unix format (in seconds).
+            interval (int): an integer denoting the number of seconds 
                 in the desired interval.
-            start_time (int): a UNIX time stamp indicating the time to start
-                aggregating. Can be smaller than the smallest time stamp value in the dataframe.
-            end_time (int): a UNIX time stamp indicating the time to end aggregating. 
-                Can be larger than the largest time stamp value in the dataframe.
-        Outputs:
-            aggregated_df (Pandas.DataFrame): a Pandas dataframe with two colums 
+            value_column (string): name of column that has the value field.
+            time_column (string): name of column that has the time field.
+        returns:
+            pandas.dataframe: a pandas dataframe with two colums 
                 ('timestamp' and 'value'), where each `timestamp` is the starting time of 
-                an interval and the `value` is the result of aggregation. For intervals that 
-                don't have data in df_time_value but are still included in start_time 
-                and end_time then the value will be NaN.
+                an interval and the `value` is the result of aggregation.
     """
-    start_ts = start_time
+    start_ts = X[time_column].iloc[0]   # min value
+    end_time = X[time_column].iloc[-1]  # max value in dataframe
     accepted_points = []
     while start_ts < end_time:
         # average the values between start_ts, [start_ts + timedelta (e.g. 6hrs)]
-        upper_ts = start_ts + interval_time_delta
-        mask = (df_time_value['timestamp'] > start_ts) & (df_time_value['timestamp'] <= upper_ts)
-        average_value = df_time_value.loc[mask]['value'].mean(skipna=True)
+        upper_ts = start_ts + interval
+        mask = (X[time_column] > start_ts) & (X[time_column] <= upper_ts)
+        average_value = X.loc[mask][value_column].mean(skipna=True)
 
         accepted_points.append([start_ts, average_value])
         start_ts = upper_ts  # update the timestamp
 
-    new_df = pd.DataFrame(accepted_points, columns=['timestamp', 'value'])
-    return new_df
+    return pd.DataFrame(accepted_points, columns=[time_column, value_column])

From 3372d04f11165688c7b7d8f6c2ece134ae102745 Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 20 Dec 2018 11:43:59 +0200
Subject: [PATCH 27/32] Removed instances of copy() from dataframes

---
 mlprimitives/timeseries.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/mlprimitives/timeseries.py b/mlprimitives/timeseries.py
index c982b914..9b0750d8 100644
--- a/mlprimitives/timeseries.py
+++ b/mlprimitives/timeseries.py
@@ -1,5 +1,5 @@
-import pandas as pd
 import numpy as np
+import pandas as pd
 
 
 def rolling_window_sequences(X, window_size, value_column, time_column):
@@ -10,46 +10,46 @@ def rolling_window_sequences(X, window_size, value_column, time_column):
             Assumes the input is timeseries sorted.
         Args:
             X (pandas.DataFrame): a pandas dataframe which has 'timestamp'
-                and 'value' columns, and is sorted based on timestamp. 
+                and 'value' columns, and is sorted based on timestamp.
                 The timestamp column is in UNIX format (in seconds).
             window_size (int): number of values that overlap to create the sequence.
             value_column (string): name of column that has the value field.
             time_column (string): name of column that has the time field.
         Returns:
-            (numpy.ndarray): contains the time series sequenced data with each 
+            (numpy.ndarray): contains the time series sequenced data with each
                 entry having window_size rows.
-            (numpy.ndarray): acts as the label for the forecasting problem with 
+            (numpy.ndarray): acts as the label for the forecasting problem with
                 each entry having window_size rows.
             (numpy.ndarray): the corresponding timestamps series.
     """
     output_X = []
-    Y = []
+    y = []
     time = []
     for i in range(len(X) - window_size):
         # reshape into a vector to fit into a neural network model (vectorize it)
-        output_X.append(X[i: i + window_size][value_column].values.copy().reshape([-1, 1])) 
-        Y.append(X[i + window_size + 1][value_column].values.copy().reshape([-1, 1]))
+        output_X.append(X[i: i + window_size][value_column].values.reshape([-1, 1]))
+        y.append(X[i + window_size + 1][value_column].values.reshape([-1, 1]))
         time.append(X.iloc[i + window_size][time_column])
 
-    return np.asarray(output_X), np.asarray(Y), np.asarray(time)
+    return np.asarray(output_X), np.asarray(y), np.asarray(time)
 
 
 def time_segments_average(X, interval, value_column, time_column):
     """
-        function that aggregates data in a pandas dataframe by averaging over a given interval. 
+        function that aggregates data in a pandas dataframe by averaging over a given interval.
             it starts averaging from the smallest timestamp in the dataframe and ends at the
             largest timestamp. assumes the input is timeseries sorted.
         args:
-            X (pandas.dataframe): a pandas dataframe which has 'timestamp' 
+            X (pandas.dataframe): a pandas dataframe which has 'timestamp'
                 and 'value' columns, and is sorted based on timestamp. the timestamp
                 column is in unix format (in seconds).
-            interval (int): an integer denoting the number of seconds 
+            interval (int): an integer denoting the number of seconds
                 in the desired interval.
             value_column (string): name of column that has the value field.
             time_column (string): name of column that has the time field.
         returns:
-            pandas.dataframe: a pandas dataframe with two colums 
-                ('timestamp' and 'value'), where each `timestamp` is the starting time of 
+            pandas.dataframe: a pandas dataframe with two colums
+                ('timestamp' and 'value'), where each `timestamp` is the starting time of
                 an interval and the `value` is the result of aggregation.
     """
     start_ts = X[time_column].iloc[0]   # min value

From 7724bceb260388d3f4a1e98fb2068c132003461a Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 20 Dec 2018 15:54:38 +0200
Subject: [PATCH 28/32] Issue 53: fixed naming issue with json files

---
 ...me.json => mlprimitives.timeseries.time_segments_average.json} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename mlblocks_primitives/{mlprimitives.timeseries.aggregate_average_time.json => mlprimitives.timeseries.time_segments_average.json} (100%)

diff --git a/mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json b/mlblocks_primitives/mlprimitives.timeseries.time_segments_average.json
similarity index 100%
rename from mlblocks_primitives/mlprimitives.timeseries.aggregate_average_time.json
rename to mlblocks_primitives/mlprimitives.timeseries.time_segments_average.json

From 33e3dc1e2cb01f9b3e02d23735735ee02d63d2fc Mon Sep 17 00:00:00 2001
From: Ihssan <itinawi@mit.edu>
Date: Thu, 20 Dec 2018 19:04:58 +0200
Subject: [PATCH 29/32] Issue 47: changed description of dense_units in
 primitive json

---
 .../keras.Sequential.LSTMTimeSeriesRegressor.json            | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json b/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json
index 82fc5948..a97e27ce 100644
--- a/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json
+++ b/mlblocks_primitives/keras.Sequential.LSTMTimeSeriesRegressor.json
@@ -2,7 +2,7 @@
     "name": "keras.Sequential.LSTMTimeSeriesRegressor",
     "author": "Ihssan Tinawi <itinawi@mit.edu>",
     "documentation": "",
-    "description": "This primitive consists of multiple Keras layers that can pass time-series data through an LSTM in order to predict the value at x_{t+1}",
+    "description": "This primitive consists of multiple Keras layers that can pass time-series data through an LSTM in order to predict the next n values.",
     "classifiers": {
         "type": "estimator",
         "subtype": "regressor"
@@ -45,7 +45,8 @@
             },
             "dense_units": {
                 "type": "int",
-                "description": "Number of classes"
+                "description": "Number of values ahead to predict",
+                "default": 1
             },
             "classification": {
                 "type": "bool",

From b2c5c109face49378ad7085b380959ac437c80f1 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 3 Jan 2019 16:20:27 +0100
Subject: [PATCH 30/32] Fix index usage in featuretools.dfs

---
 .../multi_table.classification.default.json   | 34 +++++++++++++++++++
 ... single_table.classification.default.json} |  4 +--
 mlblocks_primitives/featuretools.dfs.json     |  6 ++++
 mlprimitives/adapters/featuretools.py         | 23 ++++++++++---
 setup.py                                      |  2 +-
 5 files changed, 61 insertions(+), 8 deletions(-)
 create mode 100644 mlblocks_pipelines/multi_table.classification.default.json
 rename mlblocks_pipelines/{tabular.classification.default.json => single_table.classification.default.json} (87%)

diff --git a/mlblocks_pipelines/multi_table.classification.default.json b/mlblocks_pipelines/multi_table.classification.default.json
new file mode 100644
index 00000000..d1f4aadd
--- /dev/null
+++ b/mlblocks_pipelines/multi_table.classification.default.json
@@ -0,0 +1,34 @@
+{
+    "metadata": {
+        "name": "multi_table/classification/default",
+        "data_type": "multi_table",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "wikiqa",
+        "context": {
+            "entities": "$entities",
+            "relationships": "$relationships",
+            "target_entity": "data"
+        }
+    },
+    "primitives": [
+        "mlprimitives.preprocessing.ClassEncoder",
+        "featuretools.dfs",
+        "xgboost.XGBClassifier",
+        "mlprimitives.preprocessing.ClassDecoder"
+    ],
+    "hyperparameters": {
+        "featuretools.dfs#1": {
+            "encode": true
+        },
+        "xgboost.XGBClassifier#1": {
+            "n_jobs": -1,
+            "learning_rate": 0.1,
+            "n_estimators": 300,
+            "max_depth": 3,
+            "gamma": 0,
+            "min_child_weight": 1
+        }
+    }
+}
diff --git a/mlblocks_pipelines/tabular.classification.default.json b/mlblocks_pipelines/single_table.classification.default.json
similarity index 87%
rename from mlblocks_pipelines/tabular.classification.default.json
rename to mlblocks_pipelines/single_table.classification.default.json
index 6c4f00cf..b5b8830b 100644
--- a/mlblocks_pipelines/tabular.classification.default.json
+++ b/mlblocks_pipelines/single_table.classification.default.json
@@ -1,7 +1,7 @@
 {
     "metadata": {
-        "name": "tabular/classification/default",
-        "data_type": "tabular",
+        "name": "single_table/classification/default",
+        "data_type": "single_table",
         "task_type": "classification"
     },
     "validation": {
diff --git a/mlblocks_primitives/featuretools.dfs.json b/mlblocks_primitives/featuretools.dfs.json
index 4c63e729..d79bab43 100644
--- a/mlblocks_primitives/featuretools.dfs.json
+++ b/mlblocks_primitives/featuretools.dfs.json
@@ -75,6 +75,12 @@
         ]
     },
     "hyperparameters": {
+        "fixed": {
+            "copy": {
+                "type": "bool",
+                "default": false
+            }
+        },
         "tunable": {
             "max_depth": {
                 "type": "int",
diff --git a/mlprimitives/adapters/featuretools.py b/mlprimitives/adapters/featuretools.py
index 498deae6..c2250bde 100644
--- a/mlprimitives/adapters/featuretools.py
+++ b/mlprimitives/adapters/featuretools.py
@@ -8,7 +8,8 @@ class DFS(object):
 
     features = None
 
-    def __init__(self, max_depth=None, encode=True, remove_low_information=True):
+    def __init__(self, max_depth=None, encode=True, remove_low_information=True, copy=False):
+        self.copy = copy
         self.max_depth = max_depth
         self.encode = encode
         self.remove_low_information = remove_low_information
@@ -19,10 +20,22 @@ def __repr__(self):
             "    remove_low_information={remove_low_information})"
         ).format(**self.__dict__)
 
+    def _get_index(self, X):
+        if self.copy:
+            X = X.copy()
+
+        index = X.index.name or 'index'
+        while index in X.columns:
+            index = '_' + index
+
+        X.index.name = index
+        X.reset_index(inplace=True)
+
+        return X, index
+
     def _get_entityset(self, X, target_entity, entities, relationships):
         if entities is None:
-            index = X.index.name
-            X = X.reset_index()
+            X, index = self._get_index(X)
             entities = {
                 target_entity: (X, index)
             }
@@ -32,7 +45,7 @@ def _get_entityset(self, X, target_entity, entities, relationships):
 
         return ft.EntitySet('entityset', entities, relationships)
 
-    def dfs(self, X=None, target_entity=None, entityset=None, entities=None, relationships=None):
+    def dfs(self, X=None, target_entity='X', entityset=None, entities=None, relationships=None):
         if entityset is None:
             entityset = self._get_entityset(X, target_entity, entities, relationships)
 
@@ -44,7 +57,7 @@ def dfs(self, X=None, target_entity=None, entityset=None, entities=None, relatio
         if time_index:
             cutoff_time = target.df[[index, time_index]]
 
-        instance_ids = X.index.values.copy()
+        instance_ids = X[index].values.copy()
 
         self.features = ft.dfs(
             cutoff_time=cutoff_time,
diff --git a/setup.py b/setup.py
index ece8de69..220c6655 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@
 
 
 tests_require = [
-    'mlblocks>=0.2.0',
+    'mlblocks>=0.2.4',
     'pytest>=3.4.2',
     'google-compute-engine==2.8.12',    # required by travis
 ]

From c05dcee01b2516242fa92ed38f6611d3e4858747 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 3 Jan 2019 19:48:52 +0100
Subject: [PATCH 31/32] Fix stopwrods typo

---
 mlblocks_primitives/mlprimitives.text.TextCleaner.json | 2 +-
 mlprimitives/text.py                                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlblocks_primitives/mlprimitives.text.TextCleaner.json b/mlblocks_primitives/mlprimitives.text.TextCleaner.json
index d9b3e77e..11849764 100644
--- a/mlblocks_primitives/mlprimitives.text.TextCleaner.json
+++ b/mlblocks_primitives/mlprimitives.text.TextCleaner.json
@@ -52,7 +52,7 @@
                 "type": "bool",
                 "default": true
             },
-            "stopwrods": {
+            "stopwords": {
                 "type": "bool",
                 "default": true
             },
diff --git a/mlprimitives/text.py b/mlprimitives/text.py
index 4bdcfea6..1120272e 100644
--- a/mlprimitives/text.py
+++ b/mlprimitives/text.py
@@ -24,7 +24,7 @@ class TextCleaner(object):
     STOPWORDS = dict()
 
     def __init__(self, column=None, language='multi', lower=True, accents=True,
-                 stopwrods=True, non_alpha=True, single_chars=True):
+                 stopwords=True, non_alpha=True, single_chars=True):
         self.column = column
         self.language = language
         self.language_code = None

From e30267fc0ea907a505b431204d72c2d689291685 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 3 Jan 2019 20:40:04 +0100
Subject: [PATCH 32/32] Fix SingleLayerCNNImageClassifier annotation

---
 ...ential.SingleLayerCNNImagelClassifier.json | 33 +++++++++++++++++++
 ...uential.SingleLayerCNNImageClassifier.json |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 mlblocks_pipelines/keras.Sequential.SingleLayerCNNImagelClassifier.json

diff --git a/mlblocks_pipelines/keras.Sequential.SingleLayerCNNImagelClassifier.json b/mlblocks_pipelines/keras.Sequential.SingleLayerCNNImagelClassifier.json
new file mode 100644
index 00000000..0ae8cd78
--- /dev/null
+++ b/mlblocks_pipelines/keras.Sequential.SingleLayerCNNImagelClassifier.json
@@ -0,0 +1,33 @@
+{
+    "metadata": {
+        "name": "keras.Sequential.SingleLayerCNNImageClassifier",
+        "data_type": "image",
+        "task_type": "classification"
+    },
+    "validation": {
+        "dataset": "usps",
+        "context": {}
+    },
+    "primitives": [
+        "mlprimitives.counters.UniqueCounter",
+        "keras.Sequential.SingleLayerCNNImageClassifier"
+    ],
+    "input_names": {
+        "mlprimitives.counters.UniqueCounter#1": {
+            "X": "y"
+        }
+    },
+    "output_names": {
+        "mlprimitives.counters.UniqueCounter#1": {
+            "counts": "classes"
+        }
+    },
+    "init_params": {
+        "mlprimitives.counters.UniqueCounter#1": {
+            "add": 1
+        },
+        "keras.Sequential.SingleLayerCNNImageClassifier#1": {
+            "epochs": 5
+        }
+    }
+}
diff --git a/mlblocks_primitives/keras.Sequential.SingleLayerCNNImageClassifier.json b/mlblocks_primitives/keras.Sequential.SingleLayerCNNImageClassifier.json
index 21f7c166..8141c508 100644
--- a/mlblocks_primitives/keras.Sequential.SingleLayerCNNImageClassifier.json
+++ b/mlblocks_primitives/keras.Sequential.SingleLayerCNNImageClassifier.json
@@ -110,7 +110,7 @@
                     {
                         "class": "keras.layers.Dense",
                         "parameters": {
-                            "units": "dense_units",
+                            "units": "classes",
                             "activation": "dense_activation"
                         }
                     }