diff --git a/src/bindings/bind_dataset.cpp b/src/bindings/bind_dataset.cpp
index 3c405f8f..872750d5 100644
--- a/src/bindings/bind_dataset.cpp
+++ b/src/bindings/bind_dataset.cpp
@@ -9,29 +9,90 @@ namespace nl = nlohmann;
 void bind_dataset(py::module & m)
 {
     py::class_<br::Data::Dataset>(m, "Dataset")
+
         // construct from X 
-        .def(py::init<Ref<const ArrayXXf> &>())
+        // .def(py::init<Ref<const ArrayXXf> &>())
+        // construct from X (and optional validation and batch sizes) with constructor 3.
+        .def(py::init([](const Ref<const ArrayXXf>& X,
+                         const float validation_size=0.0,
+                         const float batch_size=1.0){
+                return br::Data::Dataset(
+                    X, {}, validation_size, batch_size);
+            }), 
+            py::arg("X"),
+            py::arg("validation_size") = 0.0,
+            py::arg("batch_size") = 1.0
+        )
         // construct from X, feature names 
-        .def(py::init<
-                const Ref<const ArrayXXf>&, 
-                const vector<string>&
-            >()
+        // .def(py::init<
+        //         const Ref<const ArrayXXf>&, 
+        //         const vector<string>&
+        //     >()
+        // )
+        // construct from X, feature names (and optional validation and batch sizes) with constructor 3.
+        .def(py::init([](const Ref<const ArrayXXf>& X, 
+                         const vector<string>& feature_names,
+                         const float validation_size=0.0,
+                         const float batch_size=1.0){
+                return br::Data::Dataset(
+                    X, feature_names, validation_size, batch_size);
+            }), 
+            py::arg("X"),
+            py::arg("feature_names"),
+            py::arg("validation_size") = 0.0,
+            py::arg("batch_size") = 1.0
         )
-        // construct from X,y arrays
-        .def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())
+
+        // construct from X, y arrays
+        // .def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())
+        // construct from X, y arrays (and optional validation and batch sizes) with constructor 2.
+        .def(py::init([](const Ref<const ArrayXXf>& X, 
+                         const Ref<const ArrayXf>& y,
+                         const float validation_size=0.0,
+                         const float batch_size=1.0){
+                return br::Data::Dataset(
+                    X, y, {}, {}, false, validation_size, batch_size);
+            }), 
+            py::arg("X"),
+            py::arg("y"),
+            py::arg("validation_size") = 0.0,
+            py::arg("batch_size") = 1.0
+        )
+
         // construct from X, y, feature names 
-        .def(py::init<
-                const Ref<const ArrayXXf>&, 
-                const Ref<const ArrayXf>&,
-                const vector<string>&
-            >()
+        // .def(py::init<
+        //         const Ref<const ArrayXXf>&, 
+        //         const Ref<const ArrayXf>&,
+        //         const vector<string>&
+        //     >()
+        // )
+        // construct from X, y, feature names (and optional validation and batch sizes) with constructor 2.
+        .def(py::init([](const Ref<const ArrayXXf>& X, 
+                         const Ref<const ArrayXf>& y,
+                         const vector<string>& feature_names,
+                         const float validation_size=0.0,
+                         const float batch_size=1.0){
+                return br::Data::Dataset(
+                    X, y, feature_names, {}, false, validation_size, batch_size);
+            }), 
+            py::arg("X"),
+            py::arg("y"),
+            py::arg("feature_names"),
+            py::arg("validation_size") = 0.0,
+            py::arg("batch_size") = 1.0
         )
+        
         .def_readwrite("y", &br::Data::Dataset::y)
-    //     .def_readwrite("features", &br::Data::Dataset::features)
+        // .def_readwrite("features", &br::Data::Dataset::features)
         .def("get_n_samples", &br::Data::Dataset::get_n_samples)
         .def("get_n_features", &br::Data::Dataset::get_n_features)
         .def("print", &br::Data::Dataset::print)
         .def("get_batch", &br::Data::Dataset::get_batch)
+        .def("get_training_data", &br::Data::Dataset::get_training_data)
+        .def("get_validation_data", &br::Data::Dataset::get_validation_data)
+        .def("get_batch_size", &br::Data::Dataset::get_batch_size)
+        .def("set_batch_size", &br::Data::Dataset::set_batch_size)
+        .def("split", &br::Data::Dataset::split)
         .def("get_X", &br::Data::Dataset::get_X)
         ;
 
diff --git a/src/bindings/bind_params.cpp b/src/bindings/bind_params.cpp
index 8c054f0f..75521ab3 100644
--- a/src/bindings/bind_params.cpp
+++ b/src/bindings/bind_params.cpp
@@ -1,5 +1,8 @@
 #include "module.h"
 #include "../params.h"
+#include "../util/rnd.h"
+
+namespace br = Brush;
 
 void bind_params(py::module& m)
 {
@@ -9,5 +12,10 @@ void bind_params(py::module& m)
     //  py::class_<br::Params>(m, "Params", py::dynamic_attr())
     //       .def(py::init<>())
 
-    m.def("set_params", &Brush::set_params);
+    m.def("set_params", &br::set_params);
+    m.def("get_params", &br::get_params);
+    m.def("set_random_state", [](unsigned int seed)
+                                { br::Util::r = *br::Util::Rnd::initRand(); 
+                                  br::Util::r.set_seed(seed); });
+    m.def("rnd_flt", [](){ return br::Util::r.rnd_flt(); });
 }
\ No newline at end of file
diff --git a/src/bindings/bind_programs.h b/src/bindings/bind_programs.h
index 35fd1c0c..96a36b71 100644
--- a/src/bindings/bind_programs.h
+++ b/src/bindings/bind_programs.h
@@ -45,9 +45,12 @@ void bind_program(py::module& m, string name)
             )
         .def("get_dot_model", &T::get_dot_model, py::arg("extras")="")
         .def("get_weights", &T::get_weights)
-        .def("size", &T::size)
-        .def("cross", &T::cross)
-        .def("mutate", &T::mutate) // static_cast<T &(T::*)()>(&T::mutate))
+        .def("size", &T::size, py::arg("include_weight")=true)
+        .def("depth", &T::depth)
+        .def("cross", &T::cross, py::return_value_policy::automatic,
+             "Performs one attempt to stochastically swap subtrees between two programs and generate a child")
+        .def("mutate", &T::mutate, py::return_value_policy::automatic,
+             "Performs one attempt to stochastically mutate the program and generate a child")
         .def("set_search_space", &T::set_search_space)
         .def(py::pickle(
             [](const T &p) { // __getstate__
diff --git a/src/brush/deap_api/nsga2.py b/src/brush/deap_api/nsga2.py
index 29a275d3..e45d011b 100644
--- a/src/brush/deap_api/nsga2.py
+++ b/src/brush/deap_api/nsga2.py
@@ -1,29 +1,45 @@
 from deap import tools 
 from deap.benchmarks.tools import diversity, convergence, hypervolume
 import numpy as np
-import random
+import functools
 
-def nsga2(toolbox, NGEN, MU, CXPB, verbosity):
+
+def nsga2(toolbox, NGEN, MU, CXPB, use_batch, verbosity, rnd_flt):
     # NGEN = 250
-    # MU = 100
+    # MU   = 100
     # CXPB = 0.9
+    # rnd_flt: random number generator to sample crossover prob
+
+    def calculate_statistics(ind):
+        on_train = ind.fitness.values
+        on_val   = toolbox.evaluateValidation(ind)
+
+        return (*on_train, *on_val) 
 
-    stats = tools.Statistics(lambda ind: ind.fitness.values)
-    stats.register("ave", np.mean, axis=0)
+    stats = tools.Statistics(calculate_statistics)
+
+    stats.register("avg", np.mean, axis=0)
+    stats.register("med", np.median, axis=0)
     stats.register("std", np.std, axis=0)
     stats.register("min", np.min, axis=0)
-    # stats.register("max", np.max, axis=0)
+    stats.register("max", np.max, axis=0)
 
     logbook = tools.Logbook()
-    logbook.header = "gen", "evals", "ave", "std", "min" 
+    logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
+                                     "med (O1 train, O2 train, O1 val, O2 val)", \
+                                     "std (O1 train, O2 train, O1 val, O2 val)", \
+                                     "min (O1 train, O2 train, O1 val, O2 val)", \
+                                     "max (O1 train, O2 train, O1 val, O2 val)"
 
     pop = toolbox.population(n=MU)
 
-
-    # Evaluate the individuals with an invalid fitness
-    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
-    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
-    for ind, fit in zip(invalid_ind, fitnesses):
+    batch = toolbox.getBatch() # everytime this function is called, a new random batch is generated
+    
+    # OBS: evaluate calls fit in the individual. It is different from using it to predict. The
+    # function evaluateValidation don't call the fit
+    fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)
+    
+    for ind, fit in zip(pop, fitnesses):
         ind.fitness.values = fit
 
     # This is just to assign the crowding distance to the individuals
@@ -31,12 +47,22 @@ def nsga2(toolbox, NGEN, MU, CXPB, verbosity):
     pop = toolbox.survive(pop, len(pop))
 
     record = stats.compile(pop)
-    logbook.record(gen=0, evals=len(invalid_ind), **record)
+    logbook.record(gen=0, evals=len(pop), **record)
+
     if verbosity > 0: 
         print(logbook.stream)
 
     # Begin the generational process
     for gen in range(1, NGEN):
+        # The batch will be random only if it is not the size of the entire train set.
+        # In this case, we dont need to reevaluate the whole pop
+        if (use_batch): 
+            batch = toolbox.getBatch()
+            fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)
+        
+            for ind, fit in zip(pop, fitnesses):
+                ind.fitness.values = fit
+
         # Vary the population
         # offspring = tools.selTournamentDCD(pop, len(pop))
         parents = toolbox.select(pop, len(pop))
@@ -44,25 +70,29 @@ def nsga2(toolbox, NGEN, MU, CXPB, verbosity):
         offspring = []
 
         for ind1, ind2 in zip(parents[::2], parents[1::2]):
-            if random.random() <= CXPB:
-                ind1, ind2 = toolbox.mate(ind1, ind2)
-
-            off1 = toolbox.mutate(ind1)
-            off2 = toolbox.mutate(ind2)
-            # del ind1.fitness.values, ind2.fitness.values
-            offspring.extend([off2, off2])
+            off1, off2 = None, None
+            if rnd_flt() < CXPB:
+                off1, off2 = toolbox.mate(ind1, ind2)
+            else:
+                off1 = toolbox.mutate(ind1)
+                off2 = toolbox.mutate(ind2)
+            
+            # avoid inserting empty solutions
+            if off1 is not None: offspring.extend([off1])
+            if off2 is not None: offspring.extend([off2])
 
         # archive.update(offspring)
         # Evaluate the individuals with an invalid fitness
         invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
-        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
+        fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), invalid_ind)
         for ind, fit in zip(invalid_ind, fitnesses):
             ind.fitness.values = fit
 
         # Select the next generation population
         pop = toolbox.survive(pop + offspring, MU)
         record = stats.compile(pop)
-        logbook.record(gen=gen, evals=len(invalid_ind), **record)
+        logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)
+
         if verbosity > 0: 
             print(logbook.stream)
 
diff --git a/src/brush/estimator.py b/src/brush/estimator.py
index f1ff642b..fd4913af 100644
--- a/src/brush/estimator.py
+++ b/src/brush/estimator.py
@@ -23,7 +23,6 @@ class BrushEstimator(BaseEstimator):
     This class shouldn't be called directly; instead, call a child class like 
     :py:class:`BrushRegressor <brush.estimator.BrushRegressor>` or :py:class:`BrushClassifier <brush.estimator.BrushClassifier>`. 
     All of the shared parameters are documented here. 
-    
 
     Parameters
     ----------
@@ -39,12 +38,44 @@ class BrushEstimator(BaseEstimator):
         Maximum depth of GP trees in the GP program. Use 0 for no limit.
     max_size : int, default 0
         Maximum number of nodes in a tree. Use 0 for no limit.
-    mutation_options : dict, default {"point":0.5, "insert": 0.25, "delete":  0.25}
+    cx_prob : float, default 1/7
+        Probability of applying the crossover variation when generating the offspring,
+        must be between 0 and 1.
+        Given that there are `n` mutations, and either crossover or mutation is 
+        used to generate each individual in the offspring (but not both at the
+        same time), we want to have by default an uniform probability between
+        crossover and every possible mutation. By setting `cx_prob=1/(n+1)`, and
+        `1/n` for each mutation, we can achieve an uniform distribution.
+    mutation_options : dict, default {"point":1/6, "insert":1/6, "delete":1/6, "subtree":1/6, "toggle_weight_on":1/6, "toggle_weight_off":1/6}
         A dictionary with keys naming the types of mutation and floating point 
-        values specifying the fraction of total mutations to do with that method. 
+        values specifying the fraction of total mutations to do with that method.
+        The probability of having a mutation is `(1-cx_prob)` and, in case the mutation
+        is applied, then each mutation option is sampled based on the probabilities
+        defined in `mutation_options`. The set of probabilities should add up to 1.0.
     functions: dict[str,float] or list[str], default {}
-        A dictionary with keys naming the function set and values giving the probability of sampling them, or a list of functions which will be weighted uniformly.
+        A dictionary with keys naming the function set and values giving the probability
+        of sampling them, or a list of functions which will be weighted uniformly.
         If empty, all available functions are included in the search space.
+    initialization : {"grow", "full"}, default "grow" 
+        Strategy to create the initial population. If `full`, then every expression is created
+        with `max_size` nodes. If `grow`, size will be uniformly distributed.
+    validation_size : float, default 0.0
+        Percentage of samples to use as a hold-out partition. These samples are used
+        to calculate statistics during evolution, but not used to train the models.
+        The `best_estimator_` will be selected using this partition. If zero, then
+        the same data used for training is used for validation.
+    batch_size : float, default 1.0
+        Percentage of training data to sample every generation. If `1.0`, then
+        all data is used. Very small values can improve execution time, but 
+        also lead to underfit.
+    random_state: int or None, default None
+        If int, then the value is used to seed the c++ random generator; if None,
+        then a seed will be generated using a non-deterministic generator. It is
+        important to notice that, even if the random state is fixed, it is
+        unlikely that running brush using multiple threads will have the same
+        results. This happens because the Operating System's scheduler is
+        responsible to choose which thread will run at any given time, thus 
+        reproductibility is not guaranteed.
 
     Attributes
     ----------
@@ -53,7 +84,11 @@ class BrushEstimator(BaseEstimator):
     archive_ : list[deap_api.DeapIndividual]
         The final population from training. 
     data_ : _brush.Dataset
-        The training data in Brush format. 
+        The complete data in Brush format. 
+    train_ : _brush.Dataset
+        Partition of `data_` containing `(1-validation_size)`% of the data, in Brush format.
+    validation_ : _brush.Dataset
+        Partition of `data_` containing `(validation_size)`% of the data, in Brush format.
     search_space_ : a Brush `SearchSpace` object. 
         Holds the operators and terminals and sampling utilities to update programs.
     toolbox_ : deap.Toolbox
@@ -69,9 +104,14 @@ def __init__(
         verbosity=0,
         max_depth=3,
         max_size=20,
-        mutation_options = {"point":0.4, "insert": 0.25, "delete":  0.25, "toggle_weight": 0.1},
+        cx_prob= 1/7,
+        mutation_options = {"point":1/6, "insert":1/6, "delete":1/6, "subtree":1/6,
+                            "toggle_weight_on":1/6, "toggle_weight_off":1/6},
         functions: list[str]|dict[str,float] = {},
-        batch_size: int = 0
+        initialization="grow",
+        random_state=None,
+        validation_size: float = 0.0,
+        batch_size: float = 1.0
         ):
         self.pop_size=pop_size
         self.max_gen=max_gen
@@ -79,17 +119,26 @@ def __init__(
         self.mode=mode
         self.max_depth=max_depth
         self.max_size=max_size
+        self.cx_prob=cx_prob
         self.mutation_options=mutation_options
         self.functions=functions
+        self.initialization=initialization
+        self.random_state=random_state
         self.batch_size=batch_size
+        self.validation_size=validation_size
 
 
-    def _setup_toolbox(self, data):
+    def _setup_toolbox(self, data_train, data_validation):
         """Setup the deap toolbox"""
         toolbox: base.Toolbox = base.Toolbox()
 
-        # minimize MAE, minimize size
-        creator.create("FitnessMulti", base.Fitness, weights=(-1.0,-1.0))
+        # creator.create is used to "create new functions", and takes at least
+        # 2 arguments: the name of the newly created class and a base class
+
+        # Minimizing/maximizing problem: negative/positive weight, respectively.
+        # Our classification is using the error as a metric
+        # Comparing fitnesses: https://deap.readthedocs.io/en/master/api/base.html#deap.base.Fitness
+        creator.create("FitnessMulti", base.Fitness, weights=self.weights)
 
         # create Individual class, inheriting from self.Individual with a fitness attribute
         creator.create("Individual", DeapIndividual, fitness=creator.FitnessMulti)  
@@ -97,29 +146,45 @@ def _setup_toolbox(self, data):
         toolbox.register("mate", self._crossover)
         toolbox.register("mutate", self._mutate)
 
+        # When solving multi-objective problems, selection and survival must
+        # support this feature. This means that these selection operators must
+        # accept a tuple of fitnesses as argument)
         toolbox.register("select", tools.selTournamentDCD) 
         toolbox.register("survive", tools.selNSGA2)
 
         # toolbox.population will return a list of elements by calling toolbox.individual
-        toolbox.register("population", tools.initRepeat, list, self._make_individual)
-        toolbox.register( "evaluate", self._fitness_function, data=data)
+        toolbox.register("createRandom", self._make_individual)
+        toolbox.register("population", tools.initRepeat, list, toolbox.createRandom)
+
+        toolbox.register("getBatch", data_train.get_batch)
+        toolbox.register("evaluate", self._fitness_function, data=data_train)
+        toolbox.register("evaluateValidation", self._fitness_validation, data=data_validation)
 
         return toolbox
 
+
     def _crossover(self, ind1, ind2):
         offspring = [] 
 
         for i,j in [(ind1,ind2),(ind2,ind1)]:
-            off = creator.Individual(i.prg.cross(j.prg))
-            # off.fitness.valid = False
-            offspring.append(off)
+            child = i.prg.cross(j.prg)
+            if child:
+                offspring.append(creator.Individual(child))
+            else: # so we'll always have two elements to unpack in `offspring`
+                offspring.append(None)
 
         return offspring[0], offspring[1]
+    
 
     def _mutate(self, ind1):
         # offspring = (creator.Individual(ind1.prg.mutate(self.search_space_)),)
-        offspring = creator.Individual(ind1.prg.mutate())
-        return offspring
+        offspring = ind1.prg.mutate()
+        
+        if offspring:
+            return creator.Individual(offspring)
+        
+        return None
+
 
     def fit(self, X, y):
         """
@@ -133,28 +198,76 @@ def fit(self, X, y):
             1-d array of (boolean) target values.
         """
         _brush.set_params(self.get_params())
-        self.data_ = self._make_data(X,y)
+        
+        if self.random_state is not None:
+            _brush.set_random_state(self.random_state)
+
+        self.data_ = self._make_data(X,y, validation_size=self.validation_size)
 
         # set n classes if relevant
         if self.mode=="classification":
             self.n_classes_ = len(np.unique(y))
 
+        # These have a default behavior to return something meaningfull if 
+        # no values are set
+        self.train_ = self.data_.get_training_data()
+        self.train_.set_batch_size(self.batch_size)
+        self.validation_ = self.data_.get_validation_data()
+
         if isinstance(self.functions, list):
             self.functions_ = {k:1.0 for k in self.functions}
         else:
             self.functions_ = self.functions
 
-        self.search_space_ = _brush.SearchSpace(self.data_, self.functions_)
-        self.toolbox_ = self._setup_toolbox(data=self.data_)
+        self.search_space_ = _brush.SearchSpace(self.train_, self.functions_)
+        self.toolbox_ = self._setup_toolbox(data_train=self.train_, data_validation=self.validation_)
 
-        archive, logbook = nsga2(self.toolbox_, self.max_gen, self.pop_size, 0.9, self.verbosity)
+        archive, logbook = nsga2(
+            self.toolbox_, self.max_gen, self.pop_size, self.cx_prob, 
+            (0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)
+        
         self.archive_ = archive
-        self.best_estimator_ = self.archive_[0].prg
+        self.logbook_ = logbook
+
+        final_ind_idx = 0
+
+        # Each individual is a point in the Multi-Objective space. We multiply
+        # the fitness by the weights so greater numbers are always better
+        points = np.array([self.toolbox_.evaluateValidation(ind) for ind in self.archive_])
+        points = points*np.array(self.weights)
+
+        if self.validation_size==0.0:  # Using the multi-criteria decision making on training data
+            # Selecting the best estimator using training data
+            # (train data==val data if validation_size is set to 0.0)
+            # and multi-criteria decision making
+
+            # Normalizing
+            min_vals = np.min(points, axis=0)
+            max_vals = np.max(points, axis=0)
+            points = (points - min_vals) / (max_vals - min_vals)
+            
+            # Reference should be best value each obj. can have (after normalization)
+            reference = np.array([1, 1])
+
+            # closest to the reference
+            final_ind_idx = np.argmin( np.linalg.norm(points - reference, axis=1) )
+        else: # Best in obj.1 (loss) in validation data
+            final_ind_idx = np.argmax( points[:, 0] )
+
+        self.best_estimator_ = self.archive_[final_ind_idx].prg
+
+        if self.verbosity > 0:
+            print(f'best model {self.best_estimator_.get_model()}'+
+                  f' with size {self.best_estimator_.size()}, '   +
+                  f' depth {self.best_estimator_.depth()}, '      +
+                  f' and fitness {self.archive_[0].fitness}'      )
 
-        print('best model:',self.best_estimator_.get_model())
         return self
     
-    def _make_data(self, X, y=None):
+    def _make_data(self, X, y=None, validation_size=0.0):
+        # This function should not partition data (as it is used in predict).
+        # partitioning is done in fit().
+
         if isinstance(y, pd.Series):
             y = y.values
         if isinstance(X, pd.DataFrame):
@@ -162,16 +275,20 @@ def _make_data(self, X, y=None):
             feature_names = X.columns.to_list()
             X = X.values
             if isinstance(y, NoneType):
-                return _brush.Dataset(X, feature_names)
+                return _brush.Dataset(X,
+                    feature_names=feature_names, validation_size=validation_size)
             else:
-                return _brush.Dataset(X, y, feature_names)
+                return _brush.Dataset(X, y,
+                    feature_names=feature_names, validation_size=validation_size)
 
         assert isinstance(X, np.ndarray)
+
         # if there is no label, don't include it in library call to Dataset
         if isinstance(y, NoneType):
-            return _brush.Dataset(X)
+            return _brush.Dataset(X, validation_size=validation_size)
+
+        return _brush.Dataset(X, y, validation_size=validation_size)
 
-        return _brush.Dataset(X,y)
 
     def predict(self, X):
         """Predict using the best estimator in the archive. """
@@ -194,6 +311,7 @@ def predict(self, X):
 
     def get_params(self):
         return {k:v for k,v in self.__dict__.items() if not k.endswith('_')}
+    
 
 class BrushClassifier(BrushEstimator,ClassifierMixin):
     """Brush for classification.
@@ -214,19 +332,39 @@ class BrushClassifier(BrushEstimator,ClassifierMixin):
     def __init__( self, **kwargs):
         super().__init__(mode='classification',**kwargs)
 
+        # Weight of each objective (+ for maximization, - for minimization)
+        self.weights = (+1.0,-1.0)
+
+    def _fitness_validation(self, ind, data: _brush.Dataset):
+        # Fitness without fitting the expression, used with validation data
+        return ( # (accuracy, size)
+            (data.y==ind.prg.predict(data)).sum() / data.y.shape[0], 
+            ind.prg.size()
+        )
+
     def _fitness_function(self, ind, data: _brush.Dataset):
         ind.prg.fit(data)
-        return (
-            np.abs(data.y-ind.prg.predict(data)).sum(), 
+        return ( # (accuracy, size)
+            (data.y==ind.prg.predict(data)).sum() / data.y.shape[0], 
             ind.prg.size()
         )
     
     def _make_individual(self):
+        # C++'s PTC2-based `make_individual` will create a tree of at least
+        # the given size. By uniformly sampling the size, we can instantiate a
+        # population with more diversity
+        
+        if self.initialization not in ["grow", "full"]:
+            raise ValueError(f"Invalid argument value for `initialization`. "
+                             f"expected 'full' or 'grow'. got {self.initialization}")
+
         return creator.Individual(
-            self.search_space_.make_classifier(self.max_depth, self.max_size)
-            if self.n_classes_ == 2 else
-            self.search_space_.make_multiclass_classifier(self.max_depth, self.max_size)
-            )
+            self.search_space_.make_classifier(
+                self.max_depth,(0 if self.initialization=='grow' else self.max_size))
+        if self.n_classes_ == 2 else
+            self.search_space_.make_multiclass_classifier(
+                self.max_depth, (0 if self.initialization=='grow' else self.max_size))
+        )
 
     def predict_proba(self, X):
         """Predict class probabilities for X.
@@ -266,16 +404,35 @@ class BrushRegressor(BrushEstimator, RegressorMixin):
     def __init__(self, **kwargs):
         super().__init__(mode='regressor',**kwargs)
 
+        # Weight of each objective (+ for maximization, - for minimization)
+        self.weights = (-1.0,-1.0)
+
+    def _fitness_validation(self, ind, data: _brush.Dataset):
+        # Fitness without fitting the expression, used with validation data
+
+        MSE = np.mean( (data.y-ind.prg.predict(data))**2 )
+        if not np.isfinite(MSE): # numeric erros, np.nan, +-np.inf
+            MSE = np.inf
+
+        return ( MSE, ind.prg.size() )
+
     def _fitness_function(self, ind, data: _brush.Dataset):
         ind.prg.fit(data)
-        return (
-            np.sum((data.y- ind.prg.predict(data))**2),
-            ind.prg.size()
-        )
+
+        MSE = np.mean( (data.y-ind.prg.predict(data))**2 )
+        if not np.isfinite(MSE): # numeric erros, np.nan, +-np.inf
+            MSE = np.inf
+
+        return ( MSE, ind.prg.size() )
 
     def _make_individual(self):
-        return creator.Individual(
-            self.search_space_.make_regressor(self.max_depth, self.max_size)
+        if self.initialization not in ["grow", "full"]:
+            raise ValueError(f"Invalid argument value for `initialization`. "
+                             f"expected 'full' or 'grow'. got {self.initialization}")
+        
+        return creator.Individual( # No arguments (or zero): brush will use PARAMS passed in set_params. max_size is sampled between 1 and params['max_size'] if zero is provided
+            self.search_space_.make_regressor(
+            self.max_depth, (0 if self.initialization=='grow' else self.max_size))
         )
 
 # Under development
diff --git a/src/data/data.cpp b/src/data/data.cpp
index 610293e9..b80668df 100644
--- a/src/data/data.cpp
+++ b/src/data/data.cpp
@@ -90,7 +90,7 @@ State check_type(const ArrayXf& x)
     }
     else
     {
-        if(isCategorical && uniqueMap.size() < 10)
+        if(isCategorical && uniqueMap.size() <= 10)
         {
             tmp = ArrayXi(x.cast<int>());
         }
@@ -130,20 +130,36 @@ Dataset Dataset::operator()(const vector<size_t>& idx) const
     return Dataset(new_features, new_y, this->classification);
 }
 
-Dataset Dataset::get_batch(int batch_size) const
+Dataset Dataset::get_batch() const
 {
+    // will always return a new dataset, even when use_batch is false (this case, returns itself)
 
-    batch_size =  std::min(batch_size,int(this->get_n_samples()));
-    return (*this)(r.shuffled_index(get_n_samples()));
+    if (!use_batch) 
+        return (*this);
+
+    auto n_samples = int(this->get_n_samples());
+    // garantee that at least one sample is going to be returned, since
+    // use_batch is true only if batch_size is (0, 1), and ceil will round
+    // up
+    n_samples = int(ceil(n_samples*batch_size));
+
+    return (*this)(r.shuffled_index(n_samples));
 }
 
 array<Dataset, 2> Dataset::split(const ArrayXb& mask) const
 {
+    // TODO: assert that mask is not filled with zeros or ones (would create
+    // one empty partition)
+
     // split data into two based on mask. 
     auto idx1 = Util::mask_to_index(mask);
     auto idx2 = Util::mask_to_index((!mask));
     return std::array<Dataset, 2>{ (*this)(idx1), (*this)(idx2) };
 }
+
+Dataset Dataset::get_training_data() const { return (*this)(training_data_idx); }
+Dataset Dataset::get_validation_data() const { return (*this)(validation_data_idx); }
+
 /// call init at the end of constructors
 /// to define metafeatures of the data.
 void Dataset::init()
@@ -153,6 +169,13 @@ void Dataset::init()
     // note this will have to change in unsupervised settings
     // n_samples = this->y.size();
 
+    if (this->features.size() == 0){
+        HANDLE_ERROR_THROW(
+            fmt::format("Error during the initialization of the dataset. It "
+                        "does not contain any data\n") 
+            );
+    }
+
     // fmt::print("Dataset::init()\n");
     for (const auto& [name, value]: this->features)
     {
@@ -165,6 +188,36 @@ void Dataset::init()
         // add feature to appropriate map list 
         this->features_of_type[feature_type].push_back(name);
     }
+
+    // setting the training and validation data indexes
+    auto n_samples = int(this->get_n_samples());
+    auto idx = r.shuffled_index(n_samples);
+
+    // garantee that at least one sample is going to be returned, since
+    // use_batch is true only if batch_size is (0, 1), and ceil will round
+    // up
+    auto n_train_samples = int(ceil(n_samples*(1-validation_size)));
+
+    training_data_idx.resize(0);
+    std::transform(idx.begin(), idx.begin() + n_train_samples,
+            back_inserter(training_data_idx),
+            [&](int element) { return element; });
+
+    if ( use_validation && (n_samples - n_train_samples != 0) ) {
+        validation_data_idx.resize(0);
+        std::transform(idx.begin() + n_train_samples, idx.end(),
+                back_inserter(validation_data_idx),
+                [&](int element) { return element; });
+    }
+    else {
+        validation_data_idx = training_data_idx;
+    } 
+}
+
+float Dataset::get_batch_size() { return batch_size; }
+void Dataset::set_batch_size(float new_size) {
+    batch_size = new_size;
+    use_batch = batch_size > 0.0 && batch_size < 1.0;
 }
 
 /// turns input data into a feature map
diff --git a/src/data/data.h b/src/data/data.h
index 52a37f20..629c02b5 100644
--- a/src/data/data.h
+++ b/src/data/data.h
@@ -50,27 +50,39 @@ class Dataset
     //Dataset(ArrayXXf& X, ArrayXf& y, std::map<string, 
     //std::pair<vector<ArrayXf>, vector<ArrayXf>>>& Z): X(X), y(y), Z(Z){}
     private:
+        vector<size_t> training_data_idx;
+        vector<size_t> validation_data_idx;
+
     public:
         /// @brief keeps track of the unique data types in the dataset. 
         std::vector<DataType> unique_data_types;
+
         /// @brief types of data in the features.  
         std::vector<DataType> feature_types;
+
         /// @brief map from data types to features having that type.
         std::unordered_map<DataType,vector<string>> features_of_type;
-
         
         /// @brief dataset features, as key value pairs
         std::map<string, State> features;
         // TODO: this should probably be a more complex type to include feature type 
         // and potentially other info, like arbitrary relations between features
 
-        
         /// @brief length N array, the target label
         ArrayXf y;
+
         /// @brief whether this is a classification problem
         bool classification;
         std::optional<std::reference_wrapper<const ArrayXXf>> Xref;
 
+        /// @brief percentage of original data used for train. if 0.0, then all data is used for train and validation
+        float validation_size; 
+        bool use_validation;
+
+        /// @brief percentage of training data size to use in each batch. if 1.0, then all data is used
+        float batch_size;
+        bool use_batch;
+
         Dataset operator()(const vector<size_t>& idx) const;
         /// call init at the end of constructors
         /// to define metafeatures of the data.
@@ -82,34 +94,53 @@ class Dataset
                                         const vector<string>& vn = {}
                                        );
 
-        /// initialize data from a map.
+        /// 1. initialize data from a map.
         Dataset(std::map<string, State>& d, 
              const Ref<const ArrayXf>& y_ = ArrayXf(), 
-             bool c = false
+             bool c = false,
+             float validation_size = 0.0,
+             float batch_size = 1.0
              ) 
              : features(d) 
              , y(y_)
              , classification(c) 
+             , validation_size(validation_size)
+             , use_validation(validation_size > 0.0 && validation_size < 1.0)
+             , batch_size(batch_size)
+             , use_batch(batch_size > 0.0 && batch_size < 1.0)
              {init();};
 
-        /// initialize data from a matrix with feature columns.
+        /// 2. initialize data from a matrix with feature columns.
         Dataset(const ArrayXXf& X, 
              const Ref<const ArrayXf>& y_ = ArrayXf(), 
              const vector<string>& vn = {}, 
              const map<string, State>& Z = {},
-             bool c = false
+             bool c = false,
+             float validation_size = 0.0,
+             float batch_size = 1.0
             ) 
             : features(make_features(X,Z,vn))
             , y(y_)
             , classification(c)
+            , validation_size(validation_size)
+            , use_validation(validation_size > 0.0 && validation_size < 1.0)
+            , batch_size(batch_size)
+            , use_batch(batch_size > 0.0 && batch_size < 1.0)
             {
                 init();
                 Xref = optional<reference_wrapper<const ArrayXXf>>{X};
             } 
 
-        Dataset(const ArrayXXf& X, const vector<string>& vn) 
+        /// 3. initialize data from X and feature names
+        Dataset(const ArrayXXf& X, const vector<string>& vn,
+             float validation_size = 0.0,
+             float batch_size = 1.0) 
             : classification(false)
             , features(make_features(X,map<string, State>{},vn))
+            , validation_size(validation_size)
+            , use_validation(validation_size > 0.0 && validation_size < 1.0)
+            , batch_size(batch_size)
+            , use_batch(batch_size > 0.0 && batch_size < 1.0)
             {
                 init();
                 Xref = optional<reference_wrapper<const ArrayXXf>>{X};
@@ -123,22 +154,26 @@ class Dataset
             for (auto& [key, value] : this->features) 
             {
                 if (std::holds_alternative<ArrayXf>(value))
-                    fmt::print("{}: {}\n", key, std::get<ArrayXf>(value));
+                    fmt::print("{} <ArrayXf>: {}\n", key, std::get<ArrayXf>(value));
                 else if (std::holds_alternative<ArrayXi>(value))
-                    fmt::print("{}: {}\n", key, std::get<ArrayXi>(value));
+                    fmt::print("{} <ArrayXi>: {}\n", key, std::get<ArrayXi>(value));
                 else if (std::holds_alternative<ArrayXb>(value))
-                    fmt::print("{}: {}\n", key, std::get<ArrayXb>(value));
+                    fmt::print("{} <ArrayXb>: {}\n", key, std::get<ArrayXb>(value));
             }
 
         };
         auto get_X() const
         {
-            if (Xref.has_value())
-                return this->Xref.value().get();
-            else
+            if (!Xref.has_value())
                 HANDLE_ERROR_THROW("Dataset does not hold a reference to X.");
+            return this->Xref.value().get();
         }
-        void set_validation(bool v=true);
+        
+        // inner partition of original dataset for train and validation.
+        // if split is not set, then training = validation.
+        Dataset get_training_data() const;
+        Dataset get_validation_data() const;
+
         inline int get_n_samples() const { 
             return std::visit(
                 [&](auto&& arg) -> int { return int(arg.size());}, 
@@ -147,7 +182,10 @@ class Dataset
         };
         inline int get_n_features() const { return this->features.size(); };
         /// select random subset of data for training weights.
-        Dataset get_batch(int batch_size) const;
+        Dataset get_batch() const;
+
+        float get_batch_size();
+        void set_batch_size(float new_size);
 
         std::array<Dataset, 2> split(const ArrayXb& mask) const;
 
diff --git a/src/program/dispatch_table.h b/src/program/dispatch_table.h
index cd7e9cd0..f4217057 100644
--- a/src/program/dispatch_table.h
+++ b/src/program/dispatch_table.h
@@ -216,7 +216,7 @@ extern DispatchTable<false> dtable_predict;
 //                         ArgsName[args_type],
 //                         node_type,
 //                         node,
-//                         SS.weight_map.at(ret_type).at(args_type).at(node_type)
+//                         SS.node_map_weights.at(ret_type).at(args_type).at(node_type)
 //                         );
 //             }
 //         }
diff --git a/src/program/node.cpp b/src/program/node.cpp
index 68becad2..2c632249 100644
--- a/src/program/node.cpp
+++ b/src/program/node.cpp
@@ -183,8 +183,10 @@ void from_json(const json &j, Node& p)
 
     if (j.contains("center_op"))
         j.at("center_op").get_to(p.center_op);
+
     if (j.contains("fixed"))
         j.at("fixed").get_to(p.fixed);
+
     if (j.contains("feature"))
     {
         // j.at("feature").get_to(p.feature);
@@ -195,6 +197,12 @@ void from_json(const json &j, Node& p)
     else
         p.is_weighted=false;
 
+    if (j.contains("prob_change"))
+        j.at("prob_change").get_to(p.prob_change);
+    else
+        p.prob_change=1.0;
+    
+
     // if node has a ret_type and arg_types, get them. if not we need to make 
     // a signature
     bool make_signature=false;
diff --git a/src/program/node.h b/src/program/node.h
index 00e5a8fc..cf4541ef 100644
--- a/src/program/node.h
+++ b/src/program/node.h
@@ -39,6 +39,28 @@ using Brush::Data::Dataset;
 
 namespace Brush{
 
+// TODO: should I move this declaration to another place?
+template <DataType... T>
+inline auto Isnt(DataType dt) -> bool { return !((dt == T) || ...); }
+
+template<DataType DT>
+inline auto IsWeighable() noexcept -> bool { 
+        return Isnt<DataType::ArrayB,
+                    DataType::MatrixB,
+                    DataType::TimeSeriesB,
+                    DataType::ArrayBJet,
+                    DataType::MatrixBJet
+                    >(DT);                
+}
+inline auto IsWeighable(DataType dt) noexcept -> bool { 
+        return Isnt<DataType::ArrayB,
+                    DataType::MatrixB,
+                    DataType::TimeSeriesB,
+                    DataType::ArrayBJet,
+                    DataType::MatrixBJet
+                    >(dt);                
+}
+
 struct uint32_vector_hasher {
     std::size_t operator()(std::vector<uint32_t> const& vec) const {
       std::size_t seed = vec.size();
@@ -134,9 +156,12 @@ struct Node {
 
         W = 1.0;
         // set_node_hash();
-        set_prob_change(1.0);
         fixed=false;
+        set_prob_change(1.0);
 
+        // cant weight an boolean terminal
+        if (!IsWeighable(this->ret_type)) 
+            this->is_weighted = false;
     }
 
     /// @brief gets a string version of the node for printing.
@@ -212,20 +237,26 @@ struct Node {
     ////////////////////////////////////////////////////////////////////////////////
     // getters and setters
     //TODO revisit
-    float get_prob_change() const { return this->prob_change;};
+    float get_prob_change() const { return fixed ? 0.0 : this->prob_change;};
     void set_prob_change(float w){ if (!fixed) this->prob_change = w;};
     float get_prob_keep() const { return 1-this->prob_change;};
 
     inline void set_feature(string f){ feature = f; };
     inline string get_feature() const { return feature; };
 
+    inline bool get_is_weighted() const {return this->is_weighted;};
+    inline void set_is_weighted(bool is_weighted){
+        // cant change the weight of a boolean terminal
+        if (IsWeighable(this->ret_type)) 
+            this->is_weighted = is_weighted;
+    };
+
     private:
 
         /// @brief feature name for terminals or splitting nodes
         string feature; 
 };
 
-//TODO: add nt to template as first argument, make these constexpr
 template <NodeType... T>
 inline auto Is(NodeType nt) -> bool { return ((nt == T) || ...); }
 
@@ -278,6 +309,7 @@ inline auto IsWeighable(NodeType nt) noexcept -> bool {
                     NodeType::SplitBest 
                     >(nt);                
 }
+
 ostream& operator<<(ostream& os, const Node& n);
 ostream& operator<<(ostream& os, const NodeType& nt);
 
diff --git a/src/program/operator.h b/src/program/operator.h
index 28061605..195afc6a 100644
--- a/src/program/operator.h
+++ b/src/program/operator.h
@@ -16,6 +16,7 @@ namespace util{
     /// @param weights option pointer to a weight array, used in place of node weight
     /// @return 
     template<typename T, typename Scalar, typename W> 
+        requires (!is_one_of_v<Scalar,bool,bJet>)
     Scalar get_weight(const TreeNode& tn, const W** weights=nullptr)
     { 
         Scalar w;
@@ -31,7 +32,7 @@ namespace util{
                 w = **weights;
             // NLS case 2: a Jet/Dual weight is stored in weights, but this constant is a 
             // integer type. We need to do some casting
-            else if constexpr (is_same_v<Scalar, iJet> && is_same_v<W, fJet>)  {
+            else if constexpr (is_same_v<Scalar, iJet> && is_same_v<W, fJet>) {
                 using WScalar = typename Scalar::Scalar;
                 WScalar tmp = WScalar((**weights).a);    
                 w = Scalar(tmp);
@@ -44,6 +45,23 @@ namespace util{
         }
         return w;
     };
+    template<typename T, typename Scalar, typename W> 
+        requires (is_one_of_v<Scalar,bool,bJet>)
+    Scalar get_weight(const TreeNode& tn, const W** weights=nullptr)
+    {
+        // we cannot weight a boolean feature. Nevertheless, we need to provide
+        // an implementation for get_weight behavior, so the metaprogramming
+        // doesn't fail to get a matching signature. 
+
+        if (tn.data.get_is_weighted())
+            // Node's init() function avoids the creation of weighted nodes, 
+            // and the setter for `is_weighted` prevent enabling weight on 
+            // boolean values. 
+            HANDLE_ERROR_THROW(fmt::format("boolean terminal is weighted, but "
+            "it should not\n"));
+
+        return Scalar(true);
+    };
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Operator class
@@ -199,7 +217,7 @@ struct Operator
         auto inputs = get_kids(d, tn, weights);
         if constexpr (is_one_of_v<Scalar,float,fJet>)
         {
-            if (tn.data.is_weighted)
+            if (tn.data.get_is_weighted())
             {
                 auto w = util::get_weight<RetType,Scalar,W>(tn, weights);
                 return this->apply(inputs)*w;
@@ -218,13 +236,14 @@ struct Operator<NodeType::Terminal, S, Fit>
     using RetType = typename S::RetType;
     using W = typename S::WeightType; 
 
+    // Standard C++ types
     template<typename T=RetType, typename Scalar=typename T::Scalar> 
         requires (is_one_of_v<Scalar,bool,int,float>)
     RetType eval(const Dataset& d, const TreeNode& tn, const W** weights=nullptr) const 
     { 
         if constexpr (is_one_of_v<Scalar,float,fJet>)
         {
-            if (tn.data.is_weighted)
+            if (tn.data.get_is_weighted())
             {
                 auto w = util::get_weight<RetType,Scalar,W>(tn, weights);
                 return this->get<RetType>(d, tn.data.get_feature())*w;
@@ -233,6 +252,7 @@ struct Operator<NodeType::Terminal, S, Fit>
         return this->get<RetType>(d,tn.data.get_feature());
     };
 
+    // Jet types
     template <typename T = RetType, typename Scalar=typename T::Scalar>
         requires( is_one_of_v<Scalar, bJet, iJet, fJet>)
     RetType eval(const Dataset &d, const TreeNode &tn, const W **weights = nullptr) const
@@ -240,7 +260,7 @@ struct Operator<NodeType::Terminal, S, Fit>
         using nonJetType = UnJetify_t<RetType>; 
         if constexpr (is_one_of_v<Scalar,float,fJet>)
         {
-            if (tn.data.is_weighted)
+            if (tn.data.get_is_weighted())
             {
                 auto w = util::get_weight<RetType,Scalar,W>(tn, weights);
                 return this->get<nonJetType>(d, tn.data.get_feature()).template cast<Scalar>()*w;
@@ -249,6 +269,7 @@ struct Operator<NodeType::Terminal, S, Fit>
         return this->get<nonJetType>(d, tn.data.get_feature()).template cast<Scalar>();
     };
 
+    // Accessing dataset directly
     template<typename T>
     auto get(const Dataset& d, const string& feature) const
     {
@@ -261,7 +282,6 @@ struct Operator<NodeType::Terminal, S, Fit>
         ));
 
         return T(); 
-
     }
 };
 
diff --git a/src/program/program.h b/src/program/program.h
index 296b7aad..d1330cc2 100644
--- a/src/program/program.h
+++ b/src/program/program.h
@@ -87,8 +87,74 @@ template<PT PType> struct Program
         SSref = std::optional<std::reference_wrapper<SearchSpace>>{s};
     }
 
-    int size(){
-        return Tree.size();
+    /// @brief count the tree size of the program, including the weights in weighted nodes.
+    /// @param include_weight whether to include the node's weight in the count.
+    /// @return int number of nodes.
+    int size(bool include_weight=true) const{
+        int acc = 0;
+
+        std::for_each(Tree.begin(), Tree.end(), 
+            [include_weight, &acc](auto& node){ 
+                ++acc; // the node operator or terminal
+                
+                if (include_weight && node.get_is_weighted()==true)
+                    acc += 2; // weight and multiplication, if enabled
+             });
+
+        return acc;
+    }
+
+    /// @brief count the size of a given subtree, optionally including the
+    /// weights in weighted nodes. This function is not exposed to the python wrapper.
+    /// @param top root node of the subtree.
+    /// @param include_weight whether to include the node's weight in the count.
+    /// @return int number of nodes.
+    int size_at(Iter& top, bool include_weight=true) const{
+
+        int acc = 0;
+
+        // inspired in tree.hh size. First create two identical iterators
+        Iter it=top, eit=top;
+        
+        // Then make the second one point to the next sibling
+        eit.skip_children();
+        ++eit;
+
+        // calculate tree size for each node until reach next sibling
+        while(it!=eit) {
+            ++acc; // counting the node operator/terminal
+                        
+            if (include_weight && it.node->data.get_is_weighted()==true)
+                acc += 2; // weight and multiplication, if enabled
+
+            ++it;
+        }
+        
+        return acc;
+    }
+
+    /// @brief count the tree depth of the program. The depth is not influenced by weighted nodes.
+    /// @return int tree depth.
+    int depth() const{
+        //tree.hh count the number of edges. We need to ensure that a single-node
+        //tree has depth>0
+        return 1+Tree.max_depth();
+    }
+
+    /// @brief count the depth of a given subtree. The depth is not influenced by
+    /// weighted nodes. This function is not exposed to the python wrapper.
+    /// @param top root node of the subtree.
+    /// @return int tree depth.
+    int depth_at(Iter& top) const{
+        return 1+Tree.max_depth(top);
+    }
+
+    /// @brief count the depth until reaching the given subtree. The depth is
+    /// not influenced by weighted nodes. This function is not exposed to the python wrapper.
+    /// @param top root node of the subtree.
+    /// @return int tree depth.
+    int depth_to_reach(Iter& top) const{
+        return 1+Tree.depth(top);
     }
 
     Program<PType>& fit(const Dataset& d)
@@ -215,7 +281,7 @@ template<PT PType> struct Program
         for (PostIter i = Tree.begin_post(); i != Tree.end_post(); ++i)
         {
             const auto& node = i.node->data; 
-            if (node.is_weighted)
+            if (node.get_is_weighted())
                 ++count;
         }
         return count;
@@ -233,7 +299,7 @@ template<PT PType> struct Program
         for (PostIter t = Tree.begin_post(); t != Tree.end_post(); ++t)
         {
             const auto& node = t.node->data; 
-            if (node.is_weighted)
+            if (node.get_is_weighted())
             {
                 weights(i) = node.W;
                 ++i;
@@ -258,7 +324,7 @@ template<PT PType> struct Program
         for (PostIter i = Tree.begin_post(); i != Tree.end_post(); ++i)
         {
             auto& node = i.node->data; 
-            if (node.is_weighted)
+            if (node.get_is_weighted())
             {
                 node.W = weights(j);
                 ++j;
@@ -327,7 +393,7 @@ template<PT PType> struct Program
 
             // if the first node is weighted, make a dummy output node so that the 
             // first node's weight can be shown
-            if (i==0 && parent->data.is_weighted)
+            if (i==0 && parent->data.get_is_weighted())
             {
                 out += "y [shape=box];\n";
                 out += fmt::format("y -> \"{}\" [label=\"{:.2f}\"];\n", 
@@ -360,7 +426,7 @@ template<PT PType> struct Program
                 // string kid_id = fmt::format("{}",fmt::ptr(kid));
                 // kid_id = kid_id.substr(2);
 
-                if (kid->data.is_weighted && Isnt<NodeType::Constant>(kid->data.node_type)){
+                if (kid->data.get_is_weighted() && Isnt<NodeType::Constant>(kid->data.node_type)){
                     edge_label = fmt::format("{:.2f}",kid->data.W);
                 }
 
@@ -415,15 +481,15 @@ template<PT PType> struct Program
 
     /// @brief convenience wrapper for :cpp:func:`variation:mutate()` in variation.h
     /// @return a mutated version of this program
-    Program<PType> mutate() const;
+    std::optional<Program<PType>> mutate() const;
 
     /**
      * @brief convenience wrapper for :cpp:func:`variation:cross` in variation.h
      * 
      * @param other another program to cross with this one. 
-     * @return a mutated version of this and the other program
+     * @return a new version of this and the other program
      */
-    Program<PType> cross(Program<PType> other) const;
+    std::optional<Program<PType>> cross(Program<PType> other) const;
 
     /// @brief turns program tree into a linear program. 
     /// @return a vector of nodes encoding the program in reverse polish notation
@@ -455,14 +521,14 @@ void Program<PType>::update_weights(const Dataset& d)
 // mutation and crossover
 #include "../variation.h"
 template<ProgramType PType>
-Program<PType> Program<PType>::mutate() const
+std::optional<Program<PType>> Program<PType>::mutate() const
 {
     return variation::mutate(*this, this->SSref.value().get());
 };
 
 /// swaps subtrees between this and other (note the pass by copy)
 template<ProgramType PType>
-Program<PType> Program<PType>::cross(Program<PType> other) const
+std::optional<Program<PType>> Program<PType>::cross(Program<PType> other) const
 {
     return variation::cross(*this, other);
 };
diff --git a/src/program/signatures.h b/src/program/signatures.h
index 1d9e13cc..a46e58a9 100644
--- a/src/program/signatures.h
+++ b/src/program/signatures.h
@@ -195,8 +195,9 @@ template<NodeType N, typename T = void> struct Signatures;
 template<NodeType N>
 struct Signatures<N, enable_if_t<is_in_v<N, NodeType::Constant, NodeType::Terminal>>>{ 
     using type = std::tuple< 
-          Signature<ArrayXf()>, 
-          Signature<ArrayXi()> 
+          Signature<ArrayXf()>,
+          Signature<ArrayXi()>,
+          Signature<ArrayXb()> 
           >; 
 }; 
 
diff --git a/src/search_space.cpp b/src/search_space.cpp
index cb4a448a..4ea0b518 100644
--- a/src/search_space.cpp
+++ b/src/search_space.cpp
@@ -4,6 +4,35 @@
 
 namespace Brush{
 
+
+float calc_initial_weight(const ArrayXf& value, const ArrayXf& y)
+{
+    // weights are initialized as the slope of the z-score of x and y.
+
+    // If y has different length from X, we get a core dump here. 
+    // TODO: need to make SS (or Datasaet) check for this when loading the data
+
+    vector<char> dtypes = {'f', 'f'};
+
+    MatrixXf data(value.size(), 2);
+
+    data.col(0) << value;
+    data.col(1) << y;
+
+    Normalizer n(true);
+    n.fit_normalize(data, dtypes); // normalize works row-wise
+
+    // In slope function, argument order matters (if not normalized with z-score)
+    // The feature should be the first value, and the true value the second
+    // (it will divide covar(arg1, arg2) by var(arg2)).
+    // Since z-score normalizes so mean=0 and std=1, then order doesnt matter
+    float prob_change = std::abs(slope(data.col(0).array() ,   // x=variable
+                                       data.col(1).array() )); // y=target
+
+    return prob_change;
+}
+
+
 /// @brief generate terminals from the dataset features and random constants.
 /// @param d a dataset
 /// @return a vector of nodes 
@@ -19,20 +48,90 @@ vector<Node> generate_terminals(const Dataset& d)
                 using Scalar = typename T::Scalar;
                 constexpr bool weighted = std::is_same_v<Scalar, float>;
                 // if constexpr (T::Scalar == float)
-                terminals.push_back(Node(
+                auto n = Node(
                     NodeType::Terminal, 
                     Signature<T()>{}, 
                     weighted,
                     feature_name
-                ));
+                );
+
+                float prob_change = 1.0; // default value
+                
+                // if the value can be casted to float array, we can calculate slope
+                if (std::holds_alternative<ArrayXf>(value)) 
+                {
+                    prob_change = calc_initial_weight(std::get<ArrayXf>(value), d.y);
+                }
+                else if (std::holds_alternative<ArrayXi>(value))
+                {
+                    // for each variable we create a one-vs-all binary variable, then
+                    // calculate slope. Final value will be the average of slopes
+
+                    auto tmp = std::get<ArrayXi>(value);
+
+                    //get number of unique values
+                    std::map<float, bool> uniqueMap;
+                    for(int i = 0; i < tmp.size(); i++)
+                        uniqueMap[(float)tmp(i)] = true;
+
+                    ArrayXf slopes = ArrayXf::Ones(uniqueMap.size());
+                    int slopesIterator = 0;
+                    for (const auto& pair : uniqueMap)
+                    {
+                        auto one_vs_all = ArrayXf::Ones(tmp.size()).array() * (tmp.array()==pair.first).cast<float>();
+
+                        slopes[slopesIterator++] = calc_initial_weight(one_vs_all, d.y);
+                    }
+                    
+                    prob_change = slopes.mean();
+                }
+                else if (std::holds_alternative<ArrayXb>(value))
+                {
+                    auto tmp = std::get<ArrayXb>(value).template cast<float>();
+                    prob_change = calc_initial_weight(tmp, d.y);
+                }
+                else
+                {
+                    auto msg = fmt::format("Brush coudn't calculate the initial weight of variable {}\n",feature_name);
+                    HANDLE_ERROR_THROW(msg);
+                }
+                
+                n.set_prob_change( prob_change );
+
+                terminals.push_back(n);
             },
             value
         );
         ++i;
     };
 
-    // add a constant
-    terminals.push_back( Node(NodeType::Constant, Signature<ArrayXf()>{}, true, "C"));
+    // iterate through terminals and take the average of values of same signature
+    auto signature_avg = [terminals](DataType ret_type){
+        float sum = 0.0;
+        int count = 0;
+
+        for (const auto& n : terminals) {
+            if (n.ret_type == ret_type) {
+                sum += n.get_prob_change();
+                count++;
+            }
+        }
+
+        return sum / count;
+    };
+
+    auto cXf = Node(NodeType::Constant, Signature<ArrayXf()>{}, true, "C");
+    cXf.set_prob_change(signature_avg(cXf.ret_type));
+    terminals.push_back(cXf);
+
+    auto cXi = Node(NodeType::Constant, Signature<ArrayXi()>{}, true, "C");
+    cXi.set_prob_change(signature_avg(cXi.ret_type));
+    terminals.push_back(cXi);
+
+    auto cXb = Node(NodeType::Constant, Signature<ArrayXb()>{}, false, "C");
+    cXb.set_prob_change(signature_avg(cXb.ret_type));
+    terminals.push_back(cXb);
+
     return terminals;
 };
 
@@ -46,7 +145,7 @@ void SearchSpace::init(const Dataset& d, const unordered_map<string,float>& user
 {
     // fmt::print("constructing search space...\n");
     this->node_map.clear();
-    this->weight_map.clear();
+    this->node_map_weights.clear();
     this->terminal_map.clear();
     this->terminal_types.clear();
     this->terminal_weights.clear();
@@ -56,7 +155,6 @@ void SearchSpace::init(const Dataset& d, const unordered_map<string,float>& user
     for (const auto& [op, weight] : user_ops)
         op_names.push_back(op);
 
-
     // create nodes based on data types 
     terminal_types = d.unique_data_types;
 
@@ -72,12 +170,173 @@ void SearchSpace::init(const Dataset& d, const unordered_map<string,float>& user
         /* fmt::print("adding {} to search space...\n", term.get_name()); */
         if (terminal_map.find(term.ret_type) == terminal_map.end())
             terminal_map[term.ret_type] = vector<Node>();
+
         /* fmt::print("terminal ret_type: {}\n", DataTypeName[term.ret_type]); */
         terminal_map[term.ret_type].push_back(term);
-        terminal_weights[term.ret_type].push_back(1.0);
+        terminal_weights[term.ret_type].push_back(term.get_prob_change());
+    }
+};
+
+std::optional<tree<Node>> SearchSpace::sample_subtree(Node root, int max_d, int max_size) const 
+{
+    // public interface to use PTC2 algorithm
+
+    // PTC is designed to not fail (it will persistently try to find nodes with
+    // sampling functions). In pop initialization, this shoudnt be a problem, but
+    // during evolution, due to dynamic changes in node weights by the learners, 
+    // it now may fail. We need to check, before calling it, that it has elements
+    // in search space to sample
+    auto ret_match = node_map.at(root.ret_type);
+
+    vector<float> args_w = get_weights(root.ret_type);
+
+    // at least one operator that matches the weight must have positive probability
+    if (!has_solution_space(args_w.begin(), args_w.end()))
+        return std::nullopt;
+
+    if ( (terminal_map.find(root.ret_type) == terminal_map.end())
+    ||   (!has_solution_space(terminal_weights.at(root.ret_type).begin(), 
+                              terminal_weights.at(root.ret_type).end())) )
+        return std::nullopt;
+
+    // we should notice the difference between size of a PROGRAM and a TREE.
+    // program count weights in its size, while the TREE structure dont. Wenever
+    // using size of a program/tree, make sure you use the function from the correct class
+    return PTC2(root, max_d, max_size);
+};
+
+tree<Node> SearchSpace::PTC2(Node root, int max_d, int max_size) const
+{
+    // PTC2 is agnostic of program type
+
+    // A comment about PTC2 method:            
+    // PTC2 can work with depth or size restriction, but it does not strictly
+    // satisfies these conditions all time. Given a `max_size` and `max_depth`
+    // parameters, the real maximum size that can occur is `max_size` plus the
+    // highest operator arity, and the real maximum depth is `max_depth` plus one.
+
+    auto Tree = tree<Node>();
+
+    /* fmt::print("building program with max size {}, max depth {}",max_size,max_d); */ 
+
+    // Queue of nodes that need children
+    vector<tuple<TreeIter, DataType, int>> queue; 
+
+    /* cout << "chose " << n.name << endl; */
+    // auto spot = Tree.set_head(n);
+    /* cout << "inserting...\n"; */
+    auto spot = Tree.insert(Tree.begin(), root);
+    // node depth
+    int d = 1;
+    // current tree size
+    int s = 1;
+    //For each argument position a of n, Enqueue(a; g) 
+    for (auto a : root.arg_types)
+    { 
+        /* cout << "queing a node of type " << DataTypeName[a] << endl; */
+        auto child_spot = Tree.append_child(spot);
+        queue.push_back(make_tuple(child_spot, a, d));
+    }
+
+    Node n;
+    // Now we actually start the PTC2 procedure to create the program tree
+    /* cout << "queue size: " << queue.size() << endl; */ 
+    /* cout << "entering first while loop...\n"; */
+    while ( 3*(queue.size()-1) + s < max_size && queue.size() > 0) 
+    {            
+        // by default, terminals are weighted (counts as 3 nodes in program size).
+        // since every spot in queue has potential to be a terminal, we multiply
+        // its size by 3. Subtracting one due to the fact that this loop will
+        // always insert a non terminal (which by default has weights off).
+        // this way, we can have PTC2 working properly.
+        
+        /* cout << "queue size: " << queue.size() << endl; */ 
+        auto [qspot, t, d] = RandomDequeue(queue);
+
+        /* cout << "current depth: " << d << endl; */
+        if (d == max_d)
+        {
+            // choose terminal of matching type
+            /* cout << "getting " << DataTypeName[t] << " terminal\n"; */ 
+            // qspot = sample_terminal(t);
+            // Tree.replace(qspot, sample_terminal(t));
+            // Tree.append_child(qspot, sample_terminal(t));
+
+            auto opt = sample_terminal(t);
+            while (!opt)
+                opt = sample_terminal(t);
+
+            // If we successfully get a terminal, use it
+            n = opt.value();
+
+            Tree.replace(qspot, n);
+        }
+        else
+        {
+            //choose a nonterminal of matching type
+            /* cout << "getting op of type " << DataTypeName[t] << endl; */
+            auto opt = sample_op(t);
+            /* cout << "chose " << n.name << endl; */
+            // TreeIter new_spot = Tree.append_child(qspot, n);
+            // qspot = n;
+
+            while (!opt)
+                opt = sample_op(t);
+
+            n = opt.value();
+            
+            auto newspot = Tree.replace(qspot, n);
+
+            // For each arg of n, add to queue
+            for (auto a : n.arg_types)
+            {
+                /* cout << "queing a node of type " << DataTypeName[a] << endl; */
+                // queue.push_back(make_tuple(new_spot, a, d+1));
+                auto child_spot = Tree.append_child(newspot);
+
+                queue.push_back(make_tuple(child_spot, a, d+1));
+            }
+        }
+
+        // increment is different based on node weights
+        ++s;
+        if  (n.get_is_weighted())
+            s += 2;
+
+        /* cout << "current tree size: " << s << endl; */
+    } 
+    /* cout << "entering second while loop...\n"; */
+    while (queue.size() > 0)
+    {
+        if (queue.size() == 0)
+            break;
+
+        /* cout << "queue size: " << queue.size() << endl; */ 
+
+        auto [qspot, t, d] = RandomDequeue(queue);
+
+        /* cout << "getting " << DataTypeName[t] << " terminal\n"; */ 
+        // Tree.append_child(qspot, sample_terminal(t));
+        // qspot = sample_terminal(t);
+        // auto newspot = Tree.replace(qspot, sample_terminal(t));
+
+        auto opt = sample_terminal(t);
+        while (!opt) {
+            opt = sample_terminal(t);
+        }
+
+        n = opt.value();
+        
+        auto newspot = Tree.replace(qspot, n);
     }
 
+    /* cout << "final tree:\n" */ 
+    /*     << Tree.begin().node->get_model() << "\n" */
+    /*     << Tree.begin().node->get_tree_model(true) << endl; */
+         /* << Tree.get_model() << "\n" */ 
+         /* << Tree.get_model(true) << endl; // pretty */
 
+    return Tree;
 };
 
 RegressorProgram SearchSpace::make_regressor(int max_d, int max_size)
diff --git a/src/search_space.h b/src/search_space.h
index d99b03de..ac751a65 100644
--- a/src/search_space.h
+++ b/src/search_space.h
@@ -16,7 +16,6 @@ license: GNU/GPL v3
 #include <utility>
 #include <optional>
 
-
 /* Defines the search space of Brush. 
  *  The search spaces consists of nodes and their accompanying probability
  *  distribution. 
@@ -53,26 +52,32 @@ vector<Node> generate_terminals(const Dataset& d);
 extern std::unordered_map<std::size_t, std::string> ArgsName; 
 
 /*! @brief Holds a search space, consisting of operations and terminals
-    * and functions, and methods to sample that space to create programs. 
-    *
-    * The set of operators is a user controlled parameter; however, we can 
-    * automate, to some extent, the set of possible operators based on the 
-    * data types in the problem. 
-    * Constraints on operators based on data types: 
-    *  - only user specified operators are included. 
-    *  - operators whose arguments are covered by terminal types are included
-    *      first. Then, a second pass includes any operators whose arguments
-    *      are covered by terminal_types + return types of the current set of 
-    *      operators. One could imagine this continuing ad infinitum, but we
-    *      just do two passes for simplicity. 
-    *  - assertion check to make sure there is at least one operator that 
-    *      returns the output type of the model. 
-    *
-    *
-    * Parameters
-    * ----------
-    *
-*/
+ * and functions, and methods to sample that space to create programs. 
+ *
+ * The set of operators is a user controlled parameter; however, we can 
+ * automate, to some extent, the set of possible operators based on the 
+ * data types in the problem. 
+ * Constraints on operators based on data types: 
+ *  - only user specified operators are included. 
+ *  - operators whose arguments are covered by terminal types are included
+ *      first. Then, a second pass includes any operators whose arguments
+ *      are covered by terminal_types + return types of the current set of 
+ *      operators. One could imagine this continuing ad infinitum, but we
+ *      just do two passes for simplicity. 
+ *  - assertion check to make sure there is at least one operator that 
+ *      returns the output type of the model. 
+ *
+ * When sampling in the search space (using any of the sampling functions
+ * `sample_op` or `sample_terminal`), some methods can fail to return a 
+ * value --- given a specific set of parameters to a function, the candidate
+ * solutions set may be empty --- and, for these methods, the return type is
+ * either a valid value, or a `std::nullopt`. This is controlled wrapping
+ * the return type with `std::optional`.
+ *
+ * Parameters
+ * ----------
+ *
+ */
 struct SearchSpace
 {
     using ArgsHash = std::size_t; 
@@ -93,7 +98,7 @@ struct SearchSpace
     Map<Node> node_map;
 
     /// @brief A map of weights corresponding to elements in @ref node_map, used to weight probabilities of each node being sampled from the map. 
-    Map<float> weight_map; 
+    Map<float> node_map_weights; 
 
     /**
      * @brief Maps return types to terminals. 
@@ -102,11 +107,10 @@ struct SearchSpace
      * 
      *      { return_type : vector of Nodes } 
      *
-     * 
      */
     unordered_map<DataType, vector<Node>> terminal_map;
 
-    /// @brief A map of weights corresponding to elements in @ref terminal_map, used to weight probabilities of each node being sampled from the map. 
+    /// @brief A map of weights corresponding to elements in @ref terminal_map, used to weight probabilities of each terminal being sampled from the map. 
     unordered_map<DataType, vector<float>> terminal_weights;
 
     /// @brief A vector storing the available return types of terminals. 
@@ -117,7 +121,7 @@ struct SearchSpace
 
     NLOHMANN_DEFINE_TYPE_INTRUSIVE(SearchSpace, 
         node_map,
-        weight_map,
+        node_map_weights,
         terminal_map,
         terminal_weights,
         terminal_types
@@ -222,6 +226,18 @@ struct SearchSpace
         return true;
     }
 
+    /// @brief Takes iterators to weight vectors and checks if they have a
+    /// non-empty solution space. An empty solution space is defined as
+    /// having no non-zero, positive values
+    /// @tparam T type of iterator.  
+    /// @param start Start iterator
+    /// @param end End iterator
+    /// @return true if at least one weight is positive
+    template<typename Iter>
+    bool has_solution_space(Iter start, Iter end) const {
+        return !std::all_of(start, end, [](const auto& w) { return w<=0.0; });
+    }
+
     template<typename F> Node get(const string& name);
 
     /// @brief get a typed node 
@@ -244,66 +260,12 @@ struct SearchSpace
     template<typename S>
     Node get(NodeType type, DataType R, S sig){ return get(type, R, sig.hash()); };
 
-    /// @brief Get a specific node type that matches a return value. 
-    /// @param type the node type
-    /// @param R the return type
-    /// @return A Node of type `type` with return type `R`. 
-    Node get(NodeType type, DataType R)
-    {
-        check(R);
-        auto ret_match = node_map.at(R);
-        vector<Node> matches; 
-        vector<float> weights; 
-        for (const auto& kv: ret_match)
-        {
-            auto arg_hash = kv.first;
-            auto node_type_map = kv.second;
-            if (node_type_map.find(type) != node_type_map.end())
-            {
-                matches.push_back(node_type_map.at(type));
-                weights.push_back(weight_map.at(R).at(arg_hash).at(type));
-            }
-        }
-
-        return (*r.select_randomly(matches.begin(),
-                                   matches.end(),
-                                   weights.begin(),
-                                   weights.end()));
-    };
-
-    /// get a random terminal 
-    Node get_terminal() const
-    {
-        //TODO: match terminal args_type (probably '{}' or something?)
-        //  make a separate terminal_map
-        auto match = *r.select_randomly(terminal_map.begin(), terminal_map.end());
-        return *r.select_randomly(
-                match.second.begin(), match.second.end(), 
-                terminal_weights.at(match.first).begin(), 
-                terminal_weights.at(match.first).end()
-                );
-    };
-
-    /// get a terminal with return type `R` 
-    Node get_terminal(DataType R) const
-    {
-        if (terminal_map.find(R) == terminal_map.end()){
-            auto msg = fmt::format("{} not in terminal_map\n",R);
-            HANDLE_ERROR_THROW(msg); 
-        }
-        auto rval =  *r.select_randomly(terminal_map.at(R).begin(), 
-                                  terminal_map.at(R).end(), 
-                                  terminal_weights.at(R).begin(),
-                                  terminal_weights.at(R).end());
-        return rval;
-    };
-
     /// @brief get weights of the return types 
     /// @return a weight vector, each element corresponding to a return type.
     vector<float> get_weights() const
     {
         vector<float> v;
-        for (auto& [ret, arg_w_map]: weight_map) 
+        for (auto& [ret, arg_w_map]: node_map_weights) 
         {
             v.push_back(0);
             for (const auto& [arg, name_map] : arg_w_map)
@@ -312,7 +274,6 @@ struct SearchSpace
                 {
                     v.back() += w; 
                 }
-
             }
         }
         return v;
@@ -324,7 +285,7 @@ struct SearchSpace
     vector<float> get_weights(DataType ret) const
     {
         vector<float> v;
-        for (const auto& [arg, name_map] : weight_map.at(ret))
+        for (const auto& [arg, name_map] : node_map_weights.at(ret))
         {
             v.push_back(0);
             for (const auto& [name, w]: name_map)
@@ -343,66 +304,167 @@ struct SearchSpace
     vector<float> get_weights(DataType ret, ArgsHash sig_hash) const
     {
         vector<float> v;
-        for (const auto& [name, w]: weight_map.at(ret).at(sig_hash))
+        for (const auto& [name, w]: node_map_weights.at(ret).at(sig_hash))
             v.push_back(w); 
 
         return v;
     };
 
+    /// @brief Get a random terminal 
+    /// @return `std::optional` that may contain a terminal Node.     
+    std::optional<Node> sample_terminal() const
+    {
+        //TODO: match terminal args_type (probably '{}' or something?)
+        //  make a separate terminal_map
+
+        // We'll make terminal types to have its weights proportional to the
+        // DataTypes Weights they hold
+        vector<float> data_type_weights(terminal_weights.size());
+        std::transform(
+            terminal_weights.begin(),
+            terminal_weights.end(),
+            data_type_weights.begin(),
+            [](const auto& tw){ 
+                return std::reduce(tw.second.begin(), tw.second.end()); }
+        );
+        
+        if (!has_solution_space(data_type_weights.begin(), 
+                                data_type_weights.end()))
+            return std::nullopt;
+
+        // If we got this far, then it is garanteed that we'll return something
+        // The match take into account datatypes with non-zero weights
+        auto match = *r.select_randomly(
+            terminal_map.begin(),
+            terminal_map.end(),
+            data_type_weights.begin(),
+            data_type_weights.end()
+        );
+
+        return *r.select_randomly(
+                    match.second.begin(), match.second.end(), 
+                    terminal_weights.at(match.first).begin(), 
+                    terminal_weights.at(match.first).end()
+                );
+    };
+
+    /// @brief Get a random terminal with return type `R` 
+    /// @return `std::optional` that may contain a terminal Node of type `R`.     
+    std::optional<Node> sample_terminal(DataType R) const
+    {
+        // should I keep doing this check?
+        // if (terminal_map.find(R) == terminal_map.end()){
+        //     auto msg = fmt::format("{} not in terminal_map\n",R);
+        //     HANDLE_ERROR_THROW(msg); 
+        // }
+
+        // TODO: try to combine with above function
+        if ( (terminal_map.find(R) == terminal_map.end())
+        ||   (!has_solution_space(terminal_weights.at(R).begin(), 
+                                  terminal_weights.at(R).end())) )
+            return std::nullopt;
+        
+        return *r.select_randomly(terminal_map.at(R).begin(), 
+                                  terminal_map.at(R).end(), 
+                                  terminal_weights.at(R).begin(),
+                                  terminal_weights.at(R).end());
+    };
+
     /// @brief get an operator matching return type `ret`. 
     /// @param ret return type
-    /// @return a randomly chosen operator
-    Node get_op(DataType ret) const
+    /// @return `std::optional` that may contain a randomly chosen operator matching return type `ret`
+    std::optional<Node> sample_op(DataType ret) const
     {
-        check(ret);
+        // check(ret);
+
         //TODO: match terminal args_type (probably '{}' or something?)
         auto ret_match = node_map.at(ret);
 
         vector<float> args_w = get_weights(ret);
 
+        if (!has_solution_space(args_w.begin(), args_w.end()))
+            return std::nullopt;
+
         auto arg_match = *r.select_randomly(ret_match.begin(), 
                                             ret_match.end(), 
                                             args_w.begin(), 
                                             args_w.end());
 
         vector<float> name_w = get_weights(ret, arg_match.first);
+
+        if (!has_solution_space(name_w.begin(), name_w.end()))
+            return std::nullopt;
+
         return (*r.select_randomly(arg_match.second.begin(), 
                                    arg_match.second.end(), 
                                    name_w.begin(), 
                                    name_w.end())).second;
     };
 
-    
+    /// @brief Get a specific node type that matches a return value. 
+    /// @param type the node type
+    /// @param R the return type
+    /// @return `std::optional` that may contain a Node of type `type` with return type `R`. 
+    std::optional<Node> sample_op(NodeType type, DataType R)
+    {
+        // check(R);
+
+        auto ret_match = node_map.at(R);
+        
+        vector<Node> matches; 
+        vector<float> weights; 
+        for (const auto& kv: ret_match)
+        {
+            auto arg_hash = kv.first;
+            auto node_type_map = kv.second;
+            if (node_type_map.find(type) != node_type_map.end())
+            {
+                matches.push_back(node_type_map.at(type));
+                weights.push_back(node_map_weights.at(R).at(arg_hash).at(type));
+            }
+        }
+
+        if ( (weights.size()==0)
+        ||   (!has_solution_space(weights.begin(), 
+                                  weights.end())) )
+            return std::nullopt;
+        
+        return (*r.select_randomly(matches.begin(),
+                                   matches.end(),
+                                   weights.begin(),
+                                   weights.end()));
+    };
+
     /// @brief get operator with at least one argument matching arg 
     /// @param ret return type
     /// @param arg argument type to match
     /// @param terminal_compatible if true, the other args the returned operator takes must exist in the terminal types. 
-    /// @param max_arg_count if zero, there is no limit on number of arguments of the operator. If not, the operator can have at most `max_arg_count` arguments. 
-    /// @return a matching operator. 
-    Node get_op_with_arg(DataType ret, DataType arg, 
+    /// @param max_args if zero, there is no limit on number of arguments of the operator. If not, the operator can have at most `max_args` arguments. 
+    /// @return `std::optional` that may contain a matching operator respecting all restrictions. 
+    std::optional<Node> sample_op_with_arg(DataType ret, DataType arg, 
                               bool terminal_compatible=true,
-                              int max_arg_count=0) const
+                              int max_args=0) const
     {
         // thoughts (TODO):
         //  this could be templated by return type and arg. although the lookup in the map should be
         //  fairly fast. 
         //TODO: these needs to be overhauled 
-        // fmt::print("get_op_with_arg");
+        // fmt::print("sample_op_with_arg");
         check(ret);
 
         auto args_map = node_map.at(ret);
-        vector<Node> matches;
-        vector<float> weights; 
+        vector<Node>  matches;
+        vector<float> weights;
 
         for (const auto& [args_type, name_map]: args_map) {
             for (const auto& [name, node]: name_map) {
                 auto node_arg_types = node.get_arg_types();
-                
+
                 // has no size limit (max_arg_count==0) or the number of
                 // arguments woudn't exceed the maximum number of arguments
-                auto within_size_limit = !(max_arg_count) || (node.get_arg_count() <= max_arg_count);
-
-                if ( in(node_arg_types, arg) && within_size_limit ) {
+                auto within_size_limit = !(max_args) || (node.get_arg_count() <= max_args);
+                
+                if ( in(node_arg_types, arg) && within_size_limit) {
                     // if checking terminal compatibility, make sure there's
                     // a compatible terminal for the node's other arguments
                     if (terminal_compatible) {
@@ -420,26 +482,37 @@ struct SearchSpace
                     }
                     // if we made it this far, include the node as a match!
                     matches.push_back(node);
-                    weights.push_back(weight_map.at(ret).at(args_type).at(name));
+                    weights.push_back(node_map_weights.at(ret).at(args_type).at(name));
                 }
             }
         }
 
+        if ( (weights.size()==0)
+        ||   (!has_solution_space(weights.begin(), 
+                                  weights.end())) )
+            return std::nullopt;
+
         return (*r.select_randomly(matches.begin(), matches.end(), 
                                    weights.begin(), weights.end()));
     };
     
     /// @brief get a node with a signature matching `node`
     /// @param node the node to match
-    /// @return a Node 
-    Node get_node_like(Node node) const
+    /// @return `std::optional` that may contain a Node 
+    std::optional<Node> get_node_like(Node node) const
     {
         if (Is<NodeType::Terminal, NodeType::Constant>(node.node_type)){
-            return get_terminal(node.ret_type);
+            return sample_terminal(node.ret_type);
         }
 
         auto matches = node_map.at(node.ret_type).at(node.args_type());
         auto match_weights = get_weights(node.ret_type, node.args_type());
+
+        if ( (match_weights.size()==0)
+        ||   (!has_solution_space(match_weights.begin(), 
+                                  match_weights.end())) )
+            return std::nullopt;
+
         return (*r.select_randomly(matches.begin(), 
                                    matches.end(), 
                                    match_weights.begin(), 
@@ -447,10 +520,18 @@ struct SearchSpace
                ).second;
     };
 
+    /// @brief create a subtree with maximum size and depth restrictions and root of type `root_type`
+    /// @param root_type return type
+    /// @param max_d the maximum depth
+    /// @param max_size the maximum size of the tree (will be sampled between [1, max_size]) 
+    /// @return `std::optional` that may contain a tree 
+    std::optional<tree<Node>> sample_subtree(Node root, int max_d, int max_size) const;
+
     /// @brief prints the search space map. 
     void print() const; 
 
     private:
+        tree<Node> PTC2(Node root, int max_d, int max_size) const;
 
         template<NodeType NT, typename S>
         requires (!is_in_v<NT, NodeType::Terminal, NodeType::Constant>)
@@ -490,7 +571,7 @@ struct SearchSpace
                 node_map[n.ret_type][n.args_type()][n.node_type] = n;
                 // sampling probability map
                 float w = use_all? 1.0 : user_ops.at(name);
-                weight_map[n.ret_type][n.args_type()][n.node_type] = w;
+                node_map_weights[n.ret_type][n.args_type()][n.node_type] = w;
             }
         }
 
@@ -550,125 +631,63 @@ T RandomDequeue(std::vector<T>& Q)
 template<typename P>
 P SearchSpace::make_program(int max_d, int max_size)
 {
-    // A comment about PTC2 method:            
-    // PTC2 can work with depth or size restriction, but it does not strictly
-    // satisfies these conditions all time. Given a `max_size` and `max_depth`
-    // parameters, the real maximum size that can occur is `max_size` plus the
-    // highest operator arity, and the real maximum depth is `max_depth` plus one.
-
     if (max_d == 0)
-        max_d = r.rnd_int(1, PARAMS["max_depth"].get<int>());
+        max_d = PARAMS["max_depth"].get<int>();
     if (max_size == 0)
         max_size = r.rnd_int(1, PARAMS["max_size"].get<int>());
 
     DataType root_type = DataTypeEnum<typename P::TreeType>::value;
     ProgramType program_type = P::program_type;
     // ProgramType program_type = ProgramTypeEnum<PT>::value;
-
+    
     auto Tree = tree<Node>();
+    if (max_size == 1)
+    {
+        // auto root = Tree.insert(Tree.begin(), sample_terminal(root_type));
 
-    /* fmt::print("building program with max size {}, max depth {}",max_size,max_d); */ 
+        // We can only have a terminal here, but the terminal must be compatible
+        auto opt = sample_terminal(root_type);
 
-    // Queue of nodes that need children
-    vector<tuple<TreeIter, DataType, int>> queue; 
+        if (!opt){
+            auto msg = fmt::format("Program with size=1 could not be created. "
+            "The search space does not contain any terminal with data type {}./n",
+            root_type);
+            HANDLE_ERROR_THROW(msg); 
+        }
 
-    if (max_size == 1)
-    {
-        auto root = Tree.insert(Tree.begin(), get_terminal(root_type));
+        Tree.insert(Tree.begin(), opt.value());
     }
-    else
-    {
-        Node n;
+    else {// Our program can (and will) be grater than 1 node
+
+        // building the root node for each program case. We give the root, and it 
+        // fills the rest of the tree
+        Node root;
+
+        // building the root node for each program case
         if (P::program_type == ProgramType::BinaryClassifier)
         {
-            n = get(NodeType::Logistic, DataType::ArrayF, Signature<ArrayXf(ArrayXf)>());
-            n.set_prob_change(0.0);
-            n.fixed=true;
+            root = get(NodeType::Logistic, DataType::ArrayF, Signature<ArrayXf(ArrayXf)>());
+            root.set_prob_change(0.0);
+            root.fixed=true;
+
         }
         else if (P::program_type == ProgramType::MulticlassClassifier)
         {
-            n = get(NodeType::Softmax, DataType::MatrixF);
-            n.set_prob_change(0.0);
-            n.fixed=true;
-        }
-        else
-            n = get_op(root_type);
-
-        /* cout << "chose " << n.name << endl; */
-        // auto spot = Tree.set_head(n);
-        /* cout << "inserting...\n"; */
-        auto spot = Tree.insert(Tree.begin(), n);
-        // node depth
-        int d = 1;
-        // current tree size
-        int s = 1;
-        //For each argument position a of n, Enqueue(a; g) 
-        for (auto a : n.arg_types)
-        { 
-            /* cout << "queing a node of type " << DataTypeName[a] << endl; */
-            auto child_spot = Tree.append_child(spot);
-            queue.push_back(make_tuple(child_spot, a, d));
+            root = get(NodeType::Softmax, DataType::MatrixF, Signature<ArrayXXf(ArrayXXf)>());
+            root.set_prob_change(0.0);
+            root.fixed=true;
         }
-
-        /* cout << "queue size: " << queue.size() << endl; */ 
-        /* cout << "entering first while loop...\n"; */
-        while (queue.size() + s < max_size && queue.size() > 0) 
-        {
-            /* cout << "queue size: " << queue.size() << endl; */ 
-            auto [qspot, t, d] = RandomDequeue(queue);
-
-            /* cout << "current depth: " << d << endl; */
-            if (d == max_d)
-            {
-                // choose terminal of matching type
-                /* cout << "getting " << DataTypeName[t] << " terminal\n"; */ 
-                // qspot = get_terminal(t);
-                Tree.replace(qspot, get_terminal(t));
-                // Tree.append_child(qspot, get_terminal(t));
+        else {
+            // we start with a non-terminal (can be replaced inside PTC2 though, if max_size==1)
+            auto opt = sample_op(root_type);
+            while (!opt) {
+                opt = sample_op(root_type);
             }
-            else
-            {
-                //choose a nonterminal of matching type
-                /* cout << "getting op of type " << DataTypeName[t] << endl; */
-                auto n = get_op(t);
-                /* cout << "chose " << n.name << endl; */
-                // TreeIter new_spot = Tree.append_child(qspot, n);
-                // qspot = n;
-                auto newspot = Tree.replace(qspot, n);
-                // For each arg of n, add to queue
-                for (auto a : n.arg_types)
-                {
-                    /* cout << "queing a node of type " << DataTypeName[a] << endl; */
-                    // queue.push_back(make_tuple(new_spot, a, d+1));
-                    auto child_spot = Tree.append_child(newspot);
-                    queue.push_back(make_tuple(child_spot, a, d+1));
-                }
-            }
-            ++s;
-            /* cout << "current tree size: " << s << endl; */
-        } 
-        /* cout << "entering second while loop...\n"; */
-        while (queue.size() > 0)
-        {
-            if (queue.size() == 0)
-                break;
-
-            /* cout << "queue size: " << queue.size() << endl; */ 
-
-            auto [qspot, t, d] = RandomDequeue(queue);
-
-            /* cout << "getting " << DataTypeName[t] << " terminal\n"; */ 
-            // Tree.append_child(qspot, get_terminal(t));
-            // qspot = get_terminal(t);
-            auto newspot = Tree.replace(qspot, get_terminal(t));
-
+            root = opt.value();
         }
+        
+        Tree = PTC2(root, max_d, max_size);
     }
-    /* cout << "final tree:\n" */ 
-    /*     << Tree.begin().node->get_model() << "\n" */
-    /*     << Tree.begin().node->get_tree_model(true) << endl; */
-         /* << Tree.get_model() << "\n" */ 
-         /* << Tree.get_model(true) << endl; // pretty */
 
     return P(*this,Tree);
 };
@@ -692,7 +711,7 @@ template <> struct fmt::formatter<Brush::SearchSpace>: formatter<string_view> {
                         ArgsName[args_type],
                         node_type,
                         node,
-                        SS.weight_map.at(ret_type).at(args_type).at(node_type)
+                        SS.node_map_weights.at(ret_type).at(args_type).at(node_type)
                         );
             }
         }
diff --git a/src/types.h b/src/types.h
index 31664384..5badc481 100644
--- a/src/types.h
+++ b/src/types.h
@@ -179,6 +179,12 @@ template<> struct DataEnumType<DT::MatrixF>{ using type = ArrayXXf; };
 template<> struct DataEnumType<DT::TimeSeriesB>{ using type = Data::TimeSeriesb; };
 template<> struct DataEnumType<DT::TimeSeriesI>{ using type = Data::TimeSeriesi; }; 
 template<> struct DataEnumType<DT::TimeSeriesF>{ using type = Data::TimeSeriesf; };
+template<> struct DataEnumType<DT::ArrayBJet>{ using type = ArrayXbJet; };
+template<> struct DataEnumType<DT::ArrayIJet>{ using type = ArrayXiJet; };
+template<> struct DataEnumType<DT::ArrayFJet>{ using type = ArrayXfJet; };
+template<> struct DataEnumType<DT::MatrixBJet>{ using type = ArrayXXbJet; };
+template<> struct DataEnumType<DT::MatrixIJet>{ using type = ArrayXXiJet; };
+template<> struct DataEnumType<DT::MatrixFJet>{ using type = ArrayXXfJet; };
 
 template<typename T> struct DataTypeEnum;
 template <> struct DataTypeEnum<ArrayXb> { static constexpr DT value = DT::ArrayB; };
diff --git a/src/util/error.cpp b/src/util/error.cpp
index a36cc220..e1fb8f55 100644
--- a/src/util/error.cpp
+++ b/src/util/error.cpp
@@ -13,7 +13,12 @@ namespace Brush{ namespace Util{
         void HandleErrorThrow(string err, const char *file, int line )
         {
             fmt::print(stderr, "FATAL ERROR {}:{}: {}\n", file, line, err);
-            throw;
+
+            // when called with no arguments, will call terminate(), which
+            // throws a std::terminate_handler (and can't be handled in GTEST).
+            // Here we throw a runtime_error with same information printed on
+            // screen.
+            throw std::runtime_error(fmt::format("FATAL ERROR {}:{}: {}\n", file, line, err)); 
         }
         
         ///prints error to stderr and returns
diff --git a/src/util/rnd.cpp b/src/util/rnd.cpp
index aedb2c32..ac95b699 100644
--- a/src/util/rnd.cpp
+++ b/src/util/rnd.cpp
@@ -12,17 +12,24 @@ namespace Brush { namespace Util{
     Rnd::Rnd()
     {
         /*!
-         * need a random generator for each core to do multiprocessing
+         * need a random generator for each core to do multiprocessing.
+         * The constructor will resize the random generators based on 
+         * the number of available cores. 
          */
+        
         //cout << "Max threads are " <<omp_get_max_threads()<<"\n";
+
+        // when we resize, the constructor of new elements are invoked.
         rg.resize(omp_get_max_threads());                      
     }
 
     Rnd* Rnd::initRand()
     {
+        // creates the static random generator by calling the constructor
         if (!instance)
         {
             instance = new Rnd();
+            instance->set_seed(0); // setting a random initial state
         }
 
         return instance;
@@ -36,28 +43,25 @@ namespace Brush { namespace Util{
         instance = NULL;
     }
     
-    void Rnd::set_seed(int seed)
+    void Rnd::set_seed(unsigned int seed)
     { 
         /*!
-         * set seeds for each core's random number generator
+         * set seeds for each core's random number generator.
          */
-        if (seed == 0)
-        {
+        if (seed == 0) {
+            // use a non-deterministic random generator to seed the deterministics
             std::random_device rd; 
-
-            for (auto& r : rg)
-                r.seed(rd());
+            seed = rd();
         }
-        else    // seed first rg with seed, then seed rest with random ints from rg[0]. 
-        {
-            rg[0].seed(seed);
-            
-            int imax = std::numeric_limits<int>::max();
-            
-            std::uniform_int_distribution<> dist(0, imax);
 
-            for (size_t i = 1; i < rg.size(); ++i)
-                rg[i].seed(dist(rg[0]));                     
+        // generating a seed sequence
+        std::seed_seq seq{seed};
+        
+        std::vector<std::uint32_t> seeds(rg.size());
+        seq.generate(seeds.begin(), seeds.end());
+
+        for (size_t i = 0; i < rg.size(); ++i) {
+            rg[i].seed(seeds[i]);
         }
     }
 
diff --git a/src/util/rnd.h b/src/util/rnd.h
index d4b61f84..0a682e54 100644
--- a/src/util/rnd.h
+++ b/src/util/rnd.h
@@ -12,6 +12,8 @@ license: GNU/GPL v3
 
 #include "../init.h"
 
+// Defines a multi-core random number generator and its operators.
+
 using namespace std;
 using std::swap;
 
@@ -32,7 +34,7 @@ namespace Brush { namespace Util{
             
             static void destroy();
 
-            void set_seed(int seed);
+            void set_seed(unsigned int seed);
             
             int rnd_int( int lowerLimit, int upperLimit );
 
@@ -123,7 +125,6 @@ namespace Brush { namespace Util{
                     && " attemping to return random choice from empty vector");
                 return *select_randomly(v.begin(),v.end());
             }
- 
            
             template<template<class, class> class C, class T>
             T random_choice(const C<T, std::allocator<T>>& v, const vector<float>& w )
@@ -163,10 +164,15 @@ namespace Brush { namespace Util{
             // Vector of pseudo-random number generators, one for each thread
             vector<std::mt19937> rg;
             
+            // private static attribute used by every instance of the class.
+            // All threads share common static members of the class
             static Rnd* instance;
-     
     };
     
+    // `Brush.Util` static attribute holding an singleton instance of Rnd.
+    // the instance is created by calling `initRand`, which creates
+    // an instance of the private static attribute `instance`. `r` will contain
+    // one generator for each thread (since it called the constructor) 
     static Rnd &r = *Rnd::initRand();
 } // Util
 } // Brush
diff --git a/src/util/utils.cpp b/src/util/utils.cpp
index 05243649..9f469618 100644
--- a/src/util/utils.cpp
+++ b/src/util/utils.cpp
@@ -105,10 +105,10 @@ void Normalizer::fit(MatrixXf& X, const vector<char>& dt)
     for (unsigned int i=0; i<X.cols(); ++i)
     {
         // mean center
-        VectorXf tmp = X.row(i).array()-X.row(i).mean();
+        VectorXf tmp = X.col(i).array()-X.col(i).mean();
         // scale by the standard deviation
-        scale.push_back(std::sqrt((tmp.array()).square().sum()/(tmp.size()-1)));
-        offset.push_back(X.row(i).mean());
+        scale.push_back(std::sqrt((tmp.array()).square().sum()/(tmp.size())));
+        offset.push_back(X.col(i).mean());
     }
       
 }
@@ -121,15 +121,15 @@ void Normalizer::normalize(MatrixXf& X)
     {
         if (std::isinf(scale.at(i)))
         {
-            X.row(i) = VectorXf::Zero(X.row(i).size());
+            X.col(i) = VectorXf::Zero(X.col(i).size());
             continue;
         }
-        // scale, potentially skipping binary and categorical rows
+        // scale, potentially skipping binary and categorical cols
         if (this->scale_all || dtypes.at(i)=='f')                   
         {
-            X.row(i) = X.row(i).array() - offset.at(i);
+            X.col(i) = X.col(i).array() - offset.at(i);
             if (scale.at(i) > NEAR_ZERO)
-                X.row(i) = X.row(i).array()/scale.at(i);
+                X.col(i) = X.col(i).array()/scale.at(i);
         }
     }
 }
diff --git a/src/variation.h b/src/variation.h
index 8048f21d..dcfd8288 100644
--- a/src/variation.h
+++ b/src/variation.h
@@ -11,6 +11,8 @@ license: GNU/GPL v3
 // #include "program/tree_node.h"
 // #include "node.h"
 
+#include <map>
+
 // namespace Brush{
 
 // typedef tree<Node>::pre_order_iterator Iter; 
@@ -27,83 +29,202 @@ namespace variation {
 
 typedef tree<Node>::pre_order_iterator Iter; 
 
-/// point mutation: replace node with same typed node
-inline void point_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+/// @brief replace node with same typed node
+/// @param Tree the program tree
+/// @param spot an iterator to the node that is being mutated
+/// @param SS the search space to sample a node like `spot`
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool point_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
 {
     // cout << "point mutation\n";
-    auto newNode = SS.get_node_like(spot.node->data); 
-    Tree.replace(spot, newNode);
+
+    // get_node_like will sample a similar node based on node_map_weights or
+    // terminal_weights, and maybe will return a Node.
+    std::optional<Node> newNode = SS.get_node_like(spot.node->data);
+
+    if (!newNode) // newNode == std::nullopt
+        return false;
+
+    // if optional contains a Node, we access its contained value
+    Tree.replace(spot, *newNode);
+
+    return true;
 }
 
-/// insert a node with spot as a child
-inline void insert_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+/// @brief insert a node with spot as a child
+/// @param Tree the program tree
+/// @param spot an iterator to the node that is being mutated
+/// @param SS the search space to sample a node like `spot`
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool insert_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
 {
     // cout << "insert mutation\n";
     auto spot_type = spot.node->data.ret_type;
     
-    // pick a compatible random node to insert. We subtract one to count
-    // the op node that is already being inserted.
-    auto n = SS.get_op_with_arg(spot_type, spot_type, true,
+    // pick a random compatible node to insert (with probabilities given by
+    // node_map_weights). The `-1` represents the node being inserted.
+    // Ideally, it should always find at least one match (the same node
+    // used as a reference when calling the function). However, we have a 
+    // size restriction, which will be relaxed here (just as it is in the PTC2
+    // algorithm). This mutation can create a new expression that exceeds the
+    // maximum size by the highest arity among the operators.
+    std::optional<Node> n = SS.sample_op_with_arg(spot_type, spot_type, true,
                                 PARAMS["max_size"].get<int>()-Tree.size()-1); 
 
-    // make node n wrap the subtree at the chosen spot
-    auto parent_node = Tree.wrap(spot, n);
+    if (!n) // there is no operator with compatible arguments
+        return false;
 
-    // GUI TODO: spot_filled should be any random spot with same type
+    // make node n wrap the subtree at the chosen spot
+    auto parent_node = Tree.wrap(spot, *n);
 
     // now fill the arguments of n appropriately
     bool spot_filled = false;
-    for (auto a: n.arg_types)
+    for (auto a: (*n).arg_types)
     {
         if (spot_filled)
         {
-            // if spot is in its child position, append children
-            Tree.append_child(parent_node, SS.get_terminal(a));
+            // if spot is in its child position, append children.
+            // TODO: reminding that sample_terminal may fail as well
+            auto opt = SS.sample_terminal(a);
+
+            if (!opt)
+                return false;
+
+            Tree.append_child(parent_node, opt.value());
         }
         // if types match, treat this spot as filled by the spot node 
         else if (a == spot_type)
             spot_filled = true;
         // otherwise, add siblings before spot node
-        else
-            Tree.insert(spot, SS.get_terminal(a));
+        else {
+            auto opt = SS.sample_terminal(a);
+
+            if (!opt)
+                return false;
+
+            Tree.insert(spot, opt.value());
+        }
     } 
+
+    return true;
 }
 
-/// delete subtree and replace it with a terminal of the same return type
-inline void delete_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+/// @brief delete subtree and replace it with a terminal of the same return type
+/// @param Tree the program tree
+/// @param spot an iterator to the node that is being mutated
+/// @param SS the search space to sample a node like `spot`
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool delete_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
 {
     // cout << "delete mutation\n";
-    auto terminal = SS.get_terminal(spot.node->data.ret_type); 
+
+    // sample_terminal will sample based on terminal_weights. If it succeeds, 
+    // then the new terminal will be in `opt.value()`
+    auto opt = SS.sample_terminal(spot.node->data.ret_type); 
+    
+    if (!opt) // there is no terminal with compatible arguments
+        return false;
+
     Tree.erase_children(spot); 
-    Tree.replace(spot, terminal);
+
+    Tree.replace(spot, opt.value());
+
+    return true;
 };
 
-/// @brief toggle the node's weight on or off. 
+/// @brief toggle the node's weight ON. 
 /// @param Tree the program tree
 /// @param spot an iterator to the node that is being mutated
 /// @param SS the search space (unused)
-inline void toggle_weight_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool toggle_weight_on_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
 {
-    spot.node->data.is_weighted = !spot.node->data.is_weighted;
+    if (spot.node->data.get_is_weighted()==true // cant turn on whats already on
+    ||  !IsWeighable(spot.node->data.ret_type)) // does not accept weights (e.g. boolean)
+        return false; // false indicates that mutation failed and should return std::nullopt
+
+    spot.node->data.set_is_weighted(true);
+    return true;
+}
+
+/// @brief toggle the node's weight OFF. 
+/// @param Tree the program tree
+/// @param spot an iterator to the node that is being mutated
+/// @param SS the search space (unused)
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool toggle_weight_off_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+{
+    if (spot.node->data.get_is_weighted()==false)
+        return false; 
+
+    spot.node->data.set_is_weighted(false);
+    return true;
+}
+
+/// @brief replaces the subtree rooted in `spot`
+/// @param Tree the program tree
+/// @param spot an iterator to the node that is being mutated
+/// @param SS the search space to generate a compatible subtree
+/// @return boolean indicating the success (true) or fail (false) of the operation
+inline bool subtree_mutation(tree<Node>& Tree, Iter spot, const SearchSpace& SS)
+{
+    auto spot_type = spot.node->data.ret_type;
+    auto max_size  = PARAMS["max_size"].get<int>() - (Tree.size() - Tree.size(spot));
+    auto max_depth = PARAMS["max_depth"].get<int>() - (Tree.depth(spot));
+
+    // sample subtree uses PTC2, which operates on depth and size of the tree<Node> 
+    // (and not on the program!). we shoudn't care for weights here
+    auto subtree = SS.sample_subtree(spot.node->data, max_depth, max_size); 
+
+    if (!subtree) // there is no terminal with compatible arguments
+        return false;
+
+    // if optional contains a Node, we access its contained value
+    Tree.erase_children(spot); 
+    Tree.replace(spot, subtree.value().begin());
+
+    return true;
 }
 
 /**
- * @brief Mutate a program.
+ * @brief Stochastically mutate a program.
  * 
  * Types of mutation:
  * 
  *  - point mutation changes a single node. 
  *  - insertion mutation inserts a node as the parent of an existing node, and fills in the other arguments. 
- *  - deletion mutation deletes a node
- *  - toggle_weight mutation turns a node's weight on or off. 
+ *  - deletion mutation deletes a node.
+ *  - subtree mutation inserts a new subtree into the program. 
+ *  - toggle_weight_on mutation turns a node's weight ON.
+ *  - toggle_weight_off mutation turns a node's weight OFF.
+ * 
+ * Every mutation has a probability (weight) based on global parameters. The
+ * spot where the mutation will take place is sampled based on attribute 
+ * `get_prob_change` of each node in the tree. Inside each type of mutation, 
+ * when a new node is inserted, it is sampled based on `terminal_weights`.
+ * 
+ * Due to the stochastic behavior, and the several sampling steps, it may come to
+ * a case where the search space does not hold any possible modification to do in
+ * the program. In this case, the method returns `std::nullopt` (and has overloads
+ * so it can be used in a boolean context).
+ * 
+ * If the mutation succeeds, the mutated program can be accessed through the
+ * `.value()` attribute of the `std::optional`. 
+ * 
+ * This means that, if you use the mutation as `auto opt = mutate(parent, SS)`,
+ * either `opt==false` or `opt.value()` contains the child program.
+ * 
  * @tparam T program type
  * @param parent the program to be mutated
  * @param SS a search space
- * @return `child`, the mutated program
+ * @return `std::optional` that may contain the child program of type `T`
  */
 template<ProgramType T>
-Program<T> mutate(const Program<T>& parent, const SearchSpace& SS)
+std::optional<Program<T>> mutate(const Program<T>& parent, const SearchSpace& SS)
 {
+    // all mutation validation and setup should be done here. Specific mutaiton
+    // functions are intended to work on the program tree thus cannot access
+    // program functions and attributes.
     Program<T> child(parent);
 
     // choose location by weighted sampling of program
@@ -113,46 +234,91 @@ Program<T> mutate(const Program<T>& parent, const SearchSpace& SS)
                     [](const auto& n){ return n.get_prob_change(); }
                     );
 
-    auto spot = r.select_randomly(child.Tree.begin(), child.Tree.end(), 
-                                    weights.begin(), weights.end());
-
     auto options = PARAMS["mutation_options"].get<std::map<string,float>>();
 
-    // Setting to zero the weight of variations that increase the expression
-    // if the expression is already at the maximum size or depth
-    if (child.Tree.size()+1      >= PARAMS["max_size"].get<int>()
-    ||  child.Tree.max_depth()+1 >= PARAMS["max_depth"].get<int>())
-    {
-        // avoid using mutations that increase size/depth 
-        options["insert"] = 0.0;
+    if (std::all_of(weights.begin(), weights.end(), [](const auto& w) {
+        return w<=0.0;
+    }))
+    { // There is no spot that has a probability to be selected
+        return std::nullopt;
     }
 
+    auto spot = r.select_randomly(child.Tree.begin(), child.Tree.end(), 
+                                  weights.begin(), weights.end());
+
+    if (std::all_of(options.begin(), options.end(), [](const auto& kv) {
+        return kv.second<=0.0;
+    }))
+    { // No mutation can be successfully applied to this solution  
+        return std::nullopt;
+    }
+        
     // choose a valid mutation option
     string choice = r.random_choice(options);
 
-    if (choice == "insert")
-        insert_mutation(child.Tree, spot, SS);
-    else if (choice == "delete")
-        delete_mutation(child.Tree, spot, SS);
-    else if (choice == "point") 
-        point_mutation(child.Tree, spot, SS);
-    else if (choice == "toggle_weight") 
-        toggle_weight_mutation(child.Tree, spot, SS);
-    else{
-        string msg = fmt::format("{} not a valid mutation choice", choice);
+    // std::cout << "mutation configuration (choice was " << choice << "):" << std::endl;
+    // for (const auto& [k, v] : options)
+    //     std::cout << " - " << k << " : " << v << std::endl;
+
+    // Every mutation here works inplace, so they return bool instead of
+    // std::optional to indicare the result of their manipulation over the
+    // program tree. Here we call the mutation function and return the result
+    using MutationFunc = std::function<bool(tree<Node>&, Iter, const SearchSpace&)>;
+
+    std::map<std::string, MutationFunc> mutations{
+        {"insert",            insert_mutation},
+        {"delete",            delete_mutation},
+        {"point",             point_mutation},
+        {"subtree",           subtree_mutation},
+        {"toggle_weight_on",  toggle_weight_on_mutation},
+        {"toggle_weight_off", toggle_weight_off_mutation}
+    };
+
+    // Try to find the mutation function based on the choice
+    auto it = mutations.find(choice);
+    if (it == mutations.end()) {
+        std::string msg = fmt::format("{} not a valid mutation choice", choice);
         HANDLE_ERROR_THROW(msg);
     }
 
-    return child;
+    // apply the mutation and check if it succeeded
+    bool success = it->second(child.Tree, spot, SS);
+
+    if (success
+    && ( (child.size()  <= PARAMS["max_size"].get<int>() )
+    &&   (child.depth() <= PARAMS["max_depth"].get<int>()) )){
+
+        return child;
+    } else {
+        return std::nullopt;
+    }
 };
 
-/// @brief swaps subtrees between root and other, returning new program 
-/// @tparam T the program type
-/// @param root the root parent
-/// @param other the donating parent
-/// @return new program of type `T`
+/**
+ * @brief Stochastically swaps subtrees between root and other, returning a new program. 
+ * 
+ * The spot where the cross will take place in the `root` parent is sampled
+ * based on attribute `get_prob_change` of each node in the tree. After selecting
+ * the cross spot, the program will iterate through the `other` parent searching
+ * for all compatible sub-trees to replace.
+ * 
+ * Due to the stochastic behavior, it may come to a case where there is no 
+ * candidate to replace the spot node.  In this case, the method returns
+ * `std::nullopt` (and has overloads so it can be used in a boolean context).
+ * 
+ * If the cross succeeds, the child program can be accessed through the
+ * `.value()` attribute of the `std::optional`. 
+ * 
+ * This means that, if you use the cross as `auto opt = mutate(parent, SS)`,
+ * either `opt==false` or `opt.value()` contains the child.
+ * 
+ * @tparam T the program type
+ * @param root the root parent
+ * @param other the donating parent
+ * @return `std::optional` that may contain the child program of type `T`
+ */
 template<ProgramType T>
-Program<T> cross(const Program<T>& root, const Program<T>& other) 
+std::optional<Program<T>> cross(const Program<T>& root, const Program<T>& other) 
 {
     /* subtree crossover between this and other, producing new Program */
     // choose location by weighted sampling of program
@@ -162,65 +328,64 @@ Program<T> cross(const Program<T>& root, const Program<T>& other)
     // pick a subtree to replace
     vector<float> child_weights(child.Tree.size());
     std::transform(child.Tree.begin(), child.Tree.end(), 
-                    child_weights.begin(),
-                    [](const auto& n){ return n.get_prob_change(); }
-                    );
+                   child_weights.begin(),
+                   [](const auto& n){ return n.get_prob_change(); }
+                   );
     
-    // GUI TODO: Keep doing random attempts, or test all possilities?
-    bool matching_spots_found = false;
-    for (int tries = 0; tries < 3; ++tries)
-    {
-        auto child_spot = r.select_randomly(child.Tree.begin(), 
-                                            child.Tree.end(), 
-                                            child_weights.begin(), 
-                                            child_weights.end()
-                                        );
-
-        auto child_ret_type = child_spot.node->data.ret_type;
-
-        auto allowed_size  = PARAMS["max_size"].get<int>() -
-                             ( child.Tree.size() - child.Tree.size(child_spot) );
-        auto allowed_depth = PARAMS["max_depth"].get<int>() - 
-                             ( child.Tree.max_depth() - child.Tree.depth(child_spot) );
-
-        // pick a subtree to insert. Selection is based on other_weights
-        vector<float> other_weights(other.Tree.size());
-
-        // iterator to get the size of subtrees inside transform
-        auto other_iter = other.Tree.begin();
-
-        // lambda function to check feasibility of solution and increment the iterator 
-        const auto check_and_incrm = [other, &other_iter, allowed_size, allowed_depth]() -> bool {
-            int s = other.Tree.size(other_iter);
-            int d = other.Tree.depth(other_iter);
-
-            std::advance(other_iter, 1);
-            return (s <= allowed_size) && (d <= allowed_depth);
-        };
-
-        std::transform(other.Tree.begin(), other.Tree.end(), 
-            other_weights.begin(),
-            [child_ret_type, check_and_incrm](const auto& n){
-                // need to pick a node that has a matching output type to the child_spot.
-                // also need to check if swaping this node wouldn't exceed max_size
-                if (check_and_incrm() && (n.ret_type == child_ret_type))
-                    return n.get_prob_change(); 
-                else
-                    // setting the weight to zero to indicate a non-feasible crossover point
-                    return float(0.0);
-                }
-            );
-
-        for (const auto& w: other_weights)
-        {
-            matching_spots_found = w > 0.0;
+    if (std::all_of(child_weights.begin(), child_weights.end(), [](const auto& w) {
+        return w<=0.0;
+    }))
+    { // There is no spot that has a probability to be selected
+        return std::nullopt;
+    }
 
-            if (matching_spots_found) 
-                break;
+    auto child_spot = r.select_randomly(child.Tree.begin(), 
+                                        child.Tree.end(), 
+                                        child_weights.begin(), 
+                                        child_weights.end()
+                                    );
+
+    auto child_ret_type = child_spot.node->data.ret_type;
+
+    auto allowed_size  = PARAMS["max_size"].get<int>() -
+                         ( child.size() - child.size_at(child_spot) );
+    auto allowed_depth = PARAMS["max_depth"].get<int>() - 
+                         ( child.depth_to_reach(child_spot) );
+
+    // pick a subtree to insert. Selection is based on other_weights
+    vector<float> other_weights(other.Tree.size());
+
+    // iterator to get the size of subtrees inside transform
+    auto other_iter = other.Tree.begin();
+
+    // lambda function to check feasibility of solution and increment the iterator 
+    const auto check_and_incrm = [other, &other_iter, allowed_size, allowed_depth]() -> bool {
+        int s = other.size_at( other_iter );
+        int d = other.depth_at( other_iter );
+
+        std::advance(other_iter, 1);
+        return (s <= allowed_size) && (d <= allowed_depth);
+    };
+
+    std::transform(other.Tree.begin(), other.Tree.end(), 
+        other_weights.begin(),
+        [child_ret_type, check_and_incrm](const auto& n){
+            // need to pick a node that has a matching output type to the child_spot.
+            // also need to check if swaping this node wouldn't exceed max_size
+            if (check_and_incrm() && (n.ret_type == child_ret_type))
+                return n.get_prob_change(); 
+            else
+                // setting the weight to zero to indicate a non-feasible crossover point
+                return float(0.0);
         }
+    );
 
-        if (matching_spots_found) 
-        {
+    bool matching_spots_found = false;
+    for (const auto& w: other_weights)
+    {
+        matching_spots_found = w > 0.0;
+
+        if (matching_spots_found) {
             auto other_spot = r.select_randomly(
                 other.Tree.begin(), 
                 other.Tree.end(), 
@@ -233,10 +398,9 @@ Program<T> cross(const Program<T>& root, const Program<T>& other)
             child.Tree.move_ontop(child_spot, other_spot);
             return child;
         }
-        // fmt::print("try {} failed\n",tries);
     }
 
-    return child;
+    return std::nullopt;
 };
-} //namespace vary
+} //namespace variation
 #endif
\ No newline at end of file
diff --git a/tests/cpp/test_data.cpp b/tests/cpp/test_data.cpp
index e69de29b..09893c2c 100644
--- a/tests/cpp/test_data.cpp
+++ b/tests/cpp/test_data.cpp
@@ -0,0 +1,117 @@
+#include "testsHeader.h"
+#include "../../src/search_space.h"
+#include "../../src/program/program.h"
+#include "../../src/program/dispatch_table.h"
+
+TEST(Data, ErrorHandling)
+{
+    // Creating an empty dataset throws error
+    EXPECT_THROW({
+        MatrixXf X(0,0);
+        ArrayXf y(0); 
+
+        try
+        {
+            Dataset dt(X, y);
+        }
+        catch( const std::runtime_error& err )
+        {
+            const string msg = err.what();
+            ASSERT_NE(
+                msg.find("Error during the initialization of the dataset"),
+                std::string::npos);
+            throw;
+        }
+    }, std::runtime_error);
+}
+
+TEST(Data, MixedVariableTypes)
+{
+    // We need to set at least the mutation options (and respective
+    // probabilities) in order to call PRG.predict()
+    PARAMS["mutation_options"] = {
+        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"toggle_weight_on", 0.125}, {"toggle_weight_off", 0.125}
+    };
+
+    MatrixXf X(5,3);
+    X << 0  , 1,    0  , // binary with integer values
+         0.0, 1.0,  1.0, // binary with float values
+         2  , 1.0, -3.0, // integer with float and negative values
+         2  , 1  ,  3  , // integer with integer values
+         2.1, 3.7, -5.2; // float values
+
+    X.transposeInPlace();
+
+    ArrayXf y(3); 
+
+    y << 6.1, 7.7, -4.2; // y = x_0 + x_1 + x_2
+    
+    unordered_map<string, float> user_ops = {
+        {"Add", 1},
+        {"Sub", 1},
+        {"SplitOn", 1}
+    };
+
+    Dataset dt(X, y);
+    SearchSpace SS;
+    SS.init(dt, user_ops);
+
+    dt.print();
+    SS.print();
+
+    for (int d = 1; d < 5; ++d)
+        for (int s = 1; s < 5; ++s)
+        {
+            
+            PARAMS["max_size"]  = s;
+            PARAMS["max_depth"] = d;
+
+            RegressorProgram PRG = SS.make_regressor(d, s);
+            fmt::print(
+                "=================================================\n"
+                "Tree model for depth = {}, size= {}: {}\n",
+                d, s, PRG.get_model("compact", true)
+            );
+
+            // visualizing detailed information for the model
+            std::for_each(PRG.Tree.begin(), PRG.Tree.end(),
+                [](const auto& n) { 
+                    fmt::print("Name {}, node {}, feature {}\n"
+                               "  sig_hash {}\n  ret_type {}\n  ret_type type {}\n",
+                               n.name, n.node_type, n.get_feature(),
+                               n.sig_hash, n.ret_type, typeid(n.ret_type).name());
+                });
+
+            std::cout << std::endl;
+
+            fmt::print( "PRG fit\n");
+            PRG.fit(dt);
+            fmt::print( "PRG predict\n");
+            ArrayXf y_pred = PRG.predict(dt);
+            fmt::print( "y_pred: {}\n", y_pred);
+
+            // creating and fitting a child
+            auto opt = PRG.mutate();
+
+            if (!opt){
+                fmt::print("Mutation failed to create a child\n");
+            }
+            else {
+                auto Child = opt.value();
+
+                fmt::print("Child model: {}\n", Child.get_model("compact", true));
+
+                fmt::print( "Child fit\n");
+                Child.fit(dt);
+                fmt::print( "Child predict\n");
+                ArrayXf y_pred_child = Child.predict(dt);
+                fmt::print( "y_pred: {}\n", y_pred);
+            }
+        }
+
+    // Brush exports two DispatchTable structs named dtable_fit and dtable_predict.
+    // These structures holds the mapping between nodes and its corresponding
+    // operations, and are used to resolve the evaluation of an expression. 
+    // dtable_fit.print();
+    // dtable_predict.print();
+}
\ No newline at end of file
diff --git a/tests/cpp/test_optimization.cpp b/tests/cpp/test_optimization.cpp
index e087cec0..857ea47d 100644
--- a/tests/cpp/test_optimization.cpp
+++ b/tests/cpp/test_optimization.cpp
@@ -7,9 +7,9 @@
 using testing::TestWithParam;
 
 // Hashes corresponding to a 3-ary Prod operator
-const std::size_t sig_hash      = 5617655905677279916;
-const std::size_t sig_dual_hash = 10188582206427064428;
-const std::size_t complete_hash = 1786662244046809282;
+const std::size_t sig_hash      = 5617655905677279916u;
+const std::size_t sig_dual_hash = 10188582206427064428u;
+const std::size_t complete_hash = 1786662244046809282u;
 
 class OptimizerTest 
     : public TestWithParam< std::tuple<string,json,std::function<bool(ArrayXf)>> > {
diff --git a/tests/cpp/test_program.cpp b/tests/cpp/test_program.cpp
index acd0f67b..3fb2de8b 100644
--- a/tests/cpp/test_program.cpp
+++ b/tests/cpp/test_program.cpp
@@ -9,8 +9,6 @@ TEST(Program, MakeRegressor)
         
     Dataset data = Data::read_csv("docs/examples/datasets/d_enc.csv","label");
 
-
-
     SearchSpace SS;
     SS.init(data);
 
@@ -179,10 +177,6 @@ TEST(Operators, ProgramSizeAndDepthPARAMS)
     SearchSpace SS;
     SS.init(data);
 
-    // split operator --> arity 3
-    // prod operator  --> arity 4
-    int max_arity = 4;
-
     for (int d = 1; d < 10; ++d)
     {
         for (int s = 1; s < 10; ++s)
@@ -201,23 +195,21 @@ TEST(Operators, ProgramSizeAndDepthPARAMS)
                 "Model size     : {}\n"
                 "=================================================\n",
                 d, s, 
-                PRG.get_model("compact", true), PRG.Tree.max_depth(), PRG.Tree.size()
+                PRG.get_model("compact", true), PRG.depth(), PRG.size()
             );
 
-            // There are two ways of assessing the size of a program
-            // GUI TODO: which style are we going to use? i) implement
-            // PRG.max_depth() (or PRG.depth()); or ii) get rid of PRG.size()
-            // and use always PRG.Tree.<>)
-            ASSERT_TRUE(PRG.Tree.size() > 0);
-            ASSERT_TRUE(PRG.Tree.size() <= s+max_arity);
-
-            ASSERT_TRUE(PRG.size() > 0);
-            ASSERT_TRUE(PRG.size() <= s+max_arity);
+            // Terminals are weighted by default, while operators not. Since we
+            // include the weights in the calculation of the size of the program,
+            // and PTC2 uses the tree size (not the program size), it is not 
+            // expected that initial trees will strictly respect `max_size`.
+            ASSERT_TRUE(PRG.size() > 0); // size is always positive
+            
+            // PTC2: maximum size is s+max(arity). Since in Brush terminals are
+            // weighted by default, we set it to 3*max(arity)
+            ASSERT_TRUE(PRG.size() <= s+3*4); 
 
-            // GUI TODO: max_depth() counts the number of edges (so a program with
-            // one node has max_depth()==0). Is this how it should behave?
-            ASSERT_TRUE(PRG.Tree.max_depth() >= 0);
-            ASSERT_TRUE(PRG.Tree.max_depth() <= d+1);
+            ASSERT_TRUE(PRG.depth() <= d+1);
+            ASSERT_TRUE(PRG.depth() > 0); // depth is always positive
         }
     }
 }
\ No newline at end of file
diff --git a/tests/cpp/test_search_space.cpp b/tests/cpp/test_search_space.cpp
index 9904684b..02eaaf19 100644
--- a/tests/cpp/test_search_space.cpp
+++ b/tests/cpp/test_search_space.cpp
@@ -5,40 +5,64 @@
 
 TEST(SearchSpace, Initialization)
 {
-	
-    MatrixXf X(4,2); 
-    MatrixXf X_v(3,2); 
-    X << 0,1,  
-         0.47942554,0.87758256,  
-         0.84147098,  0.54030231,
-         0.99749499,  0.0707372;
-    X_v <<  0.90929743, -0.41614684,
-         0.59847214, -0.80114362,
-         0.14112001,-0.9899925;
-
-    X.transposeInPlace();
-    X_v.transposeInPlace();
-
     ArrayXf y(4); 
-    ArrayXf y_v(3); 
-    // y = 2*x1 + 3.x2
-    y << 3.0,  3.59159876,  3.30384889,  2.20720158;
-    y_v << 0.57015434, -1.20648656, -2.68773747;
-    
+    y << 3.00000,  3.59876, 7.18622, 15.19294;
+
+    // variables have different pairwise correlations with y. The idea is to 
+    // see the mutation weights for each floating variable. The slope were
+    // calculated using python np.cov(xprime, yprime)[0][1]/np.var(xprime),
+    // where xprime and yprime are the z-score normalized arrays obtained
+    // from x and y.
+    MatrixXf X(5,4); 
+    X <<       0,       0,       1,       1, // x0, binary, expected weight=1
+               2,       0,       1,       2, // x1, categorical, expected weight=1
+         0.05699, 0.62737, 0.72406, 0.99294, // x2, slope ~= 1.069
+         0.03993, 0.36558, 0.01393, 0.25878, // x3, slope ~= 0.25
+         5.17539, 7.63579,-2.82560, 0.24645; // x4, slope ~= -0.799
+    X.transposeInPlace(); // 4 rows x 5 variables
+
 	Dataset dt(X, y);
-    Dataset dv(X_v, y_v);
-    
+
+    // different weights to check if searchspace is initialized correctnly
     unordered_map<string, float> user_ops = {
-        {"Add", 1},
-        {"Sub", 1},
-        {"Div", .5},
-        {"Times", 0.5}
+        {"Add",   1},
+        {"Sub",   1},
+        {"Div",  .5},
+        {"Mul", 0.5}
     };
 
-    // SearchSpace SS;
     SearchSpace SS;
     SS.init(dt, user_ops);
 
-/*     dtable_fit.print(); */
-/*     dtable_predict.print(); */
-}
+    dt.print();
+    SS.print();
+    // dtable_fit.print();
+    // dtable_predict.print();
+
+    // manually calculated. last value is the avg of prev values
+    ArrayXf expected_weights_Xf(4); // 4 elements (x3, x4, x5 and c)    
+    expected_weights_Xf << 0.80240685, 0.19270448, 0.5994426, 0.531518;
+
+    auto actual_weights_f = SS.terminal_weights.at(DataType::ArrayF);
+    Eigen::Map<ArrayXf> actual_weights_Xf(actual_weights_f.data(), actual_weights_f.size());
+    
+    ASSERT_TRUE(expected_weights_Xf.isApprox(actual_weights_Xf));
+
+    
+    ArrayXf expected_weights_Xi(2); // 2 elements (x2 and c)    
+    expected_weights_Xi << 0.2736814, 0.2736814;
+
+    auto actual_weights_i = SS.terminal_weights.at(DataType::ArrayI);
+    Eigen::Map<ArrayXf> actual_weights_Xi(actual_weights_i.data(), actual_weights_i.size());
+    
+    ASSERT_TRUE(expected_weights_Xi.isApprox(actual_weights_Xi));
+
+
+    ArrayXf expected_weights_Xb(2); // 2 elements (x0 and c)    
+    expected_weights_Xb << 0.8117065, 0.8117065;
+
+    auto actual_weights_b = SS.terminal_weights.at(DataType::ArrayB);
+    Eigen::Map<ArrayXf> actual_weights_Xb(actual_weights_b.data(), actual_weights_b.size());
+    
+    ASSERT_TRUE(expected_weights_Xb.isApprox(actual_weights_Xb));
+}
\ No newline at end of file
diff --git a/tests/cpp/test_variation.cpp b/tests/cpp/test_variation.cpp
index 1f505754..d0eb9bcf 100644
--- a/tests/cpp/test_variation.cpp
+++ b/tests/cpp/test_variation.cpp
@@ -4,46 +4,69 @@
 #include "../../src/program/dispatch_table.h"
 #include "../../src/data/io.h"
 
-TEST(Operators, Mutation)
+TEST(Operators, InsertMutationWorks)
 {
+    // TODO: this tests could be parameterized.
+    // To understand design implementation of this test, check Mutation test
+
     PARAMS["mutation_options"] = {
-        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"toggle_weight", 0.25}
+        {"point", 0.0}, {"insert", 1.0}, {"delete", 0.0}, {"subtree", 0.0}, {"toggle_weight_on", 0.0}, {"toggle_weight_off", 0.0}
     };
-    // test mutation
-    // TODO: set random seed
+
+    // retrieving the options to check if everything was set right
+    std::cout << "Initial mutation configuration" << std::endl;
+    auto options = PARAMS["mutation_options"].get<std::map<string,float>>();
+    for (const auto& [k, v] : options)
+        std::cout << k << " : " << v << std::endl;
+
     MatrixXf X(10,2);
     ArrayXf y(10);
     X << 0.85595296, 0.55417453, 0.8641915 , 0.99481109, 0.99123376,
          0.9742618 , 0.70894019, 0.94940306, 0.99748867, 0.54205151,
+
          0.5170537 , 0.8324005 , 0.50316305, 0.10173936, 0.13211973,
          0.2254195 , 0.70526861, 0.31406024, 0.07082619, 0.84034526;
+
     y << 3.55634251, 3.13854087, 3.55887523, 3.29462895, 3.33443517,
-             3.4378868 , 3.41092345, 3.5087468 , 3.25110243, 3.11382179;
+         3.4378868 , 3.41092345, 3.5087468 , 3.25110243, 3.11382179;
+
     Dataset data(X,y);
 
     SearchSpace SS;
     SS.init(data);
 
-    for (int d = 1; d < 10; ++d)
+    int successes = 0;
+    for (int attempt = 0; attempt < 100; ++attempt)
     {
-        for (int s = 1; s < 10; ++s)
-        {
-            fmt::print("d={},s={}\n",d,s);
-            fmt::print("make_regressor\n");
-            RegressorProgram PRG = SS.make_regressor(d, s);
-            fmt::print("PRG.fit(data);\n");
-            PRG.fit(data);
-            ArrayXf y_pred = PRG.predict(data);
-            fmt::print("auto Child = PRG.mutate(SS);\n");
-            auto Child = PRG.mutate();
+        // we need to have big values here so the mutation will work
+        // (when the xmen child exceeds the maximum limits, mutation returns
+        // std::nullopt)
+        PARAMS["max_size"]  = 20;
+        PARAMS["max_depth"] = 10;
+        
+        fmt::print("d={},s={}\n", PARAMS["max_depth"].get<int>(), PARAMS["max_size"].get<int>());
+        fmt::print("make_regressor\n");
+
+        // creating a "small" program (with a plenty amount of space to insert stuff)
+        RegressorProgram PRG = SS.make_regressor(5, 5);
+
+        fmt::print("PRG.fit(data);\n");
+        PRG.fit(data);
+        ArrayXf y_pred = PRG.predict(data);
+        
+        // applying mutation and checking if the optional result is non-empty
+        fmt::print("auto Child = PRG.mutate();\n");
+        auto opt = PRG.mutate(); // We should assume that it will be always the insert mutation
 
-            fmt::print("print\n");
+        if (opt){
+            successes += 1;
+            auto Child = opt.value();
             fmt::print(
                 "=================================================\n"
                 "depth = {}, size= {}\n"
                 "Initial Model: {}\n"
                 "Mutated Model: {}\n",
-                d, s, 
+                PARAMS["max_depth"].get<int>(), PARAMS["max_size"].get<int>(),
                 PRG.get_model("compact", true),
                 Child.get_model("compact", true)
             );
@@ -51,15 +74,52 @@ TEST(Operators, Mutation)
             fmt::print("child fit\n");
             Child.fit(data);
             y_pred = Child.predict(data);
+
+            // since we successfully inserted a node, this should be always true
+            ASSERT_TRUE(Child.size() > PRG.size());
+
+            // maybe the insertion spot was a shorter branch than the maximum
+            // depth. At least, xmen depth should be equal to its parent
+            ASSERT_TRUE(Child.depth() >= PRG.depth());
+        }
+
+        // lets also see if it always fails when the child exceeds the maximum limits
+        PARAMS["max_size"]  = PRG.size();
+        PARAMS["max_depth"] = PRG.depth();
+
+        auto opt2 = PRG.mutate();
+        if (opt2){ // This shoudl't happen. We'll print then error
+            auto Child2 = opt2.value();
+
+            std::cout << "Fail failed. Mutation weights:" << std::endl;
+            auto options2 = PARAMS["mutation_options"].get<std::map<string,float>>();
+            for (const auto& [k, v] : options2)
+                std::cout << k << " : " << v << std::endl;
+
+            fmt::print(
+                "=================================================\n"
+                "depth = {}, size= {}\n"
+                "Initial Model: {}\n"
+                "Mutated Model: {}\n",
+                PARAMS["max_depth"].get<int>(), PARAMS["max_size"].get<int>(),
+                PRG.get_model("compact", true),
+                Child2.get_model("compact", true)
+            );
+            ASSERT_TRUE(opt2==std::nullopt);
         }
     }
+    ASSERT_TRUE(successes > 0);
 }
 
-TEST(Operators, Crossover)
+TEST(Operators, Mutation)
 {
     // test mutation
     // TODO: set random seed
 
+    PARAMS["mutation_options"] = {
+        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"subtree", 0.0}, {"toggle_weight_on", 0.125}, {"toggle_weight_off", 0.125}
+    };
+    
     MatrixXf X(10,2);
     ArrayXf y(10);
     X << 0.85595296, 0.55417453, 0.8641915 , 0.99481109, 0.99123376,
@@ -78,54 +138,62 @@ TEST(Operators, Crossover)
 
     for (int d = 1; d < 10; ++d)
     {
+        int successes = 0;
         for (int s = 1; s < 10; ++s)
         {
-            RegressorProgram PRG1 = SS.make_regressor(d, s);
-            RegressorProgram PRG2 = SS.make_regressor(d, s);
-            PRG1.fit(data);
-            PRG2.fit(data);
-
-            fmt::print(
-                "=================================================\n"
-                "depth = {}, size= {}\n"
-                "Initial Model 1: {}\n"
-                "Initial Model 2: {}\n",
-                d, s, 
-                PRG1.get_model("compact", true),
-                PRG2.get_model("compact", true)
-            );
-            ArrayXf y_pred = PRG1.predict(data);
-            fmt::print("cross one\n");
-            auto Child1 = PRG1.cross(PRG2);
-            fmt::print(
-                "Model 1 after cross: {}\n"
-                "Model 2 after cross: {}\n",
-                PRG1.get_model("compact", true),
-                PRG2.get_model("compact", true)
-            );
-            fmt::print("cross two\n");
-            auto Child2 = PRG2.cross(PRG1);
+            fmt::print("d={},s={}\n",d,s);
+            fmt::print("make_regressor\n");
 
-            fmt::print(
-                "Crossed Model 1: {}\n"
-                "Crossed Model 2: {}\n"
-                "=================================================\n",
-                Child1.get_model("compact", true),
-                Child2.get_model("compact", true)
-            );
+            // if we set max_size and max_depth to zero, it will use the
+            // values in the global PARAMS. Otherwise, it will respect the
+            // values passed as argument.
+            RegressorProgram PRG = SS.make_regressor(d, s);
 
-            Child1.fit(data);
-            Child2.fit(data);
-            auto child_pred1 = Child1.predict(data);
-            auto child_pred2 = Child2.predict(data);
+            fmt::print("PRG.fit(data);\n");
+            PRG.fit(data);
+            ArrayXf y_pred = PRG.predict(data);
+            
+            // applying mutation and checking if the optional result is non-empty
+            fmt::print("auto Child = PRG.mutate();\n");
+            auto opt = PRG.mutate();
+
+            if (!opt){
+                fmt::print(
+                    "=================================================\n"
+                    "depth = {}, size= {}\n"
+                    "Initial Model: {}\n"
+                    "Mutation failed to create a child",
+                    d, s, 
+                    PRG.get_model("compact", true)
+                );
+            }
+            else {
+                successes += 1;
+                auto Child = opt.value();
+                fmt::print(
+                    "=================================================\n"
+                    "depth = {}, size= {}\n"
+                    "Initial Model: {}\n"
+                    "Mutated Model: {}\n",
+                    d, s, 
+                    PRG.get_model("compact", true),
+                    Child.get_model("compact", true)
+                );
+
+                fmt::print("child fit\n");
+                Child.fit(data);
+                y_pred = Child.predict(data);
+            }
         }
+        // since x1 and x2 have same type, we shoudn't get fails
+        ASSERT_TRUE(successes > 0);
     }
 }
 
 TEST(Operators, MutationSizeAndDepthLimit)
 {
     PARAMS["mutation_options"] = {
-        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"toggle_weight", 0.25}
+        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"subtree", 0.0}, {"toggle_weight_on", 0.125}, {"toggle_weight_off", 0.125}
     };
         
     MatrixXf X(10,2);
@@ -150,6 +218,7 @@ TEST(Operators, MutationSizeAndDepthLimit)
 
     for (int d = 5; d < 15; ++d)
     {
+        int successes = 0;
         for (int s = 5; s < 15; ++s)
         {
             PARAMS["max_size"]  = s;
@@ -165,43 +234,59 @@ TEST(Operators, MutationSizeAndDepthLimit)
             
             auto PRG_model = PRG.get_model("compact", true);
 
-            auto Child = PRG.mutate();
-
-            fmt::print("print\n");
-            fmt::print(
-                "=================================================\n"
-                "depth = {}, size= {}\n"
-                "Initial Model: {}\n"
-                "Mutated Model: {}\n"
-                "Mutated depth: {}\n"
-                "Mutated size : {}\n",
-                d, s, 
-                PRG.get_model("compact", true),
-                Child.get_model("compact", true),
-                Child.Tree.max_depth(),
-                Child.Tree.size()
-            );
-
-            // Original didn't change
-            ASSERT_TRUE(PRG_model == PRG.get_model("compact", true));
-            
-            // Child is within restrictions. Here we expect the generated
-            // expression to have at most max_size nodes (there is no tolerance 
-            // gap as PTC2 has). Notice that this is only valid if the original
-            // parent is already respecting the max_size
-            ASSERT_TRUE(Child.size() > 0);
-            ASSERT_TRUE(Child.size() <= s);
-
-            ASSERT_TRUE(Child.Tree.size() > 0);
-            ASSERT_TRUE(Child.Tree.size() <= s);
-
-            ASSERT_TRUE(Child.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child.Tree.max_depth() <= d);
+            auto opt = PRG.mutate();
+
+            if (!opt){
+                fmt::print(
+                    "=================================================\n"
+                    "depth = {}, size= {}\n"
+                    "Initial Model: {}\n"
+                    "Mutation failed to create a child",
+                    d, s, 
+                    PRG.get_model("compact", true)
+                );
+            }
+            else {
+                successes += 1;
+                
+                // Extracting the child from the std::optional and checking
+                // if it is within size and depth restrictions. There is no
+                // margin for having slightly bigger expressions.
+                auto Child = opt.value();
+                
+                fmt::print("print\n");
+                fmt::print(
+                    "=================================================\n"
+                    "depth = {}, size= {}\n"
+                    "Initial Model: {}\n"
+                    "Mutated Model: {}\n"
+                    "Mutated depth: {}\n"
+                    "Mutated size : {}\n",
+                    d, s, 
+                    PRG.get_model("compact", true),
+                    Child.get_model("compact", true),
+                    Child.depth(),
+                    Child.size()
+                );
+
+                // Original didn't change
+                ASSERT_TRUE(PRG_model == PRG.get_model("compact", true));
+                
+                ASSERT_TRUE(Child.size() > 0);
+                ASSERT_TRUE(Child.size() <= s);
+
+                ASSERT_TRUE(Child.size() > 0);
+                ASSERT_TRUE(Child.size() <= s);
+
+                ASSERT_TRUE(Child.depth() >= 0);
+                ASSERT_TRUE(Child.depth() <= d);
+            }
         }
+        ASSERT_TRUE(successes > 0);
     }
 }
 
-TEST(Operators, CrossoverSizeAndDepthLimit)
+TEST(Operators, Crossover)
 {
     MatrixXf X(10,2);
     ArrayXf y(10);
@@ -219,24 +304,15 @@ TEST(Operators, CrossoverSizeAndDepthLimit)
     SearchSpace SS;
     SS.init(data);
 
-    // split operator --> arity 3
-    // prod operator  --> arity 4
-    int max_arity = 4;
-
-    for (int d = 5; d < 15; ++d)
+    for (int d = 1; d < 10; ++d)
     {
-        for (int s = 5; s < 15; ++s)
+        int successes = 0;
+        for (int s = 1; s < 10; ++s)
         {
-            PARAMS["max_size"]  = s;
-            PARAMS["max_depth"] = d;
-
-            // Enforcing that the parents does not exceed max_size by
-            // taking into account the highest arity of the function nodes
-            RegressorProgram PRG1 = SS.make_regressor(d-1, s-max_arity);
-            RegressorProgram PRG2 = SS.make_regressor(d-1, s-max_arity);
-
-            auto PRG1_model = PRG1.get_model("compact", true);
-            auto PRG2_model = PRG2.get_model("compact", true);
+            RegressorProgram PRG1 = SS.make_regressor(d, s);
+            RegressorProgram PRG2 = SS.make_regressor(d, s);
+            PRG1.fit(data);
+            PRG2.fit(data);
 
             fmt::print(
                 "=================================================\n"
@@ -248,114 +324,45 @@ TEST(Operators, CrossoverSizeAndDepthLimit)
                 PRG2.get_model("compact", true)
             );
 
+            ArrayXf y_pred = PRG1.predict(data);
             fmt::print("cross one\n");
-            auto Child1 = PRG1.cross(PRG2);
-            fmt::print(
-                "Model 1 after cross: {}\n"
-                "Model 2 after cross: {}\n",
-                PRG1.get_model("compact", true),
-                PRG2.get_model("compact", true)
-            );
-
-            fmt::print("cross two\n");
-            auto Child2 = PRG2.cross(PRG1);
-            fmt::print(
-                "Crossed Model 1      : {}\n"
-                "Crossed Model 1 depth: {}\n"
-                "Crossed Model 1 size : {}\n"
-                "Crossed Model 2      : {}\n"
-                "Crossed Model 2 depth: {}\n"
-                "Crossed Model 2 size : {}\n"
-                "=================================================\n",
-                Child1.get_model("compact", true),
-                Child1.Tree.max_depth(), Child1.Tree.size(), 
-                Child2.get_model("compact", true),
-                Child2.Tree.max_depth(), Child2.Tree.size()
-            );
-
-            // Original didn't change
-            ASSERT_TRUE(PRG1_model == PRG1.get_model("compact", true));
-            ASSERT_TRUE(PRG2_model == PRG2.get_model("compact", true));
-
-            // Child1 is within restrictions
-            ASSERT_TRUE(Child1.size() > 0);
-            ASSERT_TRUE(Child1.size() <= s);
-            ASSERT_TRUE(Child1.Tree.size() > 0);
-            ASSERT_TRUE(Child1.Tree.size() <= s);
 
-            ASSERT_TRUE(Child1.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child1.Tree.max_depth() <= d);
-
-            // Child2 is within restrictions
-            ASSERT_TRUE(Child2.size() > 0);
-            ASSERT_TRUE(Child2.size() <= s);
-            ASSERT_TRUE(Child2.Tree.size() > 0);
-            ASSERT_TRUE(Child2.Tree.size() <= s);
-
-            ASSERT_TRUE(Child2.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child2.Tree.max_depth() <= d);
+            auto opt = PRG1.cross(PRG2);
+            if (!opt){
+                fmt::print(
+                    "=================================================\n"
+                    "depth = {}, size= {}\n"
+                    "Original model 1: {}\n"
+                    "Original model 2: {}\n",
+                    "Crossover failed to create a child",
+                    d, s, 
+                    PRG1.get_model("compact", true),
+                    PRG2.get_model("compact", true)
+                );
+            }
+            else {
+                successes += 1;
+                auto Child = opt.value();
+                fmt::print(
+                    "Original model 1 after cross: {}\n"
+                    "Original model 2 after cross: {}\n",
+                    PRG1.get_model("compact", true),
+                    PRG2.get_model("compact", true)
+                );
+                fmt::print(
+                    "Crossed Model: {}\n"
+                    "=================================================\n",
+                    Child.get_model("compact", true)
+                );
+                Child.fit(data);
+                auto child_pred1 = Child.predict(data);
+            }
         }
+        ASSERT_TRUE(successes > 0);
     }
 }
 
-TEST(Operators, MutationSizeAndDepthPARAMS)
-{
-    PARAMS["mutation_options"] = {
-        {"point",0.25}, {"insert", 0.25}, {"delete", 0.25}, {"toggle_weight", 0.25}
-    };
-        
-    MatrixXf X(10,2);
-    ArrayXf y(10);
-    X << 0.85595296, 0.55417453, 0.8641915 , 0.99481109, 0.99123376,
-         0.9742618 , 0.70894019, 0.94940306, 0.99748867, 0.54205151,
-
-         0.5170537 , 0.8324005 , 0.50316305, 0.10173936, 0.13211973,
-         0.2254195 , 0.70526861, 0.31406024, 0.07082619, 0.84034526;
-
-    y << 3.55634251, 3.13854087, 3.55887523, 3.29462895, 3.33443517,
-         3.4378868 , 3.41092345, 3.5087468 , 3.25110243, 3.11382179;
-
-    Dataset data(X,y);
-
-    SearchSpace SS;
-    SS.init(data);
-
-    // split operator --> arity 3
-    // prod operator  --> arity 4
-    int max_arity = 4;
-
-    for (int d = 1; d < 10; ++d)
-    {
-        for (int s = 1; s < 10; ++s)
-        {
-            PARAMS["max_size"]  = s;
-            PARAMS["max_depth"] = d;
-
-            fmt::print("d={},s={}\n",d,s);
-            fmt::print("make_regressor\n");
-
-            RegressorProgram PRG = SS.make_regressor(0, 0);
-            
-            auto PRG_model = PRG.get_model("compact", true);
-
-            auto Child = PRG.mutate();
-
-            // Child is within restrictions. Here we allow the mutation
-            // to generate slightly bigger expressions (because the original
-            // parents can also have this offset due to PTC2 generation method)
-            ASSERT_TRUE(Child.size() > 0);
-            ASSERT_TRUE(Child.size() <= s+max_arity);
-
-            ASSERT_TRUE(Child.Tree.size() > 0);
-            ASSERT_TRUE(Child.Tree.size() <= s+max_arity);
-
-            ASSERT_TRUE(Child.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child.Tree.max_depth() <= d+1);
-        }
-    }
-}
-
-TEST(Operators, CrossoverSizeAndDepthPARAMS)
+TEST(Operators, CrossoverSizeAndDepthLimit)
 {
     MatrixXf X(10,2);
     ArrayXf y(10);
@@ -377,39 +384,67 @@ TEST(Operators, CrossoverSizeAndDepthPARAMS)
     // prod operator  --> arity 4
     int max_arity = 4;
 
-    for (int d = 1; d < 10; ++d)
+    for (int d = 5; d < 15; ++d)
     {
-        for (int s = 1; s < 10; ++s)
+        int successes = 0;
+        for (int s = 5; s < 15; ++s)
         {
             PARAMS["max_size"]  = s;
             PARAMS["max_depth"] = d;
 
-            RegressorProgram PRG1 = SS.make_regressor(0, 0);
-            RegressorProgram PRG2 = SS.make_regressor(0, 0);
+            // Enforcing that the parents does not exceed max_size by
+            // taking into account the highest arity of the function nodes
+            RegressorProgram PRG1 = SS.make_regressor(d-1, s-max_arity);
+            RegressorProgram PRG2 = SS.make_regressor(d-1, s-max_arity);
 
             auto PRG1_model = PRG1.get_model("compact", true);
             auto PRG2_model = PRG2.get_model("compact", true);
 
-            auto Child1 = PRG1.cross(PRG2);
-            auto Child2 = PRG2.cross(PRG1);
-
-            // Child1 is within restrictions
-            ASSERT_TRUE(Child1.size() > 0);
-            ASSERT_TRUE(Child1.size() <= s+max_arity);
-            ASSERT_TRUE(Child1.Tree.size() > 0);
-            ASSERT_TRUE(Child1.Tree.size() <= s+max_arity);
-
-            ASSERT_TRUE(Child1.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child1.Tree.max_depth() <= d+1);
-
-            // Child2 is within restrictions
-            ASSERT_TRUE(Child2.size() > 0);
-            ASSERT_TRUE(Child2.size() <= s+max_arity);
-            ASSERT_TRUE(Child2.Tree.size() > 0);
-            ASSERT_TRUE(Child2.Tree.size() <= s+max_arity);
+            fmt::print(
+                "=================================================\n"
+                "settings: depth = {}, size= {}\n"
+                "Original model 1: {}\n"
+                "depth = {}, size= {}\n"
+                "Original model 2: {}\n"
+                "depth = {}, size= {}\n",
+                d, s, 
+                PRG1.get_model("compact", true),
+                PRG1.depth(), PRG1.size(),
+                PRG2.get_model("compact", true),
+                PRG2.depth(), PRG2.size()
+            );
 
-            ASSERT_TRUE(Child2.Tree.max_depth() >= 0);
-            ASSERT_TRUE(Child2.Tree.max_depth() <= d+1);
+            fmt::print("cross\n");
+            auto opt = PRG1.cross(PRG2);
+
+            if (!opt){
+                fmt::print("Crossover failed to create a child"
+                    "=================================================\n");
+            }
+            else {
+                successes += 1;
+                auto Child = opt.value();
+                fmt::print(
+                    "Child Model      : {}\n"
+                    "Child Model depth: {}\n"
+                    "Child Model size : {}\n"
+                    "=================================================\n",
+                    Child.get_model("compact", true),
+                    Child.depth(), Child.size()
+                );
+
+                // Original didn't change
+                ASSERT_TRUE(PRG1_model == PRG1.get_model("compact", true));
+                ASSERT_TRUE(PRG2_model == PRG2.get_model("compact", true));
+
+                // Child is within restrictions
+                ASSERT_TRUE(Child.size() > 0);
+                ASSERT_TRUE(Child.size() <= s);
+
+                ASSERT_TRUE(Child.depth() >= 0);
+                ASSERT_TRUE(Child.depth() <= d);
+            }
         }
+        ASSERT_TRUE(successes > 0);
     }
-}
+}
\ No newline at end of file
diff --git a/tests/python/test_brush.py b/tests/python/test_brush.py
index 2fadd1e4..5e38898f 100644
--- a/tests/python/test_brush.py
+++ b/tests/python/test_brush.py
@@ -60,3 +60,14 @@ def test_fit(setup, brush_args, request):
         pytest.fail(f"Unexpected Exception caught: {e}")
         logging.error(traceback.format_exc())
         
+
+# def test_random_state(): # TODO: make it work
+#     test_y = np.array( [1. , 0. , 1.4, 1. , 0. , 1. , 1. , 0. , 0. , 0.  ])
+#     test_X = np.array([[1.1, 2.0, 3.0, 4.0, 5.0, 6.5, 7.0, 8.0, 9.0, 10.0],
+#                        [2.0, 1.2, 6.0, 4.0, 5.0, 8.0, 7.0, 5.0, 9.0, 10.0]]).T
+    
+#     est1 = brush.BrushRegressor(random_state=42).fit(test_X, test_y)
+#     est2 = brush.BrushRegressor(random_state=42).fit(test_X, test_y)
+
+#     assert est1.best_estimator_.get_model() == est2.best_estimator_.get_model(), \
+#            "random state failed to generate same results"
\ No newline at end of file
diff --git a/tests/python/test_params.py b/tests/python/test_params.py
new file mode 100644
index 00000000..03d08bc4
--- /dev/null
+++ b/tests/python/test_params.py
@@ -0,0 +1,95 @@
+import pytest
+
+import _brush
+import time
+from multiprocessing import Pool
+import numpy as np
+
+
+def test_param_random_state():
+    # Check if make_regressor, mutation and crossover will create the same expressions
+    test_y = np.array( [1. , 0. , 1.4, 1. , 0. , 1. , 1. , 0. , 0. , 0.  ])
+    test_X = np.array([[1.1, 2.0, 3.0, 4.0, 5.0, 6.5, 7.0, 8.0, 9.0, 10.0],
+                       [2.0, 1.2, 6.0, 4.0, 5.0, 8.0, 7.0, 5.0, 9.0, 10.0]]).T
+    
+    data = _brush.Dataset(test_X, test_y)
+    SS   = _brush.SearchSpace(data)
+    
+    _brush.set_random_state(123)
+
+    first_run = []
+    for d in range(1,4):
+        for s in range(1,20):
+            prg = SS.make_regressor(d, s)
+            prg = prg.mutate()
+            
+            if prg != None: prg = prg.cross(prg)    
+            if prg != None: first_run.append(prg.get_model())
+    
+    assert len(first_run) > 0, "either mutation or crossover is always failing"
+
+    _brush.set_random_state(123)
+
+    second_run = []
+    for d in range(1,4):
+        for s in range(1,20):
+            prg = SS.make_regressor(d, s)
+            prg = prg.mutate()
+
+            if prg != None: prg = prg.cross(prg)
+            if prg != None: second_run.append(prg.get_model())
+        
+    assert len(second_run) > 0, "either mutation or crossover is always failing"
+
+    for fr, sr in zip(first_run, second_run):
+        assert fr==sr,  "random state failed to generate same expressions"
+
+
+def _change_and_wait(config):
+    "Will change the mutation weights to set only the `index` to 1, then wait "
+    "`seconts` to retrieve the _brush PARAMS and print weight values"
+    index, seconds = config
+
+    # Sample configuration
+    params = {
+        'verbosity': False, 
+        'pop_size' : 100,
+        'max_gen'  : 100,
+        'max_depth': 5,
+        'max_size' : 50,
+        'mutation_options': {'point'            : 0.0,
+                             'insert'           : 0.0,
+                             'delete'           : 0.0,
+                             'subtree'          : 0.0,
+                             'toggle_weight_on' : 0.0,
+                             'toggle_weight_off': 0.0}
+    }
+
+    # We need to guarantee order to use the index correctly
+    mutations = ['point', 'insert', 'delete', 'subtree', 'toggle_weight_on', 'toggle_weight_off']
+
+    for i, m in enumerate(mutations):
+        params['mutation_options'][m] = 0 if i != index else 1.0
+
+    print(f"(Thread id {index}{seconds}) Setting mutation {mutations[index]} to 1 and wait {seconds} seconds")
+
+    _brush.set_params(params)
+    time.sleep(seconds)
+    
+    print(f"(Thread id {index}{seconds}) Retrieving PARAMS: {_brush.get_params()['mutation_options']}")
+
+    assert params['mutation_options']==_brush.get_params()['mutation_options'], \
+        f"(Thread id {index}{seconds}) BRUSH FAILED TO KEEP SEPARATE INSTANCES OF `PARAMS` BETWEEN MULTIPLE THREADS"
+    
+def test_global_PARAMS_sharing():
+    print("By default, all threads starts with all mutations having weight zero.")
+    
+    scale = 0.25 # Scale the time of each thread (for human manual checking) 
+
+    # Checking if brush's PARAMS can be modified inside a pool without colateral effects.
+    # Each configuration will start in the same order as they are listed, but they
+    # will finish in different times. They are all modifying the brush's PARAMS.
+    Pool(processes=3).map(_change_and_wait, [(0, 3*scale),
+                                             (1, 1*scale),
+                                             (2, 2*scale)])
+    
\ No newline at end of file
diff --git a/tests/python/test_program.py b/tests/python/test_program.py
index 5c7e51b7..78356bee 100644
--- a/tests/python/test_program.py
+++ b/tests/python/test_program.py
@@ -13,7 +13,7 @@
 def test_data():
     test_y = np.array([1.,0.,1.4,1.,0.,1.,1.,0.,0.,0.])
     test_X = np.array([[1.1,2.0,3.0,4.0,5.0,6.5,7.0,8.0,9.0,10.0],
-                       [2.0,1.2,6.0,4.0,5.0,8.0,7.0,5.0,9.0,10.0]])
+                       [2.0,1.2,6.0,4.0,5.0,8.0,7.0,5.0,9.0,10.0]]).T
     
     return (test_X, test_y)