From c1e4366771b79bae334dd8f328ee12467b9de85e Mon Sep 17 00:00:00 2001
From: Guilherme Aldeia <guilherme.seidyo@gmail.com>
Date: Tue, 26 Sep 2023 09:27:17 -0400
Subject: [PATCH 1/3] Initial structure to implement an Island GA. taskflow
 added as depend.

---
 environment.yml |  1 +
 src/brushGA.cpp | 11 ++++++++
 src/brushGA.h   | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/selection.h | 17 ++++++++++++
 4 files changed, 98 insertions(+)
 create mode 100644 src/brushGA.cpp
 create mode 100644 src/brushGA.h
 create mode 100644 src/selection.h
diff --git a/environment.yml b/environment.yml
index 53e9d812..8e6a62d6 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,6 +10,7 @@ dependencies:
     - gxx >= 12.0
     - ninja
     - ceres-solver
+    - taskflow
     - pybind11 #=2.6.2
     - pytest #=6.2.4
     - pydot
diff --git a/src/brushGA.cpp b/src/brushGA.cpp
new file mode 100644
index 00000000..1b16a4d7
--- /dev/null
+++ b/src/brushGA.cpp
@@ -0,0 +1,11 @@
+#include "brushGA.h"
+#include <iostream>
+
+
+using namespace Brush;
+
+/// @brief initialize Feat object for fitting.
+void BrushGA::init()
+{
+    
+}
diff --git a/src/brushGA.h b/src/brushGA.h
new file mode 100644
index 00000000..02d0c536
--- /dev/null
+++ b/src/brushGA.h
@@ -0,0 +1,69 @@
+/* Brush
+copyright 2020 William La Cava
+license: GNU/GPL v3
+*/
+
+#ifndef BrushGA_H
+#define BrushGA_H
+
+#include "init.h"
+#include "taskflow/taskflow.hpp"
+
+// TODO: improve the includes (why does this lines below does not work?)
+// #include "variation.h"
+// #include "selection.h"
+
+// using namespace selection;
+// using namespace variation;
+
+namespace Brush
+{
+
+class BrushGA{
+public:
+
+    BrushGA(){}
+    /// destructor             
+    ~BrushGA(){} 
+    
+    void init();
+
+    //getters and setters for GA configuration.
+    // getters and setters for the best solution found after evolution
+    // predict, transform, predict_proba, etc.
+    // get statistics
+    // load and save best individuals
+    // logger, save to file
+    // execution archive
+    // random state control
+    // score functions
+    // fit methods (this will run the evolution), run a single generation 
+private:
+    // attributes (hyperparameters)
+    // update best
+    // calculate/print stats
+};
+
+int main(){
+  
+  tf::Executor executor;
+  tf::Taskflow taskflow;
+
+  auto [A, B, C, D] = taskflow.emplace(  // create four tasks
+    [] () { std::cout << "TaskA\n"; },
+    [] () { std::cout << "TaskB\n"; },
+    [] () { std::cout << "TaskC\n"; },
+    [] () { std::cout << "TaskD\n"; } 
+  );                                  
+                                      
+  A.precede(B, C);  // A runs before B and C
+  D.succeed(B, C);  // D runs after  B and C
+                                      
+  executor.run(taskflow).wait(); 
+
+  return 0;
+}
+
+} // Brush
+
+#endif
diff --git a/src/selection.h b/src/selection.h
new file mode 100644
index 00000000..3ce1f849
--- /dev/null
+++ b/src/selection.h
@@ -0,0 +1,17 @@
+/* Brush
+copyright 2020 William La Cava
+license: GNU/GPL v3
+*/
+
+#ifndef SELECTION_H
+#define SELECTION_H
+
+namespace selection {
+
+class SelectorBase {
+public:
+private:
+};
+
+} // selection
+#endif
\ No newline at end of file

From 2a579b9cc7e7135dbf04e44987be4cb70f8b5b9a Mon Sep 17 00:00:00 2001
From: Guilherme Aldeia <guilherme.seidyo@gmail.com>
Date: Sun, 8 Oct 2023 15:53:10 -0400
Subject: [PATCH 2/3] python implementation of island model. fixed api and
 tests

---
 src/brush/deap_api/__init__.py    |   2 +
 src/brush/deap_api/nsga2island.py | 143 ++++++++++++++++++++++++++++++
 src/brush/estimator.py            |  25 ++++--
 tests/python/test_brush.py        |  11 ++-
 4 files changed, 172 insertions(+), 9 deletions(-)
 create mode 100644 src/brush/deap_api/nsga2island.py

diff --git a/src/brush/deap_api/__init__.py b/src/brush/deap_api/__init__.py
index b2b2dfa8..6cbb48db 100644
--- a/src/brush/deap_api/__init__.py
+++ b/src/brush/deap_api/__init__.py
@@ -1,2 +1,4 @@
 from .nsga2 import nsga2
+from .ga import ga
+from .nsga2island import nsga2island
 from .utils import DeapIndividual
\ No newline at end of file
diff --git a/src/brush/deap_api/nsga2island.py b/src/brush/deap_api/nsga2island.py
new file mode 100644
index 00000000..c976b211
--- /dev/null
+++ b/src/brush/deap_api/nsga2island.py
@@ -0,0 +1,143 @@
+from deap import tools 
+from deap.benchmarks.tools import diversity, convergence, hypervolume
+import numpy as np
+import functools
+
+
+def nsga2island(toolbox, NGEN, MU, N_ISLANDS, MIGPX, CXPB, use_batch, verbosity, rnd_flt):
+    # NGEN = 250
+    # MU   = 100
+    # CXPB = 0.9
+    # N_ISLANDS: number of independent islands. Islands are controled by indexes.
+    # setting N_ISLANDS=1 would be the same as the original nsga2
+    # rnd_flt: random number generator to sample crossover prob
+
+    def calculate_statistics(ind):
+        on_train = ind.fitness.values
+        on_val   = toolbox.evaluateValidation(ind)
+
+        return (*on_train, *on_val) 
+
+    stats = tools.Statistics(calculate_statistics)
+
+    stats.register("avg", np.mean, axis=0)
+    stats.register("med", np.median, axis=0)
+    stats.register("std", np.std, axis=0)
+    stats.register("min", np.min, axis=0)
+    stats.register("max", np.max, axis=0)
+
+    logbook = tools.Logbook()
+    logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
+                                     "med (O1 train, O2 train, O1 val, O2 val)", \
+                                     "std (O1 train, O2 train, O1 val, O2 val)", \
+                                     "min (O1 train, O2 train, O1 val, O2 val)", \
+                                     "max (O1 train, O2 train, O1 val, O2 val)"
+
+    # Tuples with start and end indexes for each island. Number of individuals
+    # in each island can slightly differ if N_ISLANDS is not a divisor of MU
+    island_indexes = [((i*MU)//N_ISLANDS, ((i+1)*MU)//N_ISLANDS)
+                      for i in range(N_ISLANDS)]
+
+    print("island_indexes", island_indexes)
+    pop = toolbox.population(n=MU)
+
+    fitnesses = toolbox.map(functools.partial(toolbox.evaluate), pop)
+    for ind, fit in zip(pop, fitnesses):
+        ind.fitness.values = fit
+        
+    survived = []
+    for (idx_start, idx_end) in island_indexes:
+        survived_parents = toolbox.survive(pop[idx_start:idx_end],
+                                        idx_end-idx_start)
+        survived.extend(survived_parents)
+    pop = survived
+
+    record = stats.compile(pop)
+    logbook.record(gen=0, evals=len(pop), **record)
+
+    if verbosity > 0: 
+        print(logbook.stream)
+
+    # Begin the generational process
+    for gen in range(1, NGEN):
+        batch = toolbox.getBatch() # batch will be a random subset only if it was not
+                                   # defined as the size of the train set. everytime
+                                   # this function is called, a new random batch is generated.
+
+        if (use_batch): # recalculate the fitness for the parents
+            # use_batch is false if batch_size is different from train set size.
+            # If we're using batch, we need to re-evaluate every model (without
+            # changing its weights). evaluateValidation doesnt fit the weights
+            fitnesses = toolbox.map( 
+                functools.partial(toolbox.evaluateValidation, data=batch), pop)
+        
+            for ind, fit in zip(pop, fitnesses):
+                ind.fitness.values = fit
+
+        # Vary the population inside each island
+        parents = []
+        for (idx_start, idx_end) in island_indexes:
+            island_parents = toolbox.select(pop[idx_start:idx_end],
+                                            idx_end-idx_start)
+            parents.extend(island_parents)
+    
+        offspring = [] # Will have the same size as pop
+        for (idx_start, idx_end) in island_indexes:
+            for ind1, ind2 in zip(parents[idx_start:idx_end:2],
+                                  parents[idx_start+1:idx_end:2]
+            ):
+                off1, off2 = None, None
+                if rnd_flt() < CXPB: # either mutation or crossover
+                    off1, off2 = toolbox.mate(ind1, ind2)
+                else:
+                    off1 = toolbox.mutate(ind1)
+                    off2 = toolbox.mutate(ind2)
+                
+                # Inserting parent if mutation failed
+                offspring.extend([off1 if off1 is not None else toolbox.Clone(ind1)])
+                offspring.extend([off2 if off2 is not None else toolbox.Clone(ind2)])
+
+        # Evaluate (instead of evaluateValidation) to fit the weights of the offspring
+        fitnesses = toolbox.map(functools.partial(toolbox.evaluate), offspring)
+        if (use_batch): #calculating objectives based on batch
+            fitnesses = toolbox.map(
+                functools.partial(toolbox.evaluateValidation, data=batch), offspring)
+
+        for ind, fit in zip(offspring, fitnesses):
+            ind.fitness.values = fit
+
+        # Select the next generation population
+        new_pop = []
+        for (idx_start, idx_end) in island_indexes:
+            island_new_pop = toolbox.survive(pop[idx_start:idx_end] \
+                                            +offspring[idx_start:idx_end], 
+                                            idx_end-idx_start)
+            new_pop.extend(island_new_pop)
+
+        # Migration to fill up the islands for the next generation
+        pop = []
+        for (idx_start, idx_end) in island_indexes:
+            other_islands = list(range(0, idx_start)) + list(range(idx_end, MU))
+            for idx_individual in range(idx_start, idx_end):
+                if rnd_flt() < MIGPX: # replace by someone not from the same island
+                    idx_other_individual = other_islands[
+                        int(rnd_flt() * len(other_islands))]
+                    pop.append(new_pop[idx_other_individual])
+                else:
+                    pop.append(new_pop[idx_individual])
+        print(len(pop))
+        for p in pop:
+            print(type(p), p)
+        record = stats.compile(pop)
+        logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)
+
+        if verbosity > 0: 
+            print(logbook.stream)
+
+    if verbosity > 0: 
+        print("Final population hypervolume is %f" % hypervolume(pop, [1000.0, 50.0]))
+
+    archive = tools.ParetoFront() 
+    archive.update(pop)
+
+    return archive, logbook
\ No newline at end of file
diff --git a/src/brush/estimator.py b/src/brush/estimator.py
index 5f07eabc..6249d2f9 100644
--- a/src/brush/estimator.py
+++ b/src/brush/estimator.py
@@ -13,7 +13,7 @@
 # from tqdm import tqdm
 from types import NoneType
 import _brush
-from .deap_api import nsga2, ga, DeapIndividual 
+from .deap_api import nsga2, ga, nsga2island, DeapIndividual 
 # from _brush import Dataset, SearchSpace
 
 
@@ -38,6 +38,12 @@ class BrushEstimator(BaseEstimator):
         Maximum depth of GP trees in the GP program. Use 0 for no limit.
     max_size : int, default 0
         Maximum number of nodes in a tree. Use 0 for no limit.
+    n_islands : int, default 5
+        Number of independent islands to use in evolutionary framework. 
+        Ignored if `algorithm!="nsga2island"`.
+    mig_prob : float, default 0.05
+        Probability of occuring a migration between two random islands at the
+        end of a generation, must be between 0 and 1.
     cx_prob : float, default 1/7
         Probability of applying the crossover variation when generating the offspring,
         must be between 0 and 1.
@@ -59,7 +65,7 @@ class BrushEstimator(BaseEstimator):
     initialization : {"grow", "full"}, default "grow" 
         Strategy to create the initial population. If `full`, then every expression is created
         with `max_size` nodes. If `grow`, size will be uniformly distributed.
-    algorithm : {"nsga2", "ga"}, default "nsga2"
+    algorithm : {"nsga2island", "nsga2", "ga"}, default "nsga2island"
         Which Evolutionary Algorithm framework to use to evolve the population.
     validation_size : float, default 0.0
         Percentage of samples to use as a hold-out partition. These samples are used
@@ -106,12 +112,14 @@ def __init__(
         verbosity=0,
         max_depth=3,
         max_size=20,
+        n_islands=5,
+        mig_prob=0.05,
         cx_prob= 1/7,
         mutation_options = {"point":1/6, "insert":1/6, "delete":1/6, "subtree":1/6,
                             "toggle_weight_on":1/6, "toggle_weight_off":1/6},
         functions: list[str]|dict[str,float] = {},
         initialization="grow",
-        algorithm="nsga2",
+        algorithm="nsga2island",
         random_state=None,
         validation_size: float = 0.0,
         batch_size: float = 1.0
@@ -123,6 +131,8 @@ def __init__(
         self.mode=mode
         self.max_depth=max_depth
         self.max_size=max_size
+        self.n_islands=n_islands
+        self.mig_prob=mig_prob
         self.cx_prob=cx_prob
         self.mutation_options=mutation_options
         self.functions=functions
@@ -155,7 +165,7 @@ def _setup_toolbox(self, data_train, data_validation):
         # When solving multi-objective problems, selection and survival must
         # support this feature. This means that these selection operators must
         # accept a tuple of fitnesses as argument)
-        if self.algorithm=="nsga2":
+        if self.algorithm=="nsga2" or self.algorithm=="nsga2island":
             toolbox.register("select", tools.selTournamentDCD) 
             toolbox.register("survive", tools.selNSGA2)
         elif self.algorithm=="ga":
@@ -232,7 +242,12 @@ def fit(self, X, y):
         self.search_space_ = _brush.SearchSpace(self.train_, self.functions_)
         self.toolbox_ = self._setup_toolbox(data_train=self.train_, data_validation=self.validation_)
 
-        if self.algorithm=="nsga2":
+        if self.algorithm=="nsga2island":
+            self.archive_, self.logbook_ = nsga2island(
+                self.toolbox_, self.max_gen, self.pop_size, self.n_islands,
+                self.mig_prob, self.cx_prob, 
+                (0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)
+        elif self.algorithm=="nsga2":
             self.archive_, self.logbook_ = nsga2(
                 self.toolbox_, self.max_gen, self.pop_size, self.cx_prob, 
                 (0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)
diff --git a/tests/python/test_brush.py b/tests/python/test_brush.py
index 894fae6c..cd1cf28f 100644
--- a/tests/python/test_brush.py
+++ b/tests/python/test_brush.py
@@ -44,16 +44,19 @@ def regression_setup():
     return brush.BrushRegressor, X, y
 
 @pytest.mark.parametrize('setup,algorithm',
-                         [('classification_setup', 'nsga2'),
-                          ('classification_setup', 'ga'   ),
-                          ('regression_setup',     'nsga2'),
-                          ('regression_setup',     'ga'   )])
+                         [('classification_setup', 'nsga2island'),
+                          ('classification_setup', 'nsga2'      ),
+                          ('classification_setup', 'ga'         ),
+                          ('regression_setup',     'nsga2island'),
+                          ('regression_setup',     'nsga2'      ),
+                          ('regression_setup',     'ga'         )])
 def test_fit(setup, algorithm, brush_args, request):
     """Testing common utilities related to fitting and generic brush estimator.
     """
     
     Estimator, X, y = request.getfixturevalue(setup)
 
+    brush_args["algorithm"] = algorithm
     try:
         est = Estimator(**brush_args)
         est.fit(X, y)

From e1fd0ca671b73b43241efba0492165291820bbb8 Mon Sep 17 00:00:00 2001
From: Guilherme Aldeia <guilherme.seidyo@gmail.com>
Date: Wed, 11 Oct 2023 12:48:07 -0400
Subject: [PATCH 3/3] Option to use nsga or ga with island divisions

---
 src/brush/deap_api/__init__.py    | 1 -
 src/brush/deap_api/nsga2island.py | 5 +----
 src/brush/estimator.py            | 6 +++---
 src/variation.h                   | 1 +
 tests/cpp/test_program.cpp        | 8 ++++++++
 tests/python/test_brush.py        | 2 ++
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/brush/deap_api/__init__.py b/src/brush/deap_api/__init__.py
index 6cbb48db..b011dec5 100644
--- a/src/brush/deap_api/__init__.py
+++ b/src/brush/deap_api/__init__.py
@@ -1,4 +1,3 @@
 from .nsga2 import nsga2
-from .ga import ga
 from .nsga2island import nsga2island
 from .utils import DeapIndividual
\ No newline at end of file
diff --git a/src/brush/deap_api/nsga2island.py b/src/brush/deap_api/nsga2island.py
index c976b211..d215d3f7 100644
--- a/src/brush/deap_api/nsga2island.py
+++ b/src/brush/deap_api/nsga2island.py
@@ -38,7 +38,6 @@ def calculate_statistics(ind):
     island_indexes = [((i*MU)//N_ISLANDS, ((i+1)*MU)//N_ISLANDS)
                       for i in range(N_ISLANDS)]
 
-    print("island_indexes", island_indexes)
     pop = toolbox.population(n=MU)
 
     fitnesses = toolbox.map(functools.partial(toolbox.evaluate), pop)
@@ -125,9 +124,7 @@ def calculate_statistics(ind):
                     pop.append(new_pop[idx_other_individual])
                 else:
                     pop.append(new_pop[idx_individual])
-        print(len(pop))
-        for p in pop:
-            print(type(p), p)
+                    
         record = stats.compile(pop)
         logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)
 
diff --git a/src/brush/estimator.py b/src/brush/estimator.py
index 5c810d70..a5520067 100644
--- a/src/brush/estimator.py
+++ b/src/brush/estimator.py
@@ -66,7 +66,7 @@ class BrushEstimator(BaseEstimator):
     initialization : {"grow", "full"}, default "grow" 
         Strategy to create the initial population. If `full`, then every expression is created
         with `max_size` nodes. If `grow`, size will be uniformly distributed.
-    algorithm : {"nsga2island", "nsga2", "ga"}, default "nsga2island"
+    algorithm : {"nsga2island", "nsga2", "gaisland", "ga"}, default "nsga2island"
         Which Evolutionary Algorithm framework to use to evolve the population.
     validation_size : float, default 0.0
         Percentage of samples to use as a hold-out partition. These samples are used
@@ -169,7 +169,7 @@ def _setup_toolbox(self, data_train, data_validation):
         if self.algorithm=="nsga2" or self.algorithm=="nsga2island":
             toolbox.register("select", tools.selTournamentDCD) 
             toolbox.register("survive", tools.selNSGA2)
-        elif self.algorithm=="ga":
+        elif self.algorithm=="ga" or self.algorithm=="gaisland":
             toolbox.register("select", tools.selTournament, tournsize=3) 
             def offspring(pop, MU): return pop[-MU:]
             toolbox.register("survive", offspring)
@@ -250,7 +250,7 @@ def fit(self, X, y):
         self.search_space_ = _brush.SearchSpace(self.train_, self.functions_)
         self.toolbox_ = self._setup_toolbox(data_train=self.train_, data_validation=self.validation_)
 
-        if self.algorithm=="nsga2island":
+        if self.algorithm=="nsga2island" or self.algorithm=="gaisland":
             self.archive_, self.logbook_ = nsga2island(
                 self.toolbox_, self.max_gen, self.pop_size, self.n_islands,
                 self.mig_prob, self.cx_prob, 
diff --git a/src/variation.h b/src/variation.h
index 22d42a77..90421011 100644
--- a/src/variation.h
+++ b/src/variation.h
@@ -680,6 +680,7 @@ std::optional<Program<T>> cross(const Program<T>& root, const Program<T>& other)
                             
             // fmt::print("other_spot : {}\n",other_spot.node->data);
             // swap subtrees at child_spot and other_spot
+            // TODO: do I need to delete the removed node?
             child.Tree.move_ontop(child_spot, other_spot);
             return child;
         }
diff --git a/tests/cpp/test_program.cpp b/tests/cpp/test_program.cpp
index de7084f9..1509ac02 100644
--- a/tests/cpp/test_program.cpp
+++ b/tests/cpp/test_program.cpp
@@ -31,6 +31,14 @@ TEST(Program, MakeRegressor)
             );
 
             ASSERT_TRUE( PRG.get_model("compact", true)==clone.get_model("compact", true) );
+
+            // probabilities are the same
+            vector<float> PRG_weights(PRG.Tree.size());
+            std::transform(PRG.Tree.begin(), PRG.Tree.end(), 
+                        PRG_weights.begin(),
+                        [](const auto& n){ return n.get_prob_change(); }
+                        );
+            ASSERT_TRUE( PRG.get_model("compact", true)==clone.get_model("compact", true) );
         }
 }
 
diff --git a/tests/python/test_brush.py b/tests/python/test_brush.py
index 1c7f759e..c9b55103 100644
--- a/tests/python/test_brush.py
+++ b/tests/python/test_brush.py
@@ -46,9 +46,11 @@ def regression_setup():
 @pytest.mark.parametrize('setup,algorithm',
                          [('classification_setup', 'nsga2island'),
                           ('classification_setup', 'nsga2'      ),
+                          ('classification_setup', 'gaisland'   ),
                           ('classification_setup', 'ga'         ),
                           ('regression_setup',     'nsga2island'),
                           ('regression_setup',     'nsga2'      ),
+                          ('regression_setup',     'gaisland'   ),
                           ('regression_setup',     'ga'         )])
 def test_fit(setup, algorithm, brush_args, request):
     """Testing common utilities related to fitting and generic brush estimator.