Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Island ga implementation in python (cpp version is WIP) #50

Merged
merged 5 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- gxx >= 12.0
- ninja
- ceres-solver
- taskflow
- pybind11 #=2.6.2
- pytest #=6.2.4
- pydot
Expand Down
1 change: 1 addition & 0 deletions src/brush/deap_api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .nsga2 import nsga2
from .nsga2island import nsga2island
from .utils import DeapIndividual
140 changes: 140 additions & 0 deletions src/brush/deap_api/nsga2island.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from deap import tools
from deap.benchmarks.tools import diversity, convergence, hypervolume
import numpy as np
import functools


def nsga2island(toolbox, NGEN, MU, N_ISLANDS, MIGPX, CXPB, use_batch, verbosity, rnd_flt):
# NGEN = 250
# MU = 100
# CXPB = 0.9
# N_ISLANDS: number of independent islands. Islands are controled by indexes.
# setting N_ISLANDS=1 would be the same as the original nsga2
# rnd_flt: random number generator to sample crossover prob

def calculate_statistics(ind):
on_train = ind.fitness.values
on_val = toolbox.evaluateValidation(ind)

return (*on_train, *on_val)

stats = tools.Statistics(calculate_statistics)

stats.register("avg", np.mean, axis=0)
stats.register("med", np.median, axis=0)
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)

logbook = tools.Logbook()
logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
"med (O1 train, O2 train, O1 val, O2 val)", \
"std (O1 train, O2 train, O1 val, O2 val)", \
"min (O1 train, O2 train, O1 val, O2 val)", \
"max (O1 train, O2 train, O1 val, O2 val)"

# Tuples with start and end indexes for each island. Number of individuals
# in each island can slightly differ if N_ISLANDS is not a divisor of MU
island_indexes = [((i*MU)//N_ISLANDS, ((i+1)*MU)//N_ISLANDS)
for i in range(N_ISLANDS)]

pop = toolbox.population(n=MU)

fitnesses = toolbox.map(functools.partial(toolbox.evaluate), pop)
for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

survived = []
for (idx_start, idx_end) in island_indexes:
survived_parents = toolbox.survive(pop[idx_start:idx_end],
idx_end-idx_start)
survived.extend(survived_parents)
pop = survived

record = stats.compile(pop)
logbook.record(gen=0, evals=len(pop), **record)

if verbosity > 0:
print(logbook.stream)

# Begin the generational process
for gen in range(1, NGEN):
batch = toolbox.getBatch() # batch will be a random subset only if it was not
# defined as the size of the train set. everytime
# this function is called, a new random batch is generated.

if (use_batch): # recalculate the fitness for the parents
# use_batch is false if batch_size is different from train set size.
# If we're using batch, we need to re-evaluate every model (without
# changing its weights). evaluateValidation doesnt fit the weights
fitnesses = toolbox.map(
functools.partial(toolbox.evaluateValidation, data=batch), pop)

for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

# Vary the population inside each island
parents = []
for (idx_start, idx_end) in island_indexes:
island_parents = toolbox.select(pop[idx_start:idx_end],
idx_end-idx_start)
parents.extend(island_parents)

offspring = [] # Will have the same size as pop
for (idx_start, idx_end) in island_indexes:
for ind1, ind2 in zip(parents[idx_start:idx_end:2],
parents[idx_start+1:idx_end:2]
):
off1, off2 = None, None
if rnd_flt() < CXPB: # either mutation or crossover
off1, off2 = toolbox.mate(ind1, ind2)
else:
off1 = toolbox.mutate(ind1)
off2 = toolbox.mutate(ind2)

# Inserting parent if mutation failed
offspring.extend([off1 if off1 is not None else toolbox.Clone(ind1)])
offspring.extend([off2 if off2 is not None else toolbox.Clone(ind2)])

# Evaluate (instead of evaluateValidation) to fit the weights of the offspring
fitnesses = toolbox.map(functools.partial(toolbox.evaluate), offspring)
if (use_batch): #calculating objectives based on batch
fitnesses = toolbox.map(
functools.partial(toolbox.evaluateValidation, data=batch), offspring)

for ind, fit in zip(offspring, fitnesses):
ind.fitness.values = fit

# Select the next generation population
new_pop = []
for (idx_start, idx_end) in island_indexes:
island_new_pop = toolbox.survive(pop[idx_start:idx_end] \
+offspring[idx_start:idx_end],
idx_end-idx_start)
new_pop.extend(island_new_pop)

# Migration to fill up the islands for the next generation
pop = []
for (idx_start, idx_end) in island_indexes:
other_islands = list(range(0, idx_start)) + list(range(idx_end, MU))
for idx_individual in range(idx_start, idx_end):
if rnd_flt() < MIGPX: # replace by someone not from the same island
idx_other_individual = other_islands[
int(rnd_flt() * len(other_islands))]
pop.append(new_pop[idx_other_individual])
else:
pop.append(new_pop[idx_individual])

record = stats.compile(pop)
logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)

if verbosity > 0:
print(logbook.stream)

if verbosity > 0:
print("Final population hypervolume is %f" % hypervolume(pop, [1000.0, 50.0]))

archive = tools.ParetoFront()
archive.update(pop)

return archive, logbook
34 changes: 26 additions & 8 deletions src/brush/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# from tqdm import tqdm
from types import NoneType
import _brush
from .deap_api import nsga2, DeapIndividual
from .deap_api import nsga2, nsga2island, DeapIndividual
# from _brush import Dataset, SearchSpace


Expand All @@ -39,6 +39,12 @@ class BrushEstimator(BaseEstimator):
Maximum depth of GP trees in the GP program. Use 0 for no limit.
max_size : int, default 0
Maximum number of nodes in a tree. Use 0 for no limit.
n_islands : int, default 5
Number of independent islands to use in evolutionary framework.
Ignored if `algorithm!="nsga2island"`.
mig_prob : float, default 0.05
Probability of occuring a migration between two random islands at the
end of a generation, must be between 0 and 1.
cx_prob : float, default 1/7
Probability of applying the crossover variation when generating the offspring,
must be between 0 and 1.
Expand All @@ -60,7 +66,7 @@ class BrushEstimator(BaseEstimator):
initialization : {"grow", "full"}, default "grow"
Strategy to create the initial population. If `full`, then every expression is created
with `max_size` nodes. If `grow`, size will be uniformly distributed.
algorithm : {"nsga2", "ga"}, default "nsga2"
algorithm : {"nsga2island", "nsga2", "gaisland", "ga"}, default "nsga2island"
Which Evolutionary Algorithm framework to use to evolve the population.
validation_size : float, default 0.0
Percentage of samples to use as a hold-out partition. These samples are used
Expand Down Expand Up @@ -107,12 +113,14 @@ def __init__(
verbosity=0,
max_depth=3,
max_size=20,
n_islands=5,
mig_prob=0.05,
cx_prob= 1/7,
mutation_options = {"point":1/6, "insert":1/6, "delete":1/6, "subtree":1/6,
"toggle_weight_on":1/6, "toggle_weight_off":1/6},
functions: list[str]|dict[str,float] = {},
initialization="grow",
algorithm="nsga2",
algorithm="nsga2island",
random_state=None,
validation_size: float = 0.0,
batch_size: float = 1.0
Expand All @@ -124,6 +132,8 @@ def __init__(
self.mode=mode
self.max_depth=max_depth
self.max_size=max_size
self.n_islands=n_islands
self.mig_prob=mig_prob
self.cx_prob=cx_prob
self.mutation_options=mutation_options
self.functions=functions
Expand Down Expand Up @@ -156,10 +166,10 @@ def _setup_toolbox(self, data_train, data_validation):
# When solving multi-objective problems, selection and survival must
# support this feature. This means that these selection operators must
# accept a tuple of fitnesses as argument)
if self.algorithm=="nsga2":
if self.algorithm=="nsga2" or self.algorithm=="nsga2island":
toolbox.register("select", tools.selTournamentDCD)
toolbox.register("survive", tools.selNSGA2)
elif self.algorithm=="ga":
elif self.algorithm=="ga" or self.algorithm=="gaisland":
toolbox.register("select", tools.selTournament, tournsize=3)
def offspring(pop, MU): return pop[-MU:]
toolbox.register("survive", offspring)
Expand Down Expand Up @@ -249,9 +259,17 @@ def fit(self, X, y):
self.search_space_ = _brush.SearchSpace(self.train_, self.functions_)
self.toolbox_ = self._setup_toolbox(data_train=self.train_, data_validation=self.validation_)

self.archive_, self.logbook_ = nsga2(
self.toolbox_, self.max_gen, self.pop_size, self.cx_prob,
(0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)
if self.algorithm=="nsga2island" or self.algorithm=="gaisland":
self.archive_, self.logbook_ = nsga2island(
self.toolbox_, self.max_gen, self.pop_size, self.n_islands,
self.mig_prob, self.cx_prob,
(0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)
elif self.algorithm=="nsga2" or self.algorithm=="ga":
# nsga2 and ga differ in the toolbox
self.archive_, self.logbook_ = nsga2(
self.toolbox_, self.max_gen, self.pop_size, self.cx_prob,
(0.0<self.batch_size<1.0), self.verbosity, _brush.rnd_flt)


final_ind_idx = 0

Expand Down
11 changes: 11 additions & 0 deletions src/brushGA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "brushGA.h"
#include <iostream>


using namespace Brush;

/// @brief initialize Feat object for fitting.
void BrushGA::init()
{

}
69 changes: 69 additions & 0 deletions src/brushGA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/* Brush
copyright 2020 William La Cava
license: GNU/GPL v3
*/

#ifndef BrushGA_H
#define BrushGA_H

#include "init.h"
#include "taskflow/taskflow.hpp"

// TODO: improve the includes (why does this lines below does not work?)
// #include "variation.h"
// #include "selection.h"

// using namespace selection;
// using namespace variation;

namespace Brush
{

class BrushGA{
public:

BrushGA(){}
/// destructor
~BrushGA(){}

void init();

//getters and setters for GA configuration.
// getters and setters for the best solution found after evolution
// predict, transform, predict_proba, etc.
// get statistics
// load and save best individuals
// logger, save to file
// execution archive
// random state control
// score functions
// fit methods (this will run the evolution), run a single generation
private:
// attributes (hyperparameters)
// update best
// calculate/print stats
};

int main(){

tf::Executor executor;
tf::Taskflow taskflow;

auto [A, B, C, D] = taskflow.emplace( // create four tasks
[] () { std::cout << "TaskA\n"; },
[] () { std::cout << "TaskB\n"; },
[] () { std::cout << "TaskC\n"; },
[] () { std::cout << "TaskD\n"; }
);

A.precede(B, C); // A runs before B and C
D.succeed(B, C); // D runs after B and C

executor.run(taskflow).wait();

return 0;
}

} // Brush

#endif
17 changes: 17 additions & 0 deletions src/selection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/* Brush
copyright 2020 William La Cava
license: GNU/GPL v3
*/

#ifndef SELECTION_H
#define SELECTION_H

namespace selection {

class SelectorBase {
public:
private:
};

} // selection
#endif
1 change: 1 addition & 0 deletions src/variation.h
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ std::optional<Program<T>> cross(const Program<T>& root, const Program<T>& other)

// fmt::print("other_spot : {}\n",other_spot.node->data);
// swap subtrees at child_spot and other_spot
// TODO: do I need to delete the removed node?
child.Tree.move_ontop(child_spot, other_spot);
return child;
}
Expand Down
1 change: 1 addition & 0 deletions tests/cpp/test_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ TEST(Program, MakeRegressor)
);

ASSERT_TRUE( PRG.get_model("compact", true)==clone.get_model("compact", true) );

fmt::print("Models have the same representation\n");

// weights didnt changed
Expand Down
13 changes: 9 additions & 4 deletions tests/python/test_brush.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,21 @@ def regression_setup():
return brush.BrushRegressor, X, y

@pytest.mark.parametrize('setup,algorithm',
[('classification_setup', 'nsga2'),
('classification_setup', 'ga' ),
('regression_setup', 'nsga2'),
('regression_setup', 'ga' )])
[('classification_setup', 'nsga2island'),
('classification_setup', 'nsga2' ),
('classification_setup', 'gaisland' ),
('classification_setup', 'ga' ),
('regression_setup', 'nsga2island'),
('regression_setup', 'nsga2' ),
('regression_setup', 'gaisland' ),
('regression_setup', 'ga' )])
def test_fit(setup, algorithm, brush_args, request):
"""Testing common utilities related to fitting and generic brush estimator.
"""

Estimator, X, y = request.getfixturevalue(setup)

brush_args["algorithm"] = algorithm
try:
est = Estimator(**brush_args)
est.fit(X, y)
Expand Down
Loading