Skip to content

Commit

Permalink
Merge pull request #43 from cavalab/coherence_and_error_handling
Browse files Browse the repository at this point in the history
Coherence and error handling
  • Loading branch information
lacava authored Apr 19, 2024
2 parents 646b2df + 3a9ebc7 commit 88c8e0c
Show file tree
Hide file tree
Showing 29 changed files with 1,970 additions and 756 deletions.
87 changes: 74 additions & 13 deletions src/bindings/bind_dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,90 @@ namespace nl = nlohmann;
void bind_dataset(py::module & m)
{
py::class_<br::Data::Dataset>(m, "Dataset")

// construct from X
.def(py::init<Ref<const ArrayXXf> &>())
// .def(py::init<Ref<const ArrayXXf> &>())
// construct from X (and optional validation and batch sizes) with constructor 3.
.def(py::init([](const Ref<const ArrayXXf>& X,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, {}, validation_size, batch_size);
}),
py::arg("X"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)
// construct from X, feature names
.def(py::init<
const Ref<const ArrayXXf>&,
const vector<string>&
>()
// .def(py::init<
// const Ref<const ArrayXXf>&,
// const vector<string>&
// >()
// )
// construct from X, feature names (and optional validation and batch sizes) with constructor 3.
.def(py::init([](const Ref<const ArrayXXf>& X,
const vector<string>& feature_names,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, feature_names, validation_size, batch_size);
}),
py::arg("X"),
py::arg("feature_names"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)
// construct from X,y arrays
.def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())

// construct from X, y arrays
// .def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())
// construct from X, y arrays (and optional validation and batch sizes) with constructor 2.
.def(py::init([](const Ref<const ArrayXXf>& X,
const Ref<const ArrayXf>& y,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, y, {}, {}, false, validation_size, batch_size);
}),
py::arg("X"),
py::arg("y"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)

// construct from X, y, feature names
.def(py::init<
const Ref<const ArrayXXf>&,
const Ref<const ArrayXf>&,
const vector<string>&
>()
// .def(py::init<
// const Ref<const ArrayXXf>&,
// const Ref<const ArrayXf>&,
// const vector<string>&
// >()
// )
// construct from X, y, feature names (and optional validation and batch sizes) with constructor 2.
.def(py::init([](const Ref<const ArrayXXf>& X,
const Ref<const ArrayXf>& y,
const vector<string>& feature_names,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, y, feature_names, {}, false, validation_size, batch_size);
}),
py::arg("X"),
py::arg("y"),
py::arg("feature_names"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)

.def_readwrite("y", &br::Data::Dataset::y)
// .def_readwrite("features", &br::Data::Dataset::features)
// .def_readwrite("features", &br::Data::Dataset::features)
.def("get_n_samples", &br::Data::Dataset::get_n_samples)
.def("get_n_features", &br::Data::Dataset::get_n_features)
.def("print", &br::Data::Dataset::print)
.def("get_batch", &br::Data::Dataset::get_batch)
.def("get_training_data", &br::Data::Dataset::get_training_data)
.def("get_validation_data", &br::Data::Dataset::get_validation_data)
.def("get_batch_size", &br::Data::Dataset::get_batch_size)
.def("set_batch_size", &br::Data::Dataset::set_batch_size)
.def("split", &br::Data::Dataset::split)
.def("get_X", &br::Data::Dataset::get_X)
;

Expand Down
10 changes: 9 additions & 1 deletion src/bindings/bind_params.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include "module.h"
#include "../params.h"
#include "../util/rnd.h"

namespace br = Brush;

void bind_params(py::module& m)
{
Expand All @@ -9,5 +12,10 @@ void bind_params(py::module& m)
// py::class_<br::Params>(m, "Params", py::dynamic_attr())
// .def(py::init<>())

m.def("set_params", &Brush::set_params);
m.def("set_params", &br::set_params);
m.def("get_params", &br::get_params);
m.def("set_random_state", [](unsigned int seed)
{ br::Util::r = *br::Util::Rnd::initRand();
br::Util::r.set_seed(seed); });
m.def("rnd_flt", [](){ return br::Util::r.rnd_flt(); });
}
9 changes: 6 additions & 3 deletions src/bindings/bind_programs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ void bind_program(py::module& m, string name)
)
.def("get_dot_model", &T::get_dot_model, py::arg("extras")="")
.def("get_weights", &T::get_weights)
.def("size", &T::size)
.def("cross", &T::cross)
.def("mutate", &T::mutate) // static_cast<T &(T::*)()>(&T::mutate))
.def("size", &T::size, py::arg("include_weight")=true)
.def("depth", &T::depth)
.def("cross", &T::cross, py::return_value_policy::automatic,
"Performs one attempt to stochastically swap subtrees between two programs and generate a child")
.def("mutate", &T::mutate, py::return_value_policy::automatic,
"Performs one attempt to stochastically mutate the program and generate a child")
.def("set_search_space", &T::set_search_space)
.def(py::pickle(
[](const T &p) { // __getstate__
Expand Down
74 changes: 52 additions & 22 deletions src/brush/deap_api/nsga2.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,98 @@
from deap import tools
from deap.benchmarks.tools import diversity, convergence, hypervolume
import numpy as np
import random
import functools

def nsga2(toolbox, NGEN, MU, CXPB, verbosity):

def nsga2(toolbox, NGEN, MU, CXPB, use_batch, verbosity, rnd_flt):
# NGEN = 250
# MU = 100
# MU = 100
# CXPB = 0.9
# rnd_flt: random number generator to sample crossover prob

def calculate_statistics(ind):
on_train = ind.fitness.values
on_val = toolbox.evaluateValidation(ind)

return (*on_train, *on_val)

stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("ave", np.mean, axis=0)
stats = tools.Statistics(calculate_statistics)

stats.register("avg", np.mean, axis=0)
stats.register("med", np.median, axis=0)
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
# stats.register("max", np.max, axis=0)
stats.register("max", np.max, axis=0)

logbook = tools.Logbook()
logbook.header = "gen", "evals", "ave", "std", "min"
logbook.header = "gen", "evals", "avg (O1 train, O2 train, O1 val, O2 val)", \
"med (O1 train, O2 train, O1 val, O2 val)", \
"std (O1 train, O2 train, O1 val, O2 val)", \
"min (O1 train, O2 train, O1 val, O2 val)", \
"max (O1 train, O2 train, O1 val, O2 val)"

pop = toolbox.population(n=MU)


# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in pop if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
batch = toolbox.getBatch() # everytime this function is called, a new random batch is generated

# OBS: evaluate calls fit in the individual. It is different from using it to predict. The
# function evaluateValidation don't call the fit
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)

for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

# This is just to assign the crowding distance to the individuals
# no actual selection is done
pop = toolbox.survive(pop, len(pop))

record = stats.compile(pop)
logbook.record(gen=0, evals=len(invalid_ind), **record)
logbook.record(gen=0, evals=len(pop), **record)

if verbosity > 0:
print(logbook.stream)

# Begin the generational process
for gen in range(1, NGEN):
# The batch will be random only if it is not the size of the entire train set.
# In this case, we dont need to reevaluate the whole pop
if (use_batch):
batch = toolbox.getBatch()
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)

for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

# Vary the population
# offspring = tools.selTournamentDCD(pop, len(pop))
parents = toolbox.select(pop, len(pop))
# offspring = [toolbox.clone(ind) for ind in offspring]
offspring = []

for ind1, ind2 in zip(parents[::2], parents[1::2]):
if random.random() <= CXPB:
ind1, ind2 = toolbox.mate(ind1, ind2)

off1 = toolbox.mutate(ind1)
off2 = toolbox.mutate(ind2)
# del ind1.fitness.values, ind2.fitness.values
offspring.extend([off2, off2])
off1, off2 = None, None
if rnd_flt() < CXPB:
off1, off2 = toolbox.mate(ind1, ind2)
else:
off1 = toolbox.mutate(ind1)
off2 = toolbox.mutate(ind2)

# avoid inserting empty solutions
if off1 is not None: offspring.extend([off1])
if off2 is not None: offspring.extend([off2])

# archive.update(offspring)
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit

# Select the next generation population
pop = toolbox.survive(pop + offspring, MU)
record = stats.compile(pop)
logbook.record(gen=gen, evals=len(invalid_ind), **record)
logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)

if verbosity > 0:
print(logbook.stream)

Expand Down
Loading

0 comments on commit 88c8e0c

Please sign in to comment.