Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initializing weights #45

Merged
merged 17 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 74 additions & 13 deletions src/bindings/bind_dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,90 @@ namespace nl = nlohmann;
void bind_dataset(py::module & m)
{
py::class_<br::Data::Dataset>(m, "Dataset")

// construct from X
.def(py::init<Ref<const ArrayXXf> &>())
// .def(py::init<Ref<const ArrayXXf> &>())
// construct from X (and optional validation and batch sizes) with constructor 3.
.def(py::init([](const Ref<const ArrayXXf>& X,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, {}, validation_size, batch_size);
}),
py::arg("X"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)
// construct from X, feature names
.def(py::init<
const Ref<const ArrayXXf>&,
const vector<string>&
>()
// .def(py::init<
// const Ref<const ArrayXXf>&,
// const vector<string>&
// >()
// )
// construct from X, feature names (and optional validation and batch sizes) with constructor 3.
.def(py::init([](const Ref<const ArrayXXf>& X,
const vector<string>& feature_names,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, feature_names, validation_size, batch_size);
}),
py::arg("X"),
py::arg("feature_names"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)
// construct from X,y arrays
.def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())

// construct from X, y arrays
// .def(py::init<Ref<const ArrayXXf> &, Ref<const ArrayXf> &>())
// construct from X, y arrays (and optional validation and batch sizes) with constructor 2.
.def(py::init([](const Ref<const ArrayXXf>& X,
const Ref<const ArrayXf>& y,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, y, {}, {}, false, validation_size, batch_size);
}),
py::arg("X"),
py::arg("y"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)

// construct from X, y, feature names
.def(py::init<
const Ref<const ArrayXXf>&,
const Ref<const ArrayXf>&,
const vector<string>&
>()
// .def(py::init<
// const Ref<const ArrayXXf>&,
// const Ref<const ArrayXf>&,
// const vector<string>&
// >()
// )
// construct from X, y, feature names (and optional validation and batch sizes) with constructor 2.
.def(py::init([](const Ref<const ArrayXXf>& X,
const Ref<const ArrayXf>& y,
const vector<string>& feature_names,
const float validation_size=0.0,
const float batch_size=1.0){
return br::Data::Dataset(
X, y, feature_names, {}, false, validation_size, batch_size);
}),
py::arg("X"),
py::arg("y"),
py::arg("feature_names"),
py::arg("validation_size") = 0.0,
py::arg("batch_size") = 1.0
)

.def_readwrite("y", &br::Data::Dataset::y)
// .def_readwrite("features", &br::Data::Dataset::features)
// .def_readwrite("features", &br::Data::Dataset::features)
.def("get_n_samples", &br::Data::Dataset::get_n_samples)
.def("get_n_features", &br::Data::Dataset::get_n_features)
.def("print", &br::Data::Dataset::print)
.def("get_batch", &br::Data::Dataset::get_batch)
.def("get_training_data", &br::Data::Dataset::get_training_data)
.def("get_validation_data", &br::Data::Dataset::get_validation_data)
.def("get_batch_size", &br::Data::Dataset::get_batch_size)
.def("set_batch_size", &br::Data::Dataset::set_batch_size)
.def("split", &br::Data::Dataset::split)
.def("get_X", &br::Data::Dataset::get_X)
;

Expand Down
1 change: 1 addition & 0 deletions src/bindings/bind_params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ void bind_params(py::module& m)
m.def("set_random_state", [](unsigned int seed)
{ br::Util::r = *br::Util::Rnd::initRand();
br::Util::r.set_seed(seed); });
m.def("rnd_flt", [](){ return br::Util::r.rnd_flt(); });
}
49 changes: 36 additions & 13 deletions src/brush/deap_api/nsga2.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,71 @@
from deap import tools
from deap.benchmarks.tools import diversity, convergence, hypervolume
import numpy as np
import random
import functools

def nsga2(toolbox, NGEN, MU, CXPB, verbosity):

def nsga2(toolbox, NGEN, MU, CXPB, use_batch, verbosity, rnd_flt):
# NGEN = 250
# MU = 100
# CXPB = 0.9
# rnd_flt: random number generator to sample crossover prob

def calculate_statistics(ind):
on_train = ind.fitness.values
on_val = toolbox.evaluateValidation(ind)

return (*on_train, *on_val)

stats = tools.Statistics(calculate_statistics)

stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("ave", np.mean, axis=0)
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
# stats.register("max", np.max, axis=0)
stats.register("max", np.max, axis=0)

logbook = tools.Logbook()
logbook.header = "gen", "evals", "ave", "std", "min"
logbook.header = "gen", "evals", "ave (O1 train, O2 train, O1 val, O2 val)", \
"std (O1 train, O2 train, O1 val, O2 val)", \
"min (O1 train, O2 train, O1 val, O2 val)"

pop = toolbox.population(n=MU)

# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in pop if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
batch = toolbox.getBatch() # everytime this function is called, a new random batch is generated

# OBS: evaluate calls fit in the individual. It is different from using it to predict. The
# function evaluateValidation don't call the fit
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)

for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

# This is just to assign the crowding distance to the individuals
# no actual selection is done
pop = toolbox.survive(pop, len(pop))

record = stats.compile(pop)
logbook.record(gen=0, evals=len(invalid_ind), **record)
logbook.record(gen=0, evals=len(pop), **record)

if verbosity > 0:
print(logbook.stream)

# Begin the generational process
for gen in range(1, NGEN):
if (use_batch): #batch will be random only if it is not the size of the entire train set. In this case, we dont need to reevaluate the whole pop
batch = toolbox.getBatch()
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), pop)

for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit

# Vary the population
# offspring = tools.selTournamentDCD(pop, len(pop))
parents = toolbox.select(pop, len(pop))
# offspring = [toolbox.clone(ind) for ind in offspring]
offspring = []

for ind1, ind2 in zip(parents[::2], parents[1::2]):
if random.random() < CXPB:
if rnd_flt() < CXPB:
off1, off2 = toolbox.mate(ind1, ind2)
else:
off1, off2 = ind1, ind2
Expand All @@ -58,14 +80,15 @@ def nsga2(toolbox, NGEN, MU, CXPB, verbosity):
# archive.update(offspring)
# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
fitnesses = toolbox.map(functools.partial(toolbox.evaluate, data=batch), invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit

# Select the next generation population
pop = toolbox.survive(pop + offspring, MU)
record = stats.compile(pop)
logbook.record(gen=gen, evals=len(offspring), **record)
logbook.record(gen=gen, evals=len(offspring)+(len(pop) if use_batch else 0), **record)

if verbosity > 0:
print(logbook.stream)

Expand Down
Loading