Skip to content

Commit

Permalink
Accuracy metric. Fixed hall of fame when merging fronts of multiple i…
Browse files Browse the repository at this point in the history
…slands
  • Loading branch information
gAldeia committed Sep 3, 2024
1 parent f29ab5a commit c0e0ec8
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 4 deletions.
3 changes: 2 additions & 1 deletion pybrush/EstimatorInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ def _wrap_parameters(self, **extra_kwargs):
assert self.scorer in ['mse'], \
"Invalid scorer for the regression mode"
else:
assert self.scorer in ['log', 'multi_log', 'average_precision_score'], \
assert self.scorer in ['log', 'multi_log',
'accuracy', 'average_precision_score'], \
"Invalid scorer for the classification mode"

params.scorer = self.scorer
Expand Down
1 change: 1 addition & 0 deletions src/eval/evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class Evaluation {
Evaluation(){
// TODO: make eval update loss_v accordingly, and set to th same as train loss if there is no batch or no validation

// TODO: make accuracy the main classification metric?
string scorer;
if ( (T == Brush::ProgramType::MulticlassClassifier)
|| (T == Brush::ProgramType::Representer) )
Expand Down
62 changes: 62 additions & 0 deletions src/eval/metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ float mean_log_loss(const VectorXf& y,
return loss.mean();
}

float zero_one_loss(const VectorXf& y,
const VectorXf& predict_proba, VectorXf& loss,
const vector<float>& class_weights )
{
VectorXi yhat = (predict_proba.array() > 0.5).cast<int>();

loss = (yhat.array() != y.cast<int>().array()).cast<float>();

//TODO: weight loss by sample weights
return 1.0 - loss.mean();
}

float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
VectorXf& loss,
const vector<float>& class_weights) {
Expand Down Expand Up @@ -199,5 +211,55 @@ float mean_multi_log_loss(const VectorXf& y,
return loss.mean();
}

float bal_zero_one_loss(const VectorXf& y,
const ArrayXXf& predict_proba, VectorXf& loss,
const vector<float>& class_weights )
{
// TODO: implement this
// vector<float> uc = unique(y);
// vector<int> c;
// for (const auto& i : uc)
// c.push_back(int(i));

// // sensitivity (TP) and specificity (TN)
// vector<float> TP(c.size(),0.0), TN(c.size(), 0.0), P(c.size(),0.0), N(c.size(),0.0);
// ArrayXf class_accuracies(c.size());

// // get class counts

// for (unsigned i=0; i< c.size(); ++i)
// {
// P.at(i) = (y.array().cast<int>() == c.at(i)).count(); // total positives for this class
// N.at(i) = (y.array().cast<int>() != c.at(i)).count(); // total negatives for this class
// }


// for (unsigned i = 0; i < y.rows(); ++i)
// {
// if (yhat(i) == y(i)) // true positive
// ++TP.at(y(i) == -1 ? 0 : y(i)); // if-then ? accounts for -1 class encoding

// for (unsigned j = 0; j < c.size(); ++j)
// if ( y(i) !=c.at(j) && yhat(i) != c.at(j) ) // true negative
// ++TN.at(j);

// }

// // class-wise accuracy = 1/2 ( true positive rate + true negative rate)
// for (unsigned i=0; i< c.size(); ++i){
// class_accuracies(i) = (TP.at(i)/P.at(i) + TN.at(i)/N.at(i))/2;
// //std::cout << "TP(" << i << "): " << TP.at(i) << ", P[" << i << "]: " << P.at(i) << "\n";
// //std::cout << "TN(" << i << "): " << TN.at(i) << ", N[" << i << "]: " << N.at(i) << "\n";
// //std::cout << "class accuracy(" << i << "): " << class_accuracies(i) << "\n";
// }

// // set loss vectors if third argument supplied
// loss = (yhat.cast<int>().array() != y.cast<int>().array()).cast<float>();

// return 1.0 - class_accuracies.mean();

return 0.0;
}

} // metrics
} // Brush
25 changes: 25 additions & 0 deletions src/eval/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
VectorXf& loss,
const vector<float>& class_weights=vector<float>());

/**
* @brief Accuracy for binary classification
* @param y The true labels.
* @param predict_proba The predicted probabilities.
* @param loss Reference to store the calculated losses for each sample.
* @param class_weights The optional class weights.
* @return The final accuracy.
*/
float zero_one_loss(const VectorXf& y, const VectorXf& predict_proba,
VectorXf& loss,
const vector<float>& class_weights=vector<float>() );

// multiclass classification ---------------------------------------------------

/**
Expand All @@ -84,6 +96,19 @@ float mean_multi_log_loss(const VectorXf& y, const ArrayXXf& predict_proba,
VectorXf& loss,
const vector<float>& class_weights=vector<float>());

/**
* @brief Accuracy for multi-classification
* @param y The true labels.
* @param predict_proba The predicted probabilities.
* @param loss Reference to store the calculated losses for each sample.
* @param class_weights The optional class weights.
* @return The average accuracy in a one-vs-all schema.
*/
float bal_zero_one_loss(const VectorXf& y, const ArrayXXf& predict_proba,
VectorXf& loss,
const vector<float>& class_weights=vector<float>() );


} // metrics
} // Brush

Expand Down
3 changes: 3 additions & 0 deletions src/eval/scorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ typedef float (*funcPointer)(const VectorXf&,
Scorer(string scorer="log") {
score_hash["log"] = &mean_log_loss;
score_hash["average_precision_score"] = &average_precision_score;
score_hash["accuracy"] = &zero_one_loss;

this->set_scorer(scorer);
};
Expand Down Expand Up @@ -138,8 +139,10 @@ typedef float (*funcPointer)(const VectorXf&,
std::map<string, funcPointer> score_hash;
string scorer;

// TODO: I actually need to test this stuff
Scorer(string scorer="multi_log") {
score_hash["multi_log"] = &mean_multi_log_loss;
score_hash["accuracy"] = &bal_zero_one_loss;

this->set_scorer(scorer);
};
Expand Down
2 changes: 1 addition & 1 deletion src/ind/individual.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class Individual{
{"log", -1.0},
{"multi_log", -1.0},
{"average_precision_score", +1.0},
{"accuracy", +1.0}
{"accuracy", +1.0} // TODO: make sure we are maximizing accuracy
// {"error", -1.0}
};

Expand Down
1 change: 1 addition & 0 deletions src/pop/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void Archive<T>::update(Population<T>& pop, const Parameters& params)
individuals.resize(0); // clear archive

// refill archive with new pareto fronts (one pareto front for each island!)
// TODO: refill with fast nds just like hall of fame
for (int island =0; island< pop.num_islands; ++island) {
vector<size_t> indices = pop.get_island_indexes(island);

Expand Down
30 changes: 29 additions & 1 deletion src/pop/population.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ vector<vector<size_t>> Population<T>::sorted_front(unsigned rank)
template<ProgramType T>
vector<size_t> Population<T>::hall_of_fame(unsigned rank)
{
// TODO: hall of fame should unify all pareto fronts by doing a new fast_nds.
// Inspired in fast nds from nsga2

// TODO: use hall of fame instead of re-implmementing this feature in
// archive init and update functions

Expand All @@ -279,6 +280,33 @@ vector<size_t> Population<T>::hall_of_fame(unsigned rank)
}
}

// checking if there is no dominance between different fronts
// (without updating their fitness objects)
vector<int> hof;
hof.clear();

for (int i = 0; i < pf.size(); ++i) {

std::vector<unsigned int> dom;
int dcount = 0;

auto p = individuals.at(pf[i]);

for (int j = 0; j < pf.size(); ++j) {
const Individual<T>& q = (*individuals.at(pf[j]));

int compare = p->fitness.dominates(q.fitness);
if (compare == -1) { // q dominates p
//p.dcounter += 1;
dcount += 1;
}
}

if (dcount == 0) {
hof.push_back(pf[i]);
}
}

// TODO: should I sort the hall of fame by complexity? or error?
std::sort(pf.begin(),pf.end(),SortComplexity(*this));

Expand Down
2 changes: 1 addition & 1 deletion src/selection/lexicase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ vector<size_t> Lexicase<T>::select(Population<T>& pop, int island,
// minimum error on case
float minfit = std::numeric_limits<float>::max();

// get minimum
// get minimum (assuming minization of indiviual errors)
for (size_t j = 0; j<pool.size(); ++j)
if (pop.individuals.at(pool[j])->error(cases[h]) < minfit)
minfit = pop.individuals.at(pool[j])->error(cases[h]);
Expand Down

0 comments on commit c0e0ec8

Please sign in to comment.