From b31d3102aac843af7935cddb0b22566d80b6c261 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Mon, 23 Sep 2024 09:16:47 -0300 Subject: [PATCH] Improved variation attempts. Fixed segfault in subtree mutation. Zero rewards if the mutation fails. --- pybrush/EstimatorInterface.py | 3 +- src/bandit/bandit.cpp | 4 +- src/bandit/bandit.h | 2 +- src/bandit/bandit_operator.cpp | 2 +- src/bandit/bandit_operator.h | 2 +- src/bandit/dummy.cpp | 2 +- src/bandit/dummy.h | 2 +- src/bandit/linear_thompson.cpp | 2 +- src/bandit/linear_thompson.h | 2 +- src/bandit/thompson.cpp | 2 +- src/bandit/thompson.h | 2 +- src/vary/variation.cpp | 97 +++++++++++++++++++--------------- src/vary/variation.h | 37 +++++++++---- tests/cpp/test_variation.cpp | 2 +- 14 files changed, 94 insertions(+), 67 deletions(-) diff --git a/pybrush/EstimatorInterface.py b/pybrush/EstimatorInterface.py index 062dd5b5..efb6159c 100644 --- a/pybrush/EstimatorInterface.py +++ b/pybrush/EstimatorInterface.py @@ -82,7 +82,8 @@ class EstimatorInterface(): weights_init : bool, default True Whether the search space should initialize the sampling weights of terminal nodes based on the correlation with the output y. If `False`, then all terminal nodes - will have the same probability of 1.0. + will have the same probability of 1.0. This parameter is ignored if the bandit + strategy is used, and weights will be learned dynamically during the run. validation_size : float, default 0.0 Percentage of samples to use as a hold-out partition. These samples are used to calculate statistics during evolution, but not used to train the models. diff --git a/src/bandit/bandit.cpp b/src/bandit/bandit.cpp index 7555a4bb..d5b92037 100644 --- a/src/bandit/bandit.cpp +++ b/src/bandit/bandit.cpp @@ -87,8 +87,8 @@ map Bandit::sample_probs(bool update) { } template -T Bandit::choose(tree& tree, Fitness& f) { - return this->pbandit->choose(tree, f); +T Bandit::choose(VectorXf& context) { + return this->pbandit->choose(context); } template diff --git a/src/bandit/bandit.h b/src/bandit/bandit.h index 310819a1..951eed28 100644 --- a/src/bandit/bandit.h +++ b/src/bandit/bandit.h @@ -107,7 +107,7 @@ struct Bandit * @param f The fitness function used to evaluate and select nodes. * @return T The selected arm from the tree. */ - T choose(tree& tree, Fitness& f); + T choose(VectorXf& context); /** * @brief Updates the bandit's state based on the chosen arm and the received reward. diff --git a/src/bandit/bandit_operator.cpp b/src/bandit/bandit_operator.cpp index 01c87018..41988786 100644 --- a/src/bandit/bandit_operator.cpp +++ b/src/bandit/bandit_operator.cpp @@ -38,7 +38,7 @@ std::map BanditOperator::sample_probs(bool update) } template -T BanditOperator::choose(tree& tree, Fitness& f) +T BanditOperator::choose(VectorXf& context) { // TODO: Implement the logic for sampling probabilities // based on the bandit operator's strategy diff --git a/src/bandit/bandit_operator.h b/src/bandit/bandit_operator.h index a88245eb..52ef1d26 100644 --- a/src/bandit/bandit_operator.h +++ b/src/bandit/bandit_operator.h @@ -54,7 +54,7 @@ class BanditOperator * @param f The fitness value used to influence the choice. * @return The arm with highest probability. */ - virtual T choose(tree& tree, Fitness& f); + virtual T choose(VectorXf& context); /** * @brief Updates the reward for a specific arm. diff --git a/src/bandit/dummy.cpp b/src/bandit/dummy.cpp index d392b7cb..fc1e79a6 100644 --- a/src/bandit/dummy.cpp +++ b/src/bandit/dummy.cpp @@ -9,7 +9,7 @@ std::map DummyBandit::sample_probs(bool update) { } template -T DummyBandit::choose(tree& tree, Fitness& f) { +T DummyBandit::choose(VectorXf& context) { // std::map probs = this->sample_probs(false); return r.random_choice(this->probabilities); diff --git a/src/bandit/dummy.h b/src/bandit/dummy.h index 53e01d5e..9c3f9729 100644 --- a/src/bandit/dummy.h +++ b/src/bandit/dummy.h @@ -19,7 +19,7 @@ class DummyBandit : public BanditOperator ~DummyBandit(){}; std::map sample_probs(bool update); - T choose(tree& tree, Fitness& f); + T choose(VectorXf& context); void update(T arm, float reward, VectorXf& context); private: diff --git a/src/bandit/linear_thompson.cpp b/src/bandit/linear_thompson.cpp index 1016fc42..d971230b 100644 --- a/src/bandit/linear_thompson.cpp +++ b/src/bandit/linear_thompson.cpp @@ -22,7 +22,7 @@ std::map LinearThompsonSamplingBandit::sample_probs(bool update) { } template -T LinearThompsonSamplingBandit::choose(tree& tree, Fitness& f) { +T LinearThompsonSamplingBandit::choose(VectorXf& context) { // TODO: use context here std::map probs = this->sample_probs(true); diff --git a/src/bandit/linear_thompson.h b/src/bandit/linear_thompson.h index 7691ce65..2d389a97 100644 --- a/src/bandit/linear_thompson.h +++ b/src/bandit/linear_thompson.h @@ -25,7 +25,7 @@ class LinearThompsonSamplingBandit : public BanditOperator ~LinearThompsonSamplingBandit(){}; std::map sample_probs(bool update); - T choose(tree& tree, Fitness& f); + T choose(VectorXf& context); void update(T arm, float reward, VectorXf& context); private: }; diff --git a/src/bandit/thompson.cpp b/src/bandit/thompson.cpp index 630b6e30..18e730e4 100644 --- a/src/bandit/thompson.cpp +++ b/src/bandit/thompson.cpp @@ -78,7 +78,7 @@ std::map ThompsonSamplingBandit::sample_probs(bool update) { } template -T ThompsonSamplingBandit::choose(tree& tree, Fitness& f) { +T ThompsonSamplingBandit::choose(VectorXf& context) { std::map probs = this->sample_probs(true); return r.random_choice(probs); diff --git a/src/bandit/thompson.h b/src/bandit/thompson.h index 421467db..3f7b317f 100644 --- a/src/bandit/thompson.h +++ b/src/bandit/thompson.h @@ -25,7 +25,7 @@ class ThompsonSamplingBandit : public BanditOperator ~ThompsonSamplingBandit(){}; std::map sample_probs(bool update); - T choose(tree& tree, Fitness& f); + T choose(VectorXf& context); void update(T arm, float reward, VectorXf& context); private: bool dynamic_update; diff --git a/src/vary/variation.cpp b/src/vary/variation.cpp index cc13f54b..d19dc42e 100644 --- a/src/vary/variation.cpp +++ b/src/vary/variation.cpp @@ -537,57 +537,63 @@ std::tuple>, VectorXf> Variation::cross( * @return `std::optional` that may contain the child program of type `T` */ template -std::tuple>, VectorXf> Variation::mutate(const Individual& parent) +std::tuple>, VectorXf> Variation::mutate(const Individual& parent, string choice) { - auto options = parameters.mutation_probs; + if (choice.empty()) + { + std::cout << "Will sample a mut choice" << std::endl; + auto options = parameters.mutation_probs; - bool all_zero = true; - for (auto &it : parameters.mutation_probs) { - if (it.second > 0.0) { - all_zero = false; - break; + bool all_zero = true; + for (auto &it : parameters.mutation_probs) { + if (it.second > 0.0) { + all_zero = false; + break; + } } + + if (all_zero) { // No mutation can be successfully applied to this solution + return std::make_tuple(std::nullopt, VectorXf()); + } + + // picking a valid mutation option + choice = r.random_choice(parameters.mutation_probs); + } + + // std::cout << "Mutation choice: " << choice << std::endl; + + Program copy(parent.program); + + vector weights; // choose location by weighted sampling of program + if (choice == "point") // TODO: use enum here to optimize + weights = PointMutation::find_spots(copy.Tree, search_space, parameters); + else if (choice == "insert") + weights = InsertMutation::find_spots(copy.Tree, search_space, parameters); + else if (choice == "delete") + weights = DeleteMutation::find_spots(copy.Tree, search_space, parameters); + else if (choice == "subtree") + weights = SubtreeMutation::find_spots(copy.Tree, search_space, parameters); + else if (choice == "toggle_weight_on") + weights = ToggleWeightOnMutation::find_spots(copy.Tree, search_space, parameters); + else if (choice == "toggle_weight_off") + weights = ToggleWeightOffMutation::find_spots(copy.Tree, search_space, parameters); + else { + std::string msg = fmt::format("{} not a valid mutation choice", choice); + HANDLE_ERROR_THROW(msg); } - if (all_zero) - { // No mutation can be successfully applied to this solution + if (std::all_of(weights.begin(), weights.end(), [](const auto& w) { + return w<=0.0; + })) + { // There is no spot that has a probability to be selected return std::make_tuple(std::nullopt, VectorXf()); } - - Program child(parent.program); int attempts = 0; while(++attempts <= 3) { - // choose a valid mutation option - string choice = r.random_choice(parameters.mutation_probs); - - vector weights; - - // choose location by weighted sampling of program - if (choice == "point") // TODO: use enum here to optimize - weights = PointMutation::find_spots(child.Tree, search_space, parameters); - else if (choice == "insert") - weights = InsertMutation::find_spots(child.Tree, search_space, parameters); - else if (choice == "delete") - weights = DeleteMutation::find_spots(child.Tree, search_space, parameters); - else if (choice == "subtree") - weights = SubtreeMutation::find_spots(child.Tree, search_space, parameters); - else if (choice == "toggle_weight_on") - weights = ToggleWeightOnMutation::find_spots(child.Tree, search_space, parameters); - else if (choice == "toggle_weight_off") - weights = ToggleWeightOffMutation::find_spots(child.Tree, search_space, parameters); - else { - std::string msg = fmt::format("{} not a valid mutation choice", choice); - HANDLE_ERROR_THROW(msg); - } - - if (std::all_of(weights.begin(), weights.end(), [](const auto& w) { - return w<=0.0; - })) - { // There is no spot that has a probability to be selected - continue; - } + // std::cout << "Attempt: " << attempts << std::endl; + Program child(parent.program); // apply the mutation and check if it succeeded auto spot = r.select_randomly(child.Tree.begin(), child.Tree.end(), @@ -614,7 +620,8 @@ std::tuple>, VectorXf> Variation::mutate(const In if (success && ( (child.size() <= parameters.max_size) && (child.depth() <= parameters.max_depth) )){ - + // std::cout << "Mutation succeeded on attempt " << attempts << std::endl; + Individual ind(child); ind.set_variation(choice); @@ -626,8 +633,9 @@ std::tuple>, VectorXf> Variation::mutate(const In VectorXf context = this->variation_bandit.get_context(parent.program.Tree, spot); return std::make_tuple(ind, context); - } else { - continue; + } + else { // reseting + // std::cout << "Mutation failed on attempt " << attempts << std::endl; } } @@ -668,9 +676,12 @@ void Variation::vary(Population& pop, int island, } else { + std::cout << "Performing mutation " << std::endl; auto variation_result = mutate(mom); + cout << "finished mutation" << endl; ind_parents = {mom}; tie(opt, context) = variation_result; + cout << "unpacked" << endl; } // this assumes that islands do not share indexes before doing variation diff --git a/src/vary/variation.h b/src/vary/variation.h index 4ba5ecf7..fcffd51a 100644 --- a/src/vary/variation.h +++ b/src/vary/variation.h @@ -172,7 +172,7 @@ class Variation { * successful, or an empty optional otherwise. */ std::tuple>, VectorXf> mutate( - const Individual& parent); + const Individual& parent, string choice=""); /** * @brief Handles variation of a population. @@ -234,10 +234,11 @@ class Variation { *r.select_randomly(parents.begin(), parents.end())]; vector> ind_parents; - VectorXf context = {}; + VectorXf context = this->variation_bandit.get_context(mom.program.Tree, mom.program.Tree.begin()); - bool crossover = (r() < parameters.cx_prob); - if (crossover) + string choice = this->variation_bandit.choose(context); + + if (choice == "cx") { const Individual& dad = pop[ *r.select_randomly(parents.begin(), parents.end())]; @@ -249,10 +250,12 @@ class Variation { } else { - // std::cout << "Performing mutation" << std::endl; - auto variation_result = mutate(mom); + // std::cout << "Performing mutation " << choice << std::endl; + auto variation_result = mutate(mom, choice); + // cout << "finished mutation" << endl; ind_parents = {mom}; tie(opt, context) = variation_result; + // cout << "unpacked" << endl; } // this assumes that islands do not share indexes before doing variation @@ -327,10 +330,17 @@ class Variation { r = 1.0; // std::cout << "Updating variation bandit with reward: " << r << std::endl; - this->variation_bandit.update(ind.get_variation(), r, context); - if (ind.get_variation() != "born" && ind.get_variation() != "cx" - && ind.get_variation() != "subtree") + if (ind.get_variation() != "born") + { + this->variation_bandit.update(ind.get_variation(), r, context); + } + else + { // giving zero reward if the variation failed + this->variation_bandit.update(choice, 0.0, context); + } + + if (ind.get_variation() != "born" && ind.get_variation() != "cx") { if (ind.get_sampled_nodes().size() > 0) { const auto& changed_nodes = ind.get_sampled_nodes(); @@ -366,9 +376,14 @@ class Variation { map> terminal_bandits; map> op_bandits; - // these functions will extract context and use it to choose the nodes to replace + // these functions below will extract context and use it to choose the nodes to replace + + // bandit_get_node_like + //bandit_sample_op_with_arg //bandit_sample_terminal - //bandit_sample_op + // bandit_sample_op + //bandit_sample_subtree + //etc. }; diff --git a/tests/cpp/test_variation.cpp b/tests/cpp/test_variation.cpp index 97c2fd48..ca72d3c8 100644 --- a/tests/cpp/test_variation.cpp +++ b/tests/cpp/test_variation.cpp @@ -33,7 +33,7 @@ TEST(Variation, FixedRootDoesntChange) Variation variator = Variation(params, SS); int successes = 0; - for (int attempt = 0; attempt < 10; ++attempt) + for (int attempt = 0; attempt < 50; ++attempt) { // different program types changes how predict works (and the rettype of predict) ClassifierProgram PRG = SS.make_classifier(0, 0, params);