From eaba77589be3c6c1686ebb907d0752c9e3fbb5cc Mon Sep 17 00:00:00 2001 From: gAldeia Date: Mon, 29 Jul 2024 11:43:37 -0300 Subject: [PATCH] dynamic bandit implemented --- src/bandit/bandit.cpp | 4 +++- src/bandit/thompson.cpp | 28 +++++++++++++++++++++++----- src/bandit/thompson.h | 8 ++++---- src/engine.cpp | 2 +- src/params.h | 2 +- src/vary/variation.h | 7 ++++++- 6 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/bandit/bandit.cpp b/src/bandit/bandit.cpp index 4c61e12a..e360d66f 100644 --- a/src/bandit/bandit.cpp +++ b/src/bandit/bandit.cpp @@ -5,7 +5,7 @@ namespace MAB { template Bandit::Bandit() { - set_type("dummy"); + set_type("dynamic_thompson"); set_arms({}); set_probs({}); set_bandit(); @@ -42,6 +42,8 @@ void Bandit::set_bandit() { // other methods to raise an error if bandit was not set if (type == "thompson") { pbandit = make_unique>(probabilities); + } else if (type == "dynamic_thompson") { + pbandit = make_unique>(probabilities, true); } else if (type == "dummy") { pbandit = make_unique>(probabilities); } else { diff --git a/src/bandit/thompson.cpp b/src/bandit/thompson.cpp index f297c5e9..51fd17bb 100644 --- a/src/bandit/thompson.cpp +++ b/src/bandit/thompson.cpp @@ -4,8 +4,9 @@ namespace Brush { namespace MAB { template -ThompsonSamplingBandit::ThompsonSamplingBandit(vector arms) +ThompsonSamplingBandit::ThompsonSamplingBandit(vector arms, bool dynamic) : BanditOperator(arms) + , dynamic_update(dynamic) { for (const auto& arm : arms) { alphas[arm] = 2; @@ -14,8 +15,9 @@ ThompsonSamplingBandit::ThompsonSamplingBandit(vector arms) } template -ThompsonSamplingBandit::ThompsonSamplingBandit(map arms_probs) +ThompsonSamplingBandit::ThompsonSamplingBandit(map arms_probs, bool dynamic) : BanditOperator(arms_probs) + , dynamic_update(dynamic) { for (const auto& pair : arms_probs) { alphas[pair.first] = 2; @@ -26,7 +28,8 @@ ThompsonSamplingBandit::ThompsonSamplingBandit(map arms_probs) template std::map ThompsonSamplingBandit::sample_probs(bool update) { - + // gets sampling probabilities using the bandit + // from https://stackoverflow.com/questions/4181403/generate-random-number-based-on-beta-distribution-using-boost // You'll first want to draw a random number uniformly from the // range (0,1). Given any distribution, you can then plug that number @@ -59,8 +62,16 @@ std::map ThompsonSamplingBandit::sample_probs(bool update) { prob = X/(X+Y); - this->probabilities[arm] = prob; + // avoiding deadlocks when sampling from search space + this->probabilities[arm] = std::max(prob, 0.01f); + } + + // assert that the sum is not zero + float totalProb = 0.0f; + for (const auto& pair : this->probabilities) { + totalProb += pair.second; } + assert(totalProb != 0.0f && "Sum of probabilities is zero!"); } return this->probabilities; @@ -69,8 +80,15 @@ std::map ThompsonSamplingBandit::sample_probs(bool update) { template void ThompsonSamplingBandit::update(T arm, float reward) { // reward must be either 0 or 1 + alphas[arm] += reward; - betas[arm] += 1 - reward; + betas[arm] += 1-reward; + + if (dynamic_update && alphas[arm] + betas[arm] >= C) + { + alphas[arm] *= C/(C+1) ; + betas[arm] *= C/(C+1) ; + } } } // MAB diff --git a/src/bandit/thompson.h b/src/bandit/thompson.h index f12fa2e1..17ff4fb7 100644 --- a/src/bandit/thompson.h +++ b/src/bandit/thompson.h @@ -20,15 +20,15 @@ template class ThompsonSamplingBandit : public BanditOperator { public: - ThompsonSamplingBandit(vector arms); - ThompsonSamplingBandit(map arms_probs); + ThompsonSamplingBandit(vector arms, bool dynamic=false); + ThompsonSamplingBandit(map arms_probs, bool dynamic=false); ~ThompsonSamplingBandit(){}; std::map sample_probs(bool update); void update(T arm, float reward); - private: - // additional stuff should come here + bool dynamic_update; + float C = 1000; std::map alphas; std::map betas; diff --git a/src/engine.cpp b/src/engine.cpp index 7c70addc..a216551e 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -531,7 +531,7 @@ void Engine::run(Dataset &data) [&]() { // getting the updated versions - if (params.bandit != "duummy") + if (params.bandit != "dummy") { // TODO: make the probabilities add up to 1 (this doesnt matter for the cpp side, but it is a good practice and helps comparing different probabilities) this->ss = variator.search_space; diff --git a/src/params.h b/src/params.h index cb46e294..f417d391 100644 --- a/src/params.h +++ b/src/params.h @@ -36,7 +36,7 @@ struct Parameters unsigned int max_size = 50; vector objectives{"error","complexity"}; // error should be generic and deducted based on mode - string bandit = "dummy"; // should I rename that? + string bandit = "dynamic_thompson"; // TODO: should I rename dummy? string sel = "lexicase"; //selection method string surv = "nsga2"; //survival method std::unordered_map functions; diff --git a/src/vary/variation.h b/src/vary/variation.h index ea5350c5..3241ebab 100644 --- a/src/vary/variation.h +++ b/src/vary/variation.h @@ -100,6 +100,11 @@ class Variation { this->variation_bandit = Bandit(parameters.bandit, variation_probs); + // TODO: should I set C parameter based on pop size or leave it fixed? + // TODO: update string comparisons to use .compare method + // if (parameters.bandit.compare("dynamic_thompson")==0) + // this->variation_bandit.pbandit.set_C(parameters.pop_size); + // initializing one bandit for each terminal type for (const auto& entry : this->search_space.terminal_weights) { // entry is a tuple > where the vector is the weights @@ -119,7 +124,7 @@ class Variation { } } - + // TODO: op bandit? // this->op_bandit = Bandit(this->parameters.bandit, // this->search_space.node_map_weights.size() );