Skip to content

Commit

Permalink
update needs to know the context for some learners
Browse files Browse the repository at this point in the history
  • Loading branch information
gAldeia committed Sep 19, 2024
1 parent bbfa112 commit f3c8383
Show file tree
Hide file tree
Showing 10 changed files with 12 additions and 11 deletions.
4 changes: 2 additions & 2 deletions src/bandit/bandit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ T Bandit<T>::choose(tree<Node>& tree, Fitness& f) {
}

template <typename T>
void Bandit<T>::update(T arm, float reward) {
this->pbandit->update(arm, reward);
void Bandit<T>::update(T arm, float reward, tree<Node>* tree, Fitness* f) {
this->pbandit->update(arm, reward, tree, f);
}

} // MAB
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/bandit.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ struct Bandit
* @param arm The chosen arm.
* @param reward The received reward.
*/
void update(T arm, float reward);
void update(T arm, float reward, tree<Node>* tree=nullptr, Fitness* f=nullptr);
};

//TODO: serialization should save the type of bandit and its parameters
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/bandit_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ T BanditOperator<T>::choose(tree<Node>& tree, Fitness& f)


template<typename T>
void BanditOperator<T>::update(T arm, float reward)
void BanditOperator<T>::update(T arm, float reward, tree<Node>* tree, Fitness* f)
{
// TODO: Implement the logic for updating the bandit operator's internal state
// based on the received rewards
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/bandit_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class BanditOperator
* @param arm The arm for which to update the reward.
* @param reward The reward value.
*/
virtual void update(T arm, float reward);
virtual void update(T arm, float reward, tree<Node>* tree=nullptr, Fitness* f=nullptr); // TODO: this should not have a default value in the future
protected:
std::map<T, float> probabilities;
};
Expand Down
3 changes: 2 additions & 1 deletion src/bandit/dummy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ T DummyBandit<T>::choose(tree<Node>& tree, Fitness& f) {
}

template <typename T>
void DummyBandit<T>::update(T arm, float reward) {
void DummyBandit<T>::update(T arm, float reward, tree<Node>* tree, Fitness* f) {
// Do nothing
}

} // MAB
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/dummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DummyBandit : public BanditOperator<T>

std::map<T, float> sample_probs(bool update);
T choose(tree<Node>& tree, Fitness& f);
void update(T arm, float reward);
void update(T arm, float reward, tree<Node>* tree=nullptr, Fitness* f=nullptr);

private:
// additional stuff should come here
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/linear_thompson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ T LinearThompsonSamplingBandit<T>::choose(tree<Node>& tree, Fitness& f) {
}

template <typename T>
void LinearThompsonSamplingBandit<T>::update(T arm, float reward) {
void LinearThompsonSamplingBandit<T>::update(T arm, float reward, tree<Node>* tree, Fitness* f) {

}

Expand Down
2 changes: 1 addition & 1 deletion src/bandit/linear_thompson.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class LinearThompsonSamplingBandit : public BanditOperator<T>

std::map<T, float> sample_probs(bool update);
T choose(tree<Node>& tree, Fitness& f);
void update(T arm, float reward);
void update(T arm, float reward, tree<Node>* tree=nullptr, Fitness* f=nullptr);
private:
};

Expand Down
2 changes: 1 addition & 1 deletion src/bandit/thompson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ T ThompsonSamplingBandit<T>::choose(tree<Node>& tree, Fitness& f) {
}

template <typename T>
void ThompsonSamplingBandit<T>::update(T arm, float reward) {
void ThompsonSamplingBandit<T>::update(T arm, float reward, tree<Node>* tree, Fitness* f) {
// reward must be either 0 or 1

alphas[arm] += reward;
Expand Down
2 changes: 1 addition & 1 deletion src/bandit/thompson.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ThompsonSamplingBandit : public BanditOperator<T>

std::map<T, float> sample_probs(bool update);
T choose(tree<Node>& tree, Fitness& f);
void update(T arm, float reward);
void update(T arm, float reward, tree<Node>* tree=nullptr, Fitness* f=nullptr);
private:
bool dynamic_update;
float C = 1000;
Expand Down

0 comments on commit f3c8383

Please sign in to comment.