Skip to content

Commit

Permalink
Fixed bandit using same probability for all arms
Browse files Browse the repository at this point in the history
  • Loading branch information
gAldeia committed Oct 2, 2024
1 parent 7429ba6 commit 4ccf09c
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 29 deletions.
57 changes: 29 additions & 28 deletions src/bandit/linear_thompson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,31 @@ std::map<T, float> LinearThompsonSamplingBandit<T>::sample_probs(bool update) {
if (update)
{
MatrixXf w(n_arms, context_size);
MatrixXf r = 0.5 + 0.5*MatrixXf::Random(n_arms, context_size); // TODO: use random generator here
MatrixXf r = MatrixXf::Random(n_arms, context_size); // TODO: use random generator here
for (int i = 0; i < n_arms; ++i) {
w.row(i) = B_inv_sqrt[i] * r; // mat mul
w.row(i) = (B_inv_sqrt[i] * r.row(i)).transpose(); // mat mul
}

w = mean + w;

VectorXf u(n_arms);
// u = w * last_context; // mat mul
u = (w * last_context).transpose(); // mat mul

for (int i = 0; i < n_arms; ++i) {
// cout << "Dot product for row " << i;
float dot_product = w.row(i).dot(last_context);
if (std::isnan(dot_product))
{
dot_product = 0.0f;
// cout << "(nan)";
}
// cout << "Dot product for row " << i << ": " << dot_product << endl;
// for (int i = 0; i < n_arms; ++i) {
// // cout << "Dot product for row " << i;
// float dot_product = w.row(i).dot(last_context);
// if (std::isnan(dot_product))
// {
// dot_product = 0.0f;
// // cout << "(nan)";
// }
// // cout << "Dot product for row " << i << ": " << dot_product << endl;

u(i) = dot_product;
}
// u(i) = dot_product;
// }

for (int i = 0; i < n_arms; ++i) {
this->probabilities[arm_index_to_key[i]] = u(i);
this->probabilities[arm_index_to_key[i]] = std::exp(u(i));
}

// // Calculate probabilities
Expand Down Expand Up @@ -125,29 +126,29 @@ T LinearThompsonSamplingBandit<T>::choose(const VectorXf& context) {
MatrixXf w(n_arms, context_size);
MatrixXf r = MatrixXf::Random(n_arms, context_size); // TODO: use random generator here
for (int i = 0; i < n_arms; ++i) {
w.row(i) = B_inv_sqrt[i] * r; // mat mul
w.row(i) = (B_inv_sqrt[i] * r.row(i)).transpose(); // mat mul
}

w = mean + w;

// cout << "w: " << w << endl;
VectorXf u(n_arms);
// u = w * context; // mat mul
u = (w * context).transpose(); // mat mul
// cout << "u: " << u << endl;

for (int i = 0; i < n_arms; ++i) {
// cout << "Dot product for row " << i;
float dot_product = w.row(i).dot(context);
if (std::isnan(dot_product))
{
dot_product = 0.0f;
// cout << "(nan)";
}
// for (int i = 0; i < n_arms; ++i) {
// // cout << "Dot product for row " << i;
// float dot_product = w.row(i).dot(context);
// if (std::isnan(dot_product))
// {
// dot_product = 0.0f;
// // cout << "(nan)";
// }

// cout << "Dot product for row " << i << ": " << dot_product << endl;
// // cout << "Dot product for row " << i << ": " << dot_product << endl;

u(i) = dot_product;
}
// u(i) = dot_product;
// }

Eigen::Index max_index;
float max_value = u.maxCoeff(&max_index);
Expand Down
2 changes: 1 addition & 1 deletion src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ bool Engine<T>::update_best()
// TODO: use intermediary variables for wvalues
// Iterate over the weighted values to compare (everything is a maximization problem here)
passed = false;
for (size_t j = 0; j < this->best_ind.fitness.get_wvalues().size(); ++j) {
for (size_t j = 0; j < ind.fitness.get_wvalues().size(); ++j) {
if (ind.fitness.get_wvalues()[j] > this->best_ind.fitness.get_wvalues()[j]) {
passed = true;
break;
Expand Down

0 comments on commit 4ccf09c

Please sign in to comment.