Skip to content

Commit

Permalink
test: [epsilon decay] find champ change in simulator (#4228)
Browse files Browse the repository at this point in the history
* test: [epsilon decay] find champ change in simulator

* clang

* fix audit string

* help string

* clang
  • Loading branch information
bassmang authored Oct 18, 2022
1 parent 873c0de commit 28e7a79
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 79 deletions.
3 changes: 3 additions & 0 deletions test/train-sets/ref/help.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,9 @@ Weight Options:
--min_champ_examples arg Minimum number of examples for any challenger to become champion
(type: uint, default: 0, keep, experimental)
--initial_epsilon arg Initial epsilon value (type: float, default: 1, keep, experimental)
--shift_model_bounds arg Shift maximum update_count for model i from champ_update_count^(i
/ num_models) to champ_update_count^((i + shift) / (num_models
+ shift)) (type: uint, default: 0, keep, experimental)
[Reduction] Error Correcting Tournament Options:
--ect arg Error correcting tournament with <k> labels (type: uint, keep,
necessary)
Expand Down
75 changes: 16 additions & 59 deletions test/unit_test/epsilon_decay_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,82 +39,39 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_init)
float with_save = ctr.back();
}

// TODO: Current simulator cannot create champ changes with confidence sequence estimator
/*BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change)
BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change)
{
const size_t num_iterations = 10000;
const std::vector<uint64_t> swap_after = {200, 500, 1000, 5000};
const size_t num_iterations = 8000;
const std::vector<uint64_t> swap_after = {5000};
const float scale_reward = 0.2f;
const size_t seed = 100;
const size_t deterministic_champ_switch = 5781;
const size_t deterministic_champ_switch = 7920;
callback_map test_hooks;

test_hooks.emplace(deterministic_champ_switch - 1, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 15);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 15);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 15);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 15);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 41);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 41);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 41);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 459);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 459);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 5780);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 2183);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 2183);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 7919);
return true;
});

test_hooks.emplace(deterministic_champ_switch, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 0);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 0);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 0);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 0);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 16);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 16);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 16);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 42);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 42);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 460);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 2184);
return true;
});

// we initialize the reduction pointing to position 0 as champ, that config is hard-coded to empty
auto ctr = simulator::_test_helper_hook(
"--epsilon_decay --epsilon_decay_significance_level .9 --model_count 4 --cb_explore_adf --quiet -q ::",
test_hooks, num_iterations, seed, swap_after);
BOOST_CHECK_GT(ctr.back(), 0.8f);
}*/

// TODO: Current simulator cannot create champ changes with confidence sequence estimator
/*BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change_with_min)
{
const size_t num_iterations = 6000;
const std::vector<uint64_t> swap_after = {5000};
const size_t seed = 100;
const size_t deterministic_champ_switch = 5782;
callback_map test_hooks;
"--epsilon_decay --epsilon_decay_significance_level .9 --model_count 2 --cb_explore_adf --quiet -q :: "
"--shift_model_bounds 10",
test_hooks, num_iterations, seed, swap_after, scale_reward);

test_hooks.emplace(deterministic_champ_switch - 1, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 460);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 5781);
return true;
});
test_hooks.emplace(deterministic_champ_switch, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 461);
return true;
});
// we initialize the reduction pointing to position 0 as champ, that config is hard-coded to empty
auto ctr = simulator::_test_helper_hook(
"--epsilon_decay --model_count 4 --min_champ_examples 461 --cb_explore_adf --quiet -q ::", test_hooks,
num_iterations, seed, swap_after);
BOOST_CHECK_GT(ctr.back(), 0.8f);
}*/
BOOST_CHECK_GT(ctr.back(), 0.6f);
}

BOOST_AUTO_TEST_CASE(epsilon_decay_test_update_count)
{
Expand Down Expand Up @@ -252,7 +209,7 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_score_bounds_unit)
uint64_t num_models = 5;
uint32_t wpp = 8;
dense_parameters dense_weights(num_models);
epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f);
epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f, 0);

// Set update counts to fixed values with expected horizon bound violation
size_t score_idx = 0;
Expand Down Expand Up @@ -336,7 +293,7 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_horizon_bounds_unit)
uint64_t num_models = 5;
uint32_t wpp = 8;
dense_parameters dense_weights(num_models);
epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f);
epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f, 0);

// Set update counts to fixed values with expected horizon bound violation
size_t score_idx = 0;
Expand Down
15 changes: 8 additions & 7 deletions test/unit_test/simulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ cb_sim::cb_sim(uint64_t seed)
callback_count = 0;
}

float cb_sim::get_reaction(
const std::map<std::string, std::string>& context, const std::string& action, bool add_noise, bool swap_reward)
float cb_sim::get_reaction(const std::map<std::string, std::string>& context, const std::string& action, bool add_noise,
bool swap_reward, float scale_reward)
{
float like_reward = USER_LIKED_ARTICLE;
float dislike_reward = USER_DISLIKED_ARTICLE;
Expand Down Expand Up @@ -53,7 +53,7 @@ float cb_sim::get_reaction(
}
}

if (swap_reward) { return (reward == like_reward) ? dislike_reward : like_reward; }
if (swap_reward) { return scale_reward * ((reward == like_reward) ? dislike_reward : like_reward); }
return reward;
}

Expand Down Expand Up @@ -131,7 +131,8 @@ void cb_sim::call_if_exists(VW::workspace& vw, VW::multi_ex& ex, const callback_
}

std::vector<float> cb_sim::run_simulation_hook(VW::workspace* vw, size_t num_iterations, callback_map& callbacks,
bool do_learn, size_t shift, bool add_noise, uint64_t num_useless_features, const std::vector<uint64_t>& swap_after)
bool do_learn, size_t shift, bool add_noise, uint64_t num_useless_features, const std::vector<uint64_t>& swap_after,
float scale_reward)
{
// check if there's a callback for the first possible element,
// in this case most likely 0th event
Expand Down Expand Up @@ -169,7 +170,7 @@ std::vector<float> cb_sim::run_simulation_hook(VW::workspace* vw, size_t num_ite

// 4. Get cost of the action we chose
// Check for reward swap
float cost = get_reaction(context, chosen_action, add_noise, swap_reward);
float cost = get_reaction(context, chosen_action, add_noise, swap_reward, scale_reward);
cost_sum += cost;

if (do_learn)
Expand Down Expand Up @@ -241,12 +242,12 @@ std::vector<float> _test_helper_save_load(const std::string& vw_arg, size_t num_
}

std::vector<float> _test_helper_hook(const std::string& vw_arg, callback_map& hooks, size_t num_iterations, int seed,
const std::vector<uint64_t>& swap_after)
const std::vector<uint64_t>& swap_after, float scale_reward)
{
BOOST_CHECK(true);
auto* vw = VW::initialize(vw_arg);
simulator::cb_sim sim(seed);
auto ctr = sim.run_simulation_hook(vw, num_iterations, hooks, true, 1, false, 0, swap_after);
auto ctr = sim.run_simulation_hook(vw, num_iterations, hooks, true, 1, false, 0, swap_after, scale_reward);
VW::finish(*vw);
return ctr;
}
Expand Down
6 changes: 3 additions & 3 deletions test/unit_test/simulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class cb_sim

cb_sim(uint64_t seed = 0);
float get_reaction(const std::map<std::string, std::string>& context, const std::string& action,
bool add_noise = false, bool swap_reward = false);
bool add_noise = false, bool swap_reward = false, float scale_reward = 1.f);
std::vector<std::string> to_vw_example_format(const std::map<std::string, std::string>& context,
const std::string& chosen_action, float cost = 0.f, float prob = 0.f);
std::pair<int, float> sample_custom_pmf(std::vector<float>& pmf);
Expand All @@ -55,7 +55,7 @@ class cb_sim
const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
std::vector<float> run_simulation_hook(VW::workspace* vw, size_t num_iterations, callback_map& callbacks,
bool do_learn = true, size_t shift = 1, bool add_noise = false, uint64_t num_useless_features = 0,
const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), float scale_reward = 1.f);

private:
void call_if_exists(VW::workspace& vw, VW::multi_ex& ex, const callback_map& callbacks, const size_t event);
Expand All @@ -65,5 +65,5 @@ std::vector<float> _test_helper(const std::string& vw_arg, size_t num_iterations
std::vector<float> _test_helper_save_load(const std::string& vw_arg, size_t num_iterations = 3000, int seed = 10,
const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), const size_t split = 1500);
std::vector<float> _test_helper_hook(const std::string& vw_arg, callback_map& hooks, size_t num_iterations = 3000,
int seed = 10, const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
int seed = 10, const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), float scale_reward = 1.f);
} // namespace simulator
4 changes: 3 additions & 1 deletion vowpalwabbit/core/include/vw/core/reductions/epsilon_decay.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class epsilon_decay_data
public:
epsilon_decay_data(uint64_t model_count, uint64_t min_scope, double epsilon_decay_significance_level,
double epsilon_decay_estimator_decay, dense_parameters& weights, std::string epsilon_decay_audit_str,
bool constant_epsilon, uint32_t& wpp, bool lb_trick, uint64_t _min_champ_examples, float initial_epsilon);
bool constant_epsilon, uint32_t& wpp, bool lb_trick, uint64_t _min_champ_examples, float initial_epsilon,
uint64_t shift_model_bounds);
void update_weights(float init_ep, VW::LEARNER::multi_learner& base, VW::multi_ex& examples);
void promote_model(int64_t model_ind, int64_t swap_dist);
void rebalance_greater_models(int64_t model_ind, int64_t swap_dist, int64_t model_count);
Expand All @@ -44,6 +45,7 @@ class epsilon_decay_data
bool _lb_trick;
uint64_t _min_champ_examples;
float _initial_epsilon;
uint64_t _shift_model_bounds;

// TODO: delete all this, gd and cb_adf must respect ft_offset, see header import of automl.cc
std::vector<double> per_live_model_state_double;
Expand Down
27 changes: 18 additions & 9 deletions vowpalwabbit/core/src/reductions/epsilon_decay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ float decayed_epsilon(float init_ep, uint64_t update_count)
epsilon_decay_data::epsilon_decay_data(uint64_t model_count, uint64_t min_scope,
double epsilon_decay_significance_level, double epsilon_decay_estimator_decay, dense_parameters& weights,
std::string epsilon_decay_audit_str, bool constant_epsilon, uint32_t& wpp, bool lb_trick,
uint64_t min_champ_examples, float initial_epsilon)
uint64_t min_champ_examples, float initial_epsilon, uint64_t shift_model_bounds)
: _min_scope(min_scope)
, _epsilon_decay_significance_level(epsilon_decay_significance_level)
, _epsilon_decay_estimator_decay(epsilon_decay_estimator_decay)
Expand All @@ -49,6 +49,7 @@ epsilon_decay_data::epsilon_decay_data(uint64_t model_count, uint64_t min_scope,
, _lb_trick(lb_trick)
, _min_champ_examples(min_champ_examples)
, _initial_epsilon(initial_epsilon)
, _shift_model_bounds(shift_model_bounds)
{
_weight_indices.resize(model_count);
conf_seq_estimators.reserve(model_count);
Expand Down Expand Up @@ -117,15 +118,15 @@ void epsilon_decay_data::update_weights(float init_ep, VW::LEARNER::multi_learne
}
if (_epsilon_decay_audit_str != "")
{
if (model_ind == model_count - 1) { _audit_msg << "champ "; }
else
if (model_ind != model_count - 1)
{
_audit_msg << "challenger[" << (model_ind + 1) << "] ";

_audit_msg << "update_count: " << conf_seq_estimators[model_ind][model_ind].update_count
<< " lb: " << conf_seq_estimators[model_ind][model_ind].lower_bound()
<< " champ_ub: " << conf_seq_estimators[model_count - 1][model_ind].upper_bound()
<< " p_pred: " << a_s.score << "\n";
}
_audit_msg << "update_count: " << conf_seq_estimators[model_ind][model_ind].update_count
<< " lb: " << conf_seq_estimators[model_ind][model_ind].lower_bound()
<< " ub: " << conf_seq_estimators[model_ind][model_ind].upper_bound() << " p_pred: " << a_s.score
<< "\n";
}
break;
}
Expand Down Expand Up @@ -223,7 +224,7 @@ void epsilon_decay_data::check_horizon_bounds()
if (conf_seq_estimators[i][i].update_count > _min_scope &&
conf_seq_estimators[i][i].update_count >
std::pow(conf_seq_estimators[final_model_idx][final_model_idx].update_count,
static_cast<float>(i + 1) / model_count))
static_cast<float>(i + 1 + _shift_model_bounds) / (model_count + _shift_model_bounds)))
{
shift_model(i - 1, 1, model_count);
break;
Expand Down Expand Up @@ -321,6 +322,7 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&
bool fixed_significance_level = false;
uint64_t min_champ_examples;
float initial_epsilon;
uint64_t shift_model_bounds;

option_group_definition new_options("[Reduction] Epsilon-Decaying Exploration");
new_options
Expand Down Expand Up @@ -374,6 +376,12 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&
.default_value(1.0)
.keep()
.help("Initial epsilon value")
.experimental())
.add(make_option("shift_model_bounds", shift_model_bounds)
.default_value(0)
.keep()
.help("Shift maximum update_count for model i from champ_update_count^(i / num_models) to "
"champ_update_count^((i + shift) / (num_models + shift))")
.experimental());

if (!options.add_parse_and_check_necessary(new_options)) { return nullptr; }
Expand All @@ -384,7 +392,8 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&

auto data = VW::make_unique<VW::reductions::epsilon_decay::epsilon_decay_data>(model_count, min_scope,
epsilon_decay_significance_level, epsilon_decay_estimator_decay, all.weights.dense_weights,
epsilon_decay_audit_str, constant_epsilon, all.wpp, lb_trick, min_champ_examples, initial_epsilon);
epsilon_decay_audit_str, constant_epsilon, all.wpp, lb_trick, min_champ_examples, initial_epsilon,
shift_model_bounds);

// make sure we setup the rest of the stack with cleared interactions
// to make sure there are not subtle bugs
Expand Down

0 comments on commit 28e7a79

Please sign in to comment.