test: [epsilon decay] find champ change in simulator (#4228)

* test: [epsilon decay] find champ change in simulator * clang * fix audit string * help string * clang
VowpalWabbit · Oct 18, 2022 · 28e7a79 · 28e7a79
1 parent 873c0de
commit 28e7a79
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 79 deletions.
diff --git a/test/train-sets/ref/help.stdout b/test/train-sets/ref/help.stdout
@@ -572,6 +572,9 @@ Weight Options:
     --min_champ_examples arg                Minimum number of examples for any challenger to become champion
                                             (type: uint, default: 0, keep, experimental)
     --initial_epsilon arg                   Initial epsilon value (type: float, default: 1, keep, experimental)
+    --shift_model_bounds arg                Shift maximum update_count for model i from champ_update_count^(i
+                                            / num_models) to champ_update_count^((i + shift) / (num_models
+                                            + shift)) (type: uint, default: 0, keep, experimental)
 [Reduction] Error Correcting Tournament Options:
     --ect arg                               Error correcting tournament with <k> labels (type: uint, keep,
                                             necessary)

diff --git a/test/unit_test/epsilon_decay_test.cc b/test/unit_test/epsilon_decay_test.cc
@@ -39,82 +39,39 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_init)
   float with_save = ctr.back();
 }
 
-// TODO: Current simulator cannot create champ changes with confidence sequence estimator
-/*BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change)
+BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change)
 {
-  const size_t num_iterations = 10000;
-  const std::vector<uint64_t> swap_after = {200, 500, 1000, 5000};
+  const size_t num_iterations = 8000;
+  const std::vector<uint64_t> swap_after = {5000};
+  const float scale_reward = 0.2f;
   const size_t seed = 100;
-  const size_t deterministic_champ_switch = 5781;
+  const size_t deterministic_champ_switch = 7920;
   callback_map test_hooks;
 
   test_hooks.emplace(deterministic_champ_switch - 1, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
     epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 15);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 15);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 15);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 15);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 41);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 41);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 41);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 459);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 459);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 5780);
+    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 2183);
+    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 2183);
+    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 7919);
     return true;
   });
 
   test_hooks.emplace(deterministic_champ_switch, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
     epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
     BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[0][0].update_count, 0);
     BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][0].update_count, 0);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][0].update_count, 0);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][0].update_count, 0);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 16);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][1].update_count, 16);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][1].update_count, 16);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 42);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][2].update_count, 42);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 460);
+    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[1][1].update_count, 2184);
     return true;
   });
 
   // we initialize the reduction pointing to position 0 as champ, that config is hard-coded to empty
   auto ctr = simulator::_test_helper_hook(
-      "--epsilon_decay --epsilon_decay_significance_level .9 --model_count 4 --cb_explore_adf --quiet  -q ::",
-      test_hooks, num_iterations, seed, swap_after);
-
-  BOOST_CHECK_GT(ctr.back(), 0.8f);
-}*/
-
-// TODO: Current simulator cannot create champ changes with confidence sequence estimator
-/*BOOST_AUTO_TEST_CASE(epsilon_decay_test_champ_change_with_min)
-{
-  const size_t num_iterations = 6000;
-  const std::vector<uint64_t> swap_after = {5000};
-  const size_t seed = 100;
-  const size_t deterministic_champ_switch = 5782;
-  callback_map test_hooks;
+      "--epsilon_decay --epsilon_decay_significance_level .9 --model_count 2 --cb_explore_adf --quiet  -q :: "
+      "--shift_model_bounds 10",
+      test_hooks, num_iterations, seed, swap_after, scale_reward);
 
-  test_hooks.emplace(deterministic_champ_switch - 1, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
-    epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[2][2].update_count, 460);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 5781);
-    return true;
-  });
-
-  test_hooks.emplace(deterministic_champ_switch, [&](cb_sim&, VW::workspace& all, VW::multi_ex&) {
-    epsilon_decay_data* epsilon_decay = epsilon_decay_test::get_epsilon_decay_data(all);
-    BOOST_CHECK_EQUAL(epsilon_decay->conf_seq_estimators[3][3].update_count, 461);
-    return true;
-  });
-
-  // we initialize the reduction pointing to position 0 as champ, that config is hard-coded to empty
-  auto ctr = simulator::_test_helper_hook(
-      "--epsilon_decay --model_count 4 --min_champ_examples 461 --cb_explore_adf --quiet  -q ::", test_hooks,
-      num_iterations, seed, swap_after);
-
-  BOOST_CHECK_GT(ctr.back(), 0.8f);
-}*/
+  BOOST_CHECK_GT(ctr.back(), 0.6f);
+}
 
 BOOST_AUTO_TEST_CASE(epsilon_decay_test_update_count)
 {
@@ -252,7 +209,7 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_score_bounds_unit)
   uint64_t num_models = 5;
   uint32_t wpp = 8;
   dense_parameters dense_weights(num_models);
-  epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f);
+  epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f, 0);
 
   // Set update counts to fixed values with expected horizon bound violation
   size_t score_idx = 0;
@@ -336,7 +293,7 @@ BOOST_AUTO_TEST_CASE(epsilon_decay_test_horizon_bounds_unit)
   uint64_t num_models = 5;
   uint32_t wpp = 8;
   dense_parameters dense_weights(num_models);
-  epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f);
+  epsilon_decay_data ep_data(num_models, 100, .05, .1, dense_weights, "", false, wpp, false, 0, 1.f, 0);
 
   // Set update counts to fixed values with expected horizon bound violation
   size_t score_idx = 0;

diff --git a/test/unit_test/simulator.cc b/test/unit_test/simulator.cc
@@ -24,8 +24,8 @@ cb_sim::cb_sim(uint64_t seed)
   callback_count = 0;
 }
 
-float cb_sim::get_reaction(
-    const std::map<std::string, std::string>& context, const std::string& action, bool add_noise, bool swap_reward)
+float cb_sim::get_reaction(const std::map<std::string, std::string>& context, const std::string& action, bool add_noise,
+    bool swap_reward, float scale_reward)
 {
   float like_reward = USER_LIKED_ARTICLE;
   float dislike_reward = USER_DISLIKED_ARTICLE;
@@ -53,7 +53,7 @@ float cb_sim::get_reaction(
     }
   }
 
-  if (swap_reward) { return (reward == like_reward) ? dislike_reward : like_reward; }
+  if (swap_reward) { return scale_reward * ((reward == like_reward) ? dislike_reward : like_reward); }
   return reward;
 }
 
@@ -131,7 +131,8 @@ void cb_sim::call_if_exists(VW::workspace& vw, VW::multi_ex& ex, const callback_
 }
 
 std::vector<float> cb_sim::run_simulation_hook(VW::workspace* vw, size_t num_iterations, callback_map& callbacks,
-    bool do_learn, size_t shift, bool add_noise, uint64_t num_useless_features, const std::vector<uint64_t>& swap_after)
+    bool do_learn, size_t shift, bool add_noise, uint64_t num_useless_features, const std::vector<uint64_t>& swap_after,
+    float scale_reward)
 {
   // check if there's a callback for the first possible element,
   // in this case most likely 0th event
@@ -169,7 +170,7 @@ std::vector<float> cb_sim::run_simulation_hook(VW::workspace* vw, size_t num_ite
 
     // 4. Get cost of the action we chose
     // Check for reward swap
-    float cost = get_reaction(context, chosen_action, add_noise, swap_reward);
+    float cost = get_reaction(context, chosen_action, add_noise, swap_reward, scale_reward);
     cost_sum += cost;
 
     if (do_learn)
@@ -241,12 +242,12 @@ std::vector<float> _test_helper_save_load(const std::string& vw_arg, size_t num_
 }
 
 std::vector<float> _test_helper_hook(const std::string& vw_arg, callback_map& hooks, size_t num_iterations, int seed,
-    const std::vector<uint64_t>& swap_after)
+    const std::vector<uint64_t>& swap_after, float scale_reward)
 {
   BOOST_CHECK(true);
   auto* vw = VW::initialize(vw_arg);
   simulator::cb_sim sim(seed);
-  auto ctr = sim.run_simulation_hook(vw, num_iterations, hooks, true, 1, false, 0, swap_after);
+  auto ctr = sim.run_simulation_hook(vw, num_iterations, hooks, true, 1, false, 0, swap_after, scale_reward);
   VW::finish(*vw);
   return ctr;
 }

diff --git a/test/unit_test/simulator.h b/test/unit_test/simulator.h
@@ -44,7 +44,7 @@ class cb_sim
 
   cb_sim(uint64_t seed = 0);
   float get_reaction(const std::map<std::string, std::string>& context, const std::string& action,
-      bool add_noise = false, bool swap_reward = false);
+      bool add_noise = false, bool swap_reward = false, float scale_reward = 1.f);
   std::vector<std::string> to_vw_example_format(const std::map<std::string, std::string>& context,
       const std::string& chosen_action, float cost = 0.f, float prob = 0.f);
   std::pair<int, float> sample_custom_pmf(std::vector<float>& pmf);
@@ -55,7 +55,7 @@ class cb_sim
       const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
   std::vector<float> run_simulation_hook(VW::workspace* vw, size_t num_iterations, callback_map& callbacks,
       bool do_learn = true, size_t shift = 1, bool add_noise = false, uint64_t num_useless_features = 0,
-      const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
+      const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), float scale_reward = 1.f);
 
 private:
   void call_if_exists(VW::workspace& vw, VW::multi_ex& ex, const callback_map& callbacks, const size_t event);
@@ -65,5 +65,5 @@ std::vector<float> _test_helper(const std::string& vw_arg, size_t num_iterations
 std::vector<float> _test_helper_save_load(const std::string& vw_arg, size_t num_iterations = 3000, int seed = 10,
     const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), const size_t split = 1500);
 std::vector<float> _test_helper_hook(const std::string& vw_arg, callback_map& hooks, size_t num_iterations = 3000,
-    int seed = 10, const std::vector<uint64_t>& swap_after = std::vector<uint64_t>());
+    int seed = 10, const std::vector<uint64_t>& swap_after = std::vector<uint64_t>(), float scale_reward = 1.f);
 }  // namespace simulator
diff --git a/vowpalwabbit/core/include/vw/core/reductions/epsilon_decay.h b/vowpalwabbit/core/include/vw/core/reductions/epsilon_decay.h
@@ -21,7 +21,8 @@ class epsilon_decay_data
 public:
   epsilon_decay_data(uint64_t model_count, uint64_t min_scope, double epsilon_decay_significance_level,
       double epsilon_decay_estimator_decay, dense_parameters& weights, std::string epsilon_decay_audit_str,
-      bool constant_epsilon, uint32_t& wpp, bool lb_trick, uint64_t _min_champ_examples, float initial_epsilon);
+      bool constant_epsilon, uint32_t& wpp, bool lb_trick, uint64_t _min_champ_examples, float initial_epsilon,
+      uint64_t shift_model_bounds);
   void update_weights(float init_ep, VW::LEARNER::multi_learner& base, VW::multi_ex& examples);
   void promote_model(int64_t model_ind, int64_t swap_dist);
   void rebalance_greater_models(int64_t model_ind, int64_t swap_dist, int64_t model_count);
@@ -44,6 +45,7 @@ class epsilon_decay_data
   bool _lb_trick;
   uint64_t _min_champ_examples;
   float _initial_epsilon;
+  uint64_t _shift_model_bounds;
 
   // TODO: delete all this, gd and cb_adf must respect ft_offset, see header import of automl.cc
   std::vector<double> per_live_model_state_double;

diff --git a/vowpalwabbit/core/src/reductions/epsilon_decay.cc b/vowpalwabbit/core/src/reductions/epsilon_decay.cc
@@ -38,7 +38,7 @@ float decayed_epsilon(float init_ep, uint64_t update_count)
 epsilon_decay_data::epsilon_decay_data(uint64_t model_count, uint64_t min_scope,
     double epsilon_decay_significance_level, double epsilon_decay_estimator_decay, dense_parameters& weights,
     std::string epsilon_decay_audit_str, bool constant_epsilon, uint32_t& wpp, bool lb_trick,
-    uint64_t min_champ_examples, float initial_epsilon)
+    uint64_t min_champ_examples, float initial_epsilon, uint64_t shift_model_bounds)
     : _min_scope(min_scope)
     , _epsilon_decay_significance_level(epsilon_decay_significance_level)
     , _epsilon_decay_estimator_decay(epsilon_decay_estimator_decay)
@@ -49,6 +49,7 @@ epsilon_decay_data::epsilon_decay_data(uint64_t model_count, uint64_t min_scope,
     , _lb_trick(lb_trick)
     , _min_champ_examples(min_champ_examples)
     , _initial_epsilon(initial_epsilon)
+    , _shift_model_bounds(shift_model_bounds)
 {
   _weight_indices.resize(model_count);
   conf_seq_estimators.reserve(model_count);
@@ -117,15 +118,15 @@ void epsilon_decay_data::update_weights(float init_ep, VW::LEARNER::multi_learne
           }
           if (_epsilon_decay_audit_str != "")
           {
-            if (model_ind == model_count - 1) { _audit_msg << "champ "; }
-            else
+            if (model_ind != model_count - 1)
             {
               _audit_msg << "challenger[" << (model_ind + 1) << "] ";
+
+              _audit_msg << "update_count: " << conf_seq_estimators[model_ind][model_ind].update_count
+                         << " lb: " << conf_seq_estimators[model_ind][model_ind].lower_bound()
+                         << " champ_ub: " << conf_seq_estimators[model_count - 1][model_ind].upper_bound()
+                         << " p_pred: " << a_s.score << "\n";
             }
-            _audit_msg << "update_count: " << conf_seq_estimators[model_ind][model_ind].update_count
-                       << " lb: " << conf_seq_estimators[model_ind][model_ind].lower_bound()
-                       << " ub: " << conf_seq_estimators[model_ind][model_ind].upper_bound() << " p_pred: " << a_s.score
-                       << "\n";
           }
           break;
         }
@@ -223,7 +224,7 @@ void epsilon_decay_data::check_horizon_bounds()
     if (conf_seq_estimators[i][i].update_count > _min_scope &&
         conf_seq_estimators[i][i].update_count >
             std::pow(conf_seq_estimators[final_model_idx][final_model_idx].update_count,
-                static_cast<float>(i + 1) / model_count))
+                static_cast<float>(i + 1 + _shift_model_bounds) / (model_count + _shift_model_bounds)))
     {
       shift_model(i - 1, 1, model_count);
       break;
@@ -321,6 +322,7 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&
   bool fixed_significance_level = false;
   uint64_t min_champ_examples;
   float initial_epsilon;
+  uint64_t shift_model_bounds;
 
   option_group_definition new_options("[Reduction] Epsilon-Decaying Exploration");
   new_options
@@ -374,6 +376,12 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&
                .default_value(1.0)
                .keep()
                .help("Initial epsilon value")
+               .experimental())
+      .add(make_option("shift_model_bounds", shift_model_bounds)
+               .default_value(0)
+               .keep()
+               .help("Shift maximum update_count for model i from champ_update_count^(i / num_models) to "
+                     "champ_update_count^((i + shift) / (num_models + shift))")
                .experimental());
 
   if (!options.add_parse_and_check_necessary(new_options)) { return nullptr; }
@@ -384,7 +392,8 @@ VW::LEARNER::base_learner* VW::reductions::epsilon_decay_setup(VW::setup_base_i&
 
   auto data = VW::make_unique<VW::reductions::epsilon_decay::epsilon_decay_data>(model_count, min_scope,
       epsilon_decay_significance_level, epsilon_decay_estimator_decay, all.weights.dense_weights,
-      epsilon_decay_audit_str, constant_epsilon, all.wpp, lb_trick, min_champ_examples, initial_epsilon);
+      epsilon_decay_audit_str, constant_epsilon, all.wpp, lb_trick, min_champ_examples, initial_epsilon,
+      shift_model_bounds);
 
   // make sure we setup the rest of the stack with cleared interactions
   // to make sure there are not subtle bugs