diff --git a/src/cpp/src/scheduler.hpp b/src/cpp/src/scheduler.hpp index 7ae7a40a51..c5ee8a6250 100644 --- a/src/cpp/src/scheduler.hpp +++ b/src/cpp/src/scheduler.hpp @@ -474,7 +474,7 @@ class Scheduler { auto gen_config = sequence_groups[idx]->get_sampling_parameters(); seq_length = std::min(seq_length, sequence_groups[idx]->get_prompt_len() + gen_config.get_max_new_tokens(sequence_groups[idx]->get_prompt_len())); size_t blocks_num = std::ceil((float)seq_length / m_block_manager.get_block_size()); - if (gen_config.do_sample && gen_config.is_beam_search()) { + if (gen_config.is_beam_search()) { blocks_num *= gen_config.num_beams; } else if (gen_config.do_sample && gen_config.is_multinomial()) { blocks_num *= gen_config.num_return_sequences;