diff --git a/src/cpp/src/scheduler.hpp b/src/cpp/src/scheduler.hpp index ba0880b58a..da65c68bec 100644 --- a/src/cpp/src/scheduler.hpp +++ b/src/cpp/src/scheduler.hpp @@ -250,13 +250,13 @@ class Scheduler { OPENVINO_ASSERT(currently_allocated_token_slots >= occupied_token_slots, "internal error"); size_t available_slots = currently_allocated_token_slots - occupied_token_slots, required_slots = num_scheduled_tokens > available_slots ? num_scheduled_tokens - available_slots : 0; - size_t num_required_blocks = (required_slots + block_size - 1) / block_size, num_free_blocks = m_block_manager.num_free_blocks(); + size_t num_required_blocks = (required_slots + block_size - 1) / block_size; while (num_required_blocks > m_block_manager.num_free_blocks()) { if (!_try_increase_cache()) { break; } } - size_t num_scheduled_blocks = std::min(num_required_blocks, num_free_blocks); + size_t num_scheduled_blocks = std::min(num_required_blocks, m_block_manager.num_free_blocks()); // some scheduled blocks can be no fully occupied, so we need to take min between num_scheduled_blocks // and total "scheduled capacity" num_scheduled_tokens = std::min(num_scheduled_tokens, available_slots + num_scheduled_blocks * block_size);