diff --git a/vllm/worker/habana_model_runner.py b/vllm/worker/habana_model_runner.py index a0e039ac8ba07..ac923a667867b 100644 --- a/vllm/worker/habana_model_runner.py +++ b/vllm/worker/habana_model_runner.py @@ -712,7 +712,7 @@ def _prepare_decode( block_usage = [[self.block_size] * (bu - 1) + [lb] for bu, lb in zip(blocks_used, last_block)] block_usage = list(itertools.chain(*block_usage)) - block_bucket_size = self.decode_block_bucket_cfg[1] + block_bucket_size = find_bucket(len(block_list), self.decode_block_bucket_cfg) block_list = pad_list(block_list, block_bucket_size, _PAD_SLOT_ID) block_mapping = pad_list(block_mapping, block_bucket_size, 0) block_usage = pad_list(block_usage, block_bucket_size, 0)