From cf9edaf4b0f5851217b79f0d8ef8e71bff765492 Mon Sep 17 00:00:00 2001 From: "bongwon.jang" Date: Tue, 11 Jun 2024 04:39:38 +0000 Subject: [PATCH 1/4] flashinfer bug at _prepare_model_input function is fixed --- vllm/worker/model_runner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 7879a5de5b7bd..90ae250337807 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -384,9 +384,14 @@ def _prepare_model_input( block_table = block_table[ -curr_sliding_window_blocks:] if self.attn_backend.get_name() == "flashinfer": - paged_kv_indices.extend(block_table) + block_table_bound = seq_data.get_len( + ) // self.block_size + 1 if seq_data.get_len( + ) % self.block_size != 0 else seq_data.get_len( + ) // self.block_size + paged_kv_indices.extend( + block_table[:block_table_bound]) paged_kv_indptr.append(paged_kv_indptr[-1] + - len(block_table)) + block_table_bound) last_page_len = seq_data.get_len( ) % self.block_size if last_page_len == 0: From d9f14fc1af94a8328ba965ab953b1fcd84b98b0a Mon Sep 17 00:00:00 2001 From: "bongwon.jang" Date: Thu, 13 Jun 2024 01:21:50 +0000 Subject: [PATCH 2/4] fix comment --- vllm/worker/model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 90ae250337807..0a326558c5810 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -384,6 +384,7 @@ def _prepare_model_input( block_table = block_table[ -curr_sliding_window_blocks:] if self.attn_backend.get_name() == "flashinfer": + # flashinfer bugfix block_table_bound = seq_data.get_len( ) // self.block_size + 1 if seq_data.get_len( ) % self.block_size != 0 else seq_data.get_len( From 12469af206be5a2b4d86f8ad38182a7d9ccd0e7a Mon Sep 17 00:00:00 2001 From: "bongwon.jang" Date: Fri, 14 Jun 2024 06:05:31 +0000 Subject: [PATCH 3/4] fix comment and applied new commits --- vllm/worker/model_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index b6ec90782235f..e02983eb56f0a 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -395,7 +395,6 @@ def _prepare_model_input( block_table = block_table[ -curr_sliding_window_blocks:] if self.attn_backend.get_name() == "flashinfer": - # flashinfer bugfix block_table_bound = seq_data.get_len( ) // self.block_size + 1 if seq_data.get_len( ) % self.block_size != 0 else seq_data.get_len( From 170c9421db03bedf5b67535b3236f38de306ebf2 Mon Sep 17 00:00:00 2001 From: "bongwon.jang" Date: Sat, 15 Jun 2024 02:36:09 +0000 Subject: [PATCH 4/4] merged with upstream and modified comment --- vllm/worker/model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index e02983eb56f0a..d555fbe6d83c5 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -395,6 +395,7 @@ def _prepare_model_input( block_table = block_table[ -curr_sliding_window_blocks:] if self.attn_backend.get_name() == "flashinfer": + # flashinfer wrong indexing bug fix block_table_bound = seq_data.get_len( ) // self.block_size + 1 if seq_data.get_len( ) % self.block_size != 0 else seq_data.get_len(