From 13e3c96d91fae32713ac7ec79133a97037e902aa Mon Sep 17 00:00:00 2001 From: yudian0504 Date: Mon, 21 Oct 2024 19:07:08 +0800 Subject: [PATCH] bugfix: graph capture for decoder --- vllm/worker/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index dc1674cd1ea20..551f54b68ce67 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -828,7 +828,7 @@ def build(self) -> ModelInputForGPU: cuda_graph_pad_size = self._get_cuda_graph_pad_size( num_seqs=len(seq_lens), - max_decode_seq_len=max_encoder_seq_len, + max_decode_seq_len=max_decode_seq_len, max_encoder_seq_len=max_encoder_seq_len) batch_size = len(input_tokens)