From 94db7309b050ff7d7f37f048659ab07ffaecf468 Mon Sep 17 00:00:00 2001 From: Chen <12011125@mail.sustech.edu.cn> Date: Tue, 3 Sep 2024 18:17:26 +0800 Subject: [PATCH 1/2] Fix bug in detokenizer.py Fix the bug when some cases the length of prev_tokens and next_iter_tokens grow exponentially --- vllm/transformers_utils/detokenizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py index d27d7ba9e67bb..64b0e378bdc8b 100644 --- a/vllm/transformers_utils/detokenizer.py +++ b/vllm/transformers_utils/detokenizer.py @@ -5,6 +5,7 @@ from .tokenizer import AnyTokenizer from .tokenizer_group import BaseTokenizerGroup +from copy import deepcopy # Used eg. for marking rejected tokens in spec decoding. INVALID_TOKEN_ID = -1 @@ -90,7 +91,7 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, prefix_offset = next_iter_prefix_offset read_offset = next_iter_read_offset if prev_tokens is None: - prev_tokens = next_iter_tokens + prev_tokens = deepcopy(next_iter_tokens) else: prev_tokens.extend(next_iter_tokens) From 26fbeb2a18b10416f88ee1ff876c1de1a7595855 Mon Sep 17 00:00:00 2001 From: Cafeii <12011125@mail.sustech.edu.cn> Date: Tue, 3 Sep 2024 19:50:32 +0800 Subject: [PATCH 2/2] Run yapf and ruff --- vllm/transformers_utils/detokenizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py index 64b0e378bdc8b..a24cdbc05bfce 100644 --- a/vllm/transformers_utils/detokenizer.py +++ b/vllm/transformers_utils/detokenizer.py @@ -1,3 +1,4 @@ +import copy from typing import Dict, List, Optional, Tuple from vllm.sequence import Logprob, SamplingParams, Sequence, SequenceGroup @@ -5,7 +6,6 @@ from .tokenizer import AnyTokenizer from .tokenizer_group import BaseTokenizerGroup -from copy import deepcopy # Used eg. for marking rejected tokens in spec decoding. INVALID_TOKEN_ID = -1 @@ -91,7 +91,7 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, prefix_offset = next_iter_prefix_offset read_offset = next_iter_read_offset if prev_tokens is None: - prev_tokens = deepcopy(next_iter_tokens) + prev_tokens = copy.deepcopy(next_iter_tokens) else: prev_tokens.extend(next_iter_tokens)