From eae1f07ee9b05829617b07e6513e98416661648d Mon Sep 17 00:00:00 2001 From: youkaichao Date: Fri, 29 Mar 2024 18:46:39 -0700 Subject: [PATCH] [Core][Bugfix] cache len of tokenizer (#3741) --- vllm/transformers_utils/tokenizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index dad20a5699013..3bda3f419d8a2 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -26,6 +26,7 @@ def get_cached_tokenizer( tokenizer_all_special_tokens_extended = ( tokenizer.all_special_tokens_extended) tokenizer_all_special_tokens = set(tokenizer.all_special_tokens) + tokenizer_len = len(tokenizer) class CachedTokenizer(tokenizer.__class__): @@ -41,6 +42,9 @@ def all_special_tokens(self): def all_special_tokens_extended(self): return tokenizer_all_special_tokens_extended + def __len__(self): + return tokenizer_len + CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}" tokenizer.__class__ = CachedTokenizer