Skip to content

Commit

Permalink
[Core][Bugfix] cache len of tokenizer (vllm-project#3741)
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao authored Mar 30, 2024
1 parent bf98867 commit eae1f07
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def get_cached_tokenizer(
tokenizer_all_special_tokens_extended = (
tokenizer.all_special_tokens_extended)
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
tokenizer_len = len(tokenizer)

class CachedTokenizer(tokenizer.__class__):

Expand All @@ -41,6 +42,9 @@ def all_special_tokens(self):
def all_special_tokens_extended(self):
return tokenizer_all_special_tokens_extended

def __len__(self):
return tokenizer_len

CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"

tokenizer.__class__ = CachedTokenizer
Expand Down

0 comments on commit eae1f07

Please sign in to comment.