Skip to content

Commit

Permalink
[Misc][Log] Add log for tokenizer length not equal to vocabulary size (
Browse files Browse the repository at this point in the history
  • Loading branch information
esmeetu authored Mar 21, 2024
1 parent 4c07dd2 commit 8657323
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
8 changes: 8 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ def _init_tokenizer(self, **tokenizer_init_kwargs):
self.tokenizer: BaseTokenizerGroup = get_tokenizer_group(
self.parallel_config.tokenizer_pool_config, **init_kwargs)

if len(self.get_tokenizer()) != self.model_config.get_vocab_size():
logger.warning(
f"The tokenizer's vocabulary size {len(self.get_tokenizer())}"
f" does not match the model's vocabulary size "
f"{self.model_config.get_vocab_size()}. This might "
f"cause an error in decoding. Please change config.json "
"to match the tokenizer's vocabulary size.")

def _verify_args(self) -> None:
self.model_config.verify_with_parallel_config(self.parallel_config)
self.cache_config.verify_with_parallel_config(self.parallel_config)
Expand Down
8 changes: 8 additions & 0 deletions vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ async def _post_init(self):
tokenizer_mode=engine_model_config.tokenizer_mode,
trust_remote_code=engine_model_config.trust_remote_code)

if len(self.tokenizer) != engine_model_config.get_vocab_size():
logger.warning(
f"The tokenizer's vocabulary size {len(self.tokenizer)}"
f" does not match the model's vocabulary size "
f"{engine_model_config.get_vocab_size()}. This might "
f"cause an error in decoding. Please change config.json "
"to match the tokenizer's vocabulary size.")

async def show_available_models(self) -> ModelList:
"""Show available models. Right now we only have one model."""
model_cards = [
Expand Down

0 comments on commit 8657323

Please sign in to comment.