Skip to content

Commit

Permalink
Revert "[Bugfix] Fix edge-case crash when using chat with the Mistral…
Browse files Browse the repository at this point in the history
… Tekken Tokenizer (vllm-project#10051)"

This reverts commit 2bcbae7.
  • Loading branch information
flaviabeo committed Nov 6, 2024
1 parent bcb2fc5 commit a888b63
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 12 deletions.
9 changes: 3 additions & 6 deletions tests/models/decoder_only/language/test_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,19 @@

MODELS = [
"mistralai/Mistral-7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
# Mistral-Nemo is to big for CI, but passes locally
# "mistralai/Mistral-Nemo-Instruct-2407"
]

MISTRAL_FORMAT_MODELS = [
"mistralai/Mistral-7B-Instruct-v0.3",
# uses the v3-Tekken tokenizer
"mistralai/Ministral-8B-Instruct-2410",
# Mistral-Nemo is to big for CI, but passes locally
# "mistralai/Mistral-Nemo-Instruct-2407"
]

SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
SYMBOLIC_LANG_PROMPTS = [
"勇敢な船乗りについての詩を書く", # japanese
"寫一首關於勇敢的水手的詩", # chinese
"ပုံပြင်လေးပြောပြပါ်:\n", # burmese
"Repeat the phrase 'URGENCY🌶️':\nURGENCY🌶️\nURGENCY🌶️\n", # see https://github.com/vllm-project/vllm/pull/9625
]

# for function calling
Expand Down
8 changes: 2 additions & 6 deletions vllm/transformers_utils/tokenizers/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def decode(self,
skip_special_tokens: bool = True) -> str:
assert (
skip_special_tokens
), "skip_special_tokens=False is not supported for Mistral tokenizers."
), "Skipping special tokens is not supported for Mistral tokenizers."

if isinstance(ids, int):
ids = [ids]
Expand All @@ -268,16 +268,12 @@ def convert_ids_to_tokens(
# TODO(Patrick) - potentially allow special tokens to not be skipped
assert (
skip_special_tokens
), "skip_special_tokens=False is not supported for Mistral tokenizers."
), "Skipping special tokens is not supported for Mistral tokenizers."

assert isinstance(self.tokenizer,
(Tekkenizer, SentencePieceTokenizer)), type(
self.tokenizer)

if isinstance(self.tokenizer, Tekkenizer):
# skip special tokens
ids = [i for i in ids if i > self.tokenizer.num_special_tokens]

tokens = [self.tokenizer.id_to_piece(id) for id in ids]

if any("�" in t for t in tokens):
Expand Down

0 comments on commit a888b63

Please sign in to comment.