From a888b63ffa86ce736cc007aed2b40b36746bdfbd Mon Sep 17 00:00:00 2001
From: Flavia Beo <flavia.beo@ibm.com>
Date: Wed, 6 Nov 2024 14:59:45 -0300
Subject: [PATCH] Revert "[Bugfix] Fix edge-case crash when using chat with the
 Mistral Tekken Tokenizer (#10051)"

This reverts commit 2bcbae704c0d52913c6a2887260fc6bde6c20361.
---
 tests/models/decoder_only/language/test_mistral.py | 9 +++------
 vllm/transformers_utils/tokenizers/mistral.py      | 8 ++------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/decoder_only/language/test_mistral.py
index 6ec4b7e7e3f71..5be44c54a717c 100644
--- a/tests/models/decoder_only/language/test_mistral.py
+++ b/tests/models/decoder_only/language/test_mistral.py
@@ -10,22 +10,19 @@
 
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    # Mistral-Nemo is to big for CI, but passes locally
+    # "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 
 MISTRAL_FORMAT_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
-    # uses the v3-Tekken tokenizer
-    "mistralai/Ministral-8B-Instruct-2410",
-    # Mistral-Nemo is to big for CI, but passes locally
-    # "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 
 SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
 SYMBOLIC_LANG_PROMPTS = [
     "勇敢な船乗りについての詩を書く",  # japanese
     "寫一首關於勇敢的水手的詩",  # chinese
-    "ပုံပြင်လေးပြောပြပါ်:\n",  # burmese
-    "Repeat the phrase 'URGENCY🌶️':\nURGENCY🌶️\nURGENCY🌶️\n",  # see https://github.com/vllm-project/vllm/pull/9625
 ]
 
 # for function calling
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
index ccffdcc2a4df2..896f70bc1dafd 100644
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -254,7 +254,7 @@ def decode(self,
                skip_special_tokens: bool = True) -> str:
         assert (
             skip_special_tokens
-        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+        ), "Skipping special tokens is not supported for Mistral tokenizers."
 
         if isinstance(ids, int):
             ids = [ids]
@@ -268,16 +268,12 @@ def convert_ids_to_tokens(
         # TODO(Patrick) - potentially allow special tokens to not be skipped
         assert (
             skip_special_tokens
-        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+        ), "Skipping special tokens is not supported for Mistral tokenizers."
 
         assert isinstance(self.tokenizer,
                           (Tekkenizer, SentencePieceTokenizer)), type(
                               self.tokenizer)
 
-        if isinstance(self.tokenizer, Tekkenizer):
-            # skip special tokens
-            ids = [i for i in ids if i > self.tokenizer.num_special_tokens]
-
         tokens = [self.tokenizer.id_to_piece(id) for id in ids]
 
         if any("�" in t for t in tokens):