From 9cc49632d9bd60ed0cbbeb48c4c11e270ad8a459 Mon Sep 17 00:00:00 2001
From: Roger Wang <136131678+ywang96@users.noreply.github.com>
Date: Mon, 16 Sep 2024 22:22:45 -0700
Subject: [PATCH] [Misc][Bugfix] Disable guided decoding for mistral tokenizer
 (#8521)

---
 .../guided_decoding/__init__.py               | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/vllm/model_executor/guided_decoding/__init__.py b/vllm/model_executor/guided_decoding/__init__.py
index 7161e83952a3d..f4fe8a7307c04 100644
--- a/vllm/model_executor/guided_decoding/__init__.py
+++ b/vllm/model_executor/guided_decoding/__init__.py
@@ -6,6 +6,7 @@
 from vllm.model_executor.guided_decoding.guided_fields import (
     GuidedDecodingRequest)
 from vllm.sampling_params import LogitsProcessor
+from vllm.transformers_utils.tokenizer import MistralTokenizer
 
 
 async def get_guided_decoding_logits_processor(
@@ -15,12 +16,23 @@ async def get_guided_decoding_logits_processor(
     request = _adapt_request_for_tool_use(request)
 
     if guided_decoding_backend == 'outlines':
+        if isinstance(tokenizer, MistralTokenizer):
+            raise NotImplementedError(
+                "Guided decoding with 'outlines' is currently not supported "
+                "for Mistral tokenizer. Please consider contributing to the "
+                "'outlines' project if you are interested in this feature.")
         # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
         from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
             get_outlines_guided_decoding_logits_processor)
         return await get_outlines_guided_decoding_logits_processor(
             request, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
+        if isinstance(tokenizer, MistralTokenizer):
+            raise NotImplementedError(
+                "Guided decoding with 'lm-format-enforcer' is currently not "
+                "supported for Mistral tokenizer. Please consider contributing "
+                "to the 'lm-format-enforcer' project if you are interested "
+                "in this feature.")
         from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
             get_lm_format_enforcer_guided_decoding_logits_processor)
         return await get_lm_format_enforcer_guided_decoding_logits_processor(
@@ -37,12 +49,23 @@ def get_local_guided_decoding_logits_processor(
     # request = _adapt_request_for_tool_use(request)
 
     if guided_decoding_backend == 'outlines':
+        if isinstance(tokenizer, MistralTokenizer):
+            raise NotImplementedError(
+                "Guided decoding with 'outlines' is currently not supported "
+                "for Mistral tokenizer. Please consider contributing to the "
+                "'outlines' project if you are interested in this feature.")
         # NOTE: lazy import outlines to avoid https://github.com/vllm-project/vllm/issues/4193
         from vllm.model_executor.guided_decoding.outlines_decoding import (  # noqa
             get_local_outlines_guided_decoding_logits_processor)
         return get_local_outlines_guided_decoding_logits_processor(
             guided_options, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
+        if isinstance(tokenizer, MistralTokenizer):
+            raise NotImplementedError(
+                "Guided decoding with 'lm-format-enforcer' is currently not "
+                "supported for Mistral tokenizer. Please consider contributing "
+                "to the 'lm-format-enforcer' project if you are interested "
+                "in this feature.")
         from vllm.model_executor.guided_decoding.lm_format_enforcer_decoding import (  # noqa
             get_local_lm_format_enforcer_guided_decoding_logits_processor)
         return get_local_lm_format_enforcer_guided_decoding_logits_processor(