From 9bf6cb153a7f641d112dace94c147729d16575f6 Mon Sep 17 00:00:00 2001
From: Junichi Sato <jsato8094@gmail.com>
Date: Mon, 22 Jul 2024 17:05:56 +0900
Subject: [PATCH] feat: add add_special_tokens parameter to completions

---
 vllm/entrypoints/openai/protocol.py           | 6 ++++++
 vllm/entrypoints/openai/serving_completion.py | 1 +
 2 files changed, 7 insertions(+)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 212483109a799..d84d91249ba60 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -369,6 +369,12 @@ class CompletionRequest(OpenAIBaseModel):
             "Whether to include the stop string in the output. "
             "This is only applied when the stop or stop_token_ids is set."),
     )
+    add_special_tokens: Optional[bool] = Field(
+        default=True,
+        description=(
+            "If true, special tokens (e.g. BOS) will be added to the prompt."
+            "Default is True (same as HuggingFace Tokenizer)."),
+    )
     response_format: Optional[ResponseFormat] = Field(
         default=None,
         description=
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index e61f3fdbf6666..53f35d66f2975 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -131,6 +131,7 @@ async def create_completion(self, request: CompletionRequest,
                     tokenizer,
                     truncate_prompt_tokens=sampling_params.
                     truncate_prompt_tokens,
+                    add_special_tokens=request.add_special_tokens,
                     **{prompt_arg: prompt})
                 prompt_ids, prompt_text = prompt_formats