From 54d3544784ff20e7038abf72793eaf734e727269 Mon Sep 17 00:00:00 2001
From: Sherry <503147114@qq.com>
Date: Fri, 1 Mar 2024 15:52:22 +0800
Subject: [PATCH] Fix: Output text is always truncated in some models (#3016)

---
 vllm/engine/llm_engine.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 9bf19b932d35b..df4858a696530 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -980,7 +980,10 @@ def _check_stop(self, seq: Sequence,
     def _finalize_sequence(self, seq: Sequence,
                            sampling_params: SamplingParams,
                            stop_string: str) -> None:
-        if not sampling_params.include_stop_str_in_output and stop_string:
+        if sampling_params.include_stop_str_in_output:
+            return
+
+        if stop_string and seq.output_text.endswith(stop_string):
             # Truncate the output text so that the stop string is
             # not included in the output.
             seq.output_text = seq.output_text[:-len(stop_string)]