From 54d3544784ff20e7038abf72793eaf734e727269 Mon Sep 17 00:00:00 2001 From: Sherry <503147114@qq.com> Date: Fri, 1 Mar 2024 15:52:22 +0800 Subject: [PATCH] Fix: Output text is always truncated in some models (#3016) --- vllm/engine/llm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 9bf19b932d35b..df4858a696530 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -980,7 +980,10 @@ def _check_stop(self, seq: Sequence, def _finalize_sequence(self, seq: Sequence, sampling_params: SamplingParams, stop_string: str) -> None: - if not sampling_params.include_stop_str_in_output and stop_string: + if sampling_params.include_stop_str_in_output: + return + + if stop_string and seq.output_text.endswith(stop_string): # Truncate the output text so that the stop string is # not included in the output. seq.output_text = seq.output_text[:-len(stop_string)]