diff --git a/src/model.py b/src/model.py index 80f51320..92a1f59d 100644 --- a/src/model.py +++ b/src/model.py @@ -213,7 +213,7 @@ def create_response(self, vllm_output): """ prompt = vllm_output.prompt text_outputs = [ - (prompt + output.text).encode("utf-8") for output in vllm_output.outputs + output.text.encode("utf-8") for output in vllm_output.outputs ] triton_output_tensor = pb_utils.Tensor( "text_output", np.asarray(text_outputs, dtype=self.output_dtype)