From a14fbab8810727187d749e0f5fe93ea6e54746d0 Mon Sep 17 00:00:00 2001
From: William Lin <SolitaryThinker@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:51:02 -0700
Subject: [PATCH] Update vllm/worker/multi_step_model_runner.py

Co-authored-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com>
---
 vllm/worker/multi_step_model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py
index 0561df0bceed7..20332085b9c05 100644
--- a/vllm/worker/multi_step_model_runner.py
+++ b/vllm/worker/multi_step_model_runner.py
@@ -412,7 +412,7 @@ def vocab_size(self) -> int:
 def _pythonize_sampler_output(
         model_input: StatefulModelInput, output: SamplerOutput,
         pinned_sampled_token_buffer: torch.Tensor,
-        sampled_token_ids: torch.Tensor) -> SamplerOutput:
+        sampled_token_ids: torch.Tensor) -> None:
     """ This function is only called when the output tensors are ready. 
     See ModelOutput
     """