From 54eeec0d879fe64712699e930d71582e8a2fe95f Mon Sep 17 00:00:00 2001
From: Uranus <109661872+UranusSeven@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:18:33 +0800
Subject: [PATCH] [Misc] fix docstrings (#4191)

Co-authored-by: Zhong Wang <wangzhong@infini-ai.com>
---
 vllm/sequence.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/vllm/sequence.py b/vllm/sequence.py
index 92362a9a5d2a3..7dcacab6f2ab6 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -160,7 +160,7 @@ def reset_state_for_recompute(self) -> None:
         self._stage = SequenceStage.PREFILL
 
     def get_num_uncomputed_tokens(self) -> int:
-        """Return the number of prefil tokens that are not computed."""
+        """Return the number of prefill tokens that are not computed."""
         # we use `get_len()` which includes prompt_len + output_len instead
         # of prompt_len here. This is because during recompute we need to
         # prefill for both prompt and output.
@@ -345,12 +345,9 @@ def fork(self, new_seq_id: int) -> "Sequence":
     def get_num_new_tokens(self) -> int:
         """Get the number of new tokens to be computed.
 
-        Args:
-            remainig_token_budget: The remaining token budgets.
         Returns:
-            The new number of tokens to be computed. I.e., 1 for decode, prompt
-            size for prefill. If there's not enough remainig_token_budget, it
-            can return the chunked number of new tokens.
+            The new number of tokens to be computed. I.e., 1 for decode, or
+            the remaining prompt size for prefill.
         """
         if self.data.stage == SequenceStage.DECODE:
             return 1