[Fix] Fix a condition for ignored sequences (vllm-project#867)

liuyanyi · Sep 12, 2023 · d75f5ce · d75f5ce
1 parent c377526
commit d75f5ce
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
@@ -64,6 +64,9 @@ def __init__(
         self.scheduler_config = scheduler_config
         self.cache_config = cache_config
 
+        self.prompt_limit = min(self.scheduler_config.max_model_len,
+                                self.scheduler_config.max_num_batched_tokens)
+
         # Instantiate the scheduling policy.
         self.policy = PolicyFactory.get_policy(policy_name="fcfs")
         # Create the block space manager.
@@ -123,18 +126,15 @@ def _schedule(self) -> SchedulerOutputs:
                 seq_group = self.waiting[0]
 
                 num_prompt_tokens = seq_group.get_seqs()[0].get_len()
-                prompt_limit = min(
-                    self.scheduler_config.max_model_len,
-                    self.scheduler_config.max_num_batched_tokens)
-                if num_prompt_tokens > prompt_limit:
+                if num_prompt_tokens > self.prompt_limit:
                     logger.warning(
                         f"Input prompt ({num_prompt_tokens} tokens) is too long"
-                        f" and exceeds limit of {prompt_limit}")
+                        f" and exceeds limit of {self.prompt_limit}")
                     for seq in seq_group.get_seqs():
                         seq.status = SequenceStatus.FINISHED_IGNORED
                     ignored_seq_groups.append(seq_group)
                     self.waiting.pop(0)
-                    break
+                    continue
 
                 # If the sequence group cannot be allocated, stop.
                 if not self.block_manager.can_allocate(seq_group):