Skip to content

Commit

Permalink
[Fix] Fix a condition for ignored sequences (vllm-project#867)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuohan123 authored Aug 28, 2023
1 parent 4b6f069 commit d2b2eed
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def __init__(
self.scheduler_config = scheduler_config
self.cache_config = cache_config

self.prompt_limit = min(self.scheduler_config.max_model_len,
self.scheduler_config.max_num_batched_tokens)

# Instantiate the scheduling policy.
self.policy = PolicyFactory.get_policy(policy_name="fcfs")
# Create the block space manager.
Expand Down Expand Up @@ -123,18 +126,15 @@ def _schedule(self) -> SchedulerOutputs:
seq_group = self.waiting[0]

num_prompt_tokens = seq_group.get_seqs()[0].get_len()
prompt_limit = min(
self.scheduler_config.max_model_len,
self.scheduler_config.max_num_batched_tokens)
if num_prompt_tokens > prompt_limit:
if num_prompt_tokens > self.prompt_limit:
logger.warning(
f"Input prompt ({num_prompt_tokens} tokens) is too long"
f" and exceeds limit of {prompt_limit}")
f" and exceeds limit of {self.prompt_limit}")
for seq in seq_group.get_seqs():
seq.status = SequenceStatus.FINISHED_IGNORED
ignored_seq_groups.append(seq_group)
self.waiting.pop(0)
break
continue

# If the sequence group cannot be allocated, stop.
if not self.block_manager.can_allocate(seq_group):
Expand Down

0 comments on commit d2b2eed

Please sign in to comment.