Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[Core] Scheduler perf fix (vllm-project#4270)
Browse files Browse the repository at this point in the history
  • Loading branch information
rkooo567 authored and robertgshaw2-neuralmagic committed Apr 26, 2024
1 parent 91e2e19 commit 542dc70
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 14 deletions.
18 changes: 9 additions & 9 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def test_decode_schedule_preempted():
curr_loras = None
for i in range(3):
_, seq_group = create_dummy_prompt(str(i), prompt_length=60)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
running.append(seq_group)
scheduler.block_manager.can_append_slots = MagicMock()
Expand Down Expand Up @@ -581,7 +581,7 @@ def test_decode_swap_beam_search():
budget = create_token_budget()
for i in range(3):
_, seq_group = create_dummy_prompt(str(i), prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
running.append(seq_group)
append_new_token_seq_group(60, seq_group, 1)
budget.add_num_seqs(seq_group.request_id,
Expand Down Expand Up @@ -629,7 +629,7 @@ def test_schedule_decode_blocks_to_copy_update():
running = deque()
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
running.append(seq_group)

Expand Down Expand Up @@ -659,7 +659,7 @@ def test_schedule_swapped_simple():
curr_loras = None
blocks_to_swap_out = {}
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)
Expand Down Expand Up @@ -687,7 +687,7 @@ def test_schedule_swapped_max_token_budget():
blocks_to_swap_out = {}
for _ in range(2):
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)
Expand Down Expand Up @@ -721,7 +721,7 @@ def test_schedule_swapped_max_seqs():
blocks_to_swap_out = {}
for i in range(4):
_, seq_group = create_dummy_prompt(str(i), prompt_length=60)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)
Expand Down Expand Up @@ -759,7 +759,7 @@ def test_schedule_swapped_max_loras():
lora_name=str(i),
lora_int_id=i + 1,
lora_local_path="abc"))
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)
Expand All @@ -783,7 +783,7 @@ def test_schedule_swapped_cannot_swap_in():
blocks_to_swap_out = {}
for _ in range(2):
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
scheduler._swap_out(seq_group, blocks_to_swap_out)
swapped.append(seq_group)
Expand All @@ -808,7 +808,7 @@ def test_schedule_swapped_blocks_to_copy():
policy = PolicyFactory.get_policy(policy_name="fcfs")
curr_loras = None
_, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2)
scheduler._allocate_and_set_running(seq_group, 60)
scheduler._allocate_and_set_running(seq_group)
append_new_token_seq_group(60, seq_group, 1)
blocks_to_swap_out = {}
scheduler._swap_out(seq_group, blocks_to_swap_out)
Expand Down
7 changes: 2 additions & 5 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def num_decoding_tokens_per_seq(self) -> int:

def add_seq_group(self, seq_group: SequenceGroup) -> None:
# Add sequence groups to the waiting queue.
logger.debug(f"add_seq_group {seq_group.request_id}")
self.waiting.append(seq_group)

def abort_seq_group(self, request_id: Union[str, Iterable[str]]) -> None:
Expand Down Expand Up @@ -427,7 +426,6 @@ def _schedule_running(
swapped_out.append(seq_group)
break
else:
logger.debug(f"append slot for {seq_group}")
self._append_slots(seq_group, blocks_to_copy)
is_prefill = seq_group.is_prefill()
if is_prefill:
Expand Down Expand Up @@ -659,7 +657,7 @@ def _schedule_prefills(
if curr_loras is not None and lora_int_id > 0:
curr_loras.add(lora_int_id)
waiting_queue.popleft()
self._allocate_and_set_running(seq_group, num_new_tokens)
self._allocate_and_set_running(seq_group)
seq_groups.append(
ScheduledSequenceGroup(seq_group=seq_group,
token_chunk_size=num_new_tokens))
Expand Down Expand Up @@ -973,8 +971,7 @@ def free_finished_seq_groups(self) -> None:
self.running = deque(seq_group for seq_group in self.running
if not seq_group.is_finished())

def _allocate_and_set_running(self, seq_group: SequenceGroup,
num_new_tokens: int) -> None:
def _allocate_and_set_running(self, seq_group: SequenceGroup) -> None:
self.block_manager.allocate(seq_group)
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
seq.status = SequenceStatus.RUNNING
Expand Down

0 comments on commit 542dc70

Please sign in to comment.