diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py index 9588a1bead5f6..a2511238506b0 100644 --- a/tests/core/test_scheduler.py +++ b/tests/core/test_scheduler.py @@ -540,7 +540,7 @@ def test_decode_schedule_preempted(): curr_loras = None for i in range(3): _, seq_group = create_dummy_prompt(str(i), prompt_length=60) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) running.append(seq_group) scheduler.block_manager.can_append_slots = MagicMock() @@ -581,7 +581,7 @@ def test_decode_swap_beam_search(): budget = create_token_budget() for i in range(3): _, seq_group = create_dummy_prompt(str(i), prompt_length=60, best_of=2) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) running.append(seq_group) append_new_token_seq_group(60, seq_group, 1) budget.add_num_seqs(seq_group.request_id, @@ -629,7 +629,7 @@ def test_schedule_decode_blocks_to_copy_update(): running = deque() policy = PolicyFactory.get_policy(policy_name="fcfs") curr_loras = None - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) running.append(seq_group) @@ -659,7 +659,7 @@ def test_schedule_swapped_simple(): curr_loras = None blocks_to_swap_out = {} _, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) swapped.append(seq_group) @@ -687,7 +687,7 @@ def test_schedule_swapped_max_token_budget(): blocks_to_swap_out = {} for _ in range(2): _, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) swapped.append(seq_group) @@ -721,7 +721,7 @@ def test_schedule_swapped_max_seqs(): blocks_to_swap_out = {} for i in range(4): _, seq_group = create_dummy_prompt(str(i), prompt_length=60) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) swapped.append(seq_group) @@ -759,7 +759,7 @@ def test_schedule_swapped_max_loras(): lora_name=str(i), lora_int_id=i + 1, lora_local_path="abc")) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) swapped.append(seq_group) @@ -783,7 +783,7 @@ def test_schedule_swapped_cannot_swap_in(): blocks_to_swap_out = {} for _ in range(2): _, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) swapped.append(seq_group) @@ -808,7 +808,7 @@ def test_schedule_swapped_blocks_to_copy(): policy = PolicyFactory.get_policy(policy_name="fcfs") curr_loras = None _, seq_group = create_dummy_prompt("1", prompt_length=60, best_of=2) - scheduler._allocate_and_set_running(seq_group, 60) + scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) blocks_to_swap_out = {} scheduler._swap_out(seq_group, blocks_to_swap_out) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index 4198550621030..8d7db09bbea08 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -297,7 +297,6 @@ def num_decoding_tokens_per_seq(self) -> int: def add_seq_group(self, seq_group: SequenceGroup) -> None: # Add sequence groups to the waiting queue. - logger.debug(f"add_seq_group {seq_group.request_id}") self.waiting.append(seq_group) def abort_seq_group(self, request_id: Union[str, Iterable[str]]) -> None: @@ -427,7 +426,6 @@ def _schedule_running( swapped_out.append(seq_group) break else: - logger.debug(f"append slot for {seq_group}") self._append_slots(seq_group, blocks_to_copy) is_prefill = seq_group.is_prefill() if is_prefill: @@ -659,7 +657,7 @@ def _schedule_prefills( if curr_loras is not None and lora_int_id > 0: curr_loras.add(lora_int_id) waiting_queue.popleft() - self._allocate_and_set_running(seq_group, num_new_tokens) + self._allocate_and_set_running(seq_group) seq_groups.append( ScheduledSequenceGroup(seq_group=seq_group, token_chunk_size=num_new_tokens)) @@ -952,8 +950,7 @@ def free_finished_seq_groups(self) -> None: self.running = deque(seq_group for seq_group in self.running if not seq_group.is_finished()) - def _allocate_and_set_running(self, seq_group: SequenceGroup, - num_new_tokens: int) -> None: + def _allocate_and_set_running(self, seq_group: SequenceGroup) -> None: self.block_manager.allocate(seq_group) for seq in seq_group.get_seqs(status=SequenceStatus.WAITING): seq.status = SequenceStatus.RUNNING