diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index 06df11a8f0e73..0764eb3a6bf11 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1086,7 +1086,7 @@ def _can_append_slots(self, seq_group: SequenceGroup) -> bool: ) def _allow_async_output_proc(self, seq_group: SequenceGroup) -> bool: - no_beam_search = (seq_group.sampling_params.n == 1 + no_beam_search = (seq_group.sampling_params.best_of == 1 and not seq_group.sampling_params.use_beam_search) return no_beam_search