From d18d646038c07cc7026fc5daac4f8d569a370f17 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Sat, 10 Aug 2024 08:44:57 -0700 Subject: [PATCH] Fix ignored sequence case --- vllm/engine/llm_engine.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 40fa20311a879..bacb031f51cc3 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1236,9 +1236,19 @@ def _process_model_outputs( if request_output: request_outputs.append(request_output) for seq_group in ignored_seq_groups: - if seq_group.sampling_params.output_kind == ( - RequestOutputKind.CUMULATIVE): - request_output = RequestOutputFactory.create(seq_group) + params = seq_group.sampling_params + if params is not None and params.output_kind == ( + RequestOutputKind.DELTA): + if not seq_group.is_finished(): + continue + # Ignored seq groups have no delta, but we must still return + # an "empty" RequestOutput when finished + for seq in seq_group.seqs: + previous_output_lens[seq.seq_id] = (seq.get_output_len(), + seq.output_text) + request_output = RequestOutputFactory.create( + seq_group, previous_output_lens) + if request_output: request_outputs.append(request_output) return request_outputs