Skip to content

Commit

Permalink
[Bugfix] Qwen-vl output is inconsistent in speculative decoding (#10350)
Browse files Browse the repository at this point in the history
  • Loading branch information
skylee-01 authored Nov 15, 2024
1 parent b40cf64 commit 2ec8827
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions vllm/spec_decode/batch_expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def _create_single_target_seq_group_metadata(
seq_data = seq_group_metadata.seq_data[seq_id]
prompt_token_ids = seq_data.prompt_token_ids_array
new_output_token_ids = [*seq_data.get_output_token_ids(), *token_ids]
mrope_position_delta = seq_data.mrope_position_delta

new_seq_data_dict = {
target_seq_id:
Expand All @@ -368,6 +369,7 @@ def _create_single_target_seq_group_metadata(
# the kv cache is filled by a previous batch in the batch expansion.
for data in new_seq_data_dict.values():
data.update_num_computed_tokens(data.get_len() - 1)
data.mrope_position_delta = mrope_position_delta

return SequenceGroupMetadata(
request_id=seq_group_metadata.request_id,
Expand Down

0 comments on commit 2ec8827

Please sign in to comment.