diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py index 1aba86bf361c7..265f3d357ad4c 100644 --- a/vllm/model_executor/input_metadata.py +++ b/vllm/model_executor/input_metadata.py @@ -66,7 +66,6 @@ def __init__( else: self.max_num_blocks_per_seq = 0 assert block_tables.shape[0] == self.num_generation_tokens - assert context_lens.shape[0] == self.num_generation_tokens # Set during the execution of the first attention op. self.attn_bias: Optional[AttentionBias] = None