Skip to content

Commit

Permalink
Fix for breaking changes in xformers 0.0.21 (vllm-project#834)
Browse files Browse the repository at this point in the history
  • Loading branch information
WoosukKwon authored Aug 23, 2023
1 parent b483a2f commit e212a09
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ sentencepiece # Required for LLaMA tokenizer.
numpy
torch >= 2.0.0
transformers >= 4.31.0 # Required for LLaMA-2.
xformers >= 0.0.19
xformers >= 0.0.21
fastapi
uvicorn
pydantic < 2 # Required for OpenAI server.
5 changes: 3 additions & 2 deletions vllm/model_executor/layers/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,12 @@ def set_attn_bias(self, input_metadata: InputMetadata) -> None:
# be sliced from a tensor whose length is a multiple of 8.
padded_len = (prompt_len + 7) // 8 * 8
bias = torch.empty(
1, # batch_size
self.num_heads,
padded_len,
prompt_len,
padded_len,
device=self.alibi_slopes.device,
)[:, :prompt_len, :prompt_len].copy_(bias)
)[:, :, :, :prompt_len].copy_(bias)
bias.mul_(self.alibi_slopes[:, None, None])
attn_bias = LowerTriangularMaskWithTensorBias(bias)
input_metadata.attn_bias.append(attn_bias)
Expand Down

0 comments on commit e212a09

Please sign in to comment.