Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[Bugfix] Enable Proper attention_bias Usage in Llama Model Configur…
Browse files Browse the repository at this point in the history
…ation (vllm-project#3767)

Co-authored-by: roy <[email protected]>
  • Loading branch information
2 people authored and andy-neuma committed Apr 12, 2024
1 parent 209c356 commit b4b4e33
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ def __init__(
max_position_embeddings = getattr(config, "max_position_embeddings",
8192)
sliding_window = getattr(config, "sliding_window", None)
# Support abacusai/Smaug-72B-v0.1 with attention_bias
# Support internlm/internlm-7b with bias
attention_bias = getattr(config, "attention_bias", False) or getattr(
config, "bias", False)
self.self_attn = LlamaAttention(
hidden_size=self.hidden_size,
num_heads=config.num_attention_heads,
Expand All @@ -193,7 +197,7 @@ def __init__(
rope_scaling=rope_scaling,
max_position_embeddings=max_position_embeddings,
linear_method=linear_method,
bias=getattr(config, "bias", False),
bias=attention_bias,
sliding_window=sliding_window,
)
self.mlp = LlamaMLP(
Expand Down

0 comments on commit b4b4e33

Please sign in to comment.