diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 57857deb9eb86..96d229701c32d 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -172,6 +172,10 @@ def __init__( max_position_embeddings = getattr(config, "max_position_embeddings", 8192) sliding_window = getattr(config, "sliding_window", None) + # Support abacusai/Smaug-72B-v0.1 with attention_bias + # Support internlm/internlm-7b with bias + attention_bias = getattr(config, "attention_bias", False) or getattr( + config, "bias", False) self.self_attn = LlamaAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -181,7 +185,7 @@ def __init__( rope_scaling=rope_scaling, max_position_embeddings=max_position_embeddings, linear_method=linear_method, - bias=getattr(config, "bias", False), + bias=attention_bias, sliding_window=sliding_window, ) self.mlp = LlamaMLP(