From bc0c0192d13ca6ea4aeea4725f752a89483895bc Mon Sep 17 00:00:00 2001 From: Kiran R Date: Tue, 9 Apr 2024 01:12:35 +0530 Subject: [PATCH] [Bugfix] Enable Proper `attention_bias` Usage in Llama Model Configuration (#3767) Co-authored-by: roy --- vllm/model_executor/models/llama.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index ef19c41e67ae6..72fe21df67d8a 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -184,6 +184,10 @@ def __init__( max_position_embeddings = getattr(config, "max_position_embeddings", 8192) sliding_window = getattr(config, "sliding_window", None) + # Support abacusai/Smaug-72B-v0.1 with attention_bias + # Support internlm/internlm-7b with bias + attention_bias = getattr(config, "attention_bias", False) or getattr( + config, "bias", False) self.self_attn = LlamaAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -193,7 +197,7 @@ def __init__( rope_scaling=rope_scaling, max_position_embeddings=max_position_embeddings, linear_method=linear_method, - bias=getattr(config, "bias", False), + bias=attention_bias, sliding_window=sliding_window, ) self.mlp = LlamaMLP(