From aa9af07cac7b681d7195dbd5de621fc5a2acde99 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 30 Oct 2023 00:24:18 +0100 Subject: [PATCH] Fix bias in InternLM (#1501) --- vllm/model_executor/models/internlm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/internlm.py b/vllm/model_executor/models/internlm.py index ce35eaf4f6a38..4a595a37730da 100644 --- a/vllm/model_executor/models/internlm.py +++ b/vllm/model_executor/models/internlm.py @@ -62,6 +62,7 @@ def __init__( self, hidden_size: int, num_heads: int, + bias: bool, rope_theta: float = 10000, max_position_embeddings: int = 8192, ): @@ -81,13 +82,13 @@ def __init__( self.qkv_proj = ColumnParallelLinear( hidden_size, 3 * self.total_num_heads * self.head_dim, - bias=True, + bias=bias, gather_output=False, ) self.o_proj = RowParallelLinear( self.total_num_heads * self.head_dim, hidden_size, - bias=True, + bias=bias, input_is_parallel=True, ) self.attn = PagedAttentionWithRoPE( @@ -126,6 +127,7 @@ def __init__(self, config: LlamaConfig): self.self_attn = InternLMAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, + bias=config.bias, rope_theta=rope_theta, max_position_embeddings=max_position_embeddings, )