From aa9af07cac7b681d7195dbd5de621fc5a2acde99 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 30 Oct 2023 00:24:18 +0100
Subject: [PATCH] Fix bias in InternLM (#1501)

---
 vllm/model_executor/models/internlm.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/internlm.py b/vllm/model_executor/models/internlm.py
index ce35eaf4f6a38..4a595a37730da 100644
--- a/vllm/model_executor/models/internlm.py
+++ b/vllm/model_executor/models/internlm.py
@@ -62,6 +62,7 @@ def __init__(
         self,
         hidden_size: int,
         num_heads: int,
+        bias: bool,
         rope_theta: float = 10000,
         max_position_embeddings: int = 8192,
     ):
@@ -81,13 +82,13 @@ def __init__(
         self.qkv_proj = ColumnParallelLinear(
             hidden_size,
             3 * self.total_num_heads * self.head_dim,
-            bias=True,
+            bias=bias,
             gather_output=False,
         )
         self.o_proj = RowParallelLinear(
             self.total_num_heads * self.head_dim,
             hidden_size,
-            bias=True,
+            bias=bias,
             input_is_parallel=True,
         )
         self.attn = PagedAttentionWithRoPE(
@@ -126,6 +127,7 @@ def __init__(self, config: LlamaConfig):
         self.self_attn = InternLMAttention(
             hidden_size=self.hidden_size,
             num_heads=config.num_attention_heads,
+            bias=config.bias,
             rope_theta=rope_theta,
             max_position_embeddings=max_position_embeddings,
         )