ChatGLM Support (vllm-project#1261)

gameofdimension · Nov 7, 2023 · 1a2bbc9 · 1a2bbc9
1 parent e7f579e
commit 1a2bbc9
Show file tree

Hide file tree

Showing 7 changed files with 490 additions and 4 deletions.
diff --git a/vllm/config.py b/vllm/config.py
@@ -166,6 +166,10 @@ def get_num_kv_heads(self, parallel_config: "ParallelConfig") -> int:
         if getattr(self.hf_config, "num_key_value_heads", None) is not None:
             return (self.hf_config.num_key_value_heads //
                     parallel_config.tensor_parallel_size)
+        # For ChatGLM-2:
+        if getattr(self.hf_config, "multi_query_group_num", None) is not None:
+            return (self.hf_config.multi_query_group_num //
+                    parallel_config.tensor_parallel_size)
         total_num_attention_heads = self.hf_config.num_attention_heads
         return total_num_attention_heads // parallel_config.tensor_parallel_size
 

diff --git a/vllm/model_executor/model_loader.py b/vllm/model_executor/model_loader.py
@@ -18,6 +18,7 @@
     "BaiChuanForCausalLM": BaiChuanForCausalLM,  # baichuan-7b
     "BaichuanForCausalLM": BaichuanForCausalLM,  # baichuan-13b
     "BloomForCausalLM": BloomForCausalLM,
+    "ChatGLMModel": ChatGLMForCausalLM,
     "FalconForCausalLM": FalconForCausalLM,
     "GPT2LMHeadModel": GPT2LMHeadModel,
     "GPTBigCodeForCausalLM": GPTBigCodeForCausalLM,

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -13,13 +13,15 @@
 from vllm.model_executor.models.mpt import MptForCausalLM
 from vllm.model_executor.models.opt import OPTForCausalLM
 from vllm.model_executor.models.qwen import QWenLMHeadModel
+from vllm.model_executor.models.chatglm import ChatGLMForCausalLM
 from vllm.model_executor.models.yi import YiForCausalLM
 
 __all__ = [
     "AquilaForCausalLM",
     "BaiChuanForCausalLM",
     "BaichuanForCausalLM",
     "BloomForCausalLM",
+    "ChatGLMForCausalLM",
     "FalconForCausalLM",
     "GPT2LMHeadModel",
     "GPTBigCodeForCausalLM",