FEAT: Add support for Qwen/Qwen2.5-Coder-7B-Instruct gptq format (#2408)

xorbitsai · Oct 22, 2024 · 48a07e8 · 48a07e8
1 parent ef21a71
commit 48a07e8
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 0 deletions.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -8176,6 +8176,15 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -5880,6 +5880,17 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",