diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 466858d7b..b0ff359f3 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -57,13 +57,13 @@ |qwen1half-moe-a2_7b-chat|[qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-MoE-A2.7B-Chat/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|transformers>=4.40|moe|[Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat)| |codeqwen1half-7b-chat|[qwen/CodeQwen1.5-7B-Chat](https://modelscope.cn/models/qwen/CodeQwen1.5-7B-Chat/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat)| |qwen1half-0_5b-chat-int4|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4)| -|qwen1half-1_8b-chat-int4|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4)| -|qwen1half-4b-chat-int4|[qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int4)| -|qwen1half-7b-chat-int4|[qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4)| -|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)| -|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)| -|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)| -|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)| +|qwen1half-1_8b-chat-int4|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4)| +|qwen1half-4b-chat-int4|[qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int4)| +|qwen1half-7b-chat-int4|[qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4)| +|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)| +|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)| +|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)| +|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)| |qwen1half-0_5b-chat-int8|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8)| |qwen1half-1_8b-chat-int8|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8)| |qwen1half-4b-chat-int8|[qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int8)| @@ -87,17 +87,17 @@ |qwen2-0_5b-instruct-awq|[qwen/Qwen2-0.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-0.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-AWQ)| |qwen2-1_5b|[qwen/Qwen2-1.5B](https://modelscope.cn/models/qwen/Qwen2-1.5B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B)| |qwen2-1_5b-instruct|[qwen/Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)| -|qwen2-1_5b-instruct-int4|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4)| +|qwen2-1_5b-instruct-int4|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4)| |qwen2-1_5b-instruct-int8|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1_5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-1_5B-Instruct-GPTQ-Int8)| |qwen2-1_5b-instruct-awq|[qwen/Qwen2-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-AWQ)| |qwen2-7b|[qwen/Qwen2-7B](https://modelscope.cn/models/qwen/Qwen2-7B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B)| |qwen2-7b-instruct|[qwen/Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)| -|qwen2-7b-instruct-int4|[qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int4)| +|qwen2-7b-instruct-int4|[qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int4)| |qwen2-7b-instruct-int8|[qwen/Qwen2-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int8)| |qwen2-7b-instruct-awq|[qwen/Qwen2-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ)| |qwen2-72b|[qwen/Qwen2-72B](https://modelscope.cn/models/qwen/Qwen2-72B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-72B](https://huggingface.co/Qwen/Qwen2-72B)| |qwen2-72b-instruct|[qwen/Qwen2-72B-Instruct](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)| -|qwen2-72b-instruct-int4|[qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int4)| +|qwen2-72b-instruct-int4|[qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int4)| |qwen2-72b-instruct-int8|[qwen/Qwen2-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int8)| |qwen2-72b-instruct-awq|[qwen/Qwen2-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ)| |qwen2-57b-a14b|[qwen/Qwen2-57B-A14B](https://modelscope.cn/models/qwen/Qwen2-57B-A14B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✘|✘|transformers>=4.40|moe|[Qwen/Qwen2-57B-A14B](https://huggingface.co/Qwen/Qwen2-57B-A14B)| @@ -124,12 +124,12 @@ |qwen2_5-32b-instruct|[qwen/Qwen2.5-32B-Instruct](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)| |qwen2_5-72b-instruct|[qwen/Qwen2.5-72B-Instruct](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)| |qwen2_5-0_5b-instruct-gptq-int4|[qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4)| -|qwen2_5-1_5b-instruct-gptq-int4|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4)| -|qwen2_5-3b-instruct-gptq-int4|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4)| -|qwen2_5-7b-instruct-gptq-int4|[qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4)| -|qwen2_5-14b-instruct-gptq-int4|[qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4)| -|qwen2_5-32b-instruct-gptq-int4|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)| -|qwen2_5-72b-instruct-gptq-int4|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4)| +|qwen2_5-1_5b-instruct-gptq-int4|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4)| +|qwen2_5-3b-instruct-gptq-int4|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4)| +|qwen2_5-7b-instruct-gptq-int4|[qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4)| +|qwen2_5-14b-instruct-gptq-int4|[qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4)| +|qwen2_5-32b-instruct-gptq-int4|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)| +|qwen2_5-72b-instruct-gptq-int4|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4)| |qwen2_5-0_5b-instruct-gptq-int8|[qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8)| |qwen2_5-1_5b-instruct-gptq-int8|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8)| |qwen2_5-3b-instruct-gptq-int8|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8)| @@ -138,12 +138,12 @@ |qwen2_5-32b-instruct-gptq-int8|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8)| |qwen2_5-72b-instruct-gptq-int8|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8)| |qwen2_5-0_5b-instruct-awq|[qwen/Qwen2.5-0.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-0.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-AWQ)| -|qwen2_5-1_5b-instruct-awq|[qwen/Qwen2.5-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ)| -|qwen2_5-3b-instruct-awq|[qwen/Qwen2.5-3B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-AWQ)| -|qwen2_5-7b-instruct-awq|[qwen/Qwen2.5-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ)| -|qwen2_5-14b-instruct-awq|[qwen/Qwen2.5-14B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-14B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ)| -|qwen2_5-32b-instruct-awq|[qwen/Qwen2.5-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)| -|qwen2_5-72b-instruct-awq|[qwen/Qwen2.5-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ)| +|qwen2_5-1_5b-instruct-awq|[qwen/Qwen2.5-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ)| +|qwen2_5-3b-instruct-awq|[qwen/Qwen2.5-3B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-AWQ)| +|qwen2_5-7b-instruct-awq|[qwen/Qwen2.5-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ)| +|qwen2_5-14b-instruct-awq|[qwen/Qwen2.5-14B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-14B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ)| +|qwen2_5-32b-instruct-awq|[qwen/Qwen2.5-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)| +|qwen2_5-72b-instruct-awq|[qwen/Qwen2.5-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ)| |qwen2_5-math-1_5b|[qwen/Qwen2.5-Math-1.5B](https://modelscope.cn/models/qwen/Qwen2.5-Math-1.5B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-1.5B](https://huggingface.co/Qwen/Qwen2.5-Math-1.5B)| |qwen2_5-math-7b|[qwen/Qwen2.5-Math-7B](https://modelscope.cn/models/qwen/Qwen2.5-Math-7B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B)| |qwen2_5-math-72b|[qwen/Qwen2.5-Math-72B](https://modelscope.cn/models/qwen/Qwen2.5-Math-72B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-72B](https://huggingface.co/Qwen/Qwen2.5-Math-72B)| diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md index 9ac2589b8..33307d25d 100644 --- a/docs/source_en/Instruction/Supported-models-datasets.md +++ b/docs/source_en/Instruction/Supported-models-datasets.md @@ -57,13 +57,13 @@ The table below introcudes all models supported by SWIFT: |qwen1half-moe-a2_7b-chat|[qwen/Qwen1.5-MoE-A2.7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-MoE-A2.7B-Chat/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|transformers>=4.40|moe|[Qwen/Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat)| |codeqwen1half-7b-chat|[qwen/CodeQwen1.5-7B-Chat](https://modelscope.cn/models/qwen/CodeQwen1.5-7B-Chat/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat)| |qwen1half-0_5b-chat-int4|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4)| -|qwen1half-1_8b-chat-int4|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4)| -|qwen1half-4b-chat-int4|[qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int4)| -|qwen1half-7b-chat-int4|[qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4)| -|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)| -|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)| -|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)| -|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)| +|qwen1half-1_8b-chat-int4|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4)| +|qwen1half-4b-chat-int4|[qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int4)| +|qwen1half-7b-chat-int4|[qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-7B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GPTQ-Int4)| +|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)| +|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)| +|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)| +|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)| |qwen1half-0_5b-chat-int8|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8)| |qwen1half-1_8b-chat-int8|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8)| |qwen1half-4b-chat-int8|[qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int8)| @@ -87,17 +87,17 @@ The table below introcudes all models supported by SWIFT: |qwen2-0_5b-instruct-awq|[qwen/Qwen2-0.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-0.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-AWQ)| |qwen2-1_5b|[qwen/Qwen2-1.5B](https://modelscope.cn/models/qwen/Qwen2-1.5B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-1.5B](https://huggingface.co/Qwen/Qwen2-1.5B)| |qwen2-1_5b-instruct|[qwen/Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)| -|qwen2-1_5b-instruct-int4|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4)| +|qwen2-1_5b-instruct-int4|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4)| |qwen2-1_5b-instruct-int8|[qwen/Qwen2-1.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-1_5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-1_5B-Instruct-GPTQ-Int8)| |qwen2-1_5b-instruct-awq|[qwen/Qwen2-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-AWQ)| |qwen2-7b|[qwen/Qwen2-7B](https://modelscope.cn/models/qwen/Qwen2-7B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B)| |qwen2-7b-instruct|[qwen/Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)| -|qwen2-7b-instruct-int4|[qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int4)| +|qwen2-7b-instruct-int4|[qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int4)| |qwen2-7b-instruct-int8|[qwen/Qwen2-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-7B-Instruct-GPTQ-Int8)| |qwen2-7b-instruct-awq|[qwen/Qwen2-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-7B-Instruct-AWQ)| |qwen2-72b|[qwen/Qwen2-72B](https://modelscope.cn/models/qwen/Qwen2-72B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-72B](https://huggingface.co/Qwen/Qwen2-72B)| |qwen2-72b-instruct|[qwen/Qwen2-72B-Instruct](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✔|transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)| -|qwen2-72b-instruct-int4|[qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int4)| +|qwen2-72b-instruct-int4|[qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int4)| |qwen2-72b-instruct-int8|[qwen/Qwen2-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-72B-Instruct-GPTQ-Int8)| |qwen2-72b-instruct-awq|[qwen/Qwen2-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-72B-Instruct-AWQ)| |qwen2-57b-a14b|[qwen/Qwen2-57B-A14B](https://modelscope.cn/models/qwen/Qwen2-57B-A14B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✘|✘|transformers>=4.40|moe|[Qwen/Qwen2-57B-A14B](https://huggingface.co/Qwen/Qwen2-57B-A14B)| @@ -124,12 +124,12 @@ The table below introcudes all models supported by SWIFT: |qwen2_5-32b-instruct|[qwen/Qwen2.5-32B-Instruct](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)| |qwen2_5-72b-instruct|[qwen/Qwen2.5-72B-Instruct](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)| |qwen2_5-0_5b-instruct-gptq-int4|[qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4)| -|qwen2_5-1_5b-instruct-gptq-int4|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4)| -|qwen2_5-3b-instruct-gptq-int4|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4)| -|qwen2_5-7b-instruct-gptq-int4|[qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4)| -|qwen2_5-14b-instruct-gptq-int4|[qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4)| -|qwen2_5-32b-instruct-gptq-int4|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)| -|qwen2_5-72b-instruct-gptq-int4|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4)| +|qwen2_5-1_5b-instruct-gptq-int4|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4)| +|qwen2_5-3b-instruct-gptq-int4|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4)| +|qwen2_5-7b-instruct-gptq-int4|[qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4)| +|qwen2_5-14b-instruct-gptq-int4|[qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4)| +|qwen2_5-32b-instruct-gptq-int4|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)| +|qwen2_5-72b-instruct-gptq-int4|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4)| |qwen2_5-0_5b-instruct-gptq-int8|[qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8)| |qwen2_5-1_5b-instruct-gptq-int8|[qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8)| |qwen2_5-3b-instruct-gptq-int8|[qwen/Qwen2.5-3B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8)| @@ -138,12 +138,12 @@ The table below introcudes all models supported by SWIFT: |qwen2_5-32b-instruct-gptq-int8|[qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8)| |qwen2_5-72b-instruct-gptq-int8|[qwen/Qwen2.5-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8)| |qwen2_5-0_5b-instruct-awq|[qwen/Qwen2.5-0.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-0.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-0.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-AWQ)| -|qwen2_5-1_5b-instruct-awq|[qwen/Qwen2.5-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ)| -|qwen2_5-3b-instruct-awq|[qwen/Qwen2.5-3B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-AWQ)| -|qwen2_5-7b-instruct-awq|[qwen/Qwen2.5-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ)| -|qwen2_5-14b-instruct-awq|[qwen/Qwen2.5-14B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-14B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ)| -|qwen2_5-32b-instruct-awq|[qwen/Qwen2.5-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)| -|qwen2_5-72b-instruct-awq|[qwen/Qwen2.5-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✘|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ)| +|qwen2_5-1_5b-instruct-awq|[qwen/Qwen2.5-1.5B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-1.5B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-1.5B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-AWQ)| +|qwen2_5-3b-instruct-awq|[qwen/Qwen2.5-3B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-3B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-AWQ)| +|qwen2_5-7b-instruct-awq|[qwen/Qwen2.5-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-AWQ)| +|qwen2_5-14b-instruct-awq|[qwen/Qwen2.5-14B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-14B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-14B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-AWQ)| +|qwen2_5-32b-instruct-awq|[qwen/Qwen2.5-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)| +|qwen2_5-72b-instruct-awq|[qwen/Qwen2.5-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|✔|✔|✔|✘|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct-AWQ)| |qwen2_5-math-1_5b|[qwen/Qwen2.5-Math-1.5B](https://modelscope.cn/models/qwen/Qwen2.5-Math-1.5B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-1.5B](https://huggingface.co/Qwen/Qwen2.5-Math-1.5B)| |qwen2_5-math-7b|[qwen/Qwen2.5-Math-7B](https://modelscope.cn/models/qwen/Qwen2.5-Math-7B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B)| |qwen2_5-math-72b|[qwen/Qwen2.5-Math-72B](https://modelscope.cn/models/qwen/Qwen2.5-Math-72B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|✘|transformers>=4.37|-|[Qwen/Qwen2.5-Math-72B](https://huggingface.co/Qwen/Qwen2.5-Math-72B)| diff --git a/swift/llm/utils/lmdeploy_utils.py b/swift/llm/utils/lmdeploy_utils.py index 3e9447eb4..c3b6db469 100644 --- a/swift/llm/utils/lmdeploy_utils.py +++ b/swift/llm/utils/lmdeploy_utils.py @@ -198,7 +198,6 @@ def _prepare_lmdeploy_request(lmdeploy_engine: Union[AsyncEngine, VLAsyncEngine] generation_config.random_seed = get_seed() resp_list: List[Optional[Dict[str, Any]]] = [None] * len(request_list) - generators = [] is_multimodal = getattr(lmdeploy_engine, 'is_multimodal', False) max_workers = os.cpu_count() if not is_multimodal: @@ -220,17 +219,17 @@ def _prepare_inputs(request: Dict[str, Any]) -> Dict[str, Any]: inputs_list = [future.result() for future in futures] prog_bar.close() + new_inputs = [] for i, (inputs, request) in enumerate(zip(inputs_list, request_list)): truncation_strategy = kwargs.pop('truncation_strategy', 'delete') if len(inputs) == 0 and truncation_strategy == 'delete': # input_ids exceeds `max_length`. Please increase the value of `max_length`. resp_list[i] = {'response': '', 'history': request['history']} continue - generator = lmdeploy_engine.get_generator(False, i) - generators.append((i, inputs, generator)) + new_inputs.append((i, inputs)) - generation_info['num_samples'] = len(generators) - return resp_list, generators + generation_info['num_samples'] = len(new_inputs) + return resp_list, new_inputs @torch.inference_mode() @@ -259,7 +258,7 @@ def inference_stream_lmdeploy(lmdeploy_engine: Union[AsyncEngine, VLAsyncEngine] else: generation_info.clear() - resp_list, generators = _prepare_lmdeploy_request( + resp_list, inputs_list = _prepare_lmdeploy_request( lmdeploy_engine, template, request_list, @@ -272,30 +271,31 @@ def inference_stream_lmdeploy(lmdeploy_engine: Union[AsyncEngine, VLAsyncEngine] print_idx_list = [[0] for _ in range(len(request_list))] outputs = [None] * len(request_list) num_generated_tokens = [0] * len(request_list) - prog_bar = tqdm(total=len(generators), dynamic_ncols=True, disable=not use_tqdm) + prog_bar = tqdm(total=len(inputs_list), dynamic_ncols=True, disable=not use_tqdm) queue = Queue() - async def _inner_infer(i: int, inputs: Dict[str, Any], generator) -> None: - generator = await generator + async def _inner_infer(i: int, inputs: Dict[str, Any]) -> None: + session_id = time.time_ns() + generator = await lmdeploy_engine.get_generator(False, session_id) images = inputs.pop('images', None) or [] if len(images) > 0: inputs['images'] = await lmdeploy_engine.vl_encoder.async_infer(images) await template.prepare_lmdeploy_inputs(inputs) generation_info['num_prompt_tokens'] += len(inputs['input_ids']) - async with lmdeploy_engine.safe_run(i): + async with lmdeploy_engine.safe_run(session_id): async for output in generator.async_stream_infer( - session_id=i, **inputs, stream_output=True, gen_config=generation_config): + session_id=session_id, **inputs, stream_output=True, gen_config=generation_config): queue.put((i, output)) queue.put((i, None)) async def _batch_infer() -> None: - tasks = [_inner_infer(i, inputs, generator) for i, inputs, generator in generators] + tasks = [_inner_infer(i, inputs) for i, inputs in inputs_list] await asyncio.gather(*tasks) thread = Thread(target=lambda: asyncio.run(_batch_infer())) thread.start() - while n_finished < len(generators): + while n_finished < len(inputs_list): i, output = queue.get() is_finished = False if output is None: @@ -387,7 +387,7 @@ def inference_lmdeploy(lmdeploy_engine: Union[AsyncEngine, VLAsyncEngine], request_list = deepcopy(request_list) generation_config = deepcopy(generation_config) - resp_list, generators = _prepare_lmdeploy_request( + resp_list, inputs_list = _prepare_lmdeploy_request( lmdeploy_engine, template, request_list, @@ -399,18 +399,19 @@ def inference_lmdeploy(lmdeploy_engine: Union[AsyncEngine, VLAsyncEngine], tokenizer = template.tokenizer if use_tqdm: assert verbose is False - prog_bar = tqdm(total=len(generators), dynamic_ncols=True, disable=not use_tqdm) + prog_bar = tqdm(total=len(inputs_list), dynamic_ncols=True, disable=not use_tqdm) - async def _inner_infer(i: int, inputs: Dict[str, Any], generator) -> None: - generator = await generator + async def _inner_infer(i: int, inputs: Dict[str, Any]) -> None: + session_id = time.time_ns() + generator = await lmdeploy_engine.get_generator(False, session_id) images = inputs.pop('images', None) or [] if len(images) > 0: inputs['images'] = await lmdeploy_engine.vl_encoder.async_infer(images) await template.prepare_lmdeploy_inputs(inputs) generation_info['num_prompt_tokens'] += len(inputs['input_ids']) - async with lmdeploy_engine.safe_run(i): + async with lmdeploy_engine.safe_run(session_id): async for output in generator.async_stream_infer( - session_id=i, **inputs, stream_output=False, gen_config=generation_config): + session_id=session_id, **inputs, stream_output=False, gen_config=generation_config): pass request = request_list[i] input_ids = inputs['input_ids'] @@ -430,7 +431,7 @@ async def _inner_infer(i: int, inputs: Dict[str, Any], generator) -> None: prog_bar.update() async def _batch_infer() -> None: - tasks = [_inner_infer(i, inputs, generator) for i, inputs, generator in generators] + tasks = [_inner_infer(i, inputs) for i, inputs in inputs_list] await asyncio.gather(*tasks) asyncio.run(_batch_infer()) diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index 36481f1f4..07cce90b7 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -3396,6 +3396,7 @@ def rotary_emb(self, query_states, key_states, **kwargs): TemplateType.qwen, support_flash_attn=True, support_vllm=True, + support_lmdeploy=False, function_kwargs={'gptq_bits': 4}, torch_dtype=torch.float16, requires=['auto_gptq>=0.5', 'transformers>=4.37'], @@ -3418,6 +3419,7 @@ def rotary_emb(self, query_states, key_states, **kwargs): TemplateType.qwen, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, function_kwargs={'gptq_bits': 4}, torch_dtype=torch.float16, requires=['auto_gptq>=0.5', 'transformers>=4.37'], @@ -3440,6 +3442,7 @@ def rotary_emb(self, query_states, key_states, **kwargs): TemplateType.qwen, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, function_kwargs={'gptq_bits': 4}, torch_dtype=torch.float16, requires=['auto_gptq>=0.5', 'transformers>=4.37'], @@ -3462,6 +3465,7 @@ def rotary_emb(self, query_states, key_states, **kwargs): TemplateType.qwen, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, function_kwargs={'gptq_bits': 4}, torch_dtype=torch.float16, requires=['auto_gptq>=0.5', 'transformers>=4.37'], @@ -3827,6 +3831,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str, get_model_tokenizer_qwen2_chat, support_flash_attn=True, support_vllm=True, + support_lmdeploy=quant_bits == 4 and model_size != '0.5B', function_kwargs={'gptq_bits': quant_bits}, torch_dtype=torch.float16, requires=['auto_gptq>=0.5', 'transformers>=4.37'], @@ -3840,6 +3845,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str, get_model_tokenizer_qwen2_chat, support_flash_attn=True, support_vllm=True, + support_lmdeploy=model_size != '0.5B', function_kwargs={'is_awq': True}, torch_dtype=torch.float16, requires=['transformers>=4.37', 'autoawq'], @@ -4034,6 +4040,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=False, hf_model_id='Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_0_5b_chat_int8, @@ -4056,6 +4063,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_1_8b_chat_int8, @@ -4078,6 +4086,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-4B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_4b_chat_int8, @@ -4100,6 +4109,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-7B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_7b_chat_int8, @@ -4122,6 +4132,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-14B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_14b_chat_int8, @@ -4144,6 +4155,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-32B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_72b_chat_int4, @@ -4155,6 +4167,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-72B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_110b_chat_int4, @@ -4166,6 +4179,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float, function_kwargs={'gptq_bits': 4}, support_flash_attn=True, support_vllm=True, + support_lmdeploy=True, hf_model_id='Qwen/Qwen1.5-110B-Chat-GPTQ-Int4') @register_model( ModelType.qwen1half_72b_chat_int8,