add check

intel-analytics · Aug 26, 2024 · 83a2ddb · 83a2ddb
1 parent 858591e
commit 83a2ddb
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py
@@ -57,7 +57,7 @@ def optimize_llm(
         from transformers.models.llama.modeling_llama import LlamaForCausalLM
         from ipex_llm.transformers.npu_models.llama_mp import llama2_casullm_forward
         convert_forward(model, LlamaForCausalLM, llama2_casullm_forward)
-    elif model.config.model_type == "qwen2":
+    elif model.config.model_type == "qwen2" and model.config.num_hidden_layers == 28:
         # for qwen2-1.5B and qwen2-7B
         from ipex_llm.transformers.npu_models.qwen2_mp import gen_qwen2_fused_model_forward
         from ipex_llm.transformers.npu_models.qwen2_mp import DecodeRunner, PrefillRunner