From 509bdb4661d4c45e518698bbc85abd984a5ada23 Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:49:32 +0800 Subject: [PATCH] [NPU] Fix minicpm-2B error (#12527) --- .../transformers/npu_pipeline_model/convert_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py index 3b223017dce..f429dd4ebe4 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py @@ -435,8 +435,10 @@ def convert_llm_for_deploy(model: torch.nn.Module, layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1" lm_head_low_bit = getattr(model.config, "bigdl_transformers_low_bit", "sym_int4_rtn") - if not isinstance(model.lm_head, SlicedLMHead): + if hasattr(model, "lm_head") and not isinstance(model.lm_head, SlicedLMHead): lm_head_low_bit = model.lm_head.qtype + elif hasattr(model, "lm_head_0") and not isinstance(model.lm_head_0, SlicedLMHead): + lm_head_low_bit = model.lm_head_0.qtype else: lm_head_low_bit = model.lm_head.lm_heads[0].qtype