From 7b9ff3d2c091664ec71d1c456913efc19725b2d3 Mon Sep 17 00:00:00 2001 From: gc-fu Date: Thu, 8 Aug 2024 13:50:50 +0800 Subject: [PATCH] Fix chatglm multi-reference problem --- python/llm/src/ipex_llm/transformers/convert.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index 0b3babd9c42..65d394b5ab9 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -331,6 +331,11 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None, if any(key in full_module_name for key in modules_to_not_convert): continue + if is_linear and getattr(model_config, "model_type", None) == "chatglm" and name == "lm_head": + # Now we re-reference it to output_layer + model._modules[name] = model._modules["transformer"]._modules["output_layer"] + continue + if is_linear and not isinstance(module, LowBitLinear): in_features, out_features, mp_group = linear_args optimize_lm_head = False