refactor

intel-analytics · Jun 26, 2024 · 6e6db4e · 6e6db4e
1 parent 887ac64
commit 6e6db4e
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py
@@ -733,17 +733,17 @@ def _optimize_pre(model):
         model.apply(split_mlp)
     # for qwen2
     if model.config.model_type == "qwen2":
-        from ipex_llm.transformers.models.qwen2 import merge_qkv
-        # Skip merge_qkv if quant_method is 'gptq'
+        # Skip merge_qkv and padding_mlp if quant_method is 'gptq'
         should_apply_merge_qkv = (
             not hasattr(model.config, "quantization_config") or
             not hasattr(model.config.quantization_config, "quant_method") or
             model.config.quantization_config.quant_method != "gptq"
         )
         if should_apply_merge_qkv:
+            from ipex_llm.transformers.models.qwen2 import merge_qkv
             model.apply(merge_qkv)
-        from ipex_llm.transformers.models.qwen2 import padding_mlp
-        model.apply(padding_mlp)
+            from ipex_llm.transformers.models.qwen2 import padding_mlp
+            model.apply(padding_mlp)
     if model.config.model_type == "qwen2_moe":
         from ipex_llm.transformers.models.qwen2_moe import merge_qkv
         model.apply(merge_qkv)