From 576dbc49962731d8f483d13913093e1b6d1e5cde Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 16:51:02 +0800 Subject: [PATCH 1/6] fix --- .../ipex_llm/transformers/npu_models/convert_mp.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index 1aac9e320c2..c73abee467a 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -41,19 +41,28 @@ def optimize_llm_pre(model: torch.nn.Module, qtype): if model.config.hidden_size in [4096, 2048]: from ipex_llm.transformers.models.baichuan import pre_compute_inv_freq model.apply(pre_compute_inv_freq) + + # MiniCPM-V 2.6 and minicpm-2b must put lm_head on CPU now + cpu_lm_head = (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 + and model.config.vocab_size == 151666) or \ + (model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40) \ + or os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" if model.config.model_type == "minicpmv" and hasattr(model, "llm"): # MiniCPM-V if model.config.hidden_size == 2304 and model.config.vocab_size == 122753: + # MiniCPM-V 2 model.llm.config.model_type = "minicpm" elif model.config.hidden_size == 3584 and model.config.vocab_size == 151666: + # MiniCPM-V 2.6 model.llm.config.model_type = "qwen2" elif model.config.hidden_size == 4096 and model.config.vocab_size == 128256: + # MiniCPM-V 2.5 model.llm.config.model_type = "llama" model = model.llm # lm_head to cpu optimization - if os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0": + if cpu_lm_head: # disable the optimization by default from ipex_llm.transformers.low_bit_linear import SYM_INT4, SYM_INT8 if qtype == "sym_int4_rtn": From fdaaf9b272f8b1848385ccbff7398b80c5051b43 Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 16:53:25 +0800 Subject: [PATCH 2/6] fix --- .../HF-Transformers-AutoModels/Multimodal/minicpm-llama3-v2.5.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/minicpm-llama3-v2.5.py b/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/minicpm-llama3-v2.5.py index 4d223ee3479..86b417b2496 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/minicpm-llama3-v2.5.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/Multimodal/minicpm-llama3-v2.5.py @@ -66,6 +66,7 @@ intra_pp=args.intra_pp, inter_pp=args.inter_pp, transpose_value_cache=not args.disable_transpose_value_cache, + modules_to_not_convert=['vpm', 'resampler'] ) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) From a950d44c2e3b11a86c2ba060e3c17c8e7b738d6c Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 16:56:23 +0800 Subject: [PATCH 3/6] fix --- .../llm/src/ipex_llm/transformers/npu_models/convert_mp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index c73abee467a..8e108c2ffd4 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -41,11 +41,11 @@ def optimize_llm_pre(model: torch.nn.Module, qtype): if model.config.hidden_size in [4096, 2048]: from ipex_llm.transformers.models.baichuan import pre_compute_inv_freq model.apply(pre_compute_inv_freq) - + # MiniCPM-V 2.6 and minicpm-2b must put lm_head on CPU now cpu_lm_head = (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 - and model.config.vocab_size == 151666) or \ - (model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40) \ + and model.config.vocab_size == 151666) \ + or (model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40) \ or os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" if model.config.model_type == "minicpmv" and hasattr(model, "llm"): From 819093fd0c9c1b4334a99492f1efdc04a224ce54 Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 17:01:28 +0800 Subject: [PATCH 4/6] fix stype --- .../ipex_llm/transformers/npu_models/convert_mp.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index 8e108c2ffd4..e29d22649e2 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -43,10 +43,14 @@ def optimize_llm_pre(model: torch.nn.Module, qtype): model.apply(pre_compute_inv_freq) # MiniCPM-V 2.6 and minicpm-2b must put lm_head on CPU now - cpu_lm_head = (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 - and model.config.vocab_size == 151666) \ - or (model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40) \ - or os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" + cpu_lm_head = ( + (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 and model.config.vocab_size == 151666) + or ( + model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40 + ) + or ( + os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" + )) if model.config.model_type == "minicpmv" and hasattr(model, "llm"): # MiniCPM-V From c98080336059a0be4d673a94ca2f30964fc3a20d Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 17:06:56 +0800 Subject: [PATCH 5/6] fix style --- .../src/ipex_llm/transformers/npu_models/convert_mp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index e29d22649e2..6a3745642f5 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -44,13 +44,13 @@ def optimize_llm_pre(model: torch.nn.Module, qtype): # MiniCPM-V 2.6 and minicpm-2b must put lm_head on CPU now cpu_lm_head = ( - (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 and model.config.vocab_size == 151666) + (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 and \ + model.config.vocab_size == 151666) or ( model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40 ) - or ( - os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" - )) + or os.environ.get("IPEX_LLM_CPU_LM_HEAD", "0") != "0" + ) if model.config.model_type == "minicpmv" and hasattr(model, "llm"): # MiniCPM-V From b6edd387b0e03baf2d4979b92e43ef286e7361bb Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Fri, 30 Aug 2024 17:08:20 +0800 Subject: [PATCH 6/6] fix style --- python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index 6a3745642f5..5dac6c5a871 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -44,7 +44,7 @@ def optimize_llm_pre(model: torch.nn.Module, qtype): # MiniCPM-V 2.6 and minicpm-2b must put lm_head on CPU now cpu_lm_head = ( - (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 and \ + (model.config.model_type == "minicpmv" and model.config.hidden_size == 3584 and model.config.vocab_size == 151666) or ( model.config.model_type == "minicpm" and model.config.num_hidden_layers == 40