From 3778c3b0621741bbb606b90e0a8ba7a346d01b74 Mon Sep 17 00:00:00 2001 From: cyita Date: Mon, 4 Nov 2024 17:35:52 +0800 Subject: [PATCH 1/3] add env to disable compile opt --- python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py | 2 +- python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py index 1d4df45cba2..b237f6cc6b2 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py @@ -196,7 +196,7 @@ def __init__( new_value_states = self.convert_to_fp16(curr_key_values[i][1]) print("start compiling") - if mode == "prefill": + if mode == "prefill" and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1": self.compile(npu_dpu_groups=6) else: self.compile() diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index 54d6e4e64f8..ad497bc21bb 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -232,7 +232,9 @@ def __init__( new_value_states = self.convert_to_fp16(curr_key_values[i][1]) print(f"{mode} start compiling") - if group_size != 0 and (mode == "prefill" or num_layers == 2): + if (group_size != 0 + and (mode == "prefill" or num_layers == 2) + and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1"): self.compile(npu_dpu_groups=6) else: self.compile() From 4f55151bf81c21e22598516a582a6a9445c4416a Mon Sep 17 00:00:00 2001 From: cyita Date: Mon, 4 Nov 2024 17:40:08 +0800 Subject: [PATCH 2/3] fix style --- python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index ad497bc21bb..6ad03409992 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -234,7 +234,8 @@ def __init__( print(f"{mode} start compiling") if (group_size != 0 and (mode == "prefill" or num_layers == 2) - and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1"): + and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1" + ): self.compile(npu_dpu_groups=6) else: self.compile() From ba17a0356b05b916abdd2da254716395d0d581ac Mon Sep 17 00:00:00 2001 From: cyita Date: Mon, 4 Nov 2024 17:43:39 +0800 Subject: [PATCH 3/3] fix style --- python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index 6ad03409992..501fb4aa87a 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -232,7 +232,8 @@ def __init__( new_value_states = self.convert_to_fp16(curr_key_values[i][1]) print(f"{mode} start compiling") - if (group_size != 0 + if ( + group_size != 0 and (mode == "prefill" or num_layers == 2) and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1" ):