diff --git a/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py index 1d4df45cba2..b237f6cc6b2 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/llama_mp.py @@ -196,7 +196,7 @@ def __init__( new_value_states = self.convert_to_fp16(curr_key_values[i][1]) print("start compiling") - if mode == "prefill": + if mode == "prefill" and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1": self.compile(npu_dpu_groups=6) else: self.compile() diff --git a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py index 54d6e4e64f8..501fb4aa87a 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/qwen2_mp.py @@ -232,7 +232,11 @@ def __init__( new_value_states = self.convert_to_fp16(curr_key_values[i][1]) print(f"{mode} start compiling") - if group_size != 0 and (mode == "prefill" or num_layers == 2): + if ( + group_size != 0 + and (mode == "prefill" or num_layers == 2) + and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1" + ): self.compile(npu_dpu_groups=6) else: self.compile()