From 4cdbb990cebc9337f319716ccfa4adad27191079 Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Tue, 26 Nov 2024 11:18:45 +0800 Subject: [PATCH] fix style --- .../llm/src/ipex_llm/transformers/npu_pipeline_model/common.py | 3 ++- .../transformers/npu_pipeline_model/convert_pipeline.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py index 330401fefa98..2d5afedd07a5 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/common.py @@ -36,7 +36,8 @@ def update_names_of_IR_and_export_blob(model, model_name, dir, compile_blob=True core.set_property("NPU", {"NPU_COMPILATION_MODE_PARAMS": "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add"}) core.set_property("NPU", {"PERFORMANCE_HINT": "LATENCY"}) - if npu_dpu_groups is not None and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1": + if (npu_dpu_groups is not None + and os.environ.get("IPEX_LLM_NPU_DISABLE_COMPILE_OPT", "0") != "1"): core.set_property("NPU", {"NPU_DPU_GROUPS": str(npu_dpu_groups)}) model = core.read_model(xml_path) diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py index 84b02363452a..6e8edaa2e89c 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py @@ -472,7 +472,7 @@ def convert_llm_for_deploy(model: torch.nn.Module, # llama3.2 1B & # llama3.2 3B embedding_post = True cos_sin_input = True - fused_layers = 2 + fused_layers = 1 else: # for Llama3-8B fused_layers = 2