diff --git a/python/llm/src/ipex_llm/transformers/pipeline_parallel.py b/python/llm/src/ipex_llm/transformers/pipeline_parallel.py index 2b89c3fdaf8..f292a80cf23 100644 --- a/python/llm/src/ipex_llm/transformers/pipeline_parallel.py +++ b/python/llm/src/ipex_llm/transformers/pipeline_parallel.py @@ -107,7 +107,7 @@ def init_pipeline_parallel(): dist.init_process_group('ccl') -def _check_quantize_kv_cache(model, idx, batch_size) -> bool: +def _check_quantize_kv_cache(model, idx, batch_size): # align use_quantize_kv_cache setting for different GPU in pipeline parallel pp_quantize_kv_cache = (os.environ.get("BIGDL_QUANTIZE_KV_CACHE", None) == "1") or \ (os.environ.get("IPEX_LLM_QUANTIZE_KV_CACHE", None) == "1") or \