From 145a8a1ea615b9fb55f28138f01509c8b3946a30 Mon Sep 17 00:00:00 2001 From: xiangyuT Date: Fri, 21 Jun 2024 08:58:10 +0800 Subject: [PATCH] fix --- .../src/ipex_llm/vllm/cpu/entrypoints/openai/api_server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/vllm/cpu/entrypoints/openai/api_server.py b/python/llm/src/ipex_llm/vllm/cpu/entrypoints/openai/api_server.py index 31991d5028e..5065f1c1ab1 100644 --- a/python/llm/src/ipex_llm/vllm/cpu/entrypoints/openai/api_server.py +++ b/python/llm/src/ipex_llm/vllm/cpu/entrypoints/openai/api_server.py @@ -175,7 +175,9 @@ async def authentication(request: Request, call_next): served_model_names = [args.model] engine_args = AsyncEngineArgs.from_cli_args(args) engine = IPEXLLMAsyncLLMEngine.from_engine_args( - engine_args, usage_context=UsageContext.OPENAI_API_SERVER) + engine_args, usage_context=UsageContext.OPENAI_API_SERVER, + load_in_low_bit=args.load_in_low_bit, + ) openai_serving_chat = OpenAIServingChat(engine, served_model_names, args.response_role, args.lora_modules,