intel-analytics · liu-shaojun · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/docker/llm/serving/xpu/docker/README.md b/docker/llm/serving/xpu/docker/README.md
@@ -102,7 +102,9 @@ To set up model serving using `IPEX-LLM` as backend using FastChat, you can refe
 
     # start controller
     python -m fastchat.serve.controller &
-
+
+    export USE_XETLA=OFF
+    export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 
     export TORCH_LLM_ALLREDUCE=0
     export CCL_DG2_ALLREDUCE=1

diff --git a/docs/mddocs/DockerGuides/vllm_docker_quickstart.md b/docs/mddocs/DockerGuides/vllm_docker_quickstart.md
@@ -831,7 +831,9 @@ We can set up model serving using `IPEX-LLM` as backend using FastChat, the foll
 
     # start controller
     python -m fastchat.serve.controller &
-
+
+    export USE_XETLA=OFF
+    export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 
     export TORCH_LLM_ALLREDUCE=0
     export CCL_DG2_ALLREDUCE=1