From c10d3ef33dc09824d72be326cabb6f66621ffab4 Mon Sep 17 00:00:00 2001 From: ATMxsp01 Date: Thu, 14 Nov 2024 10:25:50 +0800 Subject: [PATCH] Update readme & doc for the vllm upgrade to v0.6.2 --- docker/llm/serving/xpu/docker/README.md | 2 ++ docs/mddocs/DockerGuides/vllm_docker_quickstart.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docker/llm/serving/xpu/docker/README.md b/docker/llm/serving/xpu/docker/README.md index 9109707c3a9..6ce8df86d26 100644 --- a/docker/llm/serving/xpu/docker/README.md +++ b/docker/llm/serving/xpu/docker/README.md @@ -123,6 +123,8 @@ To set up model serving using `IPEX-LLM` as backend using FastChat, you can refe --model-path /llm/models/Yi-1.5-34B \ --device xpu \ --enforce-eager \ + --disable-async-output-proc \ + --distributed-executor-backend ray \ --dtype float16 \ --load-in-low-bit fp8 \ --tensor-parallel-size 4 \ diff --git a/docs/mddocs/DockerGuides/vllm_docker_quickstart.md b/docs/mddocs/DockerGuides/vllm_docker_quickstart.md index 5396ae242b9..eed01391803 100644 --- a/docs/mddocs/DockerGuides/vllm_docker_quickstart.md +++ b/docs/mddocs/DockerGuides/vllm_docker_quickstart.md @@ -852,6 +852,8 @@ We can set up model serving using `IPEX-LLM` as backend using FastChat, the foll --model-path /llm/models/Yi-1.5-34B \ --device xpu \ --enforce-eager \ + --disable-async-output-proc \ + --distributed-executor-backend ray \ --dtype float16 \ --load-in-low-bit fp8 \ --tensor-parallel-size 4 \