From 5d0c607a7f545ae9db42108ad48e267c0b6ddaba Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Thu, 18 Jan 2024 16:40:14 -0800 Subject: [PATCH] catch errors when starting vllm server --- benchmarks/inference/mii/run_all.sh | 2 +- benchmarks/inference/mii/src/server.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/benchmarks/inference/mii/run_all.sh b/benchmarks/inference/mii/run_all.sh index 67cf80e1f..88ff7c8cb 100644 --- a/benchmarks/inference/mii/run_all.sh +++ b/benchmarks/inference/mii/run_all.sh @@ -3,7 +3,7 @@ # DeepSpeed Team -MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1) +MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-40B tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1) for MODEL in ${MODELS[@]}; do python ./src/run_benchmark.py --model ${MODEL} --stream diff --git a/benchmarks/inference/mii/src/server.py b/benchmarks/inference/mii/src/server.py index 73ee3c4a6..8bfc9f441 100644 --- a/benchmarks/inference/mii/src/server.py +++ b/benchmarks/inference/mii/src/server.py @@ -49,12 +49,16 @@ def start_vllm_server(model: str, tp_size: int) -> None: ) p = subprocess.Popen(vllm_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) start_time = time.time() - timeout_after = 60 * 2 # 2 minutes + timeout_after = 60 * 5 # 5 minutes while True: line = p.stderr.readline().decode("utf-8") if "Application startup complete" in line: break time.sleep(1) + if "ERROR" in line: + p.terminate() + stop_vllm_server() + raise RuntimeError(f"Error starting VLLM server: {line}") if time.time() - start_time > timeout_after: p.terminate() stop_vllm_server()