catch errors when starting vllm server

microsoft · Jan 19, 2024 · 5d0c607 · 5d0c607
1 parent dfbe75b
commit 5d0c607
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/benchmarks/inference/mii/run_all.sh b/benchmarks/inference/mii/run_all.sh
@@ -3,7 +3,7 @@
 
 # DeepSpeed Team
 
-MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1)
+MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-40B tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1)
 
 for MODEL in ${MODELS[@]}; do
     python ./src/run_benchmark.py --model ${MODEL} --stream

diff --git a/benchmarks/inference/mii/src/server.py b/benchmarks/inference/mii/src/server.py
@@ -49,12 +49,16 @@ def start_vllm_server(model: str, tp_size: int) -> None:
     )
     p = subprocess.Popen(vllm_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     start_time = time.time()
-    timeout_after = 60 * 2  # 2 minutes
+    timeout_after = 60 * 5  # 5 minutes
     while True:
         line = p.stderr.readline().decode("utf-8")
         if "Application startup complete" in line:
             break
         time.sleep(1)
+        if "ERROR" in line:
+            p.terminate()
+            stop_vllm_server()
+            raise RuntimeError(f"Error starting VLLM server: {line}")
         if time.time() - start_time > timeout_after:
             p.terminate()
             stop_vllm_server()