Skip to content

Commit

Permalink
catch errors when starting vllm server
Browse files Browse the repository at this point in the history
  • Loading branch information
mrwyattii committed Jan 19, 2024
1 parent dfbe75b commit 5d0c607
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion benchmarks/inference/mii/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# DeepSpeed Team

MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1)
MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-40B tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1)

for MODEL in ${MODELS[@]}; do
python ./src/run_benchmark.py --model ${MODEL} --stream
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/inference/mii/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,16 @@ def start_vllm_server(model: str, tp_size: int) -> None:
)
p = subprocess.Popen(vllm_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
start_time = time.time()
timeout_after = 60 * 2 # 2 minutes
timeout_after = 60 * 5 # 5 minutes
while True:
line = p.stderr.readline().decode("utf-8")
if "Application startup complete" in line:
break
time.sleep(1)
if "ERROR" in line:
p.terminate()
stop_vllm_server()
raise RuntimeError(f"Error starting VLLM server: {line}")
if time.time() - start_time > timeout_after:
p.terminate()
stop_vllm_server()
Expand Down

0 comments on commit 5d0c607

Please sign in to comment.