From dddbca9d8cb0b2d23ae7bfc3bdebd19ff988bd05 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 18 Mar 2024 12:33:47 -0700 Subject: [PATCH] CI: Add ROCm Docker Build (#2886) --- .buildkite/run-amd-test.sh | 38 +++++++++++++++++++++++++++++++++++++ .buildkite/test-template.j2 | 5 +++++ requirements-rocm.txt | 1 + 3 files changed, 44 insertions(+) create mode 100644 .buildkite/run-amd-test.sh diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh new file mode 100644 index 0000000000000..83a56e25aca73 --- /dev/null +++ b/.buildkite/run-amd-test.sh @@ -0,0 +1,38 @@ +# This script build the ROCm docker image and run the API server inside the container. +# It serves a sanity check for compilation and basic model usage. +set -ex + +# Print ROCm version +rocminfo + +# Try building the docker image +docker build -t rocm -f Dockerfile.rocm . + +# Setup cleanup +remove_docker_container() { docker rm -f rocm || true; } +trap remove_docker_container EXIT +remove_docker_container + +# Run the image +docker run --device /dev/kfd --device /dev/dri --network host --name rocm rocm python3 -m vllm.entrypoints.api_server & + +# Wait for the server to start +wait_for_server_to_start() { + timeout=300 + counter=0 + + while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do + sleep 1 + counter=$((counter + 1)) + if [ $counter -ge $timeout ]; then + echo "Timeout after $timeout seconds" + break + fi + done +} +wait_for_server_to_start + +# Test a simple prompt +curl -X POST -H "Content-Type: application/json" \ + localhost:8000/generate \ + -d '{"prompt": "San Francisco is a"}' diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index b5853a2f39383..2ff58cc2e0d3c 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -3,6 +3,11 @@ {% set default_working_dir = "/vllm-workspace/tests" %} steps: + - label: "AMD Test" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh + - label: ":docker: build image" commands: - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ." diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 53bd11de7c9de..d5a3bd423b6b3 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -11,3 +11,4 @@ fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. prometheus_client >= 0.18.0 +outlines == 0.0.34 \ No newline at end of file