Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark: add H100 suite #6047

Merged
merged 15 commits into from
Jul 11, 2024
33 changes: 16 additions & 17 deletions .buildkite/nightly-benchmarks/benchmark-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,20 @@ steps:
- name: devshm
emptyDir:
medium: Memory
# - label: "H100: NVIDIA SMI"
# agents:
# queue: H100
# plugins:
# - docker#v5.11.0:
# image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
# command:
# - bash
# - .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
# mount-buildkite-agent: true
# propagate-environment: true
# propagate-uid-gid: false
# ipc: host
# gpus: all
# environment:
# - VLLM_USAGE_SOURCE
# - HF_TOKEN
- label: "H100: NVIDIA SMI"
agents:
queue: H100
plugins:
- docker#v5.11.0:
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
command:
- bash
- .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
mount-buildkite-agent: true
propagate-environment: true
ipc: host
gpus: all
environment:
- VLLM_USAGE_SOURCE
- HF_TOKEN

28 changes: 23 additions & 5 deletions .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ wait_for_server() {
# wait for vllm server to start
# return 1 if vllm server crashes
timeout 1200 bash -c '
until curl localhost:8000/v1/completions; do
until curl -X POST localhost:8000/v1/completions; do
sleep 1
done' && return 0 || return 1
}
Expand All @@ -73,8 +73,17 @@ kill_gpu_processes() {
echo "All GPU processes have been killed."
fi

# Sometimes kill with pid doesn't work properly, we can also kill all process running python or python3
# since we are in container anyway
pkill -9 -f python
pkill -9 -f python3

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried pkill pt_main_thread, but I guess pkill -9 -f python3 also works.

# waiting for GPU processes to be fully killed
sleep 10
# loop while nvidia-smi returns any processes
while [ -n "$(nvidia-smi --query-compute-apps=pid --format=csv,noheader)" ]; do
sleep 1
echo "Waiting for GPU processes to be killed"
done

# remove vllm config file
rm -rf ~/.config/vllm
Expand All @@ -90,12 +99,19 @@ upload_to_buildkite() {
# upload the benchmarking results to buildkite

# if the agent binary is not found, skip uploading the results, exit 0
if [ ! -f /workspace/buildkite-agent ]; then
# Check if buildkite-agent is available in the PATH or at /workspace/buildkite-agent
if command -v buildkite-agent >/dev/null 2>&1; then
BUILDKITE_AGENT_COMMAND="buildkite-agent"
elif [ -f /workspace/buildkite-agent ]; then
BUILDKITE_AGENT_COMMAND="/workspace/buildkite-agent"
else
echo "buildkite-agent binary not found. Skip uploading the results."
return 0
fi
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < $RESULTS_FOLDER/benchmark_results.md
/workspace/buildkite-agent artifact upload "$RESULTS_FOLDER/*"

# Use the determined command to annotate and upload artifacts
$BUILDKITE_AGENT_COMMAND annotate --style "info" --context "h100-benchmark-results" < $RESULTS_FOLDER/benchmark_results.md
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--context "${gpu_type}-benchmark-results"

$BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
}

run_latency_tests() {
Expand Down Expand Up @@ -269,6 +285,7 @@ run_serving_tests() {
echo "Running test case $test_name"
echo "Server command: $server_command"
eval "$server_command" &
server_pid=$!

# wait until the server is alive
wait_for_server
Expand Down Expand Up @@ -318,6 +335,7 @@ run_serving_tests() {
done

# clean up
kill -9 $server_pid
kill_gpu_processes
done
}
Expand Down
Loading