This repository has been archived by the owner on Oct 11, 2024. It is now read-only.
forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CI/CD] add neuron docker and ci test scripts (vllm-project#3571)
- Loading branch information
1 parent
7b9df95
commit ce72466
Showing
6 changed files
with
103 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# This script build the Neuron docker image and run the API server inside the container. | ||
# It serves a sanity check for compilation and basic model usage. | ||
set -e | ||
|
||
# Try building the docker image | ||
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com | ||
docker build -t neuron -f Dockerfile.neuron . | ||
|
||
# Setup cleanup | ||
remove_docker_container() { docker rm -f neuron || true; } | ||
trap remove_docker_container EXIT | ||
remove_docker_container | ||
|
||
# Run the image | ||
docker run --device=/dev/neuron0 --device=/dev/neuron1 --network host --name neuron neuron python3 -m vllm.entrypoints.api_server \ | ||
--model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --max-num-seqs 8 --max-model-len 128 --block-size 128 --device neuron --tensor-parallel-size 2 & | ||
|
||
# Wait for the server to start | ||
wait_for_server_to_start() { | ||
timeout=300 | ||
counter=0 | ||
|
||
while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do | ||
sleep 1 | ||
counter=$((counter + 1)) | ||
if [ $counter -ge $timeout ]; then | ||
echo "Timeout after $timeout seconds" | ||
break | ||
fi | ||
done | ||
} | ||
wait_for_server_to_start | ||
|
||
# Test a simple prompt | ||
curl -X POST -H "Content-Type: application/json" \ | ||
localhost:8000/generate \ | ||
-d '{"prompt": "San Francisco is a"}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# default base image | ||
ARG BASE_IMAGE="763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference-neuronx:2.1.1-neuronx-py310-sdk2.17.0-ubuntu20.04" | ||
|
||
FROM $BASE_IMAGE | ||
|
||
RUN echo "Base image is $BASE_IMAGE" | ||
|
||
# Install some basic utilities | ||
RUN apt-get update && apt-get install python3 python3-pip -y | ||
|
||
### Mount Point ### | ||
# When launching the container, mount the code directory to /app | ||
ARG APP_MOUNT=/app | ||
VOLUME [ ${APP_MOUNT} ] | ||
WORKDIR ${APP_MOUNT} | ||
|
||
RUN python3 -m pip install --upgrade pip | ||
RUN python3 -m pip install --no-cache-dir fastapi ninja tokenizers pandas | ||
RUN python3 -m pip install sentencepiece transformers==4.36.2 -U | ||
RUN python3 -m pip install transformers-neuronx --extra-index-url=https://pip.repos.neuron.amazonaws.com -U | ||
RUN python3 -m pip install --pre neuronx-cc==2.12.* --extra-index-url=https://pip.repos.neuron.amazonaws.com -U | ||
|
||
COPY ./vllm /app/vllm/vllm | ||
COPY ./setup.py /app/vllm/setup.py | ||
COPY ./requirements-common.txt /app/vllm/requirements-common.txt | ||
COPY ./requirements-neuron.txt /app/vllm/requirements-neuron.txt | ||
|
||
RUN cd /app/vllm \ | ||
&& python3 -m pip install -U -r requirements-neuron.txt | ||
|
||
ENV VLLM_BUILD_WITH_NEURON 1 | ||
RUN cd /app/vllm \ | ||
&& pip install -e . \ | ||
&& cd .. | ||
|
||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters