vllm-project · simon-mo · Oct 31, 2023 · Oct 14, 2023 · Oct 14, 2023 · Oct 14, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,64 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
+ARG MAX_JOBS=4
+
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
+ARG MAX_JOBS=4
+
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
+
+RUN apt-get update -y \
+    && apt-get install -y python3-pip python3-venv
+
+WORKDIR /workspace
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+
+COPY requirements-dev.txt requirements-dev.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-dev.txt
+
+FROM dev AS build_wheel
+
+ARG max_jobs=4
+
+COPY csrc csrc
+COPY vllm vllm
+COPY pyproject.toml pyproject.toml
+COPY README.md README.md
+COPY MANIFEST.in MANIFEST.in
+COPY setup.py setup.py
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    MAX_JOBS=$max_jobs python3 -m build
+
+FROM dev AS build
+
+COPY csrc csrc
+COPY setup.py setup.py
+COPY README.md README.md
+COPY requirements.txt requirements.txt
+COPY pyproject.toml pyproject.toml
+COPY vllm/__init__.py vllm/__init__.py
+
+ENV MAX_JOBS=$max_jobs 
-ENV MAX_JOBS=$max_jobs 
+ENV MAX_JOBS=$MAX_JOBS 
-ENV MAX_JOBS=$max_jobs 
+ENV MAX_JOBS=$MAX_JOBS 
+RUN python3 setup.py build_ext --inplace
+
+FROM dev AS test
+
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY tests tests
+COPY vllm vllm
+
+ENTRYPOINT ["python3", "-m", "pytest", "tests"]
+
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS api_server
+
+RUN apt-get update -y \
+    && apt-get install -y python3-pip libnccl2
+WORKDIR /workspace
+
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+
+EXPOSE 8000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -65,6 +65,7 @@ Documentation
    serving/distributed_serving
    serving/run_on_sky
    serving/deploying_with_triton
+   serving/deploying_with_docker
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst
@@ -0,0 +1,21 @@
+.. _deploying_with_docker:
+
+Deploying with Docker
+============================
+
+You can build and run vLLM from source via the provided dockerfile. To build vLLM:
+
+.. code-block:: console
+
+    $ DOCKER_BUILDKIT=1 docker build . --target prod --tag vllm --build-arg max_jobs=8
-    $ DOCKER_BUILDKIT=1 docker build . --target prod --tag vllm --build-arg max_jobs=8
+    $ DOCKER_BUILDKIT=1 docker build . --target api_server --tag vllm --build-arg max_jobs=8
+    $ DOCKER_BUILDKIT=1 docker build . --target openai_api_server --tag vllm-openai --build-arg max_jobs=8
-    $ DOCKER_BUILDKIT=1 docker build . --target prod --tag vllm --build-arg max_jobs=8
+    $ DOCKER_BUILDKIT=1 docker build . --target api_server --tag vllm --build-arg max_jobs=8
+    $ DOCKER_BUILDKIT=1 docker build . --target openai_api_server --tag vllm-openai --build-arg max_jobs=8
+
+To run vLLM:
+
+.. code-block:: console
+
+    $ docker run --runtime nvidia --gpus all \
+        -v ~/.cache/huggingface:/root/.cache/huggingface \
+        -p 8000:8000 \
+        --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+        vllm <args...>
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -12,3 +12,6 @@ types-setuptools
 pytest
 pytest-forked
 pytest-asyncio
+
+# distribution
+build