Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROCm build for UBI #144

Merged
merged 1 commit into from
Sep 5, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 291 additions & 0 deletions Dockerfile.rocm.ubi
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
## Global Args #################################################################
ARG BASE_UBI_IMAGE_TAG=9.4
ARG PYTHON_VERSION=3.11
# Default ROCm ARCHes to build vLLM for.
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"

## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as rocm-base

# Max jobs for parallel build
ARG MAX_JOBS=12

ENV BUILD_TARGET='rocm'

USER root

ENV ROCM_VERSION=6.1.2

# Set up ROCm repository and install necessary packages

RUN echo "[amdgpu]" > /etc/yum.repos.d/amdgpu.repo && \
echo "name=amdgpu" >> /etc/yum.repos.d/amdgpu.repo && \
echo "baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.4/main/x86_64/" >> /etc/yum.repos.d/amdgpu.repo && \
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo && \
echo "[ROCm-${ROCM_VERSION}]" >> /etc/yum.repos.d/amdgpu.repo && \
echo "name=ROCm${ROCM_VERSION}" >> /etc/yum.repos.d/amdgpu.repo && \
echo "baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main" >> /etc/yum.repos.d/amdgpu.repo && \
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo

RUN microdnf -y update && \
microdnf -y install rocm hipcc git which && \
microdnf clean all

WORKDIR /workspace

##################################################################################################

FROM rocm-base as python-install
ARG PYTHON_VERSION

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs \
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && \
$VIRTUAL_ENV/bin/pip install --no-cache -U pip wheel && \
microdnf clean all

##################################################################################################

FROM python-install as python-rocm-base

# install common dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt,readonly \
--mount=type=bind,source=requirements-rocm.txt,target=requirements-rocm.txt,readonly \
pip install -r requirements-rocm.txt

##################################################################################################

FROM python-rocm-base as base

# Set the application mount point
ARG APP_MOUNT=/vllm-workspace
WORKDIR ${APP_MOUNT}

# Upgrade pip and remove unnecessary packages
RUN python3 -m pip install --upgrade --no-cache-dir pip && \
microdnf -y remove sccache || true && \
python3 -m pip uninstall -y sccache || true && \
rm -f "$(which sccache)" && \
microdnf clean all && \
rm -rf /var/cache/yum /var/cache/dnf

# Install torch == 2.5.0 on ROCm
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
*"rocm-6.1"*) \
python3 -m pip uninstall -y torch torchvision \
&& python3 -m pip install --no-cache-dir --pre \
torch==2.5.0.dev20240726 \
torchvision==0.20.0.dev20240726 \
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
*) ;; esac

# Set environment variables
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/libtorch/lib
ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include:/opt/rocm/include
ENV PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
ENV CCACHE_DIR=/root/.cache/ccache

##################################################################################################

FROM base as build_base

RUN python3 -m pip install --upgrade --no-cache-dir ninja cmake>=3.26

##################################################################################################

##################################################################################################

### AMD-SMI build stage
FROM build_base AS build_amdsmi

# Build AMD SMI wheel
RUN cd /opt/rocm/share/amd_smi && \
python3 -m pip wheel . --wheel-dir=/install

##################################################################################################

### Flash-Attention wheel build stage
FROM build_base AS build_fa

# Whether to install CK-based flash-attention
ARG BUILD_FA="1"
ARG TRY_FA_WHEEL="1"
ARG FA_WHEEL_URL="https://github.com/ROCm/flash-attention/releases/download/v2.5.9post1-cktile-vllm/flash_attn-2.5.9.post1-cp39-cp39-linux_x86_64.whl"
ARG FA_GFX_ARCHS="gfx90a;gfx942"
ARG FA_BRANCH="23a2b1c2"

# Ensure necessary tools are installed
RUN microdnf install -y wget git && microdnf clean all

# Build ROCm flash-attention wheel if `BUILD_FA` is set to 1
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_FA" = "1" ]; then \
if [ "$TRY_FA_WHEEL" = "1" ] && python3 -m pip install "${FA_WHEEL_URL}"; then \
# If a suitable wheel exists, download it instead of building FA
mkdir -p /install && wget -N "${FA_WHEEL_URL}" -P /install; \
else \
mkdir -p /libs && \
cd /libs && \
git clone https://github.com/ROCm/flash-attention.git && \
cd flash-attention && \
git checkout "${FA_BRANCH}" && \
git submodule update --init && \
GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
fi; \
else \
# Create an empty directory otherwise as later build stages expect one
mkdir -p /install; \
fi

##################################################################################################

### Triton wheel build stage
FROM build_base AS build_triton

# Whether to build triton on rocm
ARG BUILD_TRITON="1"
ARG TRITON_BRANCH="e0fc12c"

# Build triton wheel if `BUILD_TRITON` is set to 1
RUN --mount=type=cache,target=${CCACHE_DIR} \
if [ "$BUILD_TRITON" = "1" ]; then \
mkdir -p /libs && cd /libs && \
git clone https://github.com/OpenAI/triton.git && \
cd triton && \
git checkout "${TRITON_BRANCH}" && \
cd python && \
python3 setup.py bdist_wheel --dist-dir=/install; \
else \
# Create an empty directory otherwise as later build stages expect one
mkdir -p /install; \
fi

##################################################################################################

### Final vLLM build stage
FROM build_base AS final

# Import the vLLM development directory from the build context
COPY . .

# Install wget only if it is needed
RUN microdnf -y install wget && microdnf clean all

# Package upgrades to avoid dependency issues and add functionality
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install --upgrade numba scipy huggingface-hub[cli] && \
microdnf clean all

ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"

# Set environment variables for runtime
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
# Silences the HF Tokenizers warning
ENV TOKENIZERS_PARALLELISM=false

# Install dependencies from requirements file and apply ROCm specific patches
RUN --mount=type=cache,target=${CCACHE_DIR} \
--mount=type=cache,target=/root/.cache/pip \
python3 -m pip install -Ur requirements-rocm.txt && \
ROCM_VERSION=$(ls /opt | grep -Po 'rocm-[0-9]+\.[0-9]+') && \
case "$ROCM_VERSION" in \
"rocm-6.1") \
# Apply patch for ROCm 6.1
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib && \
# Remove potentially conflicting HIP runtime from torch
rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true ;; \
*) \
echo "ROCm version $ROCM_VERSION is not supported for patching." ;; \
esac && \
python3 setup.py clean --all && \
python3 setup.py bdist_wheel --dist-dir=dist

##################################################################################################

FROM base AS vllm-openai

WORKDIR /workspace

# Set up the virtual environment and update PATH
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH=$VIRTUAL_ENV/bin:$PATH

# Install necessary build tools
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc && \
microdnf clean all

# Copy amdsmi wheel into final image
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
mkdir -p libs \
&& cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y amdsmi;

# Copy triton wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y triton; fi

# Copy flash-attn wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
mkdir -p libs \
&& if ls /install/*.whl; then \
cp /install/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y flash-attn; fi

# Copy vLLM wheel(s) into the final image
RUN --mount=type=bind,from=final,src=/vllm-workspace/dist,target=/dist \
--mount=type=cache,target=/root/.cache/pip \
cp /dist/*.whl libs \
# Preemptively uninstall to avoid same-version no-installs
&& python3 -m pip uninstall -y vllm

# Install wheels that were built to the final image
RUN --mount=type=cache,target=/root/.cache/pip \
if ls libs/*.whl; then \
python3 -m pip install libs/*.whl; fi

# Environment variables for runtime configuration
ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
HOME=/home/vllm \
VLLM_USAGE_SOURCE=production-docker-image

# Set up a non-root user for OpenShift
RUN umask 002 && \
useradd --uid 2000 --gid 0 vllm && \
mkdir -p /licenses && \
chmod g+rwx $HOME /usr/src /workspace

COPY LICENSE /licenses/vllm.md

ENV HF_HUB_OFFLINE=1 \
HOME=/home/vllm \
# Allow requested max length to exceed what is extracted from the
# config.json
# see: https://github.com/vllm-project/vllm/pull/7080
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
VLLM_USAGE_SOURCE=production-docker-image \
VLLM_WORKER_MULTIPROC_METHOD=fork \
VLLM_NO_USAGE_STATS=1

# Switch to the non-root user
USER 2000

# Set the entrypoint
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Loading