Skip to content

Commit

Permalink
Merge remote-tracking branch 'rocm/main' into navi_correctness_fix_1_…
Browse files Browse the repository at this point in the history
…to_300_count
  • Loading branch information
maleksan85 committed Nov 6, 2024
2 parents 0f7dda3 + 4868a43 commit cad2332
Show file tree
Hide file tree
Showing 285 changed files with 10,771 additions and 6,137 deletions.
4 changes: 2 additions & 2 deletions .buildkite/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ cleanup_docker() {
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
# Remove dangling images (those that are not tagged and not used by any container)
docker image prune -f
# Remove unused volumes
docker volume prune -f
# Remove unused volumes / force the system prune for old images as well.
docker volume prune -f && docker system prune --force --filter "until=72h" --all
echo "Docker images and volumes cleanup completed."
else
echo "Disk usage is below $threshold%. No cleanup needed."
Expand Down
25 changes: 21 additions & 4 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# label(str): the name of the test. emoji allowed.
# fast_check(bool): whether to run this on each commit on fastcheck pipeline.
# fast_check_only(bool): run this test on fastcheck pipeline only
# nightly(bool): run this test in nightly pipeline only
# optional(bool): never run this test by default (i.e. need to unblock manually)
# command(str): the single command to run for tests. incompatible with commands.
# commands(list): the list of commands to run for test. incompatbile with command.
Expand Down Expand Up @@ -229,6 +230,9 @@ steps:
- tests/compile
commands:
- pytest -v -s compile/test_basic_correctness.py
# these tests need to be separated, cannot combine
- pytest -v -s compile/piecewise/test_simple.py
- pytest -v -s compile/piecewise/test_toy_llama.py

- label: "PyTorch Fullgraph Test" # 18min
source_file_dependencies:
Expand Down Expand Up @@ -327,15 +331,28 @@ steps:
commands:
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py

- label: Decoder-only Multi-Modal Models Test # 1h31min
- label: Decoder-only Multi-Modal Models Test (Standard)
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/decoder_only/audio_language
- tests/models/decoder_only/vision_language
commands:
- pytest -v -s models/decoder_only/audio_language
- pytest -v -s models/decoder_only/vision_language
- pytest -v -s models/decoder_only/audio_language -m core_model
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m core_model

- label: Decoder-only Multi-Modal Models Test (Extended)
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/audio_language
- tests/models/decoder_only/vision_language
commands:
- pytest -v -s models/decoder_only/audio_language -m 'not core_model'
# HACK - run phi3v tests separately to sidestep this transformers bug
# https://github.com/huggingface/transformers/issues/34307
- pytest -v -s models/decoder_only/vision_language/test_phi3v.py
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model'

- label: Other Models Test # 6min
#mirror_hardwares: [amd]
Expand Down Expand Up @@ -410,7 +427,7 @@ steps:
# Avoid importing model tests that cause CUDA reinitialization error
- pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
- pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
- pytest models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
- pytest models/decoder_only/vision_language/test_models.py -v -s -m distributed_2_gpus
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
- pip install -e ./plugins/vllm_add_dummy_model
- pytest -v -s distributed/test_distributed_oot.py
Expand Down
25 changes: 25 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,28 @@ updates:
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
labels: ["dependencies"]
open-pull-requests-limit: 5
reviewers: ["khluu", "simon-mo"]
allow:
- dependency-type: "all"
ignore:
- dependency-name: "torch"
- dependency-name: "torchvision"
- dependency-name: "xformers"
- dependency-name: "lm-format-enforcer"
- dependency-name: "gguf"
- dependency-name: "compressed-tensors"
- dependency-name: "ray[adag]"
- dependency-name: "lm-eval"
groups:
patch-update:
applies-to: version-updates
update-types: ["patch"]
minor-update:
applies-to: version-updates
update-types: ["minor"]
15 changes: 8 additions & 7 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ pull_request_rules:
- name: label-ci-build
description: Automatically apply ci/build label
conditions:
- files~=^\.github/
- files~=\.buildkite/
- files~=^cmake/
- files=CMakeLists.txt
- files~=^Dockerfile
- files~=^requirements.*\.txt
- files=setup.py
- or:
- files~=^\.github/
- files~=\.buildkite/
- files~=^cmake/
- files=CMakeLists.txt
- files~=^Dockerfile
- files~=^requirements.*\.txt
- files=setup.py
actions:
label:
add:
Expand Down
66 changes: 26 additions & 40 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ jobs:
release:
# Retrieve tag and create release
name: Create Release
runs-on: ubuntu-latest
runs-on: self-hosted
container:
image: rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
steps:
Expand All @@ -41,57 +43,39 @@ jobs:
wheel:
name: Build Wheel
runs-on: ${{ matrix.os }}
runs-on: self-hosted
container:
image: rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0
needs: release

strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1']

steps:
- name: Checkout
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1

- name: Setup ccache
uses: hendrikmuhs/ccache-action@ed74d11c0b343532753ecead8a951bb09bb34bc9 # v1.2.14
with:
create-symlink: true
key: ${{ github.job }}-${{ matrix.python-version }}-${{ matrix.cuda-version }}

- name: Set up Linux Env
if: ${{ runner.os == 'Linux' }}
run: |
bash -x .github/workflows/scripts/env.sh
- name: Set up Python
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}

- name: Install CUDA ${{ matrix.cuda-version }}
- name: Prepare
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
pip3 install -U triton
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
- name: Checkout
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1

- name: Build wheel
shell: bash
env:
CMAKE_BUILD_TYPE: Release # do not compile with debug symbol to reduce wheel size
run: |
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }}
bash -x .github/workflows/scripts/build.sh
wheel_name=$(find dist -name "*whl" -print0 | xargs -0 -n 1 basename)
asset_name=${wheel_name//"linux"/"manylinux1"}
gradlib_wheel_name=$(find gradlib/dist -name "*whl" -print0 | xargs -0 -n 1 basename)
gradlib_asset_name=${gradlib_wheel_name//"linux"/"manylinux1"}
echo "wheel_name=${wheel_name}" >> "$GITHUB_ENV"
echo "asset_name=${asset_name}" >> "$GITHUB_ENV"
echo "gradlib_wheel_name=${gradlib_wheel_name}" >> "$GITHUB_ENV"
echo "gradlib_asset_name=${gradlib_asset_name}" >> "$GITHUB_ENV"
- name: Upload Release Asset
- name: Upload vllm Release Asset
uses: actions/upload-release-asset@e8f9f06c4b078e705bd2ea027f0926603fc9b4d5 # v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -100,11 +84,13 @@ jobs:
asset_path: ./dist/${{ env.wheel_name }}
asset_name: ${{ env.asset_name }}
asset_content_type: application/*
- name: Upload gradlib Release Asset
uses: actions/upload-release-asset@e8f9f06c4b078e705bd2ea027f0926603fc9b4d5 # v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./gradlib/dist/${{ env.gradlib_wheel_name }}
asset_name: ${{ env.gradlib_asset_name }}
asset_content_type: application/*

# (Danielkinz): This last step will publish the .whl to pypi. Warning: untested
# - name: Publish package
# uses: pypa/gh-action-pypi-publish@release/v1.8
# with:
# repository-url: https://test.pypi.org/legacy/
# password: ${{ secrets.PYPI_API_TOKEN }}
# skip-existing: true
18 changes: 9 additions & 9 deletions .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
#!/bin/bash
set -eux

python_executable=python$1
cuda_home=/usr/local/cuda-$2
python_executable=python3

# Update paths
PATH=${cuda_home}/bin:$PATH
LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH

# Install requirements
$python_executable -m pip install -r requirements-build.txt -r requirements-cuda.txt
$python_executable -m pip install -r requirements-rocm.txt

# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
export VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
export PYTORCH_ROCM_ARCH="gfx90a;gfx942"

rm -f $(which sccache)

bash tools/check_repo.sh
export MAX_JOBS=32

# Build
$python_executable setup.py bdist_wheel --dist-dir=dist
cd gradlib
$python_executable setup.py bdist_wheel --dist-dir=dist
cd ..
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11" "3.12")
set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.9;9.0")

# Supported AMD GPU architectures.
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1200")
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201")

#
# Supported/expected torch versions for CUDA/ROCm.
Expand Down
12 changes: 11 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ We also believe in the power of community support; thus, answering queries, offe

Finally, one of the most impactful ways to support us is by raising awareness about vLLM. Talk about it in your blog posts and highlight how it's driving your incredible projects. Express your support on social media if you're using vLLM, or simply offer your appreciation by starring our repository!

## License

See [LICENSE](LICENSE).

## Developing

Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation. Check out the [building from source](https://docs.vllm.ai/en/latest/getting_started/installation.html#build-from-source) documentation for details.


## Testing

```bash
Expand All @@ -33,6 +35,14 @@ pytest tests/

## Contribution Guidelines

### DCO and Signed-off-by

When contributing changes to this project, you must agree to the [DCO](DCO).
Commits must include a `Signed-off-by:` header which certifies agreement with
the terms of the [DCO](DCO).

Using `-s` with `git commit` will automatically add this header.

### Issues

If you encounter a bug or have a feature request, please [search existing issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [file a new issue](https://github.com/vllm-project/vllm/issues/new/choose), providing as much relevant information as possible.
Expand Down
34 changes: 34 additions & 0 deletions DCO
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Developer Certificate of Origin
Version 1.1

Copyright (C) 2004, 2006 The Linux Foundation and its contributors.

Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.


Developer's Certificate of Origin 1.1

By making a contribution to this project, I certify that:

(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or

(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or

(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.

(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ FROM vllm-base AS vllm-openai

# install additional dependencies for openai api server
RUN --mount=type=cache,target=/root/.cache/pip \
pip install accelerate hf_transfer 'modelscope!=1.15.0' bitsandbytes>=0.44.0 timm==0.9.10
pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.44.0' timm==0.9.10

ENV VLLM_USAGE_SOURCE production-docker-image

Expand Down
4 changes: 2 additions & 2 deletions Dockerfile.neuron
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi

RUN python3 -m pip install -U \
cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \
'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
-r requirements-neuron.txt

ENV VLLM_TARGET_DEVICE neuron
RUN --mount=type=bind,source=.git,target=.git \
pip install --no-build-isolation -v -e . \
pip install --no-build-isolation -v -e .

CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion Dockerfile.ppc64le
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ RUN --mount=type=bind,source=.git,target=.git \
# These packages will be in rocketce eventually
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -v --prefer-binary --extra-index-url https://repo.fury.io/mgiessing \
cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \
'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
torch==2.3.1 \
-r requirements-cpu.txt \
xformers uvloop==0.20.0
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# default base image
ARG BASE_IMAGE="rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0"
ARG BASE_IMAGE="rocm/pytorch:rocm6.2.2_ubuntu20.04_py3.9_pytorch_release_2.1.2"

ARG COMMON_WORKDIR=/app

# The following ARGs should be "0" or "1". If "1", the respective component will be built and installed on top of the base image
ARG BUILD_HIPBLASLT="0"
ARG BUILD_RCCL="1"
ARG BUILD_RCCL="0"
ARG BUILD_FA="1"
ARG BUILD_TRITON="1"
ARG BUILD_PYTORCH="1"
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile.tpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG NIGHTLY_DATE="20240828"
ARG NIGHTLY_DATE="20241017"
ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_$NIGHTLY_DATE"

FROM $BASE_IMAGE
Expand All @@ -25,7 +25,7 @@ ENV VLLM_TARGET_DEVICE="tpu"
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=.git,target=.git \
python3 -m pip install \
cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \
'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
-r requirements-tpu.txt
RUN python3 setup.py develop

Expand Down
Loading

0 comments on commit cad2332

Please sign in to comment.