From 4c32e66c3a9c5af261728e663713a38611f3ecad Mon Sep 17 00:00:00 2001 From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:52:36 -0400 Subject: [PATCH] [ROCm][Bugfix] Fixed several bugs related to rccl path and attention selector logic (#3699) --- Dockerfile.rocm | 2 +- requirements-rocm.txt | 2 +- vllm/attention/backends/xformers.py | 4 ++-- vllm/model_executor/parallel_utils/pynccl.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile.rocm b/Dockerfile.rocm index a09de99f7a468..65a367994f960 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -90,6 +90,6 @@ RUN cd /app \ && cd .. RUN python3 -m pip install --upgrade pip -RUN python3 -m pip install --no-cache-dir ray[all] +RUN python3 -m pip install --no-cache-dir ray[all]==2.9.3 CMD ["/bin/bash"] diff --git a/requirements-rocm.txt b/requirements-rocm.txt index cea1183edc0dd..0dc2f0e664114 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -5,7 +5,7 @@ starlette requests py-cpuinfo psutil -ray >= 2.9 +ray == 2.9.3 sentencepiece # Required for LLaMA tokenizer. numpy tokenizers>=0.15.0 diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py index fcd903ddf5f51..8e510f975059e 100644 --- a/vllm/attention/backends/xformers.py +++ b/vllm/attention/backends/xformers.py @@ -405,8 +405,8 @@ def _check_use_naive_attention() -> bool: if not is_hip(): return False # For ROCm, check whether flash attention is installed or not. - has_flash_attn = importlib.util.find_spec("flash_attn") is None - if not has_flash_attn: + use_naive_attention = importlib.util.find_spec("flash_attn") is None + if use_naive_attention: logger.warning("flash_attn is not installed. Using naive attention. " "This will take significantly more GPU memory.") return True diff --git a/vllm/model_executor/parallel_utils/pynccl.py b/vllm/model_executor/parallel_utils/pynccl.py index a0c2921df2217..5d7f2fdc5118b 100644 --- a/vllm/model_executor/parallel_utils/pynccl.py +++ b/vllm/model_executor/parallel_utils/pynccl.py @@ -41,7 +41,7 @@ if torch.version.cuda is not None: so_file = "libnccl.so.2" elif torch.version.hip is not None: - so_file = "librccl.so.2" + so_file = "librccl.so.1" else: raise ValueError("NCCL only supports CUDA and ROCm backends.") logger.debug(f"Loading nccl from library {so_file}")