From 4c32e66c3a9c5af261728e663713a38611f3ecad Mon Sep 17 00:00:00 2001
From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
Date: Fri, 29 Mar 2024 17:52:36 -0400
Subject: [PATCH] [ROCm][Bugfix] Fixed several bugs related to rccl path and
 attention selector logic (#3699)

---
 Dockerfile.rocm                              | 2 +-
 requirements-rocm.txt                        | 2 +-
 vllm/attention/backends/xformers.py          | 4 ++--
 vllm/model_executor/parallel_utils/pynccl.py | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
index a09de99f7a468..65a367994f960 100644
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -90,6 +90,6 @@ RUN cd /app \
     && cd ..
 
 RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install --no-cache-dir ray[all]
+RUN python3 -m pip install --no-cache-dir ray[all]==2.9.3
 
 CMD ["/bin/bash"]
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index cea1183edc0dd..0dc2f0e664114 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -5,7 +5,7 @@ starlette
 requests
 py-cpuinfo
 psutil
-ray >= 2.9
+ray == 2.9.3
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 tokenizers>=0.15.0
diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py
index fcd903ddf5f51..8e510f975059e 100644
--- a/vllm/attention/backends/xformers.py
+++ b/vllm/attention/backends/xformers.py
@@ -405,8 +405,8 @@ def _check_use_naive_attention() -> bool:
     if not is_hip():
         return False
     # For ROCm, check whether flash attention is installed or not.
-    has_flash_attn = importlib.util.find_spec("flash_attn") is None
-    if not has_flash_attn:
+    use_naive_attention = importlib.util.find_spec("flash_attn") is None
+    if use_naive_attention:
         logger.warning("flash_attn is not installed. Using naive attention. "
                        "This will take significantly more GPU memory.")
         return True
diff --git a/vllm/model_executor/parallel_utils/pynccl.py b/vllm/model_executor/parallel_utils/pynccl.py
index a0c2921df2217..5d7f2fdc5118b 100644
--- a/vllm/model_executor/parallel_utils/pynccl.py
+++ b/vllm/model_executor/parallel_utils/pynccl.py
@@ -41,7 +41,7 @@
     if torch.version.cuda is not None:
         so_file = "libnccl.so.2"
     elif torch.version.hip is not None:
-        so_file = "librccl.so.2"
+        so_file = "librccl.so.1"
     else:
         raise ValueError("NCCL only supports CUDA and ROCm backends.")
     logger.debug(f"Loading nccl from library {so_file}")