From c0724fc9150329d42abaf2f0f77dc8ca91d48acb Mon Sep 17 00:00:00 2001
From: alexeykondrat <143633163+alexeykondrat@users.noreply.github.com>
Date: Sat, 18 May 2024 01:09:11 -0400
Subject: [PATCH] [ROCm][Hardware][AMD] Adding Navi21 to fallback to naive
 attention if Triton is not used (#4658)

---
 vllm/attention/backends/rocm_flash_attn.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py
index bb828d6fc04fe..94f3f55636ed6 100644
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -231,8 +231,9 @@ def __init__(
             self.attn_func = triton_attention
             logger.debug("Using Triton FA in ROCmBackend")
         else:
-            # if not using triton, navi3x not use flash-attn either
-            if torch.cuda.get_device_capability()[0] == 11:
+            # if not using triton, navi3x/navi21/navi10 do not use flash-attn
+            # either
+            if torch.cuda.get_device_capability()[0] != 9:
                 self.use_naive_attn = True
             else:
                 try: