flash attention forward doesn't play well with torch.compile

winglian · Dec 9, 2024 · 92df994 · 92df994
1 parent c8c8dff
commit 92df994
Showing 1 changed file with 1 addition and 0 deletions.
diff --git a/src/transformers/modeling_flash_attention_utils.py b/src/transformers/modeling_flash_attention_utils.py
@@ -228,6 +228,7 @@ def fa_peft_integration_check(
 deterministic_g = os.environ.get("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
 
 
+@torch.compiler.disable(recursive=True)
 def _flash_attention_forward(
     query_states: torch.Tensor,
     key_states: torch.Tensor,