From b9853f98b373f370abf73336b6e1a0eecdc3e2be Mon Sep 17 00:00:00 2001 From: Yishuo Wang Date: Thu, 31 Oct 2024 17:00:05 +0800 Subject: [PATCH] fix qwen2 attention_mask slice (#12307) --- python/llm/src/ipex_llm/transformers/models/qwen2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/llm/src/ipex_llm/transformers/models/qwen2.py b/python/llm/src/ipex_llm/transformers/models/qwen2.py index 802c5e7ec45..28f3032af07 100644 --- a/python/llm/src/ipex_llm/transformers/models/qwen2.py +++ b/python/llm/src/ipex_llm/transformers/models/qwen2.py @@ -560,6 +560,9 @@ def qwen2_attention_forward( if past_key_value is not None: kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + if attention_mask is not None: + attention_mask = attention_mask[:, :, :, :kv_seq_len] + if should_use_fuse_rope(hidden_states, position_ids, self.training): import xe_addons xe_addons.rotary_half_inplaced(self.rotary_emb.inv_freq, position_ids,