Skip to content

Commit

Permalink
small improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
MeouSker77 committed Nov 7, 2024
1 parent d880e53 commit 26fac5b
Showing 1 changed file with 2 additions and 6 deletions.
8 changes: 2 additions & 6 deletions python/llm/src/ipex_llm/transformers/models/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,8 @@ def qwen2_model_forward(
from transformers.models.qwen2.modeling_qwen2 import _prepare_4d_causal_attention_mask_for_sdpa
from transformers.models.qwen2.modeling_qwen2 import _prepare_4d_causal_attention_mask

# ipex-llm changes start: don't generate `attention_mask` in specific cases
if seq_length == 1 or batch_size == 1 and use_sdp_causal(
seq_length, seq_length + past_key_values_length,
self.config.hidden_size // self.config.num_attention_heads,
inputs_embeds, self.training
):
# ipex-llm changes start: don't generate `attention_mask` in decode phase
if seq_length == 1:
attention_mask = None
# ipex-llm changes end
elif self._attn_implementation == "flash_attention_2":
Expand Down

0 comments on commit 26fac5b

Please sign in to comment.