diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py index 339b7c9b13a..d4000caa03f 100644 --- a/python/llm/src/ipex_llm/transformers/models/llama.py +++ b/python/llm/src/ipex_llm/transformers/models/llama.py @@ -163,7 +163,8 @@ def llama_attention_forward( cos, sin = self.rotary_emb(value_states, position_ids) else: cos, sin = position_embeddings - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, + cos, sin, position_ids) if past_key_value is not None: # [CompressKV]