Skip to content

Commit

Permalink
comments
Browse files Browse the repository at this point in the history
  • Loading branch information
songhappy committed Jun 21, 2024
1 parent f789ca3 commit ee0f6c3
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions python/llm/src/ipex_llm/transformers/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -2512,10 +2512,15 @@ def llama_model_forward_4_41_internal(
if output_hidden_states:
all_hidden_states += (hidden_states,)

next_cache = next_decoder_cache if use_cache else None
if return_legacy_cache:
next_cache = next_cache.to_legacy_cache()

next_cache = None
from ipex_llm.transformers.kv import DynamicFp8Cache
if use_cache:
next_cache = (
next_decoder_cache.to_legacy_cache()
if not isinstance(next_decoder_cache, DynamicFp8Cache)
else next_decoder_cache
)

if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
if v is not None)
Expand Down

0 comments on commit ee0f6c3

Please sign in to comment.