From ee0f6c33c9be851b6d254620f70e30e027f527ea Mon Sep 17 00:00:00 2001 From: songhappy Date: Thu, 20 Jun 2024 17:33:47 -0700 Subject: [PATCH] comments --- .../llm/src/ipex_llm/transformers/models/llama.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/python/llm/src/ipex_llm/transformers/models/llama.py b/python/llm/src/ipex_llm/transformers/models/llama.py index f73b3b2b733..a53571e538c 100644 --- a/python/llm/src/ipex_llm/transformers/models/llama.py +++ b/python/llm/src/ipex_llm/transformers/models/llama.py @@ -2512,10 +2512,15 @@ def llama_model_forward_4_41_internal( if output_hidden_states: all_hidden_states += (hidden_states,) - next_cache = next_decoder_cache if use_cache else None - if return_legacy_cache: - next_cache = next_cache.to_legacy_cache() - + next_cache = None + from ipex_llm.transformers.kv import DynamicFp8Cache + if use_cache: + next_cache = ( + next_decoder_cache.to_legacy_cache() + if not isinstance(next_decoder_cache, DynamicFp8Cache) + else next_decoder_cache + ) + if not return_dict: return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)