add phi-3 model support

intel-analytics · Jun 17, 2024 · c9d1d8d · c9d1d8d
1 parent bca5cbd
commit c9d1d8d
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/python/llm/src/ipex_llm/transformers/models/phi3.py b/python/llm/src/ipex_llm/transformers/models/phi3.py
@@ -234,7 +234,8 @@ def model_forward(
     ):
         # IPEX-LLM OPT: kv cache and quantize kv cache and sdp
         use_cache = use_cache if use_cache is not None else self.config.use_cache
-        use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, input_ids)
+        input = input_ids if input_ids is not None else inputs_embeds
+        use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, input)
         if use_cache:
             if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
                 past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)

diff --git a/python/llm/src/ipex_llm/transformers/pipeline_parallel.py b/python/llm/src/ipex_llm/transformers/pipeline_parallel.py
@@ -48,6 +48,7 @@ def __init__(self, *args):
         # to avoid AttributeError in https://github.com/intel-analytics/ipex-llm/blob/main/
         # python/llm/src/ipex_llm/transformers/models/llama.py#L119
         self.up_proj = DummyLayer()
+        self.down_proj = DummyLayer()
 
     def forward(self, x):
         return x