Skip to content

Commit

Permalink
Fix glm4-9b-chat nan error on vllm 0.3.3 (#11970)
Browse files Browse the repository at this point in the history
* fix nan value

* update
  • Loading branch information
hzjane authored Aug 30, 2024
1 parent 77b04ef commit 7d10341
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion python/llm/src/ipex_llm/vllm/xpu/model_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,15 @@ def _ipex_llm_load_model(self) -> None:
parallel_config=self.parallel_config,
scheduler_config=self.scheduler_config)
from ipex_llm import optimize_model
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype)
import os
not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
if not_convert_last_mlp is not None:
# only use to avoid nan value in last mlp forward running glm4-9b-chat
modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
else:
modules = None
optimize_model(self.model, low_bit=low_bit, torch_dtype=self.model_config.dtype,
modules_to_not_convert=modules)
self.model = self.model.to(device=self.device_config.device,
dtype=self.model_config.dtype)

Expand Down

0 comments on commit 7d10341

Please sign in to comment.