From b4047915d390dec593f23a6381d60dc590916ce4 Mon Sep 17 00:00:00 2001 From: rnwang04 Date: Mon, 12 Aug 2024 17:00:27 +0800 Subject: [PATCH] fix --- python/llm/example/GPU/HuggingFace/LLM/qwen2/generate.py | 5 ++--- python/llm/src/ipex_llm/transformers/lookup.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen2/generate.py b/python/llm/example/GPU/HuggingFace/LLM/qwen2/generate.py index 25fdaeec16a..7d0d1ed072b 100644 --- a/python/llm/example/GPU/HuggingFace/LLM/qwen2/generate.py +++ b/python/llm/example/GPU/HuggingFace/LLM/qwen2/generate.py @@ -19,7 +19,6 @@ import argparse from transformers import AutoTokenizer -from ipex_llm import optimize_model import numpy as np @@ -36,7 +35,7 @@ args = parser.parse_args() model_path = args.repo_id_or_model_path - + from ipex_llm.transformers import AutoModelForCausalLM # Load model in 4 bit, # which convert the relevant layers in the model into INT4 format @@ -45,7 +44,7 @@ optimize_model=True, trust_remote_code=True, use_cache=True) - model = model.to("xpu") + model = model.half().to("xpu") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path, diff --git a/python/llm/src/ipex_llm/transformers/lookup.py b/python/llm/src/ipex_llm/transformers/lookup.py index e5725ff76d6..c17f76afe1c 100644 --- a/python/llm/src/ipex_llm/transformers/lookup.py +++ b/python/llm/src/ipex_llm/transformers/lookup.py @@ -149,7 +149,7 @@ def init_look_up_table(self, input_ids: torch.LongTensor): for ngram_size in range(self.max_matching_ngram_size, 0, -1): # Create sliding windows of size ngram_size - windows = input_ids.unfold(dimension=1, size=ngram_size, step=1) + windows = input_ids.cpu().unfold(dimension=1, size=ngram_size, step=1) for idx in range(windows.size(1)): window = tensor2key(windows[0, idx]) if window not in self.lookup_table: