update GPU example

intel-analytics · Jun 26, 2024 · d0ea969 · d0ea969
1 parent 6e6db4e
commit d0ea969
Showing 1 changed file with 5 additions and 3 deletions.
diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/generate.py b/python/llm/example/GPU/HF-Transformers-AutoModels/Advanced-Quantizations/GPTQ/generate.py
@@ -18,7 +18,7 @@
 import time
 import argparse
 from ipex_llm.transformers import AutoModelForCausalLM
-from transformers import AutoTokenizer, GPTQConfig
+from transformers import AutoTokenizer, AutoTokenizer
 
 # you could tune the prompt based on your own model,
 # here the prompt tuning refers to https://huggingface.co/georgesung/llama2_7b_chat_uncensored#prompt-style
@@ -48,9 +48,11 @@
                                                  torch_dtype=torch.float,
                                                  trust_remote_code=True,).to("xpu")
 
-    print(model)
     # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    if "qwen" in model_path.lower():
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    else:
+        tokenizer = LlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
     # Generate predicted tokens
     with torch.inference_mode():