From d20a968ce2b1a5f0faeecc83ed6bc0765e06d3f3 Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:07:24 +0800 Subject: [PATCH] [NPU] Fix generate example (#12541) --- .../llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py index d3abd13a6e6..6eaee048af7 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py @@ -52,7 +52,6 @@ attn_implementation="eager" ) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - tokenizer.save_pretrained(args.lowbit_path) else: model = AutoModelForCausalLM.load_low_bit( args.lowbit_path, @@ -66,6 +65,7 @@ if args.lowbit_path and not os.path.exists(args.lowbit_path): model.save_low_bit(args.lowbit_path) + tokenizer.save_pretrained(args.lowbit_path) with torch.inference_mode(): input_ids = tokenizer.encode(args.prompt, return_tensors="pt")