diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt index bc70f987095..736d79c4d72 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt @@ -2,6 +2,5 @@ transformers torch sentencepiece neural-compressor -intel-extension-for-transformers >= 1.4.1 lm-eval==0.4.2 peft diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py index 40bf217c72e..6ad8e495db2 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py @@ -62,7 +62,7 @@ def get_user_model(): user_model = convert(model=user_model) user_model.eval() -from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser +from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser eval_args = LMEvalParser( model="hf", user_model=user_model, diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt index d4155dfbf75..d9f59d178e7 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt @@ -8,7 +8,6 @@ pytest wandb einops neural-compressor -intel-extension-for-transformers -lm_eval==0.4.2 +lm_eval==0.4.3 peft optimum-intel diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py index 694c0505ea4..a082421f15b 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py @@ -185,7 +185,7 @@ def eval_func(model): config = AutoConfig.from_pretrained(args.model) setattr(model, "config", config) - from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser + from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser eval_args = LMEvalParser( model="hf", user_model=model, @@ -232,7 +232,7 @@ def eval_func(model): if args.accuracy: user_model.eval() - from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser + from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser eval_args = LMEvalParser( model="hf", diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt index f0b56e558d3..5174182f312 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt @@ -8,6 +8,5 @@ pytest wandb einops neural-compressor -intel-extension-for-transformers -lm_eval==0.4.2 +lm_eval==0.4.3 peft diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py index b56c01f20f5..eb97f930d29 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py @@ -212,7 +212,7 @@ def run_fn(model): if args.accuracy: user_model.eval() - from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser + from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser eval_args = LMEvalParser( model="hf", user_model=user_model, @@ -232,7 +232,7 @@ def run_fn(model): if args.performance: user_model.eval() - from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser + from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser import time samples = args.iters * args.batch_size diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt index b6d9b6c55de..63959e924cb 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt @@ -2,6 +2,5 @@ transformers torch sentencepiece neural-compressor -intel-extension-for-transformers >= 1.4.1 -lm-eval==0.4.2 +lm-eval==0.4.3 peft \ No newline at end of file diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py index 395bc6f9b57..a2aa6c1302a 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py @@ -116,7 +116,7 @@ def get_example_inputs(tokenizer): if args.accuracy: - from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser + from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser eval_args = LMEvalParser( model="hf", user_model=user_model, diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md index 2c3b14459c8..6a5e75b5023 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md @@ -30,12 +30,6 @@ The scripts [run_generation_sq.py](./run_generation_sq.py) and [run_generation_c ```bash # Installation -git clone https://github.com/intel/intel-extension-for-transformers.git - -# install ITREX -cd intel-extension-for-transformers -pip install -r requirements.txt -pip install -v . # install requirements cd examples/huggingface/pytorch/text-generation/quantization diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py index 62ef4ca2f49..8329d74b9a4 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py @@ -293,7 +293,6 @@ _commit_hash=args._commit_hash, ) elif args.load_in_4bit or args.load_in_8bit: - # CPU device usage is provided by intel-extension-for-transformers. user_model = AutoModelForCausalLM.from_pretrained( args.model, load_in_4bit=args.load_in_4bit, diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py index 9245d53eb50..f92a2ff6b8c 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py @@ -6,7 +6,6 @@ from transformers import AutoConfig, AutoTokenizer from transformers.generation import GenerationConfig import intel_extension_for_pytorch as ipex -# from intel_extension_for_transformers.transformers.llm.utils.generation import _beam_search, _greedy_search from neural_compressor.transformers import AutoModelForCausalLM, AutoRoundConfig, RtnConfig, GPTQConfig from neural_compressor.transformers.quantization.utils import convert_dtype_str2torch from transformers.utils import check_min_version @@ -189,7 +188,6 @@ torch_dtype=torch.float16, ) elif args.load_in_4bit or args.load_in_8bit: - # CPU device usage is provided by intel-extension-for-transformers. user_model = AutoModelForCausalLM.from_pretrained(args.model, device_map=args.device, load_in_4bit=args.load_in_4bit, diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt index 63c4d6e10b1..4745e2dfbd7 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt @@ -8,7 +8,6 @@ pytest wandb einops neural-compressor -intel-extension-for-transformers lm_eval==0.4.3 peft auto_round