From 385da7c7ed018a66fcba6e28658d1a5eea2e52e4 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Fri, 9 Aug 2024 21:53:51 +0800 Subject: [PATCH] Add 3.x readme (#1971) Signed-off-by: Sun, Xuehao --- README.md | 2 +- docs/source/llm_recipes.md | 168 ++++++++--------- .../.config/model_params_tensorflow_3x.json | 25 +-- examples/3.x_api/README.md | 169 ++++++++++++++++++ 4 files changed, 259 insertions(+), 105 deletions(-) create mode 100644 examples/3.x_api/README.md diff --git a/README.md b/README.md index bcc9c2fcc96..d7f02d5aa02 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade Workflow APIs LLMs Recipes - Examples + Examples diff --git a/docs/source/llm_recipes.md b/docs/source/llm_recipes.md index 328bba3ba09..8a9c17e7cd7 100644 --- a/docs/source/llm_recipes.md +++ b/docs/source/llm_recipes.md @@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L | EleutherAI/gpt-j-6b | ✔ | ✔ | ✔ | | facebook/opt-1.3b | ✔ | ✔ | ✔ | | facebook/opt-30b | ✔ | ✔ | ✔ | -| meta-llama/Llama-2-7b-hf | ✔ | ✔ | ✔ | -| meta-llama/Llama-2-13b-hf | ✔ | ✔ | ✔ | +| meta-llama/Llama-2-7b-hf | WIP | ✔ | ✔ | +| meta-llama/Llama-2-13b-hf | WIP | ✔ | ✔ | | meta-llama/Llama-2-70b-hf | ✔ | ✔ | ✔ | | tiiuae/falcon-7b | ✔ | ✔ | ✔ | | tiiuae/falcon-40b | ✔ | ✔ | ✔ | @@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L | databricks/dolly-v2-12b | ✖ | ✔ | ✖ | | EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ | | mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ | -| THUDM/chatglm2-6b | ✔ | ✔ | ✔ | -| THUDM/chatglm3-6b | WIP | ✔ | WIP | +| THUDM/chatglm2-6b | WIP | ✔ | WIP | +| THUDM/chatglm3-6b | WIP | ✔ | ✔ | **Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).** @@ -68,48 +68,48 @@ This document aims to publish the specific recipes we achieved for the popular L baichuan-inc/Baichuan-13B-Chat 67.57% - 69.07% - 1.0222 + 67.86% + 1.0043 67.55% 0.9997 - 68.12% - 1.0081 - 66.93% - 0.9905 + 67.46% + 0.9984 + N/A + N/A baichuan-inc/Baichuan2-13B-Chat 71.51% - 75.57% - 1.0568 + 75.51% + 1.0559 71.57% 1.0008 - 70.81% - 0.9902 - N/A - N/A + 71.45% + 0.9992 + 70.87% + 0.9911 baichuan-inc/Baichuan2-7B-Chat 67.67% - 68.06% - 1.0058 + 67.51% + 0.9976 67.61% 0.9991 - 67.90% - 1.0034 - N/A - N/A + 68.08% + 1.0061 + 67.18% + 0.9928 bigscience/bloom-1b7 46.34% - 47.99% - 1.0356 + 47.97% + 1.0352 46.21% 0.9972 - 46.90% - 1.0121 + 47.00% + 1.0142 N/A N/A @@ -128,14 +128,14 @@ This document aims to publish the specific recipes we achieved for the popular L EleutherAI/gpt-j-6b 68.31% + 68.00% + 0.9955 68.27% 0.9994 - 68.27% - 0.9994 - 68.35% - 1.0006 - 68.02% - 0.9958 + 68.23% + 0.9988 + 67.40% + 0.9867 EleutherAI/gpt-neox-20b @@ -144,68 +144,68 @@ This document aims to publish the specific recipes we achieved for the popular L N/A 72.29% 0.9994 - 71.74% - 0.9918 + 72.15% + 0.9975 N/A N/A facebook/opt-1.3b 57.89% - 57.68% - 0.9964 + 57.35% + 0.9907 58.12% 1.0040 - 58.26% - 1.0064 + 58.01% + 1.0021 N/A N/A facebook/opt-30b 71.49% - 71.78% - 1.0041 + 71.51% + 1.0003 71.53% 1.0006 - 71.59% - 1.0014 - 71.80% - 1.0043 + 71.82% + 1.0046 + 71.43% + 0.9992 meta-llama/Llama-2-13b-hf 76.77% - 76.25% - 0.9932 + N/A + N/A 76.89% 1.0016 - 77.66% - 1.0116 - 76.60% - 0.9978 + 76.96% + 1.0025 + N/A + N/A meta-llama/Llama-2-70b-hf 79.64% - 79.14% - 0.9937 + 79.53% + 0.9986 79.62% 0.9997 - 80.09% - 1.0057 - 79.68% - 1.0005 + 80.05% + 1.0051 + N/A + N/A meta-llama/Llama-2-7b-hf 73.92% - 73.45% - 0.9936 + N/A + N/A 73.90% 0.9997 - 73.84% - 0.9989 + 73.51% + 0.9945 N/A N/A @@ -216,22 +216,22 @@ This document aims to publish the specific recipes we achieved for the popular L N/A 75.80% 0.9987 - 76.25% - 1.0046 - 75.74% - 0.9979 + 75.37% + 0.9930 + 75.82% + 0.9989 THUDM/chatglm2-6b 53.23% - 52.86% - 0.9930 + N/A + N/A 53.00% 0.9957 - 52.90% - 0.9938 - 52.92% - 0.9942 + N/A + N/A + N/A + N/A THUDM/chatglm3-6b @@ -242,31 +242,31 @@ This document aims to publish the specific recipes we achieved for the popular L 0.9990 N/A N/A - N/A - N/A + 58.59% + 0.9915 tiiuae/falcon-40b 77.22% - 76.95% - 0.9965 + 77.26% + 1.0005 77.18% 0.9995 - 77.55% - 1.0043 - 77.82% - 1.0078 + 77.97% + 1.0097 + N/A + N/A tiiuae/falcon-7b 74.67% - 76.63% - 1.0262 + 76.17% + 1.0201 74.73% 1.0008 - 75.06% - 1.0052 - 74.00% - 0.9910 + 74.79% + 1.0016 + N/A + N/A diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 74b40ea4f5d..e2a052656f8 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -8,20 +8,6 @@ "batch_size": 64, "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb" }, - "distilbert_base": { - "model_src_dir": "nlp/distilbert_base/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", - "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "main.py", - "batch_size": 128 - }, - "distilbert_base_sq": { - "model_src_dir": "nlp/distilbert_base/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", - "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "main.py", - "batch_size": 128 - }, "opt_125m_sq": { "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant", "dataset_location": "", @@ -97,9 +83,9 @@ "model_src_dir": "object_detection/yolo_v5/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco", "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 1 - }, + }, "faster_rcnn_resnet50": { "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", @@ -125,14 +111,14 @@ "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 10 }, "ssd_mobilenet_v1_ckpt": { "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record", "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1", - "main_script": "main.py", + "main_script": "main.py", "batch_size": 10 }, "wide_deep_large_ds": { @@ -158,5 +144,4 @@ "batch_size": 1 } } -} - +} \ No newline at end of file diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md new file mode 100644 index 00000000000..fd79f210533 --- /dev/null +++ b/examples/3.x_api/README.md @@ -0,0 +1,169 @@ +# Examples + +Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md). + + +# PyTorch Examples + +## Quantization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDomainMethod Examples
gpt_jNatural Language ProcessingWeight-Only Quantizationlink
Static Quantization (IPEX)link
llama2_7bNatural Language ProcessingWeight-Only Quantizationlink
Static Quantization (IPEX)link
opt_125mNatural Language ProcessingStatic Quantization (IPEX)link
Static Quantization (PT2E)link
Weight-Only Quantizationlink
resnet18Image RecognitionMixed Precisionlink
Static Quantizationlink
+ + +# TensorFlow Examples + +## Quantization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDomainMethodExamples
bert_large_squad_model_zooNatural Language ProcessingPost-Training Static Quantizationlink
transformer_ltNatural Language ProcessingPost-Training Static Quantizationlink
inception_v3Image RecognitionPost-Training Static Quantizationlink
mobilenetv2Image RecognitionPost-Training Static Quantizationlink
resnetv2_50Image RecognitionPost-Training Static Quantizationlink
vgg16Image RecognitionPost-Training Static Quantizationlink
ViTImage RecognitionPost-Training Static Quantizationlink
GraphSageGraph NetworksPost-Training Static Quantizationlink
yolo_v5Object DetectionPost-Training Static Quantizationlink
faster_rcnn_resnet50Object DetectionPost-Training Static Quantizationlink
mask_rcnn_inception_v2Object DetectionPost-Training Static Quantizationlink
ssd_mobilenet_v1Object DetectionPost-Training Static Quantizationlink
wide_deep_large_dsRecommendationPost-Training Static Quantizationlink
3dunet-mlperfSemantic Image SegmentationPost-Training Static Quantizationlink
+