From 385da7c7ed018a66fcba6e28658d1a5eea2e52e4 Mon Sep 17 00:00:00 2001
From: "Sun, Xuehao" <xuehao.sun@intel.com>
Date: Fri, 9 Aug 2024 21:53:51 +0800
Subject: [PATCH] Add 3.x readme (#1971)

Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
---
 README.md                                     |   2 +-
 docs/source/llm_recipes.md                    | 168 ++++++++---------
 .../.config/model_params_tensorflow_3x.json   |  25 +--
 examples/3.x_api/README.md                    | 169 ++++++++++++++++++
 4 files changed, 259 insertions(+), 105 deletions(-)
 create mode 100644 examples/3.x_api/README.md
diff --git a/README.md b/README.md
index bcc9c2fcc96..d7f02d5aa02 100644
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloade
       <td colspan="2" align="center"><a href="./docs/source/3x/design.md#workflows">Workflow</a></td>
       <td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
       <td colspan="1" align="center"><a href="./docs/source/3x/llm_recipes.md">LLMs Recipes</a></td>
-      <td colspan="1" align="center">Examples</td>
+      <td colspan="1" align="center"><a href="./examples/3.x_api/README.md">Examples</a></td>
     </tr>
   </tbody>
   <thead>
diff --git a/docs/source/llm_recipes.md b/docs/source/llm_recipes.md
index 328bba3ba09..8a9c17e7cd7 100644
--- a/docs/source/llm_recipes.md
+++ b/docs/source/llm_recipes.md
@@ -17,8 +17,8 @@ This document aims to publish the specific recipes we achieved for the popular L
 |       EleutherAI/gpt-j-6b       |    ✔    |    ✔     |    ✔     |
 |        facebook/opt-1.3b        |    ✔    |    ✔     |    ✔     |
 |        facebook/opt-30b         |    ✔    |    ✔     |    ✔     |
-|    meta-llama/Llama-2-7b-hf     |    ✔    |    ✔     |    ✔     |
-|    meta-llama/Llama-2-13b-hf    |    ✔    |    ✔     |    ✔     |
+|    meta-llama/Llama-2-7b-hf     |   WIP   |    ✔     |    ✔     |
+|    meta-llama/Llama-2-13b-hf    |   WIP   |    ✔     |    ✔     |
 |    meta-llama/Llama-2-70b-hf    |    ✔    |    ✔     |    ✔     |
 |        tiiuae/falcon-7b         |    ✔    |    ✔     |    ✔     |
 |        tiiuae/falcon-40b        |    ✔    |    ✔     |    ✔     |
@@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L
 |     databricks/dolly-v2-12b     |    ✖    |    ✔     |    ✖     |
 |     EleutherAI/gpt-neox-20b     |    ✖    |    ✔     |    ✔     |
 |    mistralai/Mistral-7B-v0.1    |    ✖    |    ✔     |    ✔     |
-|        THUDM/chatglm2-6b        |    ✔    |    ✔     |    ✔     |
-|        THUDM/chatglm3-6b        |   WIP   |    ✔     |   WIP    |
+|        THUDM/chatglm2-6b        |   WIP   |    ✔     |   WIP    |
+|        THUDM/chatglm3-6b        |   WIP   |    ✔     |    ✔     |
 
 **Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).**
 
@@ -68,48 +68,48 @@ This document aims to publish the specific recipes we achieved for the popular L
   <tr>
     <td>baichuan-inc/Baichuan-13B-Chat</td>
     <td>67.57%</td>
-    <td>69.07%</td>
-    <td>1.0222</td>
+    <td>67.86%</td>
+    <td>1.0043</td>
     <td>67.55%</td>
     <td>0.9997</td>
-    <td>68.12%</td>
-    <td>1.0081</td>
-    <td>66.93%</td>
-    <td>0.9905</td>
+    <td>67.46%</td>
+    <td>0.9984</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
   <tr>
     <td>baichuan-inc/Baichuan2-13B-Chat</td>
     <td>71.51%</td>
-    <td>75.57%</td>
-    <td>1.0568</td>
+    <td>75.51%</td>
+    <td>1.0559</td>
     <td>71.57%</td>
     <td>1.0008</td>
-    <td>70.81%</td>
-    <td>0.9902</td>
-    <td>N/A</td>
-    <td>N/A</td>
+    <td>71.45%</td>
+    <td>0.9992</td>
+    <td>70.87%</td>
+    <td>0.9911</td>
   </tr>
   <tr>
     <td>baichuan-inc/Baichuan2-7B-Chat</td>
     <td>67.67%</td>
-    <td>68.06%</td>
-    <td>1.0058</td>
+    <td>67.51%</td>
+    <td>0.9976</td>
     <td>67.61%</td>
     <td>0.9991</td>
-    <td>67.90%</td>
-    <td>1.0034</td>
-    <td>N/A</td>
-    <td>N/A</td>
+    <td>68.08%</td>
+    <td>1.0061</td>
+    <td>67.18%</td>
+    <td>0.9928</td>
   </tr>
   <tr>
     <td>bigscience/bloom-1b7</td>
     <td>46.34%</td>
-    <td>47.99%</td>
-    <td>1.0356</td>
+    <td>47.97%</td>
+    <td>1.0352</td>
     <td>46.21%</td>
     <td>0.9972</td>
-    <td>46.90%</td>
-    <td>1.0121</td>
+    <td>47.00%</td>
+    <td>1.0142</td>
     <td>N/A</td>
     <td>N/A</td>
   </tr>
@@ -128,14 +128,14 @@ This document aims to publish the specific recipes we achieved for the popular L
   <tr>
     <td>EleutherAI/gpt-j-6b</td>
     <td>68.31%</td>
+    <td>68.00%</td>
+    <td>0.9955</td>
     <td>68.27%</td>
     <td>0.9994</td>
-    <td>68.27%</td>
-    <td>0.9994</td>
-    <td>68.35%</td>
-    <td>1.0006</td>
-    <td>68.02%</td>
-    <td>0.9958</td>
+    <td>68.23%</td>
+    <td>0.9988</td>
+    <td>67.40%</td>
+    <td>0.9867</td>
   </tr>
   <tr>
     <td>EleutherAI/gpt-neox-20b</td>
@@ -144,68 +144,68 @@ This document aims to publish the specific recipes we achieved for the popular L
     <td>N/A</td>
     <td>72.29%</td>
     <td>0.9994</td>
-    <td>71.74%</td>
-    <td>0.9918</td>
+    <td>72.15%</td>
+    <td>0.9975</td>
     <td>N/A</td>
     <td>N/A</td>
   </tr>
   <tr>
     <td>facebook/opt-1.3b</td>
     <td>57.89%</td>
-    <td>57.68%</td>
-    <td>0.9964</td>
+    <td>57.35%</td>
+    <td>0.9907</td>
     <td>58.12%</td>
     <td>1.0040</td>
-    <td>58.26%</td>
-    <td>1.0064</td>
+    <td>58.01%</td>
+    <td>1.0021</td>
     <td>N/A</td>
     <td>N/A</td>
   </tr>
   <tr>
     <td>facebook/opt-30b</td>
     <td>71.49%</td>
-    <td>71.78%</td>
-    <td>1.0041</td>
+    <td>71.51%</td>
+    <td>1.0003</td>
     <td>71.53%</td>
     <td>1.0006</td>
-    <td>71.59%</td>
-    <td>1.0014</td>
-    <td>71.80%</td>
-    <td>1.0043</td>
+    <td>71.82%</td>
+    <td>1.0046</td>
+    <td>71.43%</td>
+    <td>0.9992</td>
   </tr>
   <tr>
     <td>meta-llama/Llama-2-13b-hf</td>
     <td>76.77%</td>
-    <td>76.25%</td>
-    <td>0.9932</td>
+    <td>N/A</td>
+    <td>N/A</td>
     <td>76.89%</td>
     <td>1.0016</td>
-    <td>77.66%</td>
-    <td>1.0116</td>
-    <td>76.60%</td>
-    <td>0.9978</td>
+    <td>76.96%</td>
+    <td>1.0025</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
   <tr>
     <td>meta-llama/Llama-2-70b-hf</td>
     <td>79.64%</td>
-    <td>79.14%</td>
-    <td>0.9937</td>
+    <td>79.53%</td>
+    <td>0.9986</td>
     <td>79.62%</td>
     <td>0.9997</td>
-    <td>80.09%</td>
-    <td>1.0057</td>
-    <td>79.68%</td>
-    <td>1.0005</td>
+    <td>80.05%</td>
+    <td>1.0051</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
   <tr>
     <td>meta-llama/Llama-2-7b-hf</td>
     <td>73.92%</td>
-    <td>73.45%</td>
-    <td>0.9936</td>
+    <td>N/A</td>
+    <td>N/A</td>
     <td>73.90%</td>
     <td>0.9997</td>
-    <td>73.84%</td>
-    <td>0.9989</td>
+    <td>73.51%</td>
+    <td>0.9945</td>
     <td>N/A</td>
     <td>N/A</td>
   </tr>
@@ -216,22 +216,22 @@ This document aims to publish the specific recipes we achieved for the popular L
     <td>N/A</td>
     <td>75.80%</td>
     <td>0.9987</td>
-    <td>76.25%</td>
-    <td>1.0046</td>
-    <td>75.74%</td>
-    <td>0.9979</td>
+    <td>75.37%</td>
+    <td>0.9930</td>
+    <td>75.82%</td>
+    <td>0.9989</td>
   </tr>
   <tr>
     <td>THUDM/chatglm2-6b</td>
     <td>53.23%</td>
-    <td>52.86%</td>
-    <td>0.9930</td>
+    <td>N/A</td>
+    <td>N/A</td>
     <td>53.00%</td>
     <td>0.9957</td>
-    <td>52.90%</td>
-    <td>0.9938</td>
-    <td>52.92%</td>
-    <td>0.9942</td>
+    <td>N/A</td>
+    <td>N/A</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
   <tr>
     <td>THUDM/chatglm3-6b</td>
@@ -242,31 +242,31 @@ This document aims to publish the specific recipes we achieved for the popular L
     <td>0.9990</td>
     <td>N/A</td>
     <td>N/A</td>
-    <td>N/A</td>
-    <td>N/A</td>
+    <td>58.59%</td>
+    <td>0.9915</td>
   </tr>
   <tr>
     <td>tiiuae/falcon-40b</td>
     <td>77.22%</td>
-    <td>76.95%</td>
-    <td>0.9965</td>
+    <td>77.26%</td>
+    <td>1.0005</td>
     <td>77.18%</td>
     <td>0.9995</td>
-    <td>77.55%</td>
-    <td>1.0043</td>
-    <td>77.82%</td>
-    <td>1.0078</td>
+    <td>77.97%</td>
+    <td>1.0097</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
   <tr>
     <td>tiiuae/falcon-7b</td>
     <td>74.67%</td>
-    <td>76.63%</td>
-    <td>1.0262</td>
+    <td>76.17%</td>
+    <td>1.0201</td>
     <td>74.73%</td>
     <td>1.0008</td>
-    <td>75.06%</td>
-    <td>1.0052</td>
-    <td>74.00%</td>
-    <td>0.9910</td>
+    <td>74.79%</td>
+    <td>1.0016</td>
+    <td>N/A</td>
+    <td>N/A</td>
   </tr>
 </tbody></table>
diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json
index 74b40ea4f5d..e2a052656f8 100644
--- a/examples/.config/model_params_tensorflow_3x.json
+++ b/examples/.config/model_params_tensorflow_3x.json
@@ -8,20 +8,6 @@
       "batch_size": 64,
       "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb"
     },
-    "distilbert_base": {
-      "model_src_dir": "nlp/distilbert_base/quantization/ptq",
-      "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset",
-      "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb",
-      "main_script": "main.py",
-      "batch_size": 128
-    },
-    "distilbert_base_sq": {
-      "model_src_dir": "nlp/distilbert_base/quantization/ptq",
-      "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset",
-      "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb",
-      "main_script": "main.py",
-      "batch_size": 128
-    },
     "opt_125m_sq": {
       "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant",
       "dataset_location": "",
@@ -97,9 +83,9 @@
       "model_src_dir": "object_detection/yolo_v5/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco",
       "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb",
-      "main_script": "main.py", 
+      "main_script": "main.py",
       "batch_size": 1
-    },     
+    },
     "faster_rcnn_resnet50": {
       "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq",
       "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
@@ -125,14 +111,14 @@
       "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
       "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
       "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb",
-      "main_script": "main.py",       
+      "main_script": "main.py",
       "batch_size": 10
     },
     "ssd_mobilenet_v1_ckpt": {
       "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq",
       "dataset_location": "/tf_dataset/tensorflow/coco_val.record",
       "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1",
-      "main_script": "main.py",       
+      "main_script": "main.py",
       "batch_size": 10
     },
     "wide_deep_large_ds": {
@@ -158,5 +144,4 @@
       "batch_size": 1
     }
   }
-}
-
+}
\ No newline at end of file
diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md
new file mode 100644
index 00000000000..fd79f210533
--- /dev/null
+++ b/examples/3.x_api/README.md
@@ -0,0 +1,169 @@
+# Examples
+
+Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md).
+
+
+# PyTorch Examples
+
+## Quantization
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Method </th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+
+<tr>
+    <td rowspan="2">gpt_j</td>
+    <td rowspan="2">Natural Language Processing</td>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td rowspan="2">llama2_7b</td>
+    <td rowspan="2">Natural Language Processing</td>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td rowspan="3">opt_125m</td>
+    <td rowspan="3">Natural Language Processing</td>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (PT2E)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e">link</a></td>
+</tr>
+<tr>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td rowspan="2">resnet18</td>
+    <td rowspan="2">Image Recognition</td>
+    <td>Mixed Precision</td>
+    <td><a href="./pytorch/cv/mixed_precision">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization</td>
+    <td><a href="./pytorch/cv/static_quant">link</a></td>
+</tr>
+</tbody>
+</table>
+
+
+# TensorFlow Examples
+
+## Quantization
+
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Method</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+ <tr>
+    <td>bert_large_squad_model_zoo</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>transformer_lt</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/transformer_lt/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>inception_v3</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/inception_v3/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>mobilenetv2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/mobilenet_v2/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>resnetv2_50</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/resnet_v2_50/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>vgg16</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/vgg16/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>ViT</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/vision_transformer/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>GraphSage</td>
+    <td>Graph Networks</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/graph_networks/graphsage/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>yolo_v5</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/yolo_v5/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>faster_rcnn_resnet50</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>mask_rcnn_inception_v2</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>ssd_mobilenet_v1</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>wide_deep_large_ds</td>
+    <td>Recommendation</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/recommendation/wide_deep_large_ds/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>3dunet-mlperf</td>
+    <td>Semantic Image Segmentation</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq">link</a></td>
+</tr>
+
+</tbody>
+</table>
+

Model	Domain	Method	Examples
gpt_j	Natural Language Processing	Weight-Only Quantization	link
gpt_j	Natural Language Processing	Static Quantization (IPEX)	link
llama2_7b	Natural Language Processing	Weight-Only Quantization	link
llama2_7b	Natural Language Processing	Static Quantization (IPEX)	link
opt_125m	Natural Language Processing	Static Quantization (IPEX)	link
		Static Quantization (PT2E)	link
		Weight-Only Quantization	link
resnet18	Image Recognition	Mixed Precision	link
resnet18	Image Recognition	Static Quantization	link
Model	Domain	Method	Examples
bert_large_squad_model_zoo	Natural Language Processing	Post-Training Static Quantization	link
transformer_lt	Natural Language Processing	Post-Training Static Quantization	link
inception_v3	Image Recognition	Post-Training Static Quantization	link
mobilenetv2	Image Recognition	Post-Training Static Quantization	link
resnetv2_50	Image Recognition	Post-Training Static Quantization	link
vgg16	Image Recognition	Post-Training Static Quantization	link
ViT	Image Recognition	Post-Training Static Quantization	link
GraphSage	Graph Networks	Post-Training Static Quantization	link
yolo_v5	Object Detection	Post-Training Static Quantization	link
faster_rcnn_resnet50	Object Detection	Post-Training Static Quantization	link
mask_rcnn_inception_v2	Object Detection	Post-Training Static Quantization	link
ssd_mobilenet_v1	Object Detection	Post-Training Static Quantization	link
wide_deep_large_ds	Recommendation	Post-Training Static Quantization	link
3dunet-mlperf	Semantic Image Segmentation	Post-Training Static Quantization	link