diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
index 32bd2eb0109..896e377b755 100644
--- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
@@ -53,15 +53,15 @@ elif [ "${model}" == "resnet18_fx" ]; then
     tuning_cmd="bash run_quant.sh --topology=resnet18 --dataset_location=${dataset_location} --input_model=${input_model}"
     benchmark_cmd="bash run_benchmark.sh --topology=resnet18 --dataset_location=${dataset_location} --mode=performance --batch_size=${batch_size} --iters=500"
 elif [ "${model}" == "opt_125m_woq_gptq_int4" ]; then
-    model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+    model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
     inc_new_api=3x_pt
     tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4"
 elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_bnb" ]; then
-    model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+    model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
     inc_new_api=3x_pt
     tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_bnb"
 elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_ggml" ]; then
-    model_src_dir="nlp/huggingface_models/language-modeling/quantization/llm"
+    model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
     inc_new_api=3x_pt
     tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_ggml"
 fi
diff --git a/docs/3x/PT_WeightOnlyQuant.md b/docs/3x/PT_WeightOnlyQuant.md
index 37cc934592a..b115b38fce3 100644
--- a/docs/3x/PT_WeightOnlyQuant.md
+++ b/docs/3x/PT_WeightOnlyQuant.md
@@ -258,7 +258,7 @@ loaded_model = load(
 
 ## Examples
 
-Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm) on how to quantize a  model with WeightOnlyQuant.
+Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only) on how to quantize a  model with WeightOnlyQuant.
 
 ## Reference
 
diff --git a/examples/.config/model_params_pytorch_3x.json b/examples/.config/model_params_pytorch_3x.json
index dfedb7486d3..bbbab60bdbc 100644
--- a/examples/.config/model_params_pytorch_3x.json
+++ b/examples/.config/model_params_pytorch_3x.json
@@ -1,67 +1,151 @@
-{
-    "pytorch": {
-      "gpt_j_ipex":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 1
-      },
-      "gpt_j_ipex_sq":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 1
-      },
-      "llama2_7b_ipex":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 1
-      },
-      "llama2_7b_ipex_sq":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 1
-      },
-      "opt_125m_ipex":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 8
-      },
-      "opt_125m_ipex_sq":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 8
-      },
-      "dlrm_ipex": {
-        "model_src_dir": "recommendation/dlrm/static_quant/ipex",
-        "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input",
-        "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt",
-        "main_script": "dlrm_s_pytorch.py",
-        "batch_size": 16384
-      },
-      "resnet18_pt2e_static":{
-        "model_src_dir": "cv/static_quant",
-        "dataset_location": "/tf_dataset/pytorch/ImageNet/raw",
-        "input_model": "",
-        "main_script": "main.py",
-        "batch_size": 1
-      },
-      "opt_125m_pt2e_static":{
-        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e",
-        "dataset_location": "",
-        "input_model": "",
-        "main_script": "run_clm_no_trainer.py",
-        "batch_size": 1
-      }
-    }
-}
+{
+    "pytorch": {
+      "opt_125m_woq_gptq_int4":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "opt_125m_woq_gptq_int4_dq_bnb":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "opt_125m_woq_gptq_int4_dq_ggml":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "llama2_7b_gptq_int4":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "llama2_7b_gptq_int4_dq_bnb":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "llama2_7b_gptq_int4_dq_ggml":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_rtn_int4":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_rtn_int4_dq_bnb":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_rtn_int4_dq_ggml":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_gptq_int4":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_gptq_int4_dq_bnb":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_woq_gptq_int4_dq_ggml":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/weight_only",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "gpt_j_ipex":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "gpt_j_ipex_sq":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "llama2_7b_ipex":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "llama2_7b_ipex_sq":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      },
+      "opt_125m_ipex":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "opt_125m_ipex_sq":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 8
+      },
+      "dlrm_ipex": {
+        "model_src_dir": "recommendation/dlrm/static_quant/ipex",
+        "dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input",
+        "input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt",
+        "main_script": "dlrm_s_pytorch.py",
+        "batch_size": 16384
+      },
+      "resnet18_pt2e_static":{
+        "model_src_dir": "cv/static_quant",
+        "dataset_location": "/tf_dataset/pytorch/ImageNet/raw",
+        "input_model": "",
+        "main_script": "main.py",
+        "batch_size": 1
+      },
+      "opt_125m_pt2e_static":{
+        "model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e",
+        "dataset_location": "",
+        "input_model": "",
+        "main_script": "run_clm_no_trainer.py",
+        "batch_size": 1
+      }
+    }
+}
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
similarity index 79%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
index 1659ae41e75..889d7b42682 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
@@ -21,27 +21,14 @@ Here is how to run the scripts:
 ### GPT-J-6b
 
 #### Quantization
-```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
-    --model EleutherAI/gpt-j-6B \
-    --quantize \
-    --sq \
-    --alpha 1.0 \
-    --ipex \
-    --output_dir "saved_results"
-```
-**Notes**: Smooth quantization here is based on torch.jit. Without past key value in example_inputs, the quantized model cannot be used for text-generation.
 
 ```bash
-# "--approach weight_only" is used to enable weight only quantization.
 # "--woq_algo GPTQ" is used to enable GPTQ algorithms
 # "--double_quant_type BNB_NF4" is used to enable double quant algorithms
 python run_clm_no_trainer.py \
     --model EleutherAI/gpt-j-6B \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo GPTQ \
     --woq_bits 4 \
     --woq_scheme asym \
@@ -57,7 +44,6 @@ python run_clm_no_trainer.py \
     --model EleutherAI/gpt-j-6B \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo RTN \
     --woq_bits 4 \
     --woq_scheme asym \
@@ -74,23 +60,12 @@ python run_clm_no_trainer.py \
 #### Quantization
 
 ```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
-    --model facebook/opt-125m \
-    --quantize \
-    --sq \
-    --alpha 0.5 \
-    --ipex \
-    --output_dir "saved_results"
-
-# "--approach weight_only" is used to enable weight only quantization.
 # "--woq_algo GPTQ" is used to enable GPTQ algorithms
 # "--double_quant_type BNB_NF4" is used to enable double quant algorithms
 python run_clm_no_trainer.py \
     --model facebook/opt-125m \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo GPTQ \
     --woq_bits 4 \
     --woq_scheme asym \
@@ -106,7 +81,6 @@ python run_clm_no_trainer.py \
     --model facebook/opt-125m \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo RTN \
     --woq_bits 4 \
     --woq_scheme asym \
@@ -121,23 +95,12 @@ python run_clm_no_trainer.py \
 #### Quantization
 
 ```bash
-# "--sq" is used to enable smooth quant
-python run_clm_no_trainer.py \
-    --model meta-llama/Llama-2-7b-hf \
-    --quantize \
-    --sq \
-    --alpha 0.8 \
-    --ipex \
-    --output_dir "saved_results"
-
-# "--approach weight_only" is used to enable weight only quantization.
 # "--double_quant_type BNB_NF4" is used to enable double quant algorithms
 # "--woq_algo GPTQ" is used to enable GPTQ algorithms
 python run_clm_no_trainer.py \
     --model meta-llama/Llama-2-7b-hf \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo GPTQ \
     --woq_bits 4 \
     --woq_scheme asym \
@@ -153,7 +116,6 @@ python run_clm_no_trainer.py \
     --model meta-llama/Llama-2-7b-hf \
     --dataset NeelNanda/pile-10k \
     --quantize \
-    --approach weight_only \
     --woq_algo RTN \
     --woq_bits 4 \
     --woq_scheme asym \
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
similarity index 78%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
index 8002b61ad10..9e1d766128e 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
@@ -12,7 +12,6 @@ function main {
 function init_params {
   iters=100
   batch_size=16
-  approach=static
   tuned_checkpoint=saved_results
   task=lambada_openai
   echo ${max_eval_samples}
@@ -73,83 +72,52 @@ function run_benchmark {
 
         if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "opt_125m_ipex" ]; then
-        model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
-        model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
     elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "llama2_7b_ipex" ]; then
-        model_name_or_path="meta-llama/Llama-2-7b-hf"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
-        model_name_or_path="meta-llama/Llama-2-7b-hf"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
+        model_name_or_path="EleutherAI/gpt-j-6b"\
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
+        model_name_or_path="EleutherAI/gpt-j-6b"\
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "gpt_j_ipex" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
     fi
 
     python -u run_clm_no_trainer.py \
         --model ${model_name_or_path} \
-        --approach ${approach} \
         --output_dir ${tuned_checkpoint} \
         --task ${task} \
         --batch_size ${batch_size} \
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
similarity index 64%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
index c586f8d765e..8655c47a8da 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
@@ -35,10 +35,7 @@
     '--seed',
     type=int, default=42, help='Seed for sampling the calibration data.'
 )
-parser.add_argument("--approach", type=str, default='static',
-                    help="Select from ['dynamic', 'static', 'weight-only']")
 parser.add_argument("--int8", action="store_true")
-parser.add_argument("--ipex", action="store_true", help="Use intel extension for pytorch.")
 parser.add_argument("--accuracy", action="store_true")
 parser.add_argument("--performance", action="store_true")
 parser.add_argument("--iters", default=100, type=int,
@@ -54,9 +51,6 @@
 parser.add_argument("--tasks", default="lambada_openai,hellaswag,winogrande,piqa,wikitext",
                     type=str, help="tasks for accuracy validation")
 parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
-# ============SmoothQuant configs==============
-parser.add_argument("--sq", action="store_true")
-parser.add_argument("--alpha", default="auto", help="Smooth quant parameter.")
 # ============WeightOnly configs===============
 parser.add_argument("--woq_algo", default="RTN", choices=['RTN', 'AWQ', 'TEQ', 'GPTQ'],
                     help="Weight-only parameter.")
@@ -109,8 +103,6 @@
 # =======================================
 
 args = parser.parse_args()
-if args.ipex:
-    import intel_extension_for_pytorch as ipex
 calib_size = 1
 
 
@@ -193,7 +185,7 @@ def evaluate(self, model):
 
 def get_user_model():
     torchscript = False
-    if args.sq or args.ipex or args.woq_algo in ['AWQ', 'TEQ']:
+    if  args.woq_algo in ['AWQ', 'TEQ']:
         torchscript = True
     user_model = AutoModelForCausalLM.from_pretrained(
         args.model,
@@ -202,8 +194,7 @@ def get_user_model():
         revision=args.revision,
     )
     tokenizer = AutoTokenizer.from_pretrained(args.model)
-    if args.approach == 'weight_only':
-        user_model = user_model.float()
+    user_model = user_model.float()
 
     # Set model's seq_len when GPTQ calibration is enabled.
     if args.woq_algo == 'GPTQ':
@@ -234,120 +225,52 @@ def get_user_model():
     )
 
     # 3.x api
-    if args.approach == 'weight_only':
-        from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
-        from neural_compressor.torch.utils import get_double_quant_config_dict
-        weight_sym = True if args.woq_scheme == "sym" else False
+    from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
+    from neural_compressor.torch.utils import get_double_quant_config_dict
+    weight_sym = True if args.woq_scheme == "sym" else False
+    if args.double_quant_type is not None:
+        double_quant_config_dict = get_double_quant_config_dict(args.double_quant_type)
+
+    if args.woq_algo == "RTN":
         if args.double_quant_type is not None:
-            double_quant_config_dict = get_double_quant_config_dict(args.double_quant_type)
-
-        if args.woq_algo == "RTN":
-            if args.double_quant_type is not None:
-                double_quant_config_dict.update(
-                    {
-                        # TODO: add group_dim into double quant config?
-                        "use_full_range": args.woq_use_full_range,
-                        "use_mse_search": args.woq_use_mse_search,
-                    }
-                )
-                quant_config = RTNConfig.from_dict(double_quant_config_dict)
-            else:
-                quant_config = RTNConfig(
-                    dtype=args.woq_dtype,
-                    bits=args.woq_bits,
-                    use_sym=weight_sym,
-                    group_size=args.woq_group_size,
-                    group_dim=args.woq_group_dim,
-                    use_full_range=args.woq_use_full_range,
-                    use_mse_search=args.woq_use_mse_search,
-                    use_double_quant=False,
-                    double_quant_bits=args.double_quant_bits,
-                    double_quant_dtype=args.double_quant_dtype,
-                    double_quant_use_sym=args.double_quant_use_sym,
-                    double_quant_group_size=args.double_quant_group_size,
-                )
-            quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
-            user_model = prepare(model=user_model, quant_config=quant_config)
-            user_model = convert(model=user_model)
-        elif args.woq_algo == "GPTQ":
-            from utils import DataloaderPreprocessor
-            dataloaderPreprocessor = DataloaderPreprocessor(
-                dataloader_original=calib_dataloader,
-                use_max_length=args.gptq_use_max_length,
-                max_seq_length=args.gptq_max_seq_length,
+            double_quant_config_dict.update(
+                {
+                    # TODO: add group_dim into double quant config?
+                    "use_full_range": args.woq_use_full_range,
+                    "use_mse_search": args.woq_use_mse_search,
+                }
             )
-            dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader()
-            from neural_compressor.torch.algorithms.weight_only.utility import move_input_to_device
-            from tqdm import tqdm
-            def run_fn_for_gptq(model, dataloader_for_calibration, *args):
-                for batch in tqdm(dataloader_for_calibration):
-                    batch = move_input_to_device(batch, device=None)
-                    try:
-                        if isinstance(batch, tuple) or isinstance(batch, list):
-                            model(batch[0])
-                        elif isinstance(batch, dict):
-                            model(**batch)
-                        else:
-                            model(batch)
-                    except ValueError:
-                        pass
-                return
-            if args.double_quant_type is not None:
-                double_quant_config_dict.update(
-                    {
-                        "use_mse_search": args.woq_use_mse_search,
-                        "percdamp": args.gptq_percdamp,
-                        "act_order": args.gptq_actorder,
-                        "block_size": args.gptq_block_size,
-                        "static_groups": args.gptq_static_groups,
-                    }
-                )
-                quant_config = GPTQConfig.from_dict(double_quant_config_dict)
-            else:
-                quant_config = GPTQConfig(
-                    dtype=args.woq_dtype,
-                    bits=args.woq_bits,
-                    use_sym=weight_sym,
-                    group_size=args.woq_group_size,
-                    use_mse_search=args.woq_use_mse_search,
-                    percdamp=args.gptq_percdamp,
-                    act_order=args.gptq_actorder,
-                    block_size=args.gptq_block_size,
-                    static_groups=args.gptq_static_groups,
-                    use_double_quant=False,
-                    double_quant_bits=args.double_quant_bits,
-                    double_quant_dtype=args.double_quant_dtype,
-                    double_quant_use_sym=args.double_quant_use_sym,
-                    double_quant_group_size=args.double_quant_group_size,
-                )
-            quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
-            user_model = prepare(model=user_model, quant_config=quant_config)
-            run_fn_for_gptq(user_model, dataloader_for_calibration)
-            user_model = convert(user_model)
-    else:
-        if args.sq:
-            from neural_compressor.torch.quantization import SmoothQuantConfig
-
-            # alpha can be a float number of a list of float number.
-            args.alpha = args.alpha if args.alpha == "auto" else eval(args.alpha)
-            if re.search("falcon", user_model.config.model_type):
-                quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False)
-            else:
-                quant_config = SmoothQuantConfig(alpha=args.alpha, folding=True)
-
-            if re.search("gpt", user_model.config.model_type):
-                quant_config.set_local(torch.add, SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
+            quant_config = RTNConfig.from_dict(double_quant_config_dict)
         else:
-            from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig
-
-            quant_config =  get_default_static_config()
-            if re.search("gpt", user_model.config.model_type):
-                quant_config.set_local(torch.add, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
-
-        from neural_compressor.torch.algorithms.smooth_quant import move_input_to_device
+            quant_config = RTNConfig(
+                dtype=args.woq_dtype,
+                bits=args.woq_bits,
+                use_sym=weight_sym,
+                group_size=args.woq_group_size,
+                group_dim=args.woq_group_dim,
+                use_full_range=args.woq_use_full_range,
+                use_mse_search=args.woq_use_mse_search,
+                use_double_quant=False,
+                double_quant_bits=args.double_quant_bits,
+                double_quant_dtype=args.double_quant_dtype,
+                double_quant_use_sym=args.double_quant_use_sym,
+                double_quant_group_size=args.double_quant_group_size,
+            )
+        quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
+        user_model = prepare(model=user_model, quant_config=quant_config)
+        user_model = convert(model=user_model)
+    elif args.woq_algo == "GPTQ":
+        from utils import DataloaderPreprocessor
+        dataloaderPreprocessor = DataloaderPreprocessor(
+            dataloader_original=calib_dataloader,
+            use_max_length=args.gptq_use_max_length,
+            max_seq_length=args.gptq_max_seq_length,
+        )
+        dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader()
+        from neural_compressor.torch.algorithms.weight_only.utility import move_input_to_device
         from tqdm import tqdm
-        def run_fn(model):
-            for batch in tqdm(calib_dataloader):
+        def run_fn_for_gptq(model, dataloader_for_calibration, *args):
+            for batch in tqdm(dataloader_for_calibration):
                 batch = move_input_to_device(batch, device=None)
                 try:
                     if isinstance(batch, tuple) or isinstance(batch, list):
@@ -359,13 +282,37 @@ def run_fn(model):
                 except ValueError:
                     pass
             return
-
-        from utils import get_example_inputs
-        example_inputs = get_example_inputs(user_model, calib_dataloader)
-
-        from neural_compressor.torch.quantization import prepare, convert
-        user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
-        run_fn(user_model)
+        if args.double_quant_type is not None:
+            double_quant_config_dict.update(
+                {
+                    "use_mse_search": args.woq_use_mse_search,
+                    "percdamp": args.gptq_percdamp,
+                    "act_order": args.gptq_actorder,
+                    "block_size": args.gptq_block_size,
+                    "static_groups": args.gptq_static_groups,
+                }
+            )
+            quant_config = GPTQConfig.from_dict(double_quant_config_dict)
+        else:
+            quant_config = GPTQConfig(
+                dtype=args.woq_dtype,
+                bits=args.woq_bits,
+                use_sym=weight_sym,
+                group_size=args.woq_group_size,
+                use_mse_search=args.woq_use_mse_search,
+                percdamp=args.gptq_percdamp,
+                act_order=args.gptq_actorder,
+                block_size=args.gptq_block_size,
+                static_groups=args.gptq_static_groups,
+                use_double_quant=False,
+                double_quant_bits=args.double_quant_bits,
+                double_quant_dtype=args.double_quant_dtype,
+                double_quant_use_sym=args.double_quant_use_sym,
+                double_quant_group_size=args.double_quant_group_size,
+            )
+        quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
+        user_model = prepare(model=user_model, quant_config=quant_config)
+        run_fn_for_gptq(user_model, dataloader_for_calibration)
         user_model = convert(user_model)
 
     user_model.save(args.output_dir)
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
similarity index 77%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
index 3f95f44946e..079a1d28406 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
@@ -38,91 +38,59 @@ function init_params {
 function run_tuning {
     extra_cmd=''
     batch_size=8
-    approach='static'
     DATASET_NAME="NeelNanda/pile-10k"
     tuned_checkpoint="saved_results"
 
     if [ "${topology}" = "opt_125m_woq_gptq_int4" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_bnb" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "opt_125m_woq_gptq_int4_dq_ggml" ]; then
         model_name_or_path="facebook/opt-125m"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length --gptq_percdamp 0.1 --gptq_actorder"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "opt_125m_ipex" ]; then
-        model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
-        model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
     elif [ "${topology}" = "llama2_7b_gptq_int4" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "llama2_7b_gptq_int4_dq_bnb" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "llama2_7b_gptq_int4_dq_ggml" ]; then
         model_name_or_path="meta-llama/Llama-2-7b-hf"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "llama2_7b_ipex" ]; then
-        model_name_or_path="meta-llama/Llama-2-7b-hf"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
-        model_name_or_path="meta-llama/Llama-2-7b-hf"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_bnb" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "gpt_j_woq_rtn_int4_dq_ggml" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo RTN --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_bnb" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type BNB_NF4"
     elif [ "${topology}" = "gpt_j_woq_gptq_int4_dq_ggml" ]; then
         model_name_or_path="EleutherAI/gpt-j-6b"
-        approach="weight_only"
         extra_cmd=$extra_cmd" --woq_algo GPTQ --woq_bits 4 --woq_group_size 128 --woq_scheme asym --woq_use_mse_search --gptq_use_max_length"
         extra_cmd=$extra_cmd" --double_quant_type GGML_TYPE_Q4_K"
-    elif [ "${topology}" = "gpt_j_ipex" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        extra_cmd=$extra_cmd" --ipex"
-    elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
-        model_name_or_path="EleutherAI/gpt-j-6b"
-        extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
     fi
 
     python -u run_clm_no_trainer.py \
         --model ${model_name_or_path} \
         --dataset ${DATASET_NAME} \
         --quantize \
-        --approach ${approach} \
         --output_dir ${tuned_checkpoint} \
         --tasks "lambada_openai" \
         --batch_size ${batch_size} \
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/utils.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/utils.py
rename to examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
diff --git a/neural_compressor/torch/algorithms/weight_only/modules.py b/neural_compressor/torch/algorithms/weight_only/modules.py
index 702a22b49ad..18cf6e46e55 100644
--- a/neural_compressor/torch/algorithms/weight_only/modules.py
+++ b/neural_compressor/torch/algorithms/weight_only/modules.py
@@ -303,7 +303,6 @@ def unpack_tensor_with_torch(self, packed_tensor):
     def pack_tensor_with_numpy(self, raw_tensor):
         raw_array = raw_tensor.cpu().numpy()
         target_len = np.ceil(raw_array.shape[1] / self.n_pack).astype(int)
-        torch.int32
         target_dtype = torch.tensor(0, dtype=self.compression_dtype).numpy().dtype
         packed_array = np.zeros((raw_array.shape[0], target_len), dtype=target_dtype)
         mask = np.uint8(2**self.bits - 1)
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
index 7494dac86f9..231502c32c6 100644
--- a/neural_compressor/torch/algorithms/weight_only/save_load.py
+++ b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -41,7 +41,7 @@ def save(model, output_dir="./saved_results"):
     del model.save
     torch.save(model.state_dict(), qmodel_weight_file_path)
 
-    logger.info("Save quantized model weight to {}.".format(qmodel_weight_file_path))
+    logger.info("Save quantized model to {}.".format(qmodel_weight_file_path))
     logger.info("Save configuration of quantized model to {}.".format(qconfig_file_path))