From 2634501690f2396865011c2f79c0b8adba36cb07 Mon Sep 17 00:00:00 2001
From: Zixuan Cheng <110808245+violetch24@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:00:30 +0800
Subject: [PATCH] fix act_observer for 3.x ipex sq and static (#1851)

Signed-off-by: Cheng, Zixuan <zixuan.cheng@intel.com>
---
 .../torch/algorithms/static_quant/utility.py  | 83 ++++++++++++++-----
 1 file changed, 60 insertions(+), 23 deletions(-)

diff --git a/neural_compressor/torch/algorithms/static_quant/utility.py b/neural_compressor/torch/algorithms/static_quant/utility.py
index a8efa77d340..f90471539fd 100644
--- a/neural_compressor/torch/algorithms/static_quant/utility.py
+++ b/neural_compressor/torch/algorithms/static_quant/utility.py
@@ -158,36 +158,73 @@ def check_cfg_and_qconfig(user_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_
 
 
 def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_quant_enable=False):  # pragma: no cover
-    """This is a helper method to generate a dict containing activation observer info.
+    """This is a helper method to generate an activation observer.
 
     Args:
         scheme (str): Quantization scheme to be used.
         algorithm (str): What algorithm for computing the quantization parameters based on.
 
     Returns:
-        A dict containing observer info.zs
+        An observer.
     """
-    from intel_extension_for_pytorch.quantization._smooth_quant import SmoothQuantActivationObserver
-    from intel_extension_for_pytorch.quantization._utils import _get_observer_setting
-    from torch.quantization import HistogramObserver, MinMaxObserver
-
-    kl_activation_observer = _get_observer_setting(HistogramObserver(reduce_range=False))
-    minmax_activation_observer = _get_observer_setting(
-        MinMaxObserver(qscheme=torch.per_tensor_affine, dtype=torch.quint8)
-    )
-    smoothquant_kl_activation_observer = _get_observer_setting(
-        SmoothQuantActivationObserver(
-            reduce_range=False,
-            smooth_quant_enabled=smooth_quant_enable,
-        )
-    )
-    smoothquant_minmax_activation_observer = _get_observer_setting(
-        SmoothQuantActivationObserver(
-            reduce_range=False,
-            smooth_quant_enabled=smooth_quant_enable,
-        )
-    )
-
+    kl_activation_observer = {
+        "name": "HistogramObserver",
+        "bins": 2048,
+        "upsample_rate": 128,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+    }
+    minmax_activation_observer = {
+        "name": "MinMaxObserver",
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+    }
+    smoothquant_kl_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": kl_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
+    smoothquant_minmax_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": minmax_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
     REDUCE_RANGE = False if CpuInfo().vnni else True
     if REDUCE_RANGE:
         minmax_activation_observer["reduce_range"] = REDUCE_RANGE