fix act_observer for 3.x ipex sq and static (#1851)

Signed-off-by: Cheng, Zixuan <[email protected]>
intel · Jun 7, 2024 · 2634501 · 2634501
1 parent 9d4c88b
commit 2634501
Showing 1 changed file with 60 additions and 23 deletions.
diff --git a/neural_compressor/torch/algorithms/static_quant/utility.py b/neural_compressor/torch/algorithms/static_quant/utility.py
@@ -158,36 +158,73 @@ def check_cfg_and_qconfig(user_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_
 
 
 def generate_activation_observer(scheme, algorithm, smooth_quant=False, smooth_quant_enable=False):  # pragma: no cover
-    """This is a helper method to generate a dict containing activation observer info.
+    """This is a helper method to generate an activation observer.
 
     Args:
         scheme (str): Quantization scheme to be used.
         algorithm (str): What algorithm for computing the quantization parameters based on.
 
     Returns:
-        A dict containing observer info.zs
+        An observer.
     """
-    from intel_extension_for_pytorch.quantization._smooth_quant import SmoothQuantActivationObserver
-    from intel_extension_for_pytorch.quantization._utils import _get_observer_setting
-    from torch.quantization import HistogramObserver, MinMaxObserver
-
-    kl_activation_observer = _get_observer_setting(HistogramObserver(reduce_range=False))
-    minmax_activation_observer = _get_observer_setting(
-        MinMaxObserver(qscheme=torch.per_tensor_affine, dtype=torch.quint8)
-    )
-    smoothquant_kl_activation_observer = _get_observer_setting(
-        SmoothQuantActivationObserver(
-            reduce_range=False,
-            smooth_quant_enabled=smooth_quant_enable,
-        )
-    )
-    smoothquant_minmax_activation_observer = _get_observer_setting(
-        SmoothQuantActivationObserver(
-            reduce_range=False,
-            smooth_quant_enabled=smooth_quant_enable,
-        )
-    )
-
+    kl_activation_observer = {
+        "name": "HistogramObserver",
+        "bins": 2048,
+        "upsample_rate": 128,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+    }
+    minmax_activation_observer = {
+        "name": "MinMaxObserver",
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+    }
+    smoothquant_kl_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": kl_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
+    smoothquant_minmax_activation_observer = {
+        "name": "SmoothQuantActivationObserver",
+        "smooth_quant_enabled": smooth_quant_enable,
+        "dtype": "torch.quint8",
+        "qscheme": "torch.per_tensor_affine",
+        "reduce_range": False,
+        "quant_min": 0,
+        "quant_max": 255,
+        "alpha": 0.5,
+        "act_observer": minmax_activation_observer,
+        "act_ic_observer": {
+            "name": "PerChannelMinMaxObserver",
+            "ch_axis": -1,
+            "dtype": "torch.quint8",
+            "qscheme": "torch.per_channel_affine",
+            "reduce_range": False,
+            "quant_min": 0,
+            "quant_max": 255,
+        },
+    }
     REDUCE_RANGE = False if CpuInfo().vnni else True
     if REDUCE_RANGE:
         minmax_activation_observer["reduce_range"] = REDUCE_RANGE