diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
index 34fc7f29fcf..2902c0c8f9c 100644
--- a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
+++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
@@ -5,6 +5,9 @@ branch = True
 include =
  */neural_compressor/common/*
  */neural_compressor/torch/*
+omit =
+ */neural_compressor/torch/algorithms/habana_fp8/*
+ */neural_compressor/torch/amp/*
 exclude_lines =
  pragma: no cover
  raise NotImplementedError
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
index d1aee3a98cb..f6e54ba2662 100644
--- a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh
@@ -16,20 +16,36 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
 cd /neural-compressor/test/3x || exit 1
 rm -rf torch
 rm -rf onnxrt
-rm -rf tensorflow/quantization/ptq/newapi
 mv tensorflow/keras ../3x_keras
-mv tensorflow/quantization/itex ./3x_itex
+mv tensorflow/quantization/ptq/newapi ../3x_newapi
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}
 ut_log_name=${LOG_DIR}/ut_3x_tf.log
+
+# test for tensorflow ut
 pytest --cov="${inc_path}" -vs --disable-warnings --html=report_tf_quant.html --self-contained-html ./tensorflow/quantization 2>&1 | tee -a ${ut_log_name}
 rm -rf tensorflow/quantization
 pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
 
+# test for tensorflow new api ut
+pip uninstall tensorflow -y
+pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
+pip install cmake
+pip install protobuf==3.20.3
+pip install horovod==0.27.0
+pip list
+rm -rf tensorflow/*
+mkdir -p tensorflow/quantization/ptq
+mv ../3x_newapi tensorflow/quantization/ptq/newapi
+find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=${inc_path} --cov-append -vs --disable-warnings ,g" > run.sh
+cat run.sh
+bash run.sh 2>&1 | tee -a ${ut_log_name}
+
+# test for itex ut
 rm -rf tensorflow/*
 mv ../3x_keras tensorflow/keras
-mv ../3x_itex tensorflow/quantization/itex
+pip uninstall tensorflow -y
 pip install intel-extension-for-tensorflow[cpu]
 pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow 2>&1 | tee -a ${ut_log_name}
 
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
deleted file mode 100644
index 218e32a9b3a..00000000000
--- a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-python -c "import neural_compressor as nc"
-test_case="run 3x New TF API"
-echo "${test_case}"
-
-# install requirements
-echo "set up UT env..."
-pip install -r /neural-compressor/test/3x/tensorflow/requirements.txt
-pip install pytest-html
-pip install pytest-html-merger
-
-pip uninstall tensorflow -y
-pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
-pip install cmake
-pip install protobuf==3.20.3
-pip install horovod==0.27.0
-pip list
-
-cd /neural-compressor/test/3x || exit 1
-mv tensorflow/quantization/ptq/newapi ../3x_newapi
-rm -rf ./*
-
-LOG_DIR=/neural-compressor/log_dir
-mkdir -p ${LOG_DIR}
-ut_log_name=${LOG_DIR}/ut_3x_new_tf.log
-
-mkdir -p tensorflow/quantization/ptq
-mv ../3x_newapi tensorflow/quantization/ptq/newapi
-
-pytest -vs --disable-warnings --html=report_new_tf_quant_one_case.html --self-contained-html ./tensorflow/quantization/ptq/newapi/test_big_saved_model.py 2>&1 | tee -a ${ut_log_name}
-rm -rf tensorflow/quantization/ptq/newapi/test_big_saved_model.py
-pytest -vs --disable-warnings --html=report_new_tf_quant.html --self-contained-html ./tensorflow/quantization/ptq/newapi 2>&1 | tee -a ${ut_log_name}
-
-mkdir -p report
-mv *.html report
-pytest_html_merger -i ./report -o ./report.html
-
-cp report.html ${LOG_DIR}/
-
-if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
-    echo "Find errors in pytest case, please check the output..."
-    echo "Please search for '== FAILURES ==' or '== ERRORS =='"
-    exit 1
-fi
-
-echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/ut-3x-tf.yml b/.azure-pipelines/ut-3x-tf.yml
index 0fdc0c02f26..df852e28000 100644
--- a/.azure-pipelines/ut-3x-tf.yml
+++ b/.azure-pipelines/ut-3x-tf.yml
@@ -41,20 +41,6 @@ stages:
               uploadPath: $(UPLOAD_PATH)
               utArtifact: "ut_3x"
 
-  - stage: NewTF
-    displayName: Unit Test 3x New TF API
-    dependsOn: []
-    jobs:
-      - job:
-        displayName: Unit Test 3x New TF API
-        steps:
-          - template: template/ut-template.yml
-            parameters:
-              dockerConfigName: "commonDockerConfig"
-              utScriptFileName: "3x/run_3x_tf_new_api"
-              uploadPath: $(UPLOAD_PATH)
-              utArtifact: "ut_3x_tf_new_api"
-
   - stage: TensorFlow_baseline
     displayName: Unit Test 3x TensorFlow baseline
     dependsOn: []
diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml
index 697e70799c4..63b228f3f7e 100644
--- a/.github/checkgroup.yml
+++ b/.github/checkgroup.yml
@@ -53,11 +53,7 @@ subprojects:
       - "Model-Test (Run ONNX Model resnet50-v1-12)"
       - "Model-Test (Run PyTorch Model resnet18)"
       - "Model-Test (Run PyTorch Model resnet18_fx)"
-      - "Model-Test (Run TensorFlow Model darknet19)"
-      - "Model-Test (Run TensorFlow Model inception_v1)"
-      - "Model-Test (Run TensorFlow Model resnet-101)"
       - "Model-Test (Run TensorFlow Model resnet50v1.5)"
-      - "Model-Test (Run TensorFlow Model ssd_mobilenet_v1_ckpt)"
       - "Model-Test (Run TensorFlow Model ssd_resnet50_v1)"
 
   - id: "Model Tests 3x workflow"
diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
index c4b15d847a3..004393c8c27 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
@@ -90,46 +90,13 @@ def __init__(self, framework_specific_info):
             os.mkdir(DEFAULT_WORKSPACE)
         self.tmp_dir = (DEFAULT_WORKSPACE + "tmp_model.keras") if self.keras3 else (DEFAULT_WORKSPACE + "tmp_model")
 
-    def _check_itex(self):
-        """Check if the Intel® Extension for TensorFlow has been installed."""
-        try:
-            import intel_extension_for_tensorflow
-        except:
-            raise ImportError(
-                "The Intel® Extension for TensorFlow is not installed. "
-                "Please install it to run models on ITEX backend"
-            )
-
-    def convert_bf16(self):
-        """Execute the BF16 conversion."""
-        tf.keras.mixed_precision.set_global_policy("mixed_bfloat16")
-        model = self.pre_optimized_model
-
-        for layer in model.layers:
-            if layer.name in self.bf16_ops:
-                layer.dtype = "mixed_bfloat16"
-
-        model.save(self.tmp_dir)
-        converted_model = tf.keras.models.load_model(self.tmp_dir)
-        tf.keras.mixed_precision.set_global_policy("float32")
-
-        return converted_model
-
-    # (TODO) choose the properly quantize mode
-    def _check_quantize_mode(self, model):
-        """Check what quantize mode to use."""
-        for layer in model.layers:
-            if "ReLU" in layer.__class__.__name__:
-                return "MIN_FIRST"
-        return "SCALED"
-
     def _set_weights(self, qmodel, layer_weights):
         """Set fp32 weights to qmodel."""
         for qlayer in qmodel.layers:
             if qlayer.get_weights():
                 if qlayer.name in layer_weights:
                     qlayer.set_weights(layer_weights[qlayer.name])
-                else:
+                else:  # pragma: no cover
                     hit_layer = False
                     for sub_layer in qlayer.submodules:
                         if sub_layer.name in layer_weights:
@@ -164,7 +131,7 @@ def _check_quantize_format(self, model):
                         self.conv_format[layer.name] = "u8"
                         break
 
-    def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers):
+    def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers):  # pragma: no cover
         fuse_layers = []
         fused_bn_name = ""
         for idx, layer in enumerate(fp32_layers):
@@ -211,7 +178,7 @@ def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers):
 
         return fuse_layers
 
-    def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers):
+    def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers):  # pragma: no cover
         fuse_layers = []
         for idx, layer in enumerate(fp32_layers):
             if hasattr(layer, "_inbound_nodes"):
@@ -272,7 +239,7 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers):
 
         return fuse_layers
 
-    def _fuse_bn(self, model):
+    def _fuse_bn(self, model):  # pragma: no cover
         """Fusing Batch Normalization."""
         model.save(self.tmp_dir)
         fuse_bn_model = tf.keras.models.load_model(self.tmp_dir)
@@ -362,14 +329,6 @@ def quantize(self, quant_config, model, dataloader, iteration, q_func=None):
         tune_cfg = converter.parse_to_tune_cfg()
         self.tuning_cfg_to_fw(tune_cfg)
 
-        # just convert the input model to mixed_bfloat16
-        if self.bf16_ops and not self.quantize_config["op_wise_config"]:
-            converted_model = self.convert_bf16()
-            return converted_model
-
-        # if self.backend == "itex":
-        #     self._check_itex()
-
         logger.debug("Dump quantization configurations:")
         logger.debug(self.quantize_config)
         calib_sampling_size = tune_cfg.get("calib_sampling_size", 1)
@@ -469,59 +428,6 @@ def _calibrate(self, model, dataloader, calib_interation):
 
         return quantized_model
 
-    @dump_elapsed_time(customized_msg="Model inference")
-    def evaluate(
-        self,
-        model,
-        dataloader,
-        postprocess=None,
-        metrics=None,
-        measurer=None,
-        iteration=-1,
-        fp32_baseline=False,
-    ):
-        """The function is used to run evaluation on validation dataset.
-
-        Args:
-            model (object): The model to do calibration.
-            dataloader (generator): generate the data and labels.
-            postprocess (object, optional): process the result from the model
-            metric (object, optional): Depends on model category. Defaults to None.
-            measurer (object, optional): for precise benchmark measurement.
-            iteration(int, optional): control steps of mini-batch
-            fp32_baseline (boolean, optional): only for compare_label=False pipeline
-        """
-        # use keras object
-        keras_model = model.model
-        logger.info("Start to evaluate the Keras model.")
-        results = []
-        for idx, (inputs, labels) in enumerate(dataloader):
-            # use predict on batch
-            if measurer is not None:
-                measurer.start()
-                predictions = keras_model.predict_on_batch(inputs)
-                measurer.end()
-            else:
-                predictions = keras_model.predict_on_batch(inputs)
-
-            if self.fp32_preds_as_label:
-                self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions)
-
-            if postprocess is not None:
-                predictions, labels = postprocess((predictions, labels))
-            if metrics:
-                for metric in metrics:
-                    if not hasattr(metric, "compare_label") or (
-                        hasattr(metric, "compare_label") and metric.compare_label
-                    ):
-                        metric.update(predictions, labels)
-            if idx + 1 == iteration:
-                break
-
-        acc = 0 if metrics is None else [metric.result() for metric in metrics]
-
-        return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0]
-
     def query_fw_capability(self, model):
         """The function is used to return framework tuning capability.
 
@@ -621,7 +527,7 @@ def tuning_cfg_to_fw(self, tuning_cfg):
         for each_op_info in tuning_cfg["op"]:
             op_name = each_op_info[0]
 
-            if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16":
+            if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16":  # pragma: no cover
                 if each_op_info[1] in bf16_type:
                     bf16_ops.append(op_name)
                 continue
@@ -693,31 +599,6 @@ def _get_specified_version_cfg(self, data):
 
         return default_config
 
-    def get_version(self):
-        """Get the current backend version information.
-
-        Returns:
-            [string]: version string.
-        """
-        return self.cur_config["version"]["name"]
-
-    def get_precisions(self):
-        """Get supported precisions for current backend.
-
-        Returns:
-            [string list]: the precisions' name.
-        """
-        return self.cur_config["precisions"]["names"]
-
-    def get_op_types(self):
-        """Get the supported op types by all precisions.
-
-        Returns:
-            [dictionary list]: A list composed of dictionary which key is precision
-            and value is the op types.
-        """
-        return self.cur_config["ops"]
-
     def get_quantization_capability(self):
         """Get the supported op types' quantization capability.
 
@@ -846,7 +727,7 @@ def _parse_inputs(self, BN_fused_layers=None, conv_names=None):
 
         try:
             model_input = self.model.input
-        except ValueError:
+        except ValueError:  # pragma: no cover
             model_input = self.model.inputs[0]
 
         return input_layer_dict, model_input
diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
index 18f514ba306..4f279e20073 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
@@ -37,7 +37,6 @@
     BaseDataLoader,
     BaseModel,
     CpuInfo,
-    Dequantize,
     Statistics,
     deep_get,
     dump_elapsed_time,
@@ -87,8 +86,8 @@ def __init__(self, framework_specific_info):
         cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[: -len("Adaptor")].lower())
         self.itex_mode = self.backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml"
 
-        # if self.itex_mode:
-        #     self._check_itex()
+        if self.itex_mode:
+            self._check_itex()
 
         self.query_handler = TensorflowQuery(
             local_config_file=os.path.join(os.path.dirname(__file__), cfg_yaml_name),
@@ -119,284 +118,6 @@ def _check_itex(self):
                 "Please install it to run models on ITEX backend"
             )
 
-    def _log_histogram(self, writer, tag, values, step=0, bins=1000):
-        """Writes a histogram for later analysis."""
-        # Convert to a numpy array
-        values = np.array(values)
-
-        # Create and write Summary
-        # update using TF2.X API
-        with writer.as_default():
-            tf.summary.histogram(tag, values, step)
-            writer.flush()
-
-    def _pre_hook_for_hvd(self, dataloader=None):
-        """Pre hook for Horovod."""
-        import horovod.tensorflow as hvd
-
-        self.hvd = hvd
-        self.hvd.init()
-
-    @dump_elapsed_time(customized_msg="Model inference")
-    def evaluate(
-        self,
-        model,
-        dataloader,
-        postprocess=None,
-        metrics=None,
-        measurer=None,
-        iteration=-1,
-        tensorboard=False,
-        fp32_baseline=False,
-    ):
-        """Evaluate the model for specified metric on validation dataset.
-
-        Args:
-            model ([Graph, GraphDef or Path String]): The model could be the graph,
-                        graph_def object, the frozen pb or ckpt/savedmodel folder path.
-            dataloader (generator): generate the data and labels.
-            postprocess (object, optional): process the result from the model
-            metrics (list, optional): Depends on model category. Defaults to None.
-            measurer (object, optional): for precise benchmark measurement.
-            iteration(int, optional): control steps of mini-batch
-            tensorboard (boolean, optional): for tensorboard inspect tensor.
-            fp32_baseline (boolean, optional): only for compare_label=False pipeline
-
-        Returns:
-            [float]: evaluation result, the larger is better.
-        """
-        from neural_compressor.tensorflow.quantization.utils.utility import iterator_sess_run
-
-        outputs = model.output_tensor_names
-
-        if getattr(dataloader, "distributed", False):
-            import horovod.tensorflow as hvd
-
-            hvd.init()
-            # If metric.hvd is not None then run distributed inference
-            for metric in metrics:
-                metric.hvd = hvd
-            try:
-                len_dataloader = len(dataloader)
-            except:
-                logger.info(
-                    "The length of the distributed evaluation dataloader is unknown."
-                    "When the iteration of evaluation dataloader in each process is "
-                    "inconsistent, an error may occur."
-                )
-            else:
-                list_len_dataloader = hvd.allgather_object(len_dataloader)
-                if hvd.rank() == 0:
-                    for i in range(len(list_len_dataloader) - 1):
-                        if list_len_dataloader[i] != list_len_dataloader[i + 1]:
-                            raise AttributeError(
-                                "The evaluation dataloader's iteration is"
-                                "different between processes, please reset dataloader's batch_size."
-                            )
-            logger.info(
-                "Rank {!s} dataloaders' data distribution balance check for evaluation have been finished.".format(
-                    hvd.allgather_object(hvd.rank())
-                )
-            )
-        if tensorboard:
-            from tensorflow.python.framework import tensor_util
-
-            from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-
-            output_postfix = "_fp32.output"
-            inspect_node_types = [
-                "Conv2D",
-                "DepthwiseConv2dNative",
-                "MaxPool",
-                "AvgPool",
-                "ConcatV2",
-                "MatMul",
-                "FusedBatchNormV3",
-                "FusedBatchNorm",
-                "BiasAdd",
-                "_MklFusedInstanceNorm",
-                "Relu",
-                "Relu6",
-                "Dequantize",
-            ]
-            fp32_inspect_node_name = []
-            int8_inspect_node_name = []
-            q_node_scale = {}
-            if self.dump_times == 0:
-                temp_dir = "./runs/eval/baseline"
-            else:
-                temp_dir = "./runs/eval/tune_" + str(self.dump_times)
-            if os.path.isdir(temp_dir):
-                import shutil
-
-                shutil.rmtree(temp_dir, ignore_errors=True)
-            # Create the writer using TF2.x APIs to handle eager executions
-            writer = tf.summary.create_file_writer(temp_dir)  # pylint: disable=no-member
-            with writer.as_default():
-                tf.summary.graph(model.graph)  # pylint: disable=no-member
-
-            cur_graph = GraphAnalyzer()
-            cur_graph.graph = model.graph_def
-            cur_graph.parse_graph()
-            graph_info = cur_graph.node_name_details
-            for node in model.graph_def.node:
-                if node.op in inspect_node_types:
-                    fp32_inspect_node_name.append(node.name)
-                # Tensor dump supported quantized op including,
-                # Requantize, QuantizedConv2DAndRequantize,
-                # QuantizedConv2DAndReluAndRequantize,
-                # QuantizedConv2DWithBiasAndRequantize,
-                # QuantizedConv2DWithBiasAndReluAndRequantize,
-                # QuantizedConv2DWithBiasSignedSumAndReluAndRequantize,
-                # QuantizedConv2DWithBiasSumAndReluAndRequantize,
-                # QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize,
-                # QuantizedMatMulWithBiasAndReluAndRequantize,
-                # QuantizedMatMulWithBiasAndRequantize
-                elif node.op.find("Requantize") != -1:
-                    out_min = -2
-                    out_max = -1
-                    if node.op.find("Sum") != -1:
-                        out_min = -5
-                        out_max = -4
-                    q_out_min = graph_info[node.input[out_min]].node.attr["value"].tensor.float_val[0]
-                    q_out_max = graph_info[node.input[out_max]].node.attr["value"].tensor.float_val[0]
-                    q_node_scale[node.name] = (node.op, q_out_min, q_out_max)
-                    int8_inspect_node_name.append(node.name)
-                # Inspect weights, bias. Need further optimize
-                if node.op == "Const" and graph_info[graph_info[node.name].outputs[0]].node.op in [
-                    "Conv2D",
-                    "DepthwiseConv2dNative",
-                    "MatMul",
-                    "FusedBatchNormV3",
-                    "_MklFusedInstanceNorm",
-                    "BiasAdd",
-                ]:
-                    const_value = tensor_util.MakeNdarray(node.attr.get("value").tensor).astype(np.float32)
-                    self._log_histogram(writer, node.name, const_value)
-
-            outputs.extend(fp32_inspect_node_name)
-            if len(int8_inspect_node_name) > 0:
-                output_postfix = "_int8.output"
-                outputs.extend(int8_inspect_node_name)
-
-        if metrics:
-            for metric in metrics:
-                metric.reset()
-            self.fp32_preds_as_label = any(
-                [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics]
-            )
-
-        origin_output_tensor_names = model.output_tensor_names
-        model.output_tensor_names = outputs
-        input_tensor = model.input_tensor
-        output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0]
-        logger.info("Start to evaluate the TensorFlow model.")
-
-        def eval_func(dataloader):
-            results = []
-            for idx, (inputs, labels) in enumerate(dataloader):
-                # dataloader should keep the order and len of inputs same with input_tensor
-                if len(input_tensor) == 1:
-                    feed_dict = {}
-                    if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict):
-                        for name in inputs:
-                            for tensor in input_tensor:
-                                pos = tensor.name.rfind(":")
-                                t_name = tensor.name if pos < 0 else tensor.name[:pos]
-                                if name == t_name:
-                                    feed_dict[tensor] = inputs[name]
-                                    break
-                    else:
-                        feed_dict = {input_tensor[0]: inputs}  # get raw tensor using index [0]
-                else:
-                    assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor"
-                    feed_dict = {}
-                    if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict):
-                        for name in inputs:
-                            for tensor in input_tensor:
-                                pos = tensor.name.rfind(":")
-                                t_name = tensor.name if pos < 0 else tensor.name[:pos]
-                                if name == t_name:
-                                    feed_dict[tensor] = inputs[name]
-                                    break
-                    else:
-                        feed_dict = dict(zip(input_tensor, inputs))
-
-                if model.iter_op:
-                    predictions = iterator_sess_run(
-                        model.sess, model.iter_op, feed_dict, output_tensor, iteration, measurer
-                    )
-                elif measurer is not None:
-                    measurer.start()
-                    predictions = model.sess.run(output_tensor, feed_dict)
-                    measurer.end()
-                else:
-                    predictions = model.sess.run(output_tensor, feed_dict)
-
-                if self.fp32_preds_as_label:
-                    self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions)
-
-                # Inspect node output, just get 1st iteration output tensors for now
-                if idx == 0 and tensorboard:
-                    for index, node_name in enumerate(outputs):
-                        tensor = predictions[index]
-                        if node_name in int8_inspect_node_name:
-                            tensor = Dequantize(predictions[index], q_node_scale[node_name])
-                        self._log_histogram(writer, node_name + output_postfix, tensor.astype(np.float32), idx)
-                    writer.close()
-                if isinstance(predictions, list):
-                    if len(origin_output_tensor_names) == 1:
-                        predictions = predictions[0]
-                    elif len(origin_output_tensor_names) > 1:
-                        predictions = predictions[: len(origin_output_tensor_names)]
-                if postprocess is not None:
-                    predictions, labels = postprocess((predictions, labels))
-                if metrics:
-                    for metric in metrics:
-                        if not hasattr(metric, "compare_label") or (
-                            hasattr(metric, "compare_label") and metric.compare_label
-                        ):
-                            metric.update(predictions, labels)
-                if idx + 1 == iteration:
-                    break
-            return results
-
-        if isinstance(dataloader, BaseDataLoader) and not self.benchmark:
-            try:
-                results = eval_func(dataloader)
-            except Exception:  # pragma: no cover
-                logger.warning("Fail to forward with batch size={}, set to {} now.".format(dataloader.batch_size, 1))
-                dataloader.batch(1)
-                results = eval_func(dataloader)
-        else:  # pragma: no cover
-            results = eval_func(dataloader)
-
-        if self.fp32_preds_as_label:
-            from neural_compressor.tensorflow.quantization.utils.utility import collate_tf_preds
-
-            if fp32_baseline:
-                results = collate_tf_preds(self.fp32_results)
-                reference = results
-            else:
-                reference = collate_tf_preds(self.fp32_results)
-                results = collate_tf_preds(results)
-            for metric in metrics:
-                if hasattr(metric, "compare_label") and not metric.compare_label:
-                    metric.update(results, reference)
-
-        acc = 0 if metrics is None else [metric.result() for metric in metrics]
-        if tensorboard:
-            new_dir = temp_dir + "_acc_" + str(acc)
-            writer.close()
-            if os.path.isdir(new_dir):
-                import shutil
-
-                shutil.rmtree(new_dir, ignore_errors=True)
-            os.rename(temp_dir, new_dir)
-            self.dump_times += 1
-        model.output_tensor_names = origin_output_tensor_names
-        return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0]
-
     def _tuning_cfg_to_fw(self, tuning_cfg):
         """Parse the neural_compressor wrapped configuration to Tensorflow.
 
@@ -468,21 +189,11 @@ def quantize(
         """
         assert (
             self.approach != "post_training_dynamic_quant"
-        ), "Dynamic quantization is not supported on TensorFlow framework now!"
-
-        if self.approach == "quant_aware_training":  # pragma: no cover
-            assert (
-                q_func is not None
-            ), "quantization aware training mode \
-                is not configured correctly"
-
-            from neural_compressor.tensorflow.utils import Model
-
-            qat_model = q_func(model)
+        ), "Dynamic Quantization is not supported on TensorFlow framework now!"
 
-            return self.convert(Model(qat_model), "QAT", "default")
-
-        assert q_func is None, "post-training quantization mode is not support calibration function for Tensorflow!"
+        assert (
+            self.approach != "quant_aware_training"
+        ), "Quantize Aware Training is not supported on TensorFlow framework now!"
 
         self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration
         tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration)
@@ -612,7 +323,7 @@ def _dump_model_op_stats(self, model_graphdef):
                 continue
             possible_int8_res = [name for name in int8_op_prefix_list if i.op.find(name) != -1]
 
-            if any(possible_int8_res):
+            if any(possible_int8_res):  # pragma: no cover
                 origin_op_type = possible_int8_res[0].split("Quantized")[-1]
                 if origin_op_type == "FusedBatchNorm":
                     origin_op_type = "FusedBatchNormV3"
@@ -915,367 +626,6 @@ def check_match(patterns, input_pattern):
 
         return capability
 
-    def set_tensor(self, model, tensor_dict):
-        """Quantize the bias and weight tensors in tensor_dict."""
-        from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-
-        g = GraphAnalyzer()
-        g.graph = model.graph_def
-        graph_info = g.parse_graph()
-
-        def _get_fp32_op_name(model, tensor_name):
-            is_weight = False
-            is_biasadd = False
-            last_node_name = None
-            current_node_name = None
-            for each_node in model.graph_def.node:
-                if tensor_name in each_node.input:
-                    tensor_index = list(each_node.input).index(tensor_name)
-                    if each_node.op.find("Quantized") != -1 and tensor_index == 2:
-                        is_biasadd = True
-                        last_node_name = each_node.input[0]
-                        current_node_name = each_node.name
-
-                if tensor_name + "_qint8_const" in each_node.input:
-                    pass
-
-            return is_weight, is_biasadd, current_node_name, last_node_name
-
-        from tensorflow.core.framework import attr_value_pb2
-        from tensorflow.python.framework import dtypes, tensor_util
-
-        from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper
-
-        qint32_type = dtypes.qint32.as_datatype_enum
-
-        for tensor_name, tensor_content in tensor_dict.items():
-            is_weight, is_biasadd, current_node_name, last_node_name = _get_fp32_op_name(model, tensor_name)
-
-            if is_biasadd:
-                is_biasadd_dtype_is_fp32 = graph_info[current_node_name].node.attr["Tbias"] == attr_value_pb2.AttrValue(
-                    type=dtypes.float32.as_datatype_enum
-                )
-                current_node = graph_info[current_node_name].node
-                bias_add_node = graph_info[current_node.input[2]].node
-                if is_biasadd_dtype_is_fp32:
-                    bias_add_node.attr["value"].CopyFrom(
-                        attr_value_pb2.AttrValue(
-                            tensor=tensor_util.make_tensor_proto(tensor_content, dtypes.float32, tensor_content.shape)
-                        )
-                    )
-                else:
-                    last_node = graph_info[last_node_name].node
-                    min_input = graph_info[last_node.input[-2]].node.attr["value"].tensor.float_val[0]
-                    max_input = graph_info[last_node.input[-1]].node.attr["value"].tensor.float_val[0]
-                    channel_size = tensor_content.shape[0]
-                    max_filter_node = graph_info[current_node.input[6]].node
-                    min_filter_node = graph_info[current_node.input[5]].node
-                    if max_filter_node.attr["value"].tensor.float_val:
-                        max_filter_tensor = []
-                        min_filter_tensor = []
-                        max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0])
-                        min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0])
-                    else:
-                        max_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor)
-                        min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor)
-                    activation_range = 127.0 if current_node.attr["Tinput"].type == dtypes.qint8 else 255.0
-                    updated_bias = Helper.generate_int32_bias_for_conv(
-                        tensor_content,
-                        channel_size,
-                        max_input,
-                        min_input,
-                        max_filter_tensor,
-                        min_filter_tensor,
-                        activation_range,
-                    )
-
-                    bias_add_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type))
-                    bias_add_node.attr["value"].CopyFrom(
-                        attr_value_pb2.AttrValue(
-                            tensor=tensor_util.make_tensor_proto(updated_bias, dtypes.int32, tensor_content.shape)
-                        )
-                    )
-                    bias_add_node.attr["value"].tensor.dtype = qint32_type
-                    current_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type))
-
-            if is_weight:
-                tmp_const_node = Helper.create_constant_node(
-                    current_node.name + "_weights_tmp", tensor_content.transpose(2, 3, 1, 0), dtypes.float32
-                )
-                min_filter_node = graph_info[current_node.input[5]].node
-                per_channel = True if min_filter_node.attr["value"].tensor.tensor_shape else False
-                from neural_compressor.tensorflow.quantization.utils.quantize_graph_common import QuantizeGraphHelper
-
-                original_fp32_op = current_node.op.split("With")[0].split("Quantized")[-1]
-                if original_fp32_op.find("Depthwise") != -1:
-                    original_fp32_op = "DepthwiseConv2dNative"
-                qint8_const_node, min_node, max_node = QuantizeGraphHelper.generate_quantized_weight_node(
-                    original_fp32_op, tmp_const_node, per_channel
-                )
-                g.add_node(qint8_const_node, [], [current_node.name])
-                g.add_node(min_node, [], [current_node.name])
-                g.add_node(max_node, [], [current_node.name])
-                g.replace_constant_graph_with_constant_node(qint8_const_node, tensor_name)
-                g.replace_constant_graph_with_constant_node(min_node, current_node.input[5])
-                g.replace_constant_graph_with_constant_node(max_node, current_node.input[6])
-
-    def inspect_weight_and_bias(self, node_list, graph_def, graph_info, graph_node_name_mapping):
-        """Inspect the weights and biases."""
-        from neural_compressor.tensorflow.quantization.utils.utility import (
-            get_tensor_val_from_graph_node,
-            int8_node_name_reverse,
-        )
-        from neural_compressor.tensorflow.utils import dequantize_weight
-
-        weights_result = {}
-        inspect_nodes = []
-        node_set = set(node_list)
-        for node in graph_def.node:
-            node_name = node.name
-            if "Quantized" in node.op:
-                node_name = int8_node_name_reverse(node)
-            if node_name in node_set and ("Conv" in node.op or "Mul" in node.op):
-                inspect_nodes.append(node)
-        logger.debug(f"Start to inspect weight and bias for: {[node.name for node in inspect_nodes]}.")
-        for node in inspect_nodes:
-            # inspect weights and bias
-            node_name = node.name
-            weight_node_name = node.input[1]
-            weight_node = graph_node_name_mapping[weight_node_name]
-            if weight_node.op != "Const":  # skip the matmul whose two inputs are previous output
-                continue
-            weight_node_val = get_tensor_val_from_graph_node(graph_node_name_mapping, weight_node_name)
-            weight_node_val = weight_node_val.astype("float32")
-            # dequantize the weight for quantized model
-            if "Quantized" in node.op:
-                node_name = int8_node_name_reverse(node)
-                weight_node_name_pre = weight_node_name.split("_qint8_const")[0]
-                min_filter_node = weight_node_name_pre + "_min"
-                max_filter_node = weight_node_name_pre + "_max"
-                if graph_info[min_filter_node].node.attr["value"].tensor.float_val:
-                    min_filter_val = graph_info[min_filter_node].node.attr["value"].tensor.float_val
-                    max_filter_val = graph_info[max_filter_node].node.attr["value"].tensor.float_val
-                else:
-                    min_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, min_filter_node)
-                    max_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, max_filter_node)
-                weight_node_val = dequantize_weight(weight_node_val, min_filter_val, max_filter_val)
-            weights_result[node_name] = {weight_node_name: weight_node_val}
-        return weights_result
-
-    def fused_node_mapping(self, node_list, pattern_mapping, graph_info, graph_node_name_mapping):
-        """Create the mapping between first node and last node in fused sequence.
-
-        Args:
-            node_list: node name list
-            pattern_mapping:  key: node name, val: node pattern mapping
-            graph_info: key: node name, val: node details
-            graph_node_name_mapping: key: node name, val: node
-        Returns:
-            fused_mapping: key: first node name in fused seq, val: last node in fused seq
-            fused_mapping_reverse: key: last node in fused seq, val: first node name in fused seq
-        """
-        fused_mapping = {}
-        fused_mapping_reverse = {}
-        for node_name in node_list:
-            fused_seq = pattern_mapping[node_name]["sequence"].split(",")
-            # for the node not fused with others
-            if len(fused_seq) == 1:
-                fused_mapping[node_name] = node_name
-                fused_mapping_reverse[node_name] = node_name
-                continue
-            _next_node_name = node_name
-            for _next_node_op_type in fused_seq[1:]:
-                node_details = graph_info[_next_node_name]
-                for node_output_name in node_details.outputs:
-                    if graph_node_name_mapping[node_output_name].op == "Cast":
-                        cast_node = graph_node_name_mapping[node_output_name]
-                        node_output_name = graph_info[cast_node.name].outputs[0]
-                    if graph_node_name_mapping[node_output_name].op in [_next_node_op_type, "Cast"]:
-                        _next_node_name = node_output_name
-            fused_mapping[node_name] = _next_node_name
-            fused_mapping_reverse[_next_node_name] = node_name
-        return fused_mapping, fused_mapping_reverse
-
-    def _inspect_tensor_inference(self, inspect_node_dict, model, dataloader, iteration_list):
-        """Do inference for inspect activation."""
-        out_tensor_lst = []
-        out_tensor_lst += [{n: [n + ":" + str(i) for i in range(3)]} for n in inspect_node_dict["qreq_node"]]
-        out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["qdq_node"]]
-        out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["f_node"]]
-        out_cnt = len(out_tensor_lst)
-        iteration_list = set(iteration_list)
-        input_tensor = model.input_tensor
-        logger.info("Start to do inference for inspect activation.")
-        activation_result = []
-        for idx, (inputs, labels) in enumerate(dataloader):
-            model_out = []
-            if idx + 1 > max(iteration_list):
-                break
-            if idx + 1 not in iteration_list:
-                continue
-            if len(input_tensor) == 1:
-                feed_dict = {input_tensor[0]: inputs}  # get raw tensor using index [0]
-            else:
-                assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor"
-                feed_dict = dict(zip(input_tensor, inputs))
-            # TODO find an optimized method to avoid multiple runs
-            for i, out_t in enumerate(out_tensor_lst):
-                logger.debug(f"Finished inspect {i}/{out_cnt} nodes, current inspect node {out_t.keys()}.")
-                model_out.append(model.sess.run(out_t, feed_dict))
-            activation_result.append(model_out)
-        return activation_result
-
-    def inspect_activation(
-        self, node_list, graph_def, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info
-    ):
-        """Inspect the activation."""
-        from neural_compressor.tensorflow.utils import Model
-
-        original_graph_node_mapping = {}
-        for node in graph_def.node:
-            original_graph_node_mapping[node.name] = node
-        inspect_node_dict = {"qdq_node": [], "qreq_node": [], "f_node": []}
-        for node_name in node_list:
-            node = graph_node_name_mapping[node_name]
-            if "Quantized" in node.op and "Dequantize" in node.op:
-                inspect_node_dict["qdq_node"].append(node.name)
-            elif "Quantized" in node.op or "_Quantized" in node.op or "Requantize" in node.op:
-                inspect_node_dict["qreq_node"].append(node.name)
-            else:
-                inspect_node_dict["f_node"].append(node_name)
-        pattern_mapping = {}
-        node_dict = quantization_cfg["op"]
-        for node_name_and_type in node_dict.keys():
-            node_name, _ = node_name_and_type
-            if "pattern" in node_dict[node_name_and_type]:
-                pattern_mapping[node_name] = node_dict[node_name_and_type]["pattern"]
-            else:
-                pattern_mapping[node_name] = {"sequence": node_name}
-        if inspect_node_dict["f_node"]:
-            fuse_map, fuse_map_reverse = self.fused_node_mapping(
-                inspect_node_dict["f_node"], pattern_mapping, graph_info, graph_node_name_mapping
-            )
-            inspect_node_dict["f_node"] = [fuse_map[n] for n in inspect_node_dict["f_node"]]
-        # build model and do inference
-        model = Model(graph_def)
-        activation_result = self._inspect_tensor_inference(inspect_node_dict, model, dataloader, iteration_list)
-        final_result = []
-        int8_postfix = "_eightbit"
-        for iter_res in activation_result:
-            tmp_iter_result = {}
-            for res in iter_res:
-                node_name, val = list(res.keys())[0], list(res.values())[0]
-                val = Dequantize(val[0], (node_name, val[1], val[2])) if len(val) == 3 else val
-                val = val.astype(np.float32)
-                index_postfix = node_name.find(int8_postfix)
-                if index_postfix != -1:
-                    node_name = node_name[:index_postfix]
-                    tmp_iter_result[node_name] = {node_name: val}
-                else:
-                    tmp_iter_result[fuse_map_reverse[node_name]] = {fuse_map_reverse[node_name]: val}
-            final_result.append(tmp_iter_result)
-        return final_result
-
-    def inspect_tensor(
-        self,
-        model,
-        dataloader=None,
-        op_list=[],
-        iteration_list=[],
-        inspect_type="activation",
-        save_to_disk=False,
-        save_path=None,
-        quantization_cfg=None,
-    ):
-        """Dump the weight and activation(output) to local disk.
-
-        1. create the correspondence between query node name and the actually output node name in graph_def
-        2. get the weight and bias for the given node
-        3. get the activation for the given node
-        4. save the tensor to disk
-        Args:
-            model: int8/fp32 graph_def/TensorflowBaseModel
-            dataloader: dataloader used during inspect activation
-            op_list: op list to inspect
-            iteration_list: iteration list to inspect, start from 1
-            inspect_type: activation/weight/all
-            save_to_disk: dump to disk or not
-            save_path: the dump path for inspect tensor
-            quantization_cfg: quantization configuration for fused fp32 model and quantized model
-        Returns:
-            Dict
-               {
-                 'weight': {
-                   'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...},
-                   'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...},
-                   ...
-                 },
-                 'activation': [
-                   # iter 1:
-                       {
-                         'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...}
-                         'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...}
-                         ...
-                       },
-                   # iter 2:
-                        {
-                       ...
-                       }
-                 ]
-               }
-        """
-        from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-        from neural_compressor.tensorflow.quantization.utils.utility import int8_node_name_reverse
-        from neural_compressor.tensorflow.utils import TensorflowBaseModel, dump_data_to_local, load_data_from_pkl
-
-        if isinstance(model, TensorflowBaseModel):
-            model = model.graph_def
-        if not quantization_cfg:
-            # TODO get config from graph if config is None
-            quantization_cfg = load_data_from_pkl("./nc_workspace/", "cfg.pkl")
-        node_list = op_list
-        # create the mapping between node name and node, key: node_name, val: node
-        graph_node_name_mapping = {}
-        quan_model_flag = False
-        for node in model.node:
-            node_name = int8_node_name_reverse(node)
-            if "Quantized" in node.op:
-                quan_model_flag = True
-                node_name = int8_node_name_reverse(node)
-            if node.attr["value"].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum:
-                quan_model_flag = True
-            graph_node_name_mapping[node_name] = node
-        if quan_model_flag:
-            logger.info("Dump the tensor for quantized model.")
-
-        # create the mapping between node name and node detail
-        g = GraphAnalyzer()
-        g.graph = model
-        graph_info = g.parse_graph()
-        inspect_result = {}
-
-        # inspect weight
-        if inspect_type == "weight" or inspect_type == "all":
-            logger.info("Start to inspect weight and bias.")
-            weights_result = self.inspect_weight_and_bias(node_list, model, graph_info, graph_node_name_mapping)
-            inspect_result["weight"] = weights_result
-
-        # inspect activation
-        if inspect_type == "activation" or inspect_type == "all":
-            logger.info("Start to inspect activation.")
-            activation_result = self.inspect_activation(
-                node_list, model, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info
-            )
-            inspect_result["activation"] = activation_result
-
-        # save to disk
-        if save_to_disk:
-            if not save_path:
-                save_path = "./nc_workspace/tmp/"
-            dump_data_to_local(inspect_result, save_path, "inspect_result.pkl")
-            logger.info(f"Dumped the inspect tensor to {save_path}")
-        return inspect_result
-
     def quantize_input(self, model):
         """Quantize the model to be able to take quantized input.
 
@@ -1381,320 +731,8 @@ def get_optype_wise_ability(self):
                 res[op[1]] = {"activation": {"dtype": ["bf16"]}, "weight": {"dtype": ["bf16"]}}
         return res
 
-    def _pre_hook_for_qat(self, dataloader=None):
-        """Pre hook for QAT."""
-        self.model.model = self.qat_convert(self.model.model)
-
-    def _post_hook_for_qat(self):
-        """Post hook for QAT."""
-        pass
-
-    def _pre_eval_hook(self, model):
-        """Pre evaluation hook."""
-        return model
-
-    # Add keyword arguments unpacking
-    def _post_eval_hook(self, model, **kwargs):
-        """Post evaluation hook."""
-        pass
-
-    def save(self, model, path):
-        """Save model to the path."""
-        pass
-
-    # this function is used to convert keras QAT model to pb in old QAT implementation,
-    # and it's not used in refactored QAT
-    def convert(self, model, source, destination):  # pragma: no cover
-        """The function is used to convert a source model format to another.
-
-        Args:
-            model (neural_compressor.model): base model to be converted.
-            source (string): The source model format.
-            destination (string): The destination model format.
-        """
-        assert source.lower() == "qat" and destination.lower() == "default"
-        capability = self._query_fw_capability(model)
-
-        quantize_config = {"op_wise_config": {}}
-        for each_op_info in capability["opwise"]:
-            is_perchannel = False
-            weight_bit = 7.0
-            for op_cap in capability["opwise"][each_op_info]:
-                if "activation" in op_cap and "quant_mode" in op_cap["activation"]:
-                    activation = op_cap["activation"]
-                    if "weight" in op_cap:
-                        weight = op_cap["weight"]
-                        is_perchannel = True if weight["granularity"][0] == "per_channel" else False
-                    algorithm = activation["algorithm"][0]
-                    is_asymmetric = False
-                    if "activation" in op_cap:
-                        is_asymmetric = True if activation["scheme"][0] == "asym" else False
-
-                    quantize_config["op_wise_config"][each_op_info[0]] = (
-                        is_perchannel,
-                        algorithm,
-                        is_asymmetric,
-                        weight_bit,
-                    )
-        from neural_compressor.tensorflow.quantization.utils.graph_converter import GraphConverter
-
-        tmp_graphdef = copy.deepcopy(model.graph_def)
-        for i in tmp_graphdef.node:
-            if i.op == "Const" and i.input:
-                i.ClearField("input")
-        model.graph_def = tmp_graphdef
-        converter = GraphConverter(
-            model,
-            qt_config=quantize_config,
-            int8_sequences=self.op_wise_sequences,
-            fake_quant=True,
-            new_api=self.new_api,
-            performance_only=self.performance_only,
-            use_bf16=self.use_bf16,
-        )
-
-        return converter.convert()
-
-    def qat_convert(self, model, quantize_recipe=None):
-        """Convert a fp32 'tf.keras' model to be a int8 one with quantization aware training implementation.
-
-        Args:
-            model (tf.keras.Model): The model to be quantized, expected to be a Keras Functional or Sequential model.
-            quantize_recipe (dict): A dict that decide whether given layers should be quantized.
-
-        Returns:
-            converted_model (tf.keras.Model): Quantized model with fake quant nodes inserted.
-        """
-        assert isinstance(model, tf.keras.Model), (
-            "The model to be converted is expected to be "
-            "a `tf.keras.Model` instance. You should not pass an instance of type: {input}.".format(
-                input=model.__class__.__name__
-            )
-        )
-
-        assert model.__class__.__name__ in [
-            "Functional",
-            "Sequential",
-        ], "Only `Functional` or `Sequential` keras model is supported for QAT."
-
-        from neural_compressor.tensorflow.quantization.utils.quantize_graph.qat.quantize_config import global_config
-        from neural_compressor.tensorflow.quantization.utils.quantize_graph.qat.quantize_helper import (
-            init_quantize_config,
-            qat_clone_function,
-        )
-
-        config = init_quantize_config(model, quantize_recipe)
-        q_model = tf.keras.models.clone_model(model, input_tensors=None, clone_function=qat_clone_function)
-        global_config.clear()
-
-        return q_model
-
-    @dump_elapsed_time("Pass recover model")
-    def recover_tuned_model(self, model, q_config):
-        """Execute the recover process on the specified model.
-
-        Args:
-            tune_cfg (dict): quantization configuration
-            model (tf.compat.v1.GraphDef): fp32 model
-            q_config (dict): recover configuration
-
-        Returns:
-            tf.compat.v1.GraphDef: the quantized model
-        """
-        from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.pre_optimize import PreOptimization
-
-        self.pre_optimizer_handle = PreOptimization(model, self.new_api, self.device)
-        self.pre_optimized_model = self.pre_optimizer_handle.get_optimized_model(self.itex_mode)
-        model.graph_def = self.pre_optimized_model.graph_def
-
-        from neural_compressor.tensorflow.quantization.utils.graph_converter_without_calib import (
-            GraphConverterWithoutCalib,
-        )
-
-        converter = GraphConverterWithoutCalib(
-            model,
-            recover_config=q_config,
-            new_api=self.new_api,
-            performance_only=self.performance_only,
-            use_bf16=self.use_bf16,
-        )
-
-        return converter.convert_without_calib()
-
-    def get_output_op_names(self, qmodel):
-        """Get the oupur OPs's names."""
-        from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-
-        graph_def = GraphAnalyzer().parse_graph(qmodel.graph_def)
-        output_op_names = set()
-
-        def _add_output_op_name(opname):
-            if opname.endswith("_dequantize"):
-                output_op_names.add(opname[: -len("_dequantize")])  # pylint: disable=no-member
-            elif opname.endswith("__dequant"):
-                pass
-            else:
-                output_op_names.add(opname)  # pylint: disable=no-member
-
-        for output_opname in qmodel.output_node_names:
-            op_count = 0
-            stack = [output_opname]
-            while stack:
-                opname = stack.pop()
-                while True:
-                    op_count += 1
-                    if opname not in graph_def:
-                        break
-                    op = graph_def[opname]
-                    if op.node.op == "Dequantize":
-                        _add_output_op_name(opname)
-                        break
-                    next_opnames = op.node.input
-                    if not next_opnames:
-                        break
-                    elif len(next_opnames) > 1:
-                        stack += next_opnames[1:]
-
-                    opname = next_opnames[0]
-
-        output_op_names = list(output_op_names)
-        logger.debug(f"output op names: {output_op_names}")
-        return output_op_names
-
-    def calculate_op_sensitivity(
-        self, model, dataloader, tune_cfg, output_op_names, confidence_batches, fallback=True, requantize_cfgs=None
-    ):
-        """Compute the op sensitivity.
-
-        The sensitivity metric is the mse between the output of the last quantized op of
-        the quantized model and the output of its corresponding op in the fp32 model.
-
-          1. Backup the tune cfg
-          2. Fallback each int8 op and compute its mse if use fallback (with 'fallback == True'),
-            or re-quantize each fp32 op(fallen back in the previous stage) and compute its MSE if not.
-          3. Sorted op name list according to its MSE
-
-        Args:
-          fp32_model: The fp32 model.
-          dataloader: the dataloader with full dataset.
-          tune_cfg: tuning config
-          fallback: denote fallback stage or re-quantize stage
-          requantize_cfgs: the dict of tuning configs for all re-quantizable ops
-
-        Returns:
-          A list of op names, sorted by its MSE sensitivity.
-        """
-        fp32_op_cfg = {"activation": {"dtype": "fp32", "quant_mode": "fp32"}, "weight": {"dtype": "fp32"}}
-
-        if fallback:
-            ops_list = [
-                op
-                for op, config in tune_cfg["op"].items()
-                if config["activation"]["quant_mode"] in ("static", "dynamic")
-            ]
-            replace_cfgs = {op: fp32_op_cfg for op in tune_cfg["op"]}
-        else:
-            ops_list = [
-                op
-                for op, config in tune_cfg["op"].items()
-                if config["activation"]["quant_mode"] == "fp32" and op in requantize_cfgs
-            ]
-            replace_cfgs = requantize_cfgs
-
-        # Step2. compute mse
-        mse_result = self._get_mse_order(
-            model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, output_op_names, confidence_batches
-        )
-
-        # Step3. sort
-        mse_order = [op for op, _ in sorted(mse_result.items(), key=lambda i: i[1])]
-        logger.debug("Dump MSE order:")
-        for op in mse_order:
-            logger.debug(f"{op}: {mse_result[op]}")
-        return mse_order
-
-    def _get_mse_order(
-        self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, output_op_names, confidence_batches
-    ):
-        """Compute MSE."""
-        op_cfg = tune_cfg["op"]
-        mse_result = {}
-        partial_dataloader = self._partial_dataloader(dataloader, confidence_batches)
-
-        fp32_output = self._inference_model_on_batches(fp32_model, tune_cfg, partial_dataloader, output_op_names)
-
-        for op in ops_lst:
-            # backup and set replace tuning config
-            backup_cfg = op_cfg[op]
-            op_cfg[op] = replace_cfgs[op]
 
-            # quantize and inference the model
-            q_model = self.quantize(tune_cfg, fp32_model, partial_dataloader)
-            q_output = self._inference_model_on_batches(q_model, tune_cfg, partial_dataloader, output_op_names)
-
-            mse_result[op] = self._calculate_mse(fp32_output, q_output)
-
-            # recover tune_cfg
-            op_cfg[op] = backup_cfg
-
-        return mse_result
-
-    def _partial_dataset_of(self, dataloader, confidence_batches):
-        """Partial dataset."""
-        from neural_compressor.tensorflow.utils import DummyDataset, DummyDatasetV2
-
-        if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDatasetV2):
-            assert isinstance(confidence_batches, int)
-            ds = copy.deepcopy(dataloader.dataset)
-            ds.dataset = ds.dataset[:confidence_batches]
-            return ds
-        else:
-            return dataloader.dataset.take(confidence_batches)
-
-    def _partial_dataloader(self, dataloader, confidence_batches):
-        """Partial dataloader."""
-        return type(dataloader)(
-            dataset=self._partial_dataset_of(dataloader, confidence_batches),
-            batch_size=dataloader.batch_size,
-            last_batch=dataloader.last_batch,
-            collate_fn=dataloader.collate_fn,
-            sampler=dataloader.sampler,
-            batch_sampler=dataloader.batch_sampler,
-            num_workers=dataloader.num_workers,
-            pin_memory=dataloader.pin_memory,
-            shuffle=dataloader.shuffle,
-            distributed=dataloader.distributed,
-        )
-
-    def _calculate_mse(self, fp32_output, q_output):
-        """MSE calculation."""
-        result = []
-        for i, j in zip(fp32_output, q_output):
-            result.append(np.square(i - j).mean())
-        return np.array(result).mean()
-
-    def _inference_model_on_batches(self, model, tune_cfg, dataloader, output_op_names):
-        """Inference model on batches."""
-        from neural_compressor.tensorflow.quantization.utils.utility import generate_feed_dict
-
-        input_tensors = model.input_tensor
-        output_tensors = []
-        for op in output_op_names:
-            for tensor in model.graph.get_operation_by_name(op).outputs:
-                output_tensors.append(tensor)
-
-        predictions = []
-        for index, (inputs, _) in enumerate(dataloader):
-            feed_dict = generate_feed_dict(input_tensors, inputs)
-
-            pred = model.sess.run(output_tensors, feed_dict)
-            for item in pred:
-                predictions.append(item)
-
-        return predictions
-
-
-class Tensorflow_ITEXAdaptor(TensorFlowAdaptor):
+class Tensorflow_ITEXAdaptor(TensorFlowAdaptor):  # pragma: no cover
     """Tensorflow ITEX Adaptor Class."""
 
     def __init__(self, framework_specific_info):
@@ -2253,10 +1291,10 @@ def get_fuse_patterns(self):
             if self.itex_mode:
                 patterns["int8"].append("FusedBatchNormV3 + Relu")
                 patterns["int8"].append("FusedBatchNormV3 + LeakyRelu")
-        elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"):
+        elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"):  # pragma: no cover
             patterns["int8"] = tf1_15_up3_int8_pattern_list
             patterns["uint8"] = tf1_15_up3_uint8_pattern_list
-        else:
+        else:  # pragma: no cover
             patterns["int8"] = old_tf_int8_pattern_list
             patterns["uint8"] = old_tf_uint8_pattern_list
 
diff --git a/neural_compressor/tensorflow/keras/layers/conv2d.py b/neural_compressor/tensorflow/keras/layers/conv2d.py
index 426b1777b42..002ac1c507b 100644
--- a/neural_compressor/tensorflow/keras/layers/conv2d.py
+++ b/neural_compressor/tensorflow/keras/layers/conv2d.py
@@ -30,7 +30,7 @@
 else:
     from keras.layers.convolutional.base_conv import Conv  # pylint: disable=E0401
 
-if version1_gte_version2(tf.__version__, "2.16.1"):
+if version1_gte_version2(tf.__version__, "2.16.1"):  # pragma: no cover
 
     class QConv2D(BaseConv):
         def __init__(
@@ -354,40 +354,28 @@ def get_config(self):
 def initialize_int8_conv2d(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "filters" in kwargs:
-        del kwargs["filters"]
-    if "kernel_size" in kwargs:
-        del kwargs["kernel_size"]
-    if "strides" in kwargs:
-        del kwargs["strides"]
-    if "padding" in kwargs:
-        del kwargs["padding"]
-    if "data_format" in kwargs:
-        del kwargs["data_format"]
-    if "dilation_rate" in kwargs:
-        del kwargs["dilation_rate"]
-    if "groups" in kwargs:
-        del kwargs["groups"]
-    if "activation" in kwargs:
-        del kwargs["activation"]
-    if "use_bias" in kwargs:
-        del kwargs["use_bias"]
-    if "kernel_initializer" in kwargs:
-        del kwargs["kernel_initializer"]
-    if "bias_initializer" in kwargs:
-        del kwargs["bias_initializer"]
-    if "kernel_regularizer" in kwargs:
-        del kwargs["kernel_regularizer"]
-    if "activity_regularizer" in kwargs:
-        del kwargs["activity_regularizer"]
-    if "bias_regularizer" in kwargs:
-        del kwargs["bias_regularizer"]
-    if "kernel_constraint" in kwargs:
-        del kwargs["kernel_constraint"]
-    if "bias_constraint" in kwargs:
-        del kwargs["bias_constraint"]
+    param_list = [
+        "name",
+        "filters",
+        "kernel_size",
+        "strides",
+        "padding",
+        "data_format",
+        "dilation_rate",
+        "groups",
+        "activation",
+        "use_bias",
+        "kernel_initializer",
+        "bias_initializer",
+        "kernel_regularizer",
+        "activity_regularizer",
+        "bias_regularizer",
+        "kernel_constraint",
+        "bias_constraint",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     return QConv2D(
         name=fp32_layer.name,
diff --git a/neural_compressor/tensorflow/keras/layers/dense.py b/neural_compressor/tensorflow/keras/layers/dense.py
index 4e97cbfb7a7..84c4dfabd6c 100644
--- a/neural_compressor/tensorflow/keras/layers/dense.py
+++ b/neural_compressor/tensorflow/keras/layers/dense.py
@@ -170,28 +170,22 @@ def get_config(self):
 def initialize_int8_dense(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "units" in kwargs:
-        del kwargs["units"]
-    if "activation" in kwargs:
-        del kwargs["activation"]
-    if "use_bias" in kwargs:
-        del kwargs["use_bias"]
-    if "kernel_initializer" in kwargs:
-        del kwargs["kernel_initializer"]
-    if "bias_initializer" in kwargs:
-        del kwargs["bias_initializer"]
-    if "kernel_regularizer" in kwargs:
-        del kwargs["kernel_regularizer"]
-    if "activity_regularizer" in kwargs:
-        del kwargs["activity_regularizer"]
-    if "bias_regularizer" in kwargs:
-        del kwargs["bias_regularizer"]
-    if "kernel_constraint" in kwargs:
-        del kwargs["kernel_constraint"]
-    if "bias_constraint" in kwargs:
-        del kwargs["bias_constraint"]
+    param_list = [
+        "name",
+        "units",
+        "activation",
+        "use_bias",
+        "kernel_initializer",
+        "bias_initializer",
+        "kernel_regularizer",
+        "activity_regularizer",
+        "bias_regularizer",
+        "kernel_constraint",
+        "bias_constraint",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     q_layer = QDense(
         name=fp32_layer.name,
diff --git a/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py b/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py
index 683c774b2fe..a0d8511d058 100644
--- a/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py
+++ b/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py
@@ -34,12 +34,10 @@
 
 if version1_gte_version2(tf.__version__, "2.16.1"):
 
-    class QDepthwiseConv2D(BaseDepthwiseConv):
+    class QDepthwiseConv2D(BaseDepthwiseConv):  # pragma: no cover
         def __init__(
             self,
             kernel_size,
-            min_value,
-            max_value,
             strides=(1, 1),
             padding="valid",
             depth_multiplier=1,
@@ -195,8 +193,6 @@ class QDepthwiseConv2D(DepthwiseConv):
         def __init__(
             self,
             kernel_size,
-            min_value,
-            max_value,
             strides=(1, 1),
             padding="valid",
             depth_multiplier=1,
@@ -376,42 +372,27 @@ def initialize_int8_depthwise_conv2d(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
     q_name = fp32_layer.name
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "kernel_size" in kwargs:
-        del kwargs["kernel_size"]
-    if "strides" in kwargs:
-        del kwargs["strides"]
-    if "padding" in kwargs:
-        del kwargs["padding"]
-    if "depth_multiplier" in kwargs:
-        del kwargs["depth_multiplier"]
-    if "data_format" in kwargs:
-        del kwargs["data_format"]
-    if "dilation_rate" in kwargs:
-        del kwargs["dilation_rate"]
-    if "activation" in kwargs:
-        del kwargs["activation"]
-    if "use_bias" in kwargs:
-        del kwargs["use_bias"]
-    if "depthwise_initializer" in kwargs:
-        del kwargs["depthwise_initializer"]
-    if "bias_initializer" in kwargs:
-        del kwargs["bias_initializer"]
-    if "depthwise_regularizer" in kwargs:
-        del kwargs["depthwise_regularizer"]
-    if "activity_regularizer" in kwargs:
-        del kwargs["activity_regularizer"]
-    if "bias_regularizer" in kwargs:
-        del kwargs["bias_regularizer"]
-    if "depthwise_constraint" in kwargs:
-        del kwargs["depthwise_constraint"]
-    if "bias_constraint" in kwargs:
-        del kwargs["bias_constraint"]
-    if "min_value" in kwargs:
-        del kwargs["min_value"]
-    if "max_value" in kwargs:
-        del kwargs["max_value"]
+    param_list = [
+        "name",
+        "kernel_size",
+        "strides",
+        "padding",
+        "depth_multiplier",
+        "data_format",
+        "dilation_rate",
+        "activation",
+        "use_bias",
+        "depthwise_initializer",
+        "bias_initializer",
+        "depthwise_regularizer",
+        "activity_regularizer",
+        "bias_regularizer",
+        "depthwise_constraint",
+        "bias_constraint",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     return QDepthwiseConv2D(
         name=q_name,
diff --git a/neural_compressor/tensorflow/keras/layers/pool2d.py b/neural_compressor/tensorflow/keras/layers/pool2d.py
index ce81fc2377b..1a04627f06b 100644
--- a/neural_compressor/tensorflow/keras/layers/pool2d.py
+++ b/neural_compressor/tensorflow/keras/layers/pool2d.py
@@ -215,16 +215,16 @@ def get_config(self):
 def initialize_int8_avgpool(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "pool_size" in kwargs:
-        del kwargs["pool_size"]
-    if "strides" in kwargs:
-        del kwargs["strides"]
-    if "padding" in kwargs:
-        del kwargs["padding"]
-    if "data_format" in kwargs:
-        del kwargs["data_format"]
+    param_list = [
+        "name",
+        "pool_size",
+        "strides",
+        "padding",
+        "data_format",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     q_layer = QAvgPool2D(
         name=fp32_layer.name,
@@ -243,16 +243,16 @@ def initialize_int8_avgpool(fp32_layer, q_config):
 def initialize_int8_maxpool(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "pool_size" in kwargs:
-        del kwargs["pool_size"]
-    if "strides" in kwargs:
-        del kwargs["strides"]
-    if "padding" in kwargs:
-        del kwargs["padding"]
-    if "data_format" in kwargs:
-        del kwargs["data_format"]
+    param_list = [
+        "name",
+        "pool_size",
+        "strides",
+        "padding",
+        "data_format",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     q_layer = QMaxPool2D(
         name=fp32_layer.name,
diff --git a/neural_compressor/tensorflow/keras/layers/separable_conv2d.py b/neural_compressor/tensorflow/keras/layers/separable_conv2d.py
index 05ee3a62c72..b3df094fec0 100644
--- a/neural_compressor/tensorflow/keras/layers/separable_conv2d.py
+++ b/neural_compressor/tensorflow/keras/layers/separable_conv2d.py
@@ -32,15 +32,13 @@
     from keras.layers.convolutional.base_separable_conv import SeparableConv  # pylint: disable=E0401
     from keras.utils import conv_utils  # pylint: disable=E0401
 
-if version1_gte_version2(tf.__version__, "2.16.1"):
+if version1_gte_version2(tf.__version__, "2.16.1"):  # pragma: no cover
 
     class QSeparableConv2D(BaseSeparableConv):
         def __init__(
             self,
             filters,
             kernel_size,
-            min_value,
-            max_value,
             strides=(1, 1),
             padding="valid",
             data_format=None,
@@ -205,8 +203,6 @@ def __init__(
             self,
             filters,
             kernel_size,
-            min_value,
-            max_value,
             strides=(1, 1),
             padding="valid",
             data_format=None,
@@ -368,50 +364,31 @@ def get_config(self):
 def initialize_int8_separable_conv2d(fp32_layer, q_config):
     kwargs = fp32_layer.get_config()
 
-    if "name" in kwargs:
-        del kwargs["name"]
-    if "filters" in kwargs:
-        del kwargs["filters"]
-    if "kernel_size" in kwargs:
-        del kwargs["kernel_size"]
-    if "strides" in kwargs:
-        del kwargs["strides"]
-    if "padding" in kwargs:
-        del kwargs["padding"]
-    if "data_format" in kwargs:
-        del kwargs["data_format"]
-    if "dilation_rate" in kwargs:
-        del kwargs["dilation_rate"]
-    if "depth_multiplier" in kwargs:
-        del kwargs["depth_multiplier"]
-    if "activation" in kwargs:
-        del kwargs["activation"]
-    if "use_bias" in kwargs:
-        del kwargs["use_bias"]
-    if "depthwise_initializer" in kwargs:
-        del kwargs["depthwise_initializer"]
-    if "pointwise_initializer" in kwargs:
-        del kwargs["pointwise_initializer"]
-    if "bias_initializer" in kwargs:
-        del kwargs["bias_initializer"]
-    if "depthwise_regularizer" in kwargs:
-        del kwargs["depthwise_regularizer"]
-    if "pointwise_regularizer" in kwargs:
-        del kwargs["pointwise_regularizer"]
-    if "activity_regularizer" in kwargs:
-        del kwargs["activity_regularizer"]
-    if "bias_regularizer" in kwargs:
-        del kwargs["bias_regularizer"]
-    if "depthwise_constraint" in kwargs:
-        del kwargs["depthwise_constraint"]
-    if "pointwise_constraint" in kwargs:
-        del kwargs["pointwise_constraint"]
-    if "bias_constraint" in kwargs:
-        del kwargs["bias_constraint"]
-    if "min_value" in kwargs:
-        del kwargs["min_value"]
-    if "max_value" in kwargs:
-        del kwargs["max_value"]
+    param_list = [
+        "name",
+        "filters",
+        "kernel_size",
+        "strides",
+        "padding",
+        "data_format",
+        "dilation_rate",
+        "depth_multiplier",
+        "activation",
+        "use_bias",
+        "depthwise_initializer",
+        "bias_initializer",
+        "pointwise_initializer",
+        "depthwise_regularizer",
+        "activity_regularizer",
+        "bias_regularizer",
+        "pointwise_regularizer",
+        "depthwise_constraint",
+        "bias_constraint",
+        "pointwise_constraint",
+    ]
+    for p in param_list:  # pragma: no cover
+        if p in kwargs:
+            del kwargs[p]
 
     return QSeparableConv2D(
         name=fp32_layer.name,
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
index 30295005686..a0a924ecbe7 100644
--- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py
+++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
@@ -117,7 +117,7 @@
     version1_lte_version2,
 )
 
-TF_SUPPORTED_MAX_VERSION = "2.15.0"
+TF_SUPPORTED_MAX_VERSION = "2.16.1"
 TF_SUPPORTED_MIN_VERSION = "1.14.0"
 
 logger = logging.getLogger("neural_compressor")
@@ -231,10 +231,6 @@ def _inference(self, model):
         Args:
             model(TensorflowBaseModel): input TensorflowBaseModel
         """
-        if self.calib_func:
-            self.calib_func(model.model)
-            return
-
         if model.model_type == "llm_saved_model":
             self._inference_llm(model)
             return
@@ -264,7 +260,9 @@ def _inference(self, model):
         for idx, (inputs, labels) in enumerate(self.data_loader):
             if len(input_tensor) == 1:
                 feed_dict = {}
-                if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict):
+                if (
+                    isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict)
+                ):  # pragma: no cover
                     for name in inputs:
                         for tensor in input_tensor:
                             pos = tensor.name.rfind(":")
@@ -274,7 +272,7 @@ def _inference(self, model):
                                 break
                 else:
                     feed_dict = {input_tensor[0]: inputs}  # get raw tensor using index [0]
-            else:
+            else:  # pragma: no cover
                 assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor"
                 feed_dict = {}
                 if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict):
@@ -345,7 +343,7 @@ def _inference_llm(self, model):
             if idx >= self.calib_iteration:
                 break
 
-    def _check_tf_version(self):
+    def _check_tf_version(self):  # pragma: no cover
         """Check if the installed tensorflow version is supported."""
         is_supported_version = False
         is_sprbase_version = False
@@ -466,7 +464,7 @@ def convert(self):
             else:
                 model = self.quantize()
 
-        if self.itex_mode:
+        if self.itex_mode:  # pragma: no cover
             host_const_graph_def = PostHostConstConverter(self._tmp_model.graph_def).do_transformation()
             host_const_graph_def.library.CopyFrom(self.model.graph_def.library)
             self._tmp_model.graph_def = host_const_graph_def
@@ -524,7 +522,9 @@ def _get_fp32_print_node_names(self, specified_op_list):
         for i in target_conv_op:
             if specified_op_list and i not in specified_op_list:
                 continue
-            if node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBiasSumAndRelu":
+            if (
+                node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBiasSumAndRelu"
+            ):  # pragma: no cover
                 start_index = sorted_node_names.index(i)
                 for index, value in enumerate(sorted_node_names[start_index:]):
                     if (
@@ -553,7 +553,7 @@ def _get_fp32_print_node_names(self, specified_op_list):
         self._fp32_model.graph_def = fp32_graph_def
         return self._fp32_model
 
-    def _search_y_pattern_for_itex(self):
+    def _search_y_pattern_for_itex(self):  # pragma: no cover
         """Search the Y pattern for itex and return the op name."""
         g = GraphAnalyzer()
         g.graph = self._fp32_model.graph_def
@@ -633,7 +633,7 @@ def quantize(self):
                     self._freeze_requantization_ranges(self._kl_op_dict)
                     self._fuse_requantize_with_fused_quantized_node()
 
-        except ValueError as e:
+        except ValueError as e:  # pragma: no cover
             logger.error("Fail to quantize graph due to {}.".format(str(e)))
             self._tmp_model = None
             raise
@@ -944,7 +944,7 @@ def _insert_qdq_pairs(self):
 
     def _convert_qdq(self):
         """Convert Dequantize + Op + QuantizeV2 into QuantizedOps."""
-        if self.itex_mode:
+        if self.itex_mode:  # pragma: no cover
             self._tmp_graph_def, quantizev2_max = FreezeValueTransformer(
                 self._tmp_graph_def, self._calibration_data, "__max:", self.itex_mode
             ).do_transformation()
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py b/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py
deleted file mode 100644
index 1ec3712a39d..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py
+++ /dev/null
@@ -1,384 +0,0 @@
-#
-#  -*- coding: utf-8 -*-
-#
-#  Copyright (c) 2021 Intel Corporation
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-"""Without calibration Graph Converter Class."""
-
-import copy
-import logging
-import os
-
-import tensorflow as tf
-from tensorflow.python.platform import gfile
-
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.bf16.bf16_convert import BF16Convert
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.fold_batch_norm import (
-    FoldBatchNormNodesOptimizer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.fuse_pad_with_conv import (
-    FusePadWithConv2DOptimizer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.remove_training_nodes import (
-    RemoveTrainingNodesOptimizer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.strip_unused_nodes import (
-    StripUnusedNodesOptimizer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.freeze_value_without_calib import (
-    FreezeValueWithoutCalibTransformer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.fuse_conv_requantize import (
-    FuseConvRequantizeTransformer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.fuse_matmul_requantize import (
-    FuseMatMulRequantizeDequantizeTransformer,
-    FuseMatMulRequantizeTransformer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.meta_op_optimizer import (
-    MetaInfoChangingMemOpOptimizer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.rnn_convert import QuantizedRNNConverter
-from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.scale_propagation import (
-    ScaleProPagationTransformer,
-)
-from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-from neural_compressor.tensorflow.quantization.utils.quantize_graph.quantize_graph_for_intel_cpu import (
-    QuantizeGraphForIntel,
-)
-from neural_compressor.tensorflow.quantization.utils.quantize_graph_common import QuantizeGraphHelper
-from neural_compressor.tensorflow.quantization.utils.transform_graph.bias_correction import BiasCorrection
-from neural_compressor.tensorflow.quantization.utils.transform_graph.rerange_quantized_concat import (
-    RerangeQuantizedConcat,
-)
-from neural_compressor.tensorflow.utils import (
-    SPR_BASE_VERSIONS,
-    Model,
-    deep_get,
-    version1_eq_version2,
-    version1_gt_version2,
-    version1_gte_version2,
-    version1_lt_version2,
-    version1_lte_version2,
-)
-
-TF_SUPPORTED_MAX_VERSION = "2.15.0"
-TF_SUPPORTED_MIN_VERSION = "1.14.0"
-
-logger = logging.getLogger("neural_compressor")
-debug = bool(logger.level == logging.DEBUG)
-
-
-class GraphConverterWithoutCalib:
-    """Graph Converter without calibration Class is used to generate the quantization graph without calibration."""
-
-    def __init__(
-        self, model, data_loader=None, recover_config=None, new_api=False, performance_only=False, use_bf16=False
-    ):
-        """Convert graph without calibration.
-
-        :param model: input tensorflow model.
-        :param qt_config: quantization configs, including interaction and op-wise quant config
-        :param fp32_ops: fall back to fp32 dtype op list
-        :param bf16_ops: fall back to bf16 dtype op list
-        :param data_loader: for calibration phase used dataloader
-        :param recover_config: config for recovering tuned model
-        """
-        # Logger initial
-        self.model = model
-        # (TODO) does it right to make the internal model format as graph_def
-        self.output_tensor_names = self.model.output_tensor_names
-        self.input_tensor_names = self.model.input_tensor_names
-        # quantize specific config
-        self.op_wise_config = recover_config["op_wise_config"]
-        self.advance_config = deep_get(recover_config, "advance")
-        self.device = recover_config["device"] if "device" in recover_config else "cpu"
-        self.int8_sequences = recover_config["int8_sequences"]
-        self.fp32_ops = recover_config["fp32_ops"]
-        self.bf16_ops = recover_config["bf16_ops"]
-        self.recipes = recover_config["recipes"]
-        self.quantized_node_info = []
-        self._calibration_data = []
-        self._fp32_print_data = []
-        self.data_loader = data_loader
-        self.recover_config = recover_config
-        self._check_tf_version()
-        self._check_args()
-        self._gen_tmp_filenames()
-        self.new_api = new_api
-        self.performance_only = performance_only
-        self.use_bf16 = use_bf16
-        self._tmp_graph_def = copy.deepcopy(self.model.graph_def)
-
-    # pylint: disable=no-member
-    def _check_tf_version(self):
-        """Check if the installed tensorflow version is supported."""
-        is_supported_version = False
-        is_sprbase_version = False
-        try:
-            from tensorflow import python
-
-            if hasattr(python, "pywrap_tensorflow") and hasattr(
-                python.pywrap_tensorflow, "IsMklEnabled"
-            ):  # pragma: no cover
-                from tensorflow.python.pywrap_tensorflow import IsMklEnabled
-            elif hasattr(python.util, "_pywrap_util_port"):
-                from tensorflow.python.util._pywrap_util_port import IsMklEnabled
-            else:
-                from tensorflow.python._pywrap_util_port import IsMklEnabled
-            if IsMklEnabled() and (version1_lte_version2(TF_SUPPORTED_MIN_VERSION, tf.version.VERSION)):
-                is_supported_version = True
-
-            if version1_gte_version2(tf.version.VERSION, "2.6.0") and os.getenv("TF_ENABLE_ONEDNN_OPTS") == "1":
-                is_supported_version = True
-
-            if version1_gte_version2(tf.version.VERSION, "2.9.0"):
-                is_supported_version = True
-
-            if tf.version.VERSION == "1.15.0-up3":
-                is_supported_version = True
-
-            if tf.version.VERSION in SPR_BASE_VERSIONS:
-                is_supported_version = True
-                is_sprbase_version = True
-
-        except Exception as e:
-            raise ValueError(e)
-        finally:  # pragma: no cover
-            if version1_gt_version2(tf.version.VERSION, TF_SUPPORTED_MAX_VERSION) and not is_sprbase_version:
-                logger.warning(
-                    str(
-                        "Please note the {} version of TensorFlow is not fully verified! "
-                        "Suggest to use the versions between {} and {} if meet problem."
-                    ).format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION)
-                )
-
-            if version1_eq_version2(tf.version.VERSION, "2.5.0") and os.getenv("TF_ENABLE_MKL_NATIVE_FORMAT") != "0":
-                logger.fatal(
-                    "Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " "when TensorFlow 2.5.0 installed."
-                )
-
-            if (
-                version1_gte_version2(tf.version.VERSION, "2.6.0")
-                and version1_lt_version2(tf.version.VERSION, "2.9.0")
-                and os.getenv("TF_ENABLE_ONEDNN_OPTS") != "1"
-            ):
-                logger.fatal(
-                    "Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 "
-                    "when TensorFlow >= 2.6.0 and < 2.9.0 installed."
-                )
-
-            if not is_supported_version:
-                raise ValueError(
-                    str("Please install TensorFlow within version >={} and <={}.").format(
-                        TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION
-                    )
-                )
-
-    def _check_args(self):
-        """Check model's arguments."""
-        if (
-            self.model.workspace_path
-            and not os.path.isdir(self.model.workspace_path)
-            and not os.path.exists(os.path.dirname(self.model.workspace_path))
-        ):
-            raise ValueError('"output_graph" directory does not exist.')
-        self._output_path = self.model.workspace_path
-
-    def _gen_tmp_filenames(self):
-        """Generate the temporary file names."""
-        self._int8_dynamic_range_model_path = os.path.join(self._output_path, "int8_dynamic_range_graph")
-        self._int8_logged_model_path = os.path.join(self._output_path, "int8_logged_graph")
-        self._fp32_logged_model_path = os.path.join(self._output_path, "fp32_logged_graph")
-        self._int8_frozen_range_model_path = os.path.join(self._output_path, "int8_frozen_range_graph")
-        self._bf16_mixed_precision_model_path = os.path.join(self._output_path, "int8_bf16_mixed_precision_graph")
-
-        self.output_graph = os.path.join(self._output_path, "int8_final_fused_graph")
-        # to keep temp model
-        self._tmp_model = Model(self.model._model, **self.model.kwargs)
-        self._tmp_model.output_tensor_names = self.output_tensor_names
-        self._tmp_model.input_tensor_names = self.input_tensor_names
-
-    def convert_without_calib(self):
-        """Do conversion without calibration."""
-        model = self._tmp_model
-
-        if len(self.op_wise_config) > 0:
-            model = self.quantize_without_calib()
-
-        if len(self.bf16_ops) > 0:
-            model = self.bf16_convert()
-
-        post_cse_graph_def = PostCseOptimizer(model.graph_def).do_transformation()
-        post_cse_graph_def.library.CopyFrom(self.model.graph_def.library)
-        model.graph_def = post_cse_graph_def
-
-        if debug:
-            model.save(self.output_graph)
-
-        return model
-
-    def _analysis_rnn_model(self):
-        """Match the RNN pattern."""
-        g = GraphAnalyzer()
-        g.graph = self._tmp_graph_def
-        graph_info = g.parse_graph()
-        rnn_pattern = [["TensorArrayV3"], ["Enter"], ["TensorArrayReadV3"], ["MatMul"], ["BiasAdd"]]
-        target_nodes = g.query_fusion_pattern_nodes(rnn_pattern)
-        res = {}
-        for i in target_nodes:
-            if i[-3] not in self.bf16_ops and i[-3] not in self.fp32_ops:
-                res[(i[-3], i[-2])] = graph_info[i[1]].node.attr["frame_name"].s.decode()
-
-        return res
-
-    def quantize_without_calib(self):
-        """Quantize graph only (without optimizing fp32 graph).
-
-        Including:
-            1) quantize graph,
-            2) fuse RequantizeOp with fused quantized conv, and so on.
-
-        :return:
-        """
-        try:
-            self._quantize_graph()
-            self._rnn_details = self._analysis_rnn_model()
-            self._freeze_requantization_ranges_without_calib()
-            self._fuse_requantize_with_fused_quantized_node()
-        except Exception as e:
-            import traceback
-
-            traceback.print_exc()
-            self._tmp_model = None
-            logger.error("Fail to quantize graph due to {}.".format(str(e)))
-        finally:
-            if not debug:
-                self._post_clean()
-            return self._tmp_model
-
-    def bf16_convert(self):
-        """Convert fp32 nodes in bf16_node to bf16 dtype based on FP32 + INT8 mixed precision graph."""
-        try:
-            self._tmp_model.graph_def = BF16Convert(
-                self._tmp_model.graph_def, self.fp32_ops, self.bf16_ops
-            ).do_transformation()
-
-        except Exception as e:
-            self._tmp_model = None
-            logger.error("Fail to convert graph due to {}.".format(str(e)))
-        finally:
-            if debug:
-                self._tmp_model.save(self._bf16_mixed_precision_model_path)
-
-            return self._tmp_model
-
-    def _quantize_graph(self):
-        """Quantize graph."""
-        non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops))))
-        self._tmp_graph_def = FusePadWithConv2DOptimizer(
-            self._tmp_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api
-        ).do_transformation()
-
-        self._tmp_graph_def = QuantizeGraphHelper().get_sorted_graph(
-            self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names
-        )
-
-        self._tmp_graph_def, self.quantized_node_info, _ = QuantizeGraphForIntel(
-            self._tmp_graph_def,
-            self._tmp_model.input_node_names,
-            self._tmp_model.output_node_names,
-            self.op_wise_config,
-            self.int8_sequences,
-            self.device,
-            False,
-            self.new_api,
-            self.performance_only,
-        ).do_transform()
-
-        self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
-        if debug:
-            self._tmp_model.graph_def = self._tmp_graph_def
-            self._tmp_model.save(self._int8_dynamic_range_model_path)
-
-    def _freeze_requantization_ranges_without_calib(self):
-        """Freeze requantization ranges after doing quantization."""
-        self._tmp_graph_def = FreezeValueWithoutCalibTransformer(
-            self._tmp_graph_def, self.recover_config, postfix="__min"
-        ).do_transformation_without_calib()
-        self._tmp_graph_def = FreezeValueWithoutCalibTransformer(
-            self._tmp_graph_def, self.recover_config, postfix="__max"
-        ).do_transformation_without_calib()
-        self._tmp_graph_def = FreezeValueWithoutCalibTransformer(
-            self._tmp_graph_def, self.recover_config, postfix="__requant_min_max", device=self.device
-        ).do_transformation_without_calib()
-
-        self._tmp_graph_def = QuantizedRNNConverter(
-            self._tmp_graph_def, self._calibration_data, self._rnn_details
-        ).do_transformation()
-
-        if "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"]:
-            self._tmp_graph_def = ScaleProPagationTransformer(self._tmp_graph_def).do_transformation()
-
-        if debug:
-            self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
-            self._tmp_model.graph_def = self._tmp_graph_def
-            self._tmp_model.save(self._int8_frozen_range_model_path)
-
-    def _fuse_requantize_with_fused_quantized_node(self):
-        """Fuse the Requantize/Dequantize with fused quantized Ops."""
-        self._tmp_graph_def = FuseConvRequantizeTransformer(
-            self._tmp_graph_def, self.device, self.new_api
-        ).do_transformation()
-
-        self._tmp_graph_def = FuseMatMulRequantizeTransformer(self._tmp_graph_def).do_transformation()
-
-        self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer(self._tmp_graph_def).do_transformation()
-
-        self._tmp_graph_def = StripUnusedNodesOptimizer(
-            self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names
-        ).do_transformation()
-
-        self._tmp_graph_def = RemoveTrainingNodesOptimizer(
-            self._tmp_graph_def, protected_nodes=self._tmp_model.output_node_names
-        ).do_transformation()
-
-        self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation()
-
-        if "scale_propagation_concat" in self.recipes and self.recipes["scale_propagation_concat"]:
-            self._tmp_graph_def = RerangeQuantizedConcat(self._tmp_graph_def, self.device).do_transformation()
-
-        self._tmp_graph_def = MetaInfoChangingMemOpOptimizer(self._tmp_graph_def).do_transformation()
-
-        if self.advance_config is not None and deep_get(self.advance_config, "bias_correction") is not None:
-            self._tmp_graph_def = BiasCorrection(self._tmp_graph_def, self.model.graph_def).do_transformation()
-
-        self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
-
-        self._tmp_model.graph_def = self._tmp_graph_def
-
-    def _post_clean(self):
-        """Delete the temporarily files generated during the quantization process.
-
-        :return: None
-        """
-        if os.path.exists(self._int8_logged_model_path) and os.path.isdir(self._int8_logged_model_path):
-            import shutil
-
-            shutil.rmtree(self._int8_logged_model_path)
-
-        elif gfile.Exists(self._int8_logged_model_path + ".pb"):
-            os.remove(self._int8_logged_model_path + ".pb")
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py
index 44e20f20cc3..beac92c1b8d 100644
--- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py
+++ b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py
@@ -75,7 +75,7 @@ def __init__(self, model, new_api, device):
                 "debug_stripper": True,
                 "loop": True,
             }
-        else:
+        else:  # pragma: no cover
             self.optimization = {
                 "pruning": True,
                 "shape": True,
@@ -85,7 +85,7 @@ def __init__(self, model, new_api, device):
             }
         # Table initialization should disable grappler dependency and pruning pass
         node_names = [node.name for node in model.graph_def.node]
-        if "init_all_tables" in node_names:
+        if "init_all_tables" in node_names:  # pragma: no cover
             self.optimization["dependency"] = False
             self.optimization["pruning"] = False
         self.new_api = new_api
@@ -144,7 +144,7 @@ def get_optimized_model(self, itex_mode=False):
             if self.device == "cpu":
                 cpus = tf.config.list_physical_devices("CPU")
                 node_device = cpus[0].name.replace("physical_device:", "")
-            else:
+            else:  # pragma: no cover
                 gpus = tf.config.list_physical_devices("GPU")
                 if len(gpus) == 0:
                     xpus = tf.config.list_physical_devices("XPU")
@@ -253,7 +253,7 @@ def get_optimized_model(self, itex_mode=False):
         if self.device == "cpu":
             cpus = list_physical_devices("CPU")
             node_device = cpus[0].name.replace("physical_device:", "")
-        else:
+        else:  # pragma: no cover
             gpus = list_physical_devices("GPU")
             if len(gpus) == 0:
                 xpus = list_physical_devices("XPU")
@@ -272,7 +272,7 @@ def get_optimized_model(self, itex_mode=False):
         self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
 
         for function_def in self.model.graph_def.library.function:
-            if function_def.signature.name == "swish_f32":
+            if function_def.signature.name == "swish_f32":  # pragma: no cover
                 self._tmp_graph_def.library.function.extend([copy.deepcopy(function_def)])
 
         origin_model.graph_def = self._tmp_graph_def
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py
index 3d0cef7da9e..20325ea7f42 100644
--- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py
+++ b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py
@@ -79,7 +79,7 @@ def _get_valid_log(self):
                 output.append(i)
             elif semi_count % 2 != 0:
                 self.logger.warning("Invalid line.")
-            else:
+            else:  # pragma: no cover
                 loop_times = int(semi_count / 2)
                 semi_index = [index for index, value in enumerate(i) if value == ";"]
                 for index in range(loop_times - 1):
@@ -165,7 +165,7 @@ def _parse_requantization_ranges(self):
 
             res[key].append(sorted(temp_max[key])[target_max_index])
 
-        if self.tensor_data:
+        if self.tensor_data:  # pragma: no cover
             for k, v in self.tensor_data.items():
                 if k in res:
                     self.logger.debug("Update node {} min to {}, max to {}.".format(k, v[2], v[3]))
@@ -241,7 +241,7 @@ def generate_output_graph_ranges(self, max_name_value):
             if not self.graph_info.get(in_node_name) or not in_node_name.endswith("_eightbit_quantized_in"):
                 in_node_name = None
 
-            if self.itex_mode and "BatchNorm" in node_name:
+            if self.itex_mode and "BatchNorm" in node_name:  # pragma: no cover
                 bn_node_name = node_name[: -len("_eightbit_requant_range")]
                 if bn_node_name not in self.graph_info:
                     bn_node_name = None
@@ -284,7 +284,7 @@ def generate_output_graph_ranges(self, max_name_value):
                 attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, []))
             )
 
-            if bn_node_name:
+            if bn_node_name:  # pragma: no cover
                 if self.itex_mode:
                     self.cur_graph.replace_const_node(
                         min_node,
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py
deleted file mode 100644
index d793d333931..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Freeze Value without calibration Graph Rewriter."""
-
-from tensorflow.core.framework import attr_value_pb2, node_def_pb2
-from tensorflow.python.framework import dtypes, tensor_util
-
-from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper
-
-from ..graph_base import GraphRewriterBase
-
-
-class FreezeValueWithoutCalibTransformer(GraphRewriterBase):
-    """Freeze value without calibration."""
-
-    def __init__(self, model, max_min_data, postfix, th=0.95, device="gpu"):
-        """Free Max/Min value into QuantizeV2 op.
-
-        Args:
-            model (graphdef): input model
-            max_min_data (string list): the string context contains max/min values.
-            postfix (string): the specified postfix to locate value.
-            th (float, optional): The percentage of overall data.Defaults to 0.95.
-            device (string, optional): The hardware device type, 'cpu' or 'gpu'.
-        """
-        super().__init__(model)
-        self.data = max_min_data
-        if 0.0 < th <= 1.0:
-            self.threshold = th
-        else:
-            self.logger.warning("The threshold value for clipping is invalid, " "Reset it to 0.95 by default.")
-            self.threshold = 0.95
-        self.postfix = postfix
-        self.device = device
-        self.cur_graph = GraphAnalyzer()
-        self.cur_graph.graph = self.model
-
-        self.graph_info = self.cur_graph.parse_graph()
-
-    def generate_output_graph(self, max_name_value):
-        """Generate transformed graph for freeze_max/freeze_min transformation.
-
-        :param max_name_value: target values
-        :return: transformed graph
-        """
-        for node_name, value in max_name_value.items():
-            node_name = node_name.replace(":", "__port__").replace("^", "__hat__")
-            if node_name not in self.graph_info:
-                continue
-            new_node = node_def_pb2.NodeDef()
-            new_node.op = "Const"
-            new_node_postfix = "/frozen_{}_only".format("".join([x for x in self.postfix if x.isalpha()]))
-            new_node.name = node_name + new_node_postfix
-            new_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum))
-            new_node.attr["value"].CopyFrom(
-                attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, []))
-            )
-            output_node_name = self.graph_info[node_name].outputs[0]
-            self.cur_graph.replace_const_node(new_node, [Helper.node_name_from_input(output_node_name)], node_name)
-            self.cur_graph.remove_node(node_name)
-
-        return GraphAnalyzer().dump_graph()
-
-    def generate_output_graph_ranges(self, max_name_value):
-        """Generate transformed graph for freeze_max/freeze_min transformation.
-
-        :param max_name_value: target values
-        :return: transformed graph
-        """
-        for node_name, value in max_name_value.items():
-            if node_name not in self.graph_info:
-                continue
-
-            min_node = node_def_pb2.NodeDef()
-            min_node.op = "Const"
-            min_node_postfix = "/frozen_min"
-            min_node.name = node_name + min_node_postfix
-            min_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum))
-            min_node.attr["value"].CopyFrom(
-                attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[0]), dtypes.float32, []))
-            )
-
-            max_node = node_def_pb2.NodeDef()
-            max_node.op = "Const"
-            max_node_postfix = "/frozen_max"
-            max_node.name = node_name + max_node_postfix
-            max_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum))
-            max_node.attr["value"].CopyFrom(
-                attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, []))
-            )
-            output_node_name = self.graph_info[node_name].outputs[0]
-            self.cur_graph.replace_const_node(
-                min_node, [Helper.node_name_from_input(output_node_name)], node_name + ":0"
-            )
-            self.cur_graph.replace_const_node(
-                max_node, [Helper.node_name_from_input(output_node_name)], node_name + ":1"
-            )
-            self.cur_graph.remove_node(node_name)
-
-        return GraphAnalyzer().dump_graph()
-
-    def do_transformation_without_calib(self):
-        """Apply transformation without calibration."""
-        if self.postfix == "__requant_min_max":
-            range_data = self.data[self.postfix]
-            return self.generate_output_graph_ranges(range_data)
-        max_name_value = self.data[self.postfix]
-        return self.generate_output_graph(max_name_value)
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py
deleted file mode 100644
index 55142680b09..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py
+++ /dev/null
@@ -1,296 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Quantized RNN Graph Rewriter."""
-
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.framework import dtypes, tensor_util
-from tensorflow.python.ops import array_ops
-
-from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer
-from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper
-from neural_compressor.tensorflow.utils import dump_elapsed_time
-
-from ..graph_base import GraphRewriterBase
-
-
-class QuantizedRNNConverter(GraphRewriterBase):
-    """Quantized RNN converter."""
-
-    def __init__(self, model, calibration_data, rnn_details, new_api=False):
-        """Initialization."""
-        super().__init__(model)
-        self.calibration_data = calibration_data
-        self.rnn_details = rnn_details
-        self.new_api = new_api
-
-    @dump_elapsed_time("Pass QuantizedRNNConverter")
-    def do_transformation(self):
-        """Apply the RNN conversion."""
-        g = GraphAnalyzer()
-        g.graph = self.model
-        graph_info = g.parse_graph()
-
-        for i in self.rnn_details.keys():  # pragma: no cover
-            start_node_name = graph_info[i[0]].node.input[0]
-
-            matmul_b_node_name = graph_info[i[0]].node.input[1]
-            matmul_b_node = graph_info[Helper.node_name_from_input(matmul_b_node_name)].node
-            if matmul_b_node.op == "Split":
-                enter_node_name = matmul_b_node.input[1]
-            elif matmul_b_node.op == "Enter":
-                enter_node_name = graph_info[i[0]].node.input[1]
-            else:
-                continue
-
-            min_str = i[0] + "_eightbit_min_" + start_node_name + "__print__;__min:"
-            input_min_values = []
-            input_max_values = []
-            output_min_values = []
-            output_max_values = []
-            max_str = i[0] + "_eightbit_max_" + start_node_name + "__print__;__max:"
-            output_str = i[0] + "_eightbit_requant_range__print__;__requant_min_max:"
-            for j in self.calibration_data:
-                if j.find(min_str) != -1:
-                    input_min_values.append(float(j.split("[")[-1].split("]")[0]))
-                if j.find(max_str) != -1:
-                    input_max_values.append(float(j.split("[")[-1].split("]")[0]))
-
-                if j.find(output_str) != -1:
-                    output_min_values.append(float(j.split(":")[-1][1:].split("]")[0]))
-                    output_max_values.append(float(j.split("][")[-1][:-1]))
-            min_input = min(input_min_values)
-            max_input = max(input_max_values)
-            min_output = min(output_min_values)
-            max_output = max(output_max_values)
-            q_max_in_node = Helper.create_constant_node(i[0] + "_quant_max", max_input, dtypes.float32)
-
-            q_min_in_node = Helper.create_constant_node(i[0] + "_quant_min", min_input, dtypes.float32)
-            q_enter_min_node = Helper.create_node("Enter", q_min_in_node.name + "_enter", [q_min_in_node.name])
-            Helper.set_attr_string(q_enter_min_node, "frame_name", self.rnn_details[i].encode())
-            Helper.set_attr_dtype(q_enter_min_node, "T", dtypes.float32)
-            Helper.set_attr_bool(q_enter_min_node, "is_constant", True)
-            Helper.set_attr_int(q_enter_min_node, "parallel_iterations", 32)
-            q_enter_max_node = Helper.create_node("Enter", q_max_in_node.name + "_enter", [q_max_in_node.name])
-            Helper.set_attr_dtype(q_enter_max_node, "T", dtypes.float32)
-            Helper.set_attr_string(q_enter_max_node, "frame_name", self.rnn_details[i].encode())
-            Helper.set_attr_bool(q_enter_max_node, "is_constant", True)
-            Helper.set_attr_int(q_enter_max_node, "parallel_iterations", 32)
-
-            weight_node_name = graph_info[Helper.node_name_from_input(enter_node_name)].node.input[0]
-            weight_node = graph_info[Helper.node_name_from_input(weight_node_name)].node
-            if weight_node.attr["dtype"].type == dtypes.qint8:
-                qint8_const_name = weight_node_name
-            else:
-                base_name = weight_node_name + "_"
-                qint8_const_name = base_name + "qint8_const"
-                min_name = base_name + "min"
-                max_name = base_name + "max"
-
-            need_to_create_const_node = bool(qint8_const_name not in graph_info)
-            if need_to_create_const_node:
-                float_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor)
-
-                min_value = np.min(float_tensor.flatten())
-                max_value = np.max(float_tensor.flatten())
-                # Same processing of min-max as in quantize_weight_eightbit
-                # function.
-                if min_value > 0.0:
-                    min_value = 0.0
-                if min_value == max_value:
-                    if abs(min_value) < 0.000001:
-                        max_value = min_value + 1.0
-                    elif min_value > 0:
-                        max_value = 2 * min_value
-                    else:
-                        max_value = min_value / 2.0
-
-                sess = tf.compat.v1.Session()
-                with sess.as_default():
-                    quantize_op = array_ops.quantize_v2(
-                        float_tensor, min_value, max_value, dtypes.qint8, mode="SCALED", round_mode="HALF_TO_EVEN"
-                    )
-                    qint8_tensor = quantize_op[0].numpy() if tf.executing_eagerly() else quantize_op[0].eval()
-                    # Updated min-max values should be passed to the next
-                    # feeding node.
-                    min_value = quantize_op[1].numpy() if tf.executing_eagerly() else quantize_op[1].eval()
-                    max_value = quantize_op[2].numpy() if tf.executing_eagerly() else quantize_op[2].eval()
-                sess.close()
-
-                shape = tensor_util.TensorShapeProtoToList(weight_node.attr["value"].tensor.tensor_shape)
-                qint8_const_node = Helper.create_constant_node(
-                    qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape
-                )
-
-                min_node = Helper.create_constant_node(min_name, min_value, dtypes.float32)
-
-                max_node = Helper.create_constant_node(max_name, max_value, dtypes.float32)
-                enter_min_node = Helper.create_node("Enter", min_name + "_enter", [min_name])
-                Helper.set_attr_string(enter_min_node, "frame_name", self.rnn_details[i].encode())
-                Helper.set_attr_dtype(enter_min_node, "T", dtypes.float32)
-                Helper.set_attr_bool(enter_min_node, "is_constant", True)
-                Helper.set_attr_int(enter_min_node, "parallel_iterations", 32)
-                enter_max_node = Helper.create_node("Enter", max_name + "_enter", [max_name])
-                Helper.set_attr_dtype(enter_max_node, "T", dtypes.float32)
-                Helper.set_attr_string(enter_max_node, "frame_name", self.rnn_details[i].encode())
-                Helper.set_attr_bool(enter_max_node, "is_constant", True)
-                Helper.set_attr_int(enter_max_node, "parallel_iterations", 32)
-            else:
-                qint8_const_node = graph_info[qint8_const_name].node
-                min_node = graph_info[min_name].node
-                max_node = graph_info[max_name].node
-            quant_input = [start_node_name, q_enter_min_node.name, q_enter_max_node.name]
-            quantize_node = Helper.create_node("QuantizeV2", i[0] + "_quantize", quant_input)
-            Helper.set_attr_dtype(quantize_node, "T", dtypes.quint8)
-            Helper.set_attr_string(quantize_node, "mode", b"MIN_FIRST")
-            g.add_node(quantize_node, start_node_name, [i[0]])
-            g.add_node(q_enter_max_node, None, [quantize_node.name])
-            g.add_node(q_enter_min_node, None, [quantize_node.name])
-            g.add_node(q_max_in_node, None, [q_enter_max_node.name])
-            g.add_node(q_min_in_node, None, [q_enter_min_node.name])
-
-            bias_node = graph_info[graph_info[i[0]].outputs[0]].node
-            if graph_info[bias_node.name].outputs:
-                last_node_name = [graph_info[graph_info[bias_node.name].outputs[0]].node.name]
-            else:
-                last_node_name = []
-            quantized_matmul_input = [
-                quantize_node.name,
-                Helper.node_name_from_input(graph_info[i[0]].node.input[1]),
-                bias_node.input[1],
-            ]
-            quantized_matmul_input.append(quantize_node.name + ":1")
-            quantized_matmul_input.append(quantize_node.name + ":2")
-
-            quantized_matmul_input.append(enter_min_node.name)
-            quantized_matmul_input.append(enter_max_node.name)
-            if self.new_api:
-                quantized_matmul_with_bias_node = Helper.create_node(
-                    "_QuantizedMatMul", i[0] + "_quantized_mat_mul", quantized_matmul_input
-                )
-            else:
-                quantized_matmul_with_bias_node = Helper.create_node(
-                    "QuantizedMatMulWithBias", i[0] + "_quantized_mat_mul", quantized_matmul_input
-                )
-            Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T1", dtypes.quint8)
-            Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T2", dtypes.qint8)
-            Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tbias", dtypes.float32)
-            if self.new_api:
-                Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tout", dtypes.qint32)
-            else:
-                Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Toutput", dtypes.qint32)
-            Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_a", False)
-            Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_b", False)
-            if self.new_api:
-                Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"SCALED")
-                Helper.set_attr_string(quantized_matmul_with_bias_node, "output_quant_mode", b"SCALED")
-                Helper.set_attr_string_list(quantized_matmul_with_bias_node, "fused_ops", [b"BiasAdd"])
-                Helper.set_attr_type_list(
-                    quantized_matmul_with_bias_node,
-                    "Thost_inputs",
-                    [
-                        dtypes.quint8.as_datatype_enum,
-                        dtypes.qint8.as_datatype_enum,
-                        dtypes.float32.as_datatype_enum,
-                        dtypes.float32.as_datatype_enum,
-                        dtypes.float32.as_datatype_enum,
-                        dtypes.float32.as_datatype_enum,
-                        dtypes.float32.as_datatype_enum,
-                    ],
-                )
-                Helper.set_attr_type_list(
-                    quantized_matmul_with_bias_node,
-                    "Thost_outputs",
-                    [dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum],
-                )
-            else:
-                Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"MIN_FIRST")
-
-            g.add_node(quantized_matmul_with_bias_node, quantize_node.name, [bias_node.name])
-
-            if qint8_const_node.name not in graph_info:
-                g.add_node(qint8_const_node, None, [enter_node_name])
-                enter_node = graph_info[enter_node_name].node
-                if matmul_b_node.op == "Split":
-                    Helper.set_attr_dtype(matmul_b_node, "T", dtypes.qint8)
-                Helper.set_attr_dtype(enter_node, "T", dtypes.qint8)
-                graph_info[enter_node.name].node.input[0] = qint8_const_node.name
-            elif qint8_const_node.name in graph_info:
-                pass
-            else:
-                g.add_node(qint8_const_node, None, [quantized_matmul_with_bias_node.name])
-
-            if need_to_create_const_node:
-                g.add_node(enter_min_node, None, [quantized_matmul_with_bias_node.name])
-                g.add_node(enter_max_node, None, [quantized_matmul_with_bias_node.name])
-                g.add_node(min_node, None, [enter_min_node.name])
-                g.add_node(max_node, None, [enter_max_node.name])
-
-            # create requantize node
-            requantize_min_node = Helper.create_constant_node(i[0] + "requant_w_min", min_output, dtypes.float32)
-            requantize_max_node = Helper.create_constant_node(i[0] + "requant_w_max", max_output, dtypes.float32)
-
-            enter_req_min_node = Helper.create_node(
-                "Enter", requantize_min_node.name + "_enter", [requantize_min_node.name]
-            )
-            Helper.set_attr_string(enter_req_min_node, "frame_name", self.rnn_details[i].encode())
-            Helper.set_attr_dtype(enter_req_min_node, "T", dtypes.float32)
-            Helper.set_attr_bool(enter_req_min_node, "is_constant", True)
-            Helper.set_attr_int(enter_req_min_node, "parallel_iterations", 32)
-
-            enter_req_max_node = Helper.create_node(
-                "Enter", requantize_max_node.name + "_enter", [requantize_max_node.name]
-            )
-            Helper.set_attr_dtype(enter_req_max_node, "T", dtypes.float32)
-            Helper.set_attr_string(enter_req_max_node, "frame_name", self.rnn_details[i].encode())
-            Helper.set_attr_bool(enter_req_max_node, "is_constant", True)
-            Helper.set_attr_int(enter_req_max_node, "parallel_iterations", 32)
-            requantize_input = [
-                quantized_matmul_with_bias_node.name,
-                quantized_matmul_with_bias_node.name + ":1",
-                quantized_matmul_with_bias_node.name + ":2",
-                enter_req_min_node.name,
-                enter_req_max_node.name,
-            ]
-            requantize_node = Helper.create_node("Requantize", i[0] + "_requantize", requantize_input)
-            Helper.set_attr_dtype(requantize_node, "out_type", dtypes.qint8)
-            Helper.set_attr_dtype(requantize_node, "Tinput", dtypes.qint32)
-
-            g.add_node(requantize_node, quantized_matmul_with_bias_node.name, [bias_node.name])
-            dequantize_input = [requantize_node.name, requantize_node.name + ":1", requantize_node.name + ":2"]
-            dequantize_node = Helper.create_node("Dequantize", i[0] + "_dequantize", dequantize_input)
-            Helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8)
-            Helper.set_attr_dtype(dequantize_node, "dtype", dtypes.float32)
-            Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST")
-
-            g.add_node(enter_req_min_node, None, [requantize_node.name])
-            g.add_node(enter_req_max_node, None, [requantize_node.name])
-            g.add_node(requantize_min_node, None, [enter_req_min_node.name])
-            g.add_node(requantize_max_node, None, [enter_req_max_node.name])
-            g.add_node(dequantize_node, requantize_node.name, last_node_name)
-            if last_node_name:
-                replace_index = [
-                    Helper.node_name_from_input(i) for i in graph_info[last_node_name[0]].node.input
-                ].index(bias_node.name)
-
-                graph_info[last_node_name[0]].node.input[replace_index] = dequantize_node.name
-            g.remove_node(bias_node.name)
-            g.remove_node(i[0])
-
-            # g.remove_node(weight_node_name)
-
-        return g.dump_graph()
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py
deleted file mode 100644
index 2a47a09d8f1..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tensorflow QAT Graph Quantizers."""
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py
deleted file mode 100644
index 97f3f6ce3d4..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""QAT Fake Quantize Graph Class."""
-
-import abc
-
-import six
-import tensorflow as tf
-
-
-@six.add_metaclass(abc.ABCMeta)
-class FakeQuantizeBase(object):
-    """ABC interface class for applying fake quantization by insert qdq."""
-
-    @abc.abstractmethod
-    def __call__(self, inputs, range, training, **kwargs):
-        """Apply quantization to the input tensor.
-
-        This is the main logic of the 'FakeQuantize' which implements the core logic
-        to quantize the tensor. It is invoked during the `call` stage of the layer,
-        and allows modifying the tensors used in graph construction.
-
-        Args:
-            inputs (tf.Tensor): Input tensor to be quantized.
-            range (dict): The min-max range of input tensor.
-            training (bool): Whether the graph is currently training.
-            **kwargs: Additional variables which may be passed to the FakeQuantize class.
-
-        Returns:
-            output (tf.Tensor): The tensor to be quantized.
-        """
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def get_config(self):
-        """Returns the config used to serialize the 'FakeQuantize'."""
-        raise NotImplementedError("FakeQuantize should implement get_config().")
-
-    @classmethod
-    def from_config(cls, config):
-        """Instantiates a 'FakeQuantize' from its config.
-
-        Args:
-            config (dict): A dict containing required information.
-
-        Returns:
-            output (FakeQuantize): A 'FakeQuantize' instance.
-        """
-        return cls(**config)
-
-
-class FakeQuantize(FakeQuantizeBase):
-    """The class that applies fake quantization."""
-
-    def __init__(self, per_channel=False, num_bits=8, channel_axis=-1, symmetric=True, narrow_range=True):
-        """Initialize a FakeQuantize class.
-
-        Args:
-            per_channel (bool): Whether to apply per_channel quantization. The last dimension is
-                used as the channel.
-            num_bits (int): Number of bits for quantization.
-            channel_axis(int): Channel axis.
-            symmetric (bool): If true, use symmetric quantization limits instead of training
-                the minimum and maximum of each quantization range separately.
-            narrow_range (bool): In case of 8 bits, narrow_range nudges the quantized range
-                to be [-127, 127] instead of [-128, 127]. This ensures symmetric range
-                has 0 as the centre.
-        """
-        self.num_bits = num_bits
-        self.per_channel = per_channel
-        self.symmetric = symmetric
-        self.narrow_range = narrow_range
-        self.channel_axis = channel_axis
-        self.name_prefix = "FakeQuantize"
-
-    def __call__(self, inputs, ranges, training, **kwargs):
-        """Applying fake quantization by insert qdq.
-
-        The quantized tensor is calculated based on range of the last batch of values.
-
-        Args:
-            inputs (tf.Tensor): Input tensor to be quantized.
-            range (dict): The min-max range of input tensor.
-            training (bool): Whether the graph is currently training.
-            **kwargs: Additional variables which may be passed to the FakeQuantize class.
-
-        Returns:
-            output (tf.Tensor): The tensor to be quantized.
-        """
-        with tf.name_scope(self.name_prefix):
-            input_shape = inputs.get_shape()
-            input_dim = len(input_shape)
-            if self.channel_axis == -1:
-                self.channel_axis += input_dim
-
-            if not training:
-                return self._insert_qdq(inputs, ranges["min_var"], ranges["max_var"])
-
-            if self.per_channel:
-                if input_dim == 2:
-                    reduce_dims = [0]
-                elif input_dim == 4:
-                    reduce_dims = [i for i in range(input_dim) if i != self.channel_axis]
-
-            if self.per_channel:
-                if input_dim >= 2:
-                    batch_min = tf.math.reduce_min(inputs, axis=reduce_dims, name="BatchMin")
-                else:
-                    batch_min = inputs
-            else:
-                batch_min = tf.math.reduce_min(inputs, name="BatchMin")
-
-            if self.per_channel:
-                if input_dim >= 2:
-                    batch_max = tf.math.reduce_max(inputs, axis=reduce_dims, name="BatchMax")
-                else:
-                    batch_max = inputs
-            else:
-                batch_max = tf.math.reduce_max(inputs, name="BatchMax")
-
-            if self.symmetric:
-                if self.narrow_range:
-                    min_max_ratio = -1
-                else:
-                    min_max_ratio = -((1 << self.num_bits) - 2) / (1 << self.num_bits)
-
-                range_min = tf.math.minimum(batch_min, batch_max / min_max_ratio)
-                range_max = tf.math.maximum(batch_max, batch_min * min_max_ratio)
-            else:
-                range_min = tf.math.minimum(batch_min, 0.0)
-                range_max = tf.math.maximum(batch_max, 0.0)
-
-            assign_min = ranges["min_var"].assign(range_min, name="AssignMinLast")
-            assign_max = ranges["max_var"].assign(range_max, name="AssignMaxLast")
-
-            return self._insert_qdq(inputs, assign_min, assign_max)
-
-    def _insert_qdq(self, inputs, min_var, max_var):
-        """Adds a fake quantization operation.
-
-        Depending on value of self.per_channel, this operation may do global quantization
-        or per channel quantization.  min_var and max_var should have corresponding
-        shapes: [1] when per_channel == False and [d] when per_channel == True.
-
-        Args:
-            inputs (tf.Tensor): A tensor containing values to be quantized.
-            min_var (tf.Variable): A variable containing quantization range lower end(s).
-            max_var (tf.Variable): A variable containing quantization range upper end(s).
-
-        Returns:
-            outputs (tf.Tensor): A tensor containing quantized values.
-        """
-        if self.per_channel:
-            return tf.quantization.quantize_and_dequantize_v2(
-                inputs,
-                min_var,
-                max_var,
-                num_bits=self.num_bits,
-                narrow_range=self.narrow_range,
-                axis=self.channel_axis,
-                range_given=True,
-            )
-        else:
-            assert min_var.get_shape() == []
-            assert max_var.get_shape() == []
-
-            return tf.quantization.quantize_and_dequantize_v2(
-                inputs,
-                min_var,
-                max_var,
-                num_bits=self.num_bits,
-                narrow_range=self.narrow_range,
-                range_given=True,
-            )
-
-    def get_config(self):
-        """Returns the config used to serialize the 'FakeQuantize'.
-
-        Returns:
-            config (dict): A dict containing required information.
-        """
-        return {
-            "num_bits": self.num_bits,
-            "per_channel": self.per_channel,
-            "symmetric": self.symmetric,
-            "narrow_range": self.narrow_range,
-        }
-
-    def __eq__(self, other):
-        """Check if this instance is equal to another instance.
-
-        Args:
-            other (FakeQuantize): Another instance to be checked.
-
-        Returns:
-            is_equal (bool): If the two instances are equal.
-        """
-        if not isinstance(other, FakeQuantize):
-            return False
-
-        return (
-            self.num_bits == other.num_bits
-            and self.per_channel == other.per_channel
-            and self.symmetric == other.symmetric
-            and self.narrow_range == other.narrow_range
-        )
-
-    def __ne__(self, other):
-        """Check if this instance is not equal to another instance.
-
-        Args:
-            other (FakeQuantize): Another instance to be checked.
-
-        Returns:
-            not_equal (bool): If the two instances are not equal.
-        """
-        return not self.__eq__(other)
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py
deleted file mode 100644
index 1f50f20879e..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""QAT Quantize Config Class."""
-
-import logging
-
-global_config = {}
-logger = logging.getLogger("neural_compressor")
-
-
-class QuantizeConfig:
-    """Class for building custom quantize config.
-
-    There should be only one QuantizeConfig instance for global setting.
-    """
-
-    def __new__(cls):
-        """Created a QuantizeConfig instance and add it to the global_config dict.
-
-        Returns:
-            instance (QuantizeConfig) : The created QuantizeConfig instance.
-        """
-        instance = super().__new__(cls)
-        global_config["quantize_config"] = instance
-        return instance
-
-    def __init__(self):
-        """Initialize QuantizeConfig instance."""
-        self.quantize_recipe = {}
-        self.model_name = None
-
-    def add_quantize_recipe(self, quantize_recipe):  # pragma: no cover
-        """Add custom recipe for quantization to the QuantizeConfig instance.
-
-        Args:
-            quantize_recipe (dict): A dict that decide whether given layers should be quantized.
-                                    A typical quantize_recipe will be a dict of layer_name and
-                                    dict as key-value pairs. In each value dict, there should be
-                                    a {'quantize': bool} key-value pair and a {'index': list} pair.
-                                    The latter one is used to decide which inputs should be quantized
-                                    in some layers with multiple inputs.
-                                    For example:
-                                        {'conv5_block3_3_conv': {'quantize': False}
-                                         'conv5_block3_3_add' : {'quantize': True, 'index': [1, 3]}
-                                        }
-        """
-        self.quantize_recipe.update(quantize_recipe)
-
-    def query_layer(self, layer_name):
-        """Query if a specific layer is in the quantize_recipe dict.
-
-        Args:
-            layer_name (string): The input layer name.
-
-        Returns:
-            layer_recipe (dict): The quantize recipe for this input layer.
-        """
-        if layer_name in self.quantize_recipe:
-            return self.quantize_recipe[layer_name]
-        return {}
-
-    def remove_layer(self, layer_name):  # pragma: no cover
-        """Remove a specific layer from the quantize_recipe dict.
-
-        Args:
-            layer_name (string): The name of layer to be removed.
-        """
-        if layer_name in self.quantize_recipe:
-            del self.quantize_recipe[layer_name]
-
-    def remove_layers(self, layer_names):  # pragma: no cover
-        """Remove a batch of layers from the quantize_recipe dict.
-
-        Args:
-            layer_names (List): The names of layers to be removed.
-        """
-        for layer_name in layer_names:
-            self.remove_layer(layer_name)
-
-    def get_quantize_recipe(self):  # pragma: no cover
-        """Get the current recipe dict for quantization.
-
-        Returns:
-            quantize_recipe (dict): A dict that decide whether given layers should be quantized.
-        """
-        return self.quantize_recipe
-
-    def is_empty(self):  # pragma: no cover
-        """Check if the recipe of quantization is an empty dict.
-
-        Returns:
-            is_empty (bool): True if no custom recipe is updated to this class.
-        """
-        if self.quantize_recipe:
-            return False
-        return True
-
-    def clear_quantize_recipe(self):  # pragma: no cover
-        """Clear recipe of quantization to be an empty dict."""
-        self.quantize_recipe.clear()
-
-
-layer_wise_config = {
-    "quantize_layers": {
-        "Conv2D",
-        "Dense",
-        "DepthwiseConv2D",
-        "MaxPooling2D",
-        "AveragePooling2D",
-        "GlobalAveragePooling2D",
-    },
-    "possible_quantize_layers": {"Multiply", "Concatenate", "Add", "BatchNormalization"},
-    "weighted_layers": {"Conv2D", "Dense", "DepthwiseConv2D"},
-    "multiple_inputs_layers": {"Multiply", "Concatenate", "Add"},
-}
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py
deleted file mode 100644
index d28d9474f2b..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""QAT Quantize Helper Class."""
-
-from .quantize_config import QuantizeConfig, global_config, layer_wise_config
-from .quantize_layers.optimize_layer import config_quantizable_layers
-from .quantize_wrapper import QuantizeWrapper
-
-
-def init_quantize_config(model, quantize_recipe=None):
-    """Initialize quantization config at the beginning of QAT process.
-
-    Args:
-        model_name (string): Special pre-optimized model name.
-        quantize_recipe (dict): A dict that decide whether given layers should be quantized.
-
-    Returns:
-        config (QuantizeConfig): QuantizeConfig instance used to decide whether a specific layer
-                                 should be quantized.
-    """
-    assert "quantize_config" not in global_config, (
-        "quantize_config has been unexpectedly " "created. Please check your QAT workflow"
-    )
-
-    config = QuantizeConfig()
-    config_quantizable_layers(model)
-
-    if quantize_recipe:
-        config.add_quantize_recipe(quantize_recipe)
-
-    return config
-
-
-def _is_quantizable_layer(layer):
-    """Query if the input layer should be quantized.
-
-    Args:
-        layer (tf.keras.layers.Layer): input Keras layer
-
-    Returns:
-        capability (bool): whether the input layer is capable of quantization.
-    """
-    quantizable = True
-    layer_class = layer.__class__.__name__
-
-    quantize_config = global_config["quantize_config"]
-    specific_layer_config = quantize_config.query_layer(layer.name)
-    if specific_layer_config:
-        # the layer is set to be unquantizable by QuantizeConfig
-        if not specific_layer_config["quantize"]:
-            return False
-        else:
-            if (
-                layer_class in layer_wise_config["quantize_layers"]
-                or layer_class in layer_wise_config["possible_quantize_layers"]
-            ):
-                return True
-
-    if layer_class not in layer_wise_config["quantize_layers"]:
-        quantizable = False
-
-    return quantizable
-
-
-def qat_clone_function(layer):
-    """Wrap or leave given layer based on quantize config object parameters.
-
-    Args:
-        layer (tf.keras.layers.Layer): input Keras layer
-
-    Returns:
-        wrapped_layer (QuantizeWrapper): layer wrapped by QuantizeWrapper class.
-    """
-    wrapped_layer = layer
-    if _is_quantizable_layer(layer):
-        wrapped_layer = QuantizeWrapper(layer)
-
-    return wrapped_layer
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py
deleted file mode 100644
index 81d1403e2b4..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tensorflow QAT Graph Quantize Layers."""
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py
deleted file mode 100644
index 620942261e1..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Optimize layer config."""
-
-from .quantize_layer_add import QuantizeLayerAdd
-from .quantize_layer_bn import QuantizeLayerBatchNormalization
-
-
-def config_quantizable_layers(model):
-    """Configure the quantizable layers."""
-    quantize_layer_mapping = {"Add": QuantizeLayerAdd, "BatchNormalization": QuantizeLayerBatchNormalization}
-
-    for layer_class, quantize_layer in quantize_layer_mapping.items():
-        quantize_layer_mapping[layer_class] = quantize_layer()
-
-    for layer in model.layers:
-        if layer.__class__.__name__ in quantize_layer_mapping:
-            quantize_layer_mapping[layer.__class__.__name__](layer)
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py
deleted file mode 100644
index ae2db7e4006..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Quantization Add Layer Class."""
-
-import logging
-
-from .quantize_layer_base import QuantizeLayerBase
-
-logger = logging.getLogger("neural_compressor")
-
-
-class QuantizeLayerAdd(QuantizeLayerBase):  # pragma: no cover
-    """The class for quantization of Add."""
-
-    def __init__(self):
-        """Initialize QuantizeLayerAdd class."""
-        self.quantize_patterns = [
-            ["Conv", "BatchNorm", "Add"],
-            ["Conv", "BatchNorm", "Activation", "Add"],
-            ["Conv", "BatchNorm", "Activation", "Dropout", "Add"],
-        ]
-
-        super().__init__()
-
-    def _quantizable_add(self):
-        """Check if the input layer meets criteria of quantization.
-
-        Args:
-            layer (tf.keras.layers.Layer): The input layer.
-
-        Returns:
-            quantizable (bool): If this layer should be quantized.
-        """
-        input_layer = self._find_input_layers(self.layer)
-        if len(input_layer) == 1:
-            logger.warning(
-                "The layer 'Add' should have more than one input. "
-                "You input a model with layer {} which has only one input".format(self.layer.name)
-            )
-            return False
-
-        return True
-
-    def __call__(self, layer):
-        """The main logic of QuantizeLayerAdd.
-
-        Neural Compressor will enumerate all layers of the input model to check
-        if there are any layer meeting the criteria. The chosen ones will be marked
-        as quantizable by QuantizeConfig.
-
-        Args:
-            layer (tf.keras.layers.Layer): The keras layer to be estimated.
-        """
-        self.layer = layer
-        if self._quantizable_add():
-            input_layers = self._find_input_layers(self.layer)
-            fused_conv_index = None
-            for i, input_layer in enumerate(input_layers):
-                # Check that the input is a Conv pattern
-                if "Conv" in input_layer.__class__.__name__ or self._find_patterns(input_layer):
-                    if hasattr(input_layer, "outbound_nodes") and len(getattr(input_layer, "outbound_nodes")) == 1:
-                        fused_conv_index = i
-                        break
-
-            input_indexes = [i for i in range(0, len(input_layers))]
-            if fused_conv_index:
-                del input_indexes[fused_conv_index]
-
-            self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True, "index": input_indexes}})
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py
deleted file mode 100644
index 2cf5f76c37e..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""QuantizeLayer Base Class."""
-
-from ..quantize_config import global_config
-
-
-class QuantizeLayerBase:  # pragma: no cover
-    """QuantizeLayer Base Class."""
-
-    def __init__(self):
-        """Initialize QuantizeLayerBase class."""
-        self.quantize_patterns = []
-        assert "quantize_config" in global_config, "QuantizeConfig is not correctly created."
-        self.quantize_config = global_config["quantize_config"]
-
-    def _find_input_layers(self, layer):
-        """Find all inputs of a specific layer.
-
-        Args:
-            layer (tf.keras.layers.Layer): The target keras layer that this method
-                                           is to find its input layers.
-
-        Returns:
-            input_layers (list): List of input layers found by this method.
-        """
-        input_layers = []
-        if isinstance(layer.input, list):
-            for input_tensor in layer.input:
-                input_layer = input_tensor._keras_history.layer
-                input_layers.append(input_layer)
-        else:
-            input_layer = layer.input._keras_history.layer
-            input_layers.append(input_layer)
-        return input_layers
-
-    def _find_patterns(self, layer):
-        """Checks if the input layer can satisfy the patterns.
-
-        Args:
-            layer (tf.keras.layers.Layer): The input keras layer that this method
-                                           is to find patterns.
-
-        Returns:
-            valid_patterns (bool): If the input layer can satisfy any pattern.
-        """
-        if not self.quantize_patterns:
-            return False
-
-        for quantize_pattern in self.quantize_patterns:
-            index = len(quantize_pattern) - 2
-            previous_layer = layer
-            while index >= 0:
-                previous_layer = self._find_input_layers(previous_layer)
-                if quantize_pattern[index] not in previous_layer.__class__.__name__:
-                    break
-                index -= 1
-            if index == -1:
-                return True
-
-        return False
-
-    def __call__(self, layer):
-        """The main logic of QuantizeLayerBase.
-
-        Neural Compressor will enumerate all layers of the input model to check
-        if there are any layer meeting the criteria. The chosen ones will be marked
-        as quantizable by QuantizeConfig.
-
-        Args:
-            layer (tf.keras.layers.Layer): The keras layer to be estimated.
-        """
-        raise NotImplementedError()
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py
deleted file mode 100644
index c44b5da3f7c..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Quantize Layer BatchNormalization Class."""
-
-from .quantize_layer_base import QuantizeLayerBase
-
-
-class QuantizeLayerBatchNormalization(QuantizeLayerBase):  # pragma: no cover
-    """The class for quantization of BatchNormalization."""
-
-    def __init__(self):
-        """Initialize QuantizeLayerBatchNormalization class."""
-        super().__init__()
-
-    def _quantizable_bn(self):
-        """Check if the input layer meets criteria of quantization.
-
-        Args:
-            layer (tf.keras.layers.Layer): The input layer.
-
-        Returns:
-            quantizable (bool): If this layer should be quantized.
-        """
-        input_layer = self._find_input_layers(self.layer)
-        assert len(input_layer) == 1, "BatchNormalization only has one input."
-        input_layer_class = input_layer.__class__.__name__
-        if "Conv" not in input_layer_class:
-            return True
-
-        return False
-
-    def __call__(self, layer):
-        """The main logic of QuantizeLayerBatchNormalization.
-
-        Neural Compressor will enumerate all layers of the input model to check
-        if there are any layer meeting the criteria. The chosen ones will be marked
-        as quantizable by QuantizeConfig.
-
-        Args:
-            layer (tf.keras.layers.Layer): The keras layer to be estimated.
-        """
-        self.layer = layer
-        if self._quantizable_bn():
-            self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True}})
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py
deleted file mode 100644
index 2baf26c0c24..00000000000
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py
+++ /dev/null
@@ -1,284 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""QAT Quantize Wrapper Class."""
-
-from abc import abstractmethod
-
-import tensorflow as tf
-from tensorflow.python.util import tf_inspect
-
-from .fake_quantize import FakeQuantize
-from .quantize_config import global_config, layer_wise_config
-
-
-class QuantizeWrapperBase(tf.keras.layers.Wrapper):
-    """Base class for quantize wrapper."""
-
-    def __init__(self, layer, **kwargs):
-        """Create a quantize wrapper for a keras layer.
-
-        This wrapper provides options to quantize inputs and weights of the layer.
-
-        Args:
-          layer (tf.keras.layers.Layer): The keras layer to be wrapped.
-          **kwargs: Additional keyword arguments to be passed.
-        """
-        assert layer is not None, "'layer' should not be None."
-
-        assert isinstance(layer, tf.keras.layers.Layer) or isinstance(layer, tf.keras.Model), (
-            "'layer' can only be a 'tf.keras.layers.Layer' instance."
-            " You passed an instance of type: {input}.".format(input=layer.__class__.__name__)
-        )
-
-        if "name" not in kwargs:
-            kwargs["name"] = self._make_layer_name(layer)
-
-        super(QuantizeWrapperBase, self).__init__(layer, **kwargs)
-
-        self.index = None
-        self._layer_class = layer.__class__.__name__
-        self._track_trackable(layer, name="layer")
-
-    @staticmethod
-    def _make_layer_name(layer):
-        """Modify the layer name to be quantized layer."""
-        return "{}_{}".format("quant", layer.name)
-
-    def build(self, input_shape):
-        """Creates the variables of the layer.
-
-        Args:
-          input_shape (tf.TensorShape or list): shapes of input tensors
-        """
-        super(QuantizeWrapperBase, self).build(input_shape)
-
-        self.optimizer_step = self.add_weight(
-            "optimizer_step",
-            initializer=tf.keras.initializers.Constant(-1),
-            dtype=tf.dtypes.int32,
-            trainable=False,
-        )
-
-    def _init_min_max_variables(self, name, shape):
-        """Initialize the minimum and maximum values of variables to the wrapped layer.
-
-        Args:
-            name (string): Name prefix of the variables.
-            shape (tf.TensorShape): shape of variables to be added.
-
-        Returns:
-            min_variable (tf.Variable) : The initialized minimum value of given variables.
-            min_variable (tf.Variable) : The initialized maximum value of given variables.
-        """
-        min_variable = self.layer.add_weight(
-            name + "_min",
-            shape=(shape),
-            trainable=False,
-            initializer=tf.keras.initializers.Constant(-6.0),
-        )
-        max_variable = self.layer.add_weight(
-            name + "_max",
-            shape=(shape),
-            trainable=False,
-            initializer=tf.keras.initializers.Constant(6.0),
-        )
-
-        return min_variable, max_variable
-
-    def query_input_index(self):
-        """Query QuantizeConfig to check if there is any designated input index for this layer."""
-        quantize_config = global_config["quantize_config"]
-        custom_layer_config = quantize_config.query_layer(self.layer)
-        if custom_layer_config and "index" in custom_layer_config:
-            self.index = custom_layer_config["index"]
-
-    @abstractmethod
-    def call(self, inputs, training=None):
-        """This is where the quantize wrapper's logic lives.
-
-        Args:
-          inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer.
-
-        Returns:
-          outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer.
-        """
-        raise NotImplementedError
-
-    @property
-    def trainable(self):
-        """Get trainable attribute for the layer and its sublayers."""
-        return self.layer.trainable
-
-    @trainable.setter
-    def trainable(self, value):
-        """Set trainable attribute for the layer and its sublayers.
-
-        Args:
-          value (Boolean): The desired state for the layer's trainable attribute.
-        """
-        self.layer.trainable = value
-
-    @property
-    def trainable_weights(self):
-        """List of all trainable weights tracked by this layer.
-
-        Trainable weights are updated via gradient descent during training.
-
-        Returns:
-          trainable_weights (list): A list of trainable variables.
-        """
-        return self.layer.trainable_weights + self._trainable_weights
-
-    @property
-    def non_trainable_weights(self):
-        """List of all non-trainable weights tracked by this layer.
-
-        Non-trainable weights are *not* updated during training. They are
-        expected to be updated manually in `call()`.
-
-        Returns:
-          non_trainable_weights (list): A list of non-trainable variables.
-        """
-        return self.layer.non_trainable_weights + self._non_trainable_weights
-
-    @property
-    def updates(self):
-        """Update layer."""
-        return self.layer.updates + self._updates
-
-    @property
-    def losses(self):
-        """List of losses added using the `add_loss()` API.
-
-        Variable regularization tensors are created when this property is
-        accessed, so it is eager safe: accessing `losses` under a
-        `tf.GradientTape` will propagate gradients back to the corresponding
-        variables.
-
-        Returns:
-          losses (list): A list of tensors.
-        """
-        return self.layer.losses + self._losses
-
-
-class QuantizeWrapper(QuantizeWrapperBase):
-    """General QuantizeWrapper for quantizable layers.
-
-    Weights and inputs will be quantized according to the layer type and quantize config.
-    """
-
-    def __init__(self, layer, **kwargs):
-        """Create a quantize wrapper for a keras layer.
-
-        This wrapper provides options to quantize inputs and weights of the layer.
-
-        Args:
-          layer (tf.keras.layers.Layer): The keras layer to be wrapped.
-          **kwargs: Additional keyword arguments to be passed.
-        """
-        super().__init__(layer, **kwargs)
-
-        self.kernel = "kernel"
-        self.kernel_weights = None
-        self.channel_axis = kwargs.get("axis", -1)
-        if self._layer_class == "DepthwiseConv2D":
-            self.kernel = "depthwise_kernel"
-            self.channel_axis = 2
-        if self._layer_class in layer_wise_config["multiple_inputs_layers"]:
-            self.query_input_index()
-
-    def build(self, input_shape):
-        """Creates the variables of the layer.
-
-        Args:
-          input_shape (tf.TensorShape or list): shapes of input tensors
-        """
-        super().build(input_shape)
-
-        if self._layer_class in layer_wise_config["weighted_layers"]:
-            self.kernel_weights = getattr(self.layer, self.kernel)
-
-            weight_min, weight_max = self._init_min_max_variables(
-                name=self.kernel_weights.name.split(":")[0], shape=self.kernel_weights.shape[self.channel_axis]
-            )
-
-            self.weight_range = {"min_var": weight_min, "max_var": weight_max}
-            self._trainable_weights.append(self.kernel_weights)
-
-        num_input = 1
-        if not isinstance(input_shape, tf.TensorShape):
-            num_input = len(input_shape)
-        self.query_input_index()
-        if not self.index:
-            self.index = [i for i in range(num_input)]
-
-        if num_input == 1:
-            inputs_min, inputs_max = self._init_min_max_variables(
-                name=self.layer.name + "_input{}".format(0), shape=None
-            )
-            self.inputs_range = {"min_var": inputs_min, "max_var": inputs_max}
-        else:
-            self.inputs_range = []
-            for i in range(num_input):
-                self.inputs_range.append({})
-                if i in self.index:
-                    inputs_min, inputs_max = self._init_min_max_variables(
-                        name=self.layer.name + "_input{}".format(i), shape=None
-                    )
-                    self.inputs_range[i] = {"min_var": inputs_min, "max_var": inputs_max}
-
-    def call(self, inputs, training=None):
-        """This is where the quantize wrapper's logic lives.
-
-        Args:
-          inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer.
-
-        Returns:
-          outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer.
-        """
-        if training is None:
-            training = tf.keras.backend.learning_phase()
-
-        # Quantize all weights, and replace them in the underlying layer.
-        if self._layer_class in layer_wise_config["weighted_layers"]:
-            weight_quantizer = FakeQuantize(
-                per_channel=True,
-                channel_axis=self.channel_axis,
-            )
-            quantized_weight = weight_quantizer(self.kernel_weights, self.weight_range, training)
-            setattr(self.layer, self.kernel, quantized_weight)
-
-        quantized_inputs = inputs
-        inputs_quantizer = FakeQuantize(
-            per_channel=False,
-            channel_axis=self.channel_axis,
-        )
-
-        if not isinstance(quantized_inputs, tf.Tensor):
-            for i in range(len(quantized_inputs)):
-                if i in self.index:
-                    quantized_inputs[i] = inputs_quantizer(inputs[i], self.inputs_range[i], training)
-        else:
-            quantized_inputs = inputs_quantizer(inputs, self.inputs_range, training)
-
-        args = tf_inspect.getfullargspec(self.layer.call).args
-        if "training" in args:
-            outputs = self.layer.call(quantized_inputs, training=training)
-        else:
-            outputs = self.layer.call(quantized_inputs)
-
-        return outputs
diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py
index e08b052204a..90f29fa3fa3 100644
--- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py
+++ b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py
@@ -24,7 +24,7 @@
 from ..quantize_graph_base import QuantizeNodeBase
 
 
-class FuseNodeStartWithFusedInstanceNorm(QuantizeNodeBase):
+class FuseNodeStartWithFusedInstanceNorm(QuantizeNodeBase):  # pragma: no cover
     """Quantize FusedInstanceNorm and apply the fusion."""
 
     def __init__(self, **kwargs):
diff --git a/neural_compressor/tensorflow/quantization/utils/utility.py b/neural_compressor/tensorflow/quantization/utils/utility.py
index 84ae1fb1915..5e3fa83ea90 100644
--- a/neural_compressor/tensorflow/quantization/utils/utility.py
+++ b/neural_compressor/tensorflow/quantization/utils/utility.py
@@ -308,45 +308,6 @@ def strip_unused_nodes(graph_def, input_node_names, output_node_names):
     return tf.compat.v1.graph_util.extract_sub_graph(cur_graph.dump_graph(), output_node_names)
 
 
-def get_estimator_graph(estimator, input_fn):
-    """Get the graph of the estimator.
-
-    Args:
-        estimator: tf estimator model
-        input_fn: input function
-
-    Returns:
-        graph
-    """
-    with tf.Graph().as_default() as g:
-        features, input_hooks = estimator._get_features_from_input_fn(input_fn, tf.estimator.ModeKeys.PREDICT)
-        estimator_spec = estimator._call_model_fn(features, None, tf.estimator.ModeKeys.PREDICT, estimator.config)
-
-        outputs = (
-            [tensor.name for tensor in estimator_spec.predictions.values()]
-            if isinstance(estimator_spec.predictions, dict)
-            else [estimator_spec.predictions.name]
-        )
-        logger.info("Estimator output tensor names is {}.".format(outputs))
-        with tf.compat.v1.Session(graph=g) as sess:
-            sess.run(tf.compat.v1.global_variables_initializer())
-            # Freezing a graph requires output_node_names, which can be found in
-            # estimator_spec.predictions that contains prediction tensors as a
-            # dictionary
-            # When a model uses Iterator, we need to have 'MakeIterator' (default
-            # name used by TF) in the output_node_names as well.
-            output_nodes = list(set([output.split(":")[0] for output in outputs]))
-            if "MakeIterator" in [node.op for node in g.as_graph_def().node]:
-                output_nodes.append("MakeIterator")
-
-            graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, g.as_graph_def(), output_nodes)
-
-        graph = tf.Graph()
-        with graph.as_default():
-            tf.import_graph_def(graph_def, name="")
-        return graph
-
-
 def strip_equivalent_nodes(graph_def, output_node_names):
     """Strip nodes with the same input and attr."""
     stripped_graph = GraphAnalyzer()
@@ -445,50 +406,6 @@ def get_model_input_shape(model):
     return 1
 
 
-def get_tensor_val_from_graph_node(graph_node_name_mapping, node_name):
-    """Get the tensor value for given node name.
-
-    Args:
-        graph_node_name_mapping: key: node name, val: node
-        node_name: query node
-
-    Returns:
-        tensor_val: numpy array
-    """
-    from tensorflow.python.framework import tensor_util
-
-    node = graph_node_name_mapping[node_name]
-    node_tensor = node.attr["value"].tensor
-    tensor_val = tensor_util.MakeNdarray(node_tensor)
-    return tensor_val
-
-
-def int8_node_name_reverse(node):
-    """Reverse int8 node name."""
-    int8_postfix = "_eightbit"
-    node_name = node.name
-    if "Quantized" in node.op:
-        index_postfix = node_name.find(int8_postfix)
-        if index_postfix != -1:
-            node_name = node_name[:index_postfix]
-    return node_name
-
-
-def _parse_config(q_config, cfg, op_list):
-    """Parse q_config and get dequantize min max value."""
-    activation_min_max = {}
-    if "__requant_min_max" in q_config:
-        for node_name, val in q_config["__requant_min_max"].items():
-            node_name = node_name.split("_eightbit_requant_range")[0]
-            if node_name in op_list:
-                activation_min_max[node_name] = {"min": val[0], "max": val[1]}
-    updated_cfg = {"op": {}}
-    for op_name_and_type in cfg["op"].keys():
-        if op_name_and_type[0] in op_list:
-            updated_cfg["op"][op_name_and_type] = cfg["op"][op_name_and_type]
-    return activation_min_max, updated_cfg
-
-
 def generate_feed_dict(input_tensor, inputs):
     """Generate feed dict helper function."""
     if len(input_tensor) == 1:
@@ -545,51 +462,6 @@ def check_shape(tensor, data):
     return feed_dict
 
 
-def get_weight_from_input_tensor(model, input_tensor_names, op_types):
-    """Extracts weight tensors and their associated nodes from a smooth quant node's input tensor.
-
-    Args:
-        model: A TensorFlow model containing a `graph_def` attribute.
-        input_tensor_names: A list of input tensor names to search for weight tensors.
-        op_types: A list of operation types to search for when looking for weight tensors.
-
-    Returns:
-        A tuple of two dictionaries:
-        - sq_weight_tensors: A dictionary mapping each input tensor name
-            to a dict of its associated weight tensors with weight name.
-        - sq_weights_nodes: A dictionary mapping each input tensor name
-            to a dict of its associated weight nodes with weight name.
-    """
-    g_analyzer = GraphAnalyzer()
-    g_analyzer.graph = model.graph_def
-    graph_info = g_analyzer.parse_graph()
-
-    sq_weight_tensors = {}
-    sq_weights_nodes = {}
-
-    from tensorflow.python.framework import tensor_util
-
-    for name in input_tensor_names:
-        # Use dict rather than list to fix the QKV/VQK misorder issue
-        curr_weight_tensors = {}
-        curr_weights_nodes = {}
-        next_node_names = graph_info[name].outputs
-        for node_name in next_node_names:
-            curr_node = graph_info[node_name].node
-            if curr_node.op not in op_types:
-                continue
-            if len(curr_node.input) >= 2:
-                weight_name = curr_node.input[1]
-                weight_node = graph_info[weight_name].node
-                weight_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor)
-                curr_weight_tensors[weight_name] = weight_tensor
-                curr_weights_nodes[weight_name] = weight_node
-        # {input node -> {xxx_q_proj_matmul: value1, xxx_v_proj_matmul: value2, ...}, ...}
-        sq_weight_tensors[name] = curr_weight_tensors
-        sq_weights_nodes[name] = curr_weights_nodes
-    return sq_weight_tensors, sq_weights_nodes
-
-
 def apply_inlining(func):
     """Apply an inlining optimization to the function's graph definition.
 
diff --git a/neural_compressor/tensorflow/utils/__init__.py b/neural_compressor/tensorflow/utils/__init__.py
index 65dbabd2270..0e1535b235f 100644
--- a/neural_compressor/tensorflow/utils/__init__.py
+++ b/neural_compressor/tensorflow/utils/__init__.py
@@ -46,14 +46,10 @@
     combine_histogram,
     get_all_fp32_data,
     get_tensor_histogram,
-    Dequantize,
-    dequantize_weight,
-    dump_data_to_local,
-    load_data_from_pkl,
     singleton,
     CpuInfo,
     Statistics,
     CaptureOutputToFile,
-    LazyImport,
     valid_keras_format,
+    TFSlimNetsFactory,
 )
diff --git a/neural_compressor/tensorflow/utils/data.py b/neural_compressor/tensorflow/utils/data.py
index bdf4ce1d9bf..5854e45ad75 100644
--- a/neural_compressor/tensorflow/utils/data.py
+++ b/neural_compressor/tensorflow/utils/data.py
@@ -28,7 +28,7 @@
 from neural_compressor.common import logger
 
 
-def default_collate(batch):  # pragma: no cover
+def default_collate(batch):
     """Merge data with outer dimension batch size."""
     elem = batch[0]
     if isinstance(elem, collections.abc.Mapping):
@@ -229,7 +229,7 @@ def __len__(self):
             return (len(self.sampler) + self.batch_size - 1) // self.batch_size
 
 
-class BaseDataLoader:  # pragma: no cover
+class BaseDataLoader:
     """Base class for TF DataLoaders.
 
     _generate_dataloader is needed to create a dataloader object
diff --git a/neural_compressor/tensorflow/utils/model.py b/neural_compressor/tensorflow/utils/model.py
index 75334446c4c..8ad020678ae 100644
--- a/neural_compressor/tensorflow/utils/model.py
+++ b/neural_compressor/tensorflow/utils/model.py
@@ -43,7 +43,7 @@ def reset_global_config(self):
 TFConfig = TensorflowGlobalConfig()
 
 
-class Model(object):
+class Model(object):  # pragma: no cover
     """A wrapper to construct a Neural Compressor TF Model."""
 
     def __new__(cls, root, **kwargs):
diff --git a/neural_compressor/tensorflow/utils/model_wrappers.py b/neural_compressor/tensorflow/utils/model_wrappers.py
index 2628ad1edb8..b9fc4a54a63 100644
--- a/neural_compressor/tensorflow/utils/model_wrappers.py
+++ b/neural_compressor/tensorflow/utils/model_wrappers.py
@@ -371,7 +371,7 @@ def _get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_t
     return load_saved_model(saved_model_dir, saved_model_tags, input_tensor_names, output_tensor_names)
 
 
-def _get_graph_from_original_keras_v2(model):
+def _get_graph_from_original_keras_v2(model):  # pragma: no cover
     """The version 2 function that get graph from the original keras model.
 
     Args:
@@ -424,7 +424,7 @@ def _get_graph_from_original_keras_v2(model):
     return graph_def, input_names, output_names
 
 
-def _check_keras_format(model, saved_model_dir):
+def _check_keras_format(model, saved_model_dir):  # pragma: no cover
     """Decide which method will be used to get graph from the saved_model .
 
     Args:
@@ -504,7 +504,7 @@ def _get_graph_from_saved_model_v1(model):
     return graph_def, inputs, outputs
 
 
-def try_loading_keras(model, input_tensor_names, output_tensor_names):
+def try_loading_keras(model, input_tensor_names, output_tensor_names):  # pragma: no cover
     """Try different ways of loading keras models.
 
     Args:
@@ -590,7 +590,7 @@ def slim_session(model, input_tensor_names, output_tensor_names, **kwargs):  # p
         output_tensor_names (list of string): validated output_tensor_names.
     """
     assert version1_lt_version2(tf.version.VERSION, "2.0.0"), "slim model only used in tensorflow 1.x"
-    from neural_compressor.tensorflow.utils.nets_factory import TFSlimNetsFactory
+    from neural_compressor.tensorflow.utils.utility import TFSlimNetsFactory
 
     factory = TFSlimNetsFactory()
     assert "name" in kwargs, "model name should be set in slim checkpoint...."
@@ -682,7 +682,7 @@ def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs)
     return sess, input_tensor_names, output_tensor_names
 
 
-def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs):
+def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs):  # pragma: no cover
     """Build session with estimator model.
 
     Args:
@@ -1113,68 +1113,6 @@ def model(self, input_model):
         """Set model in AutoTrackable object."""
         self._auto_trackable = input_model
 
-    def compute_sparsity(self, tensor):
-        """Compute the sparsity.
-
-        Args:
-            tensor: Tensorflow tensor
-
-        Return:
-            (the original tensor size, number of zero elements, number of non-zero elements)
-        """
-        mask = np.ones_like(tensor)
-        tensor_size = tensor.size
-        dense_mask = tensor != 0
-        dense_size = dense_mask.sum()
-        return tensor_size, tensor_size - dense_size, dense_size
-
-    def report_sparsity(self):
-        """Get sparsity of the model.
-
-        Returns:
-            df (DataFrame): DataFrame of sparsity of each weight.
-            total_sparsity (float): total sparsity of model.
-        """
-        import numpy as np
-        import pandas as pd
-        import tensorflow as tf
-
-        df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"])
-        pd.set_option("display.precision", 2)
-        param_dims = [2, 4]
-        params_size = 0
-        sparse_params_size = 0
-        for index, layer in enumerate(tf.keras.models.load_model(self._model).layers):
-            if not len(layer.weights):
-                continue
-            # Extract just the actual parameter's name, which in this context we treat
-            # as its "type"
-            weights = layer.get_weights()[0]
-            if weights.ndim in param_dims:
-                param_size, sparse_param_size, dense_param_size = self.compute_sparsity(weights)
-                density = dense_param_size / param_size
-                params_size += param_size
-                sparse_params_size += sparse_param_size
-                df.loc[len(df.index)] = [
-                    index,
-                    list(weights.shape),
-                    dense_param_size,
-                    sparse_param_size,
-                    (1 - density) * 100,
-                ]
-
-        total_sparsity = sparse_params_size / params_size * 100
-
-        df.loc[len(df.index)] = [
-            "Total sparsity:",
-            "-",
-            params_size,
-            sparse_params_size,
-            total_sparsity,
-        ]
-
-        return df, total_sparsity
-
     def build_saved_model(self, root=None):
         """Build Tensorflow saved model.
 
@@ -1411,67 +1349,6 @@ def save(self, root=None):
         shutil.rmtree(self.model_path, ignore_errors=True)
 
 
-class TensorflowQATModel(TensorflowSavedModelModel):
-    """Build Tensorflow QAT model."""
-
-    def __init__(self, model="", **kwargs):
-        """Initialize a Tensorflow QAT model.
-
-        Args:
-            model (string or  tf.keras.Model object): model path or model object.
-        """
-        assert isinstance(model, tf.keras.Model) or isinstance(
-            model, str
-        ), "The TensorflowQATModel should be initialized either by a string or a tf.keras.Model."
-        super(TensorflowQATModel, self).__init__(model)
-        self.keras_model = None
-        self.model_type = "keras"
-
-    @property
-    def model(self):
-        """Return model itself."""
-        if self.keras_model is None:
-            if isinstance(self._model, tf.keras.Model):
-                self.keras_model = self._model
-            else:
-                self.keras_model = tf.keras.models.load_model(self._model)
-
-        return self.keras_model
-
-    @model.setter
-    def model(self, q_model):
-        """Set model itself."""
-        self.keras_model = q_model
-
-    @property
-    def frozen_graph_def(self):
-        """Get frozen graph_def."""
-        graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
-            self.sess, self.sess.graph_def, self.output_node_names
-        )
-        return graph_def
-
-    def save(self, root=None):
-        """Save Tensorflow QAT model."""
-        if not root:
-            root = DEFAULT_WORKSPACE + "/saved_model"
-        root = os.path.abspath(os.path.expanduser(root))
-        os.makedirs(os.path.dirname(root), exist_ok=True)
-        if root.endswith(".pb"):
-            saved_format = "pb file"
-            graph_def = self.frozen_graph_def
-            f = tf.io.gfile.GFile(root, "wb")
-            f.write(graph_def.SerializeToString())
-        else:
-            q_aware_model = self.keras_model
-            q_aware_model.save(root)
-            saved_format = "saved_model"
-            if root.endswith(".h5"):
-                saved_format = "h5 file"
-        logger.info("Save quantized model to {}.".format(saved_format))
-        return root
-
-
 class TensorflowCheckpointModel(TensorflowBaseModel):
     """Build Tensorflow checkpoint model."""
 
@@ -1552,93 +1429,6 @@ def save(self, root, *args, **kwargs):
         """Save Keras model."""
         self._model_object.save(root)
 
-    @abstractmethod
-    def _export(
-        self,
-        save_path: str,
-        conf,
-    ):
-        pass
-
-    @abstractmethod
-    def framework(self):
-        """Return framework."""
-        return "keras"
-
-    def get_all_weight_names(self):
-        """Get weight names of model.
-
-        Returns:
-            list: weight names list.
-        """
-        names = []
-        for index, layer in enumerate(self.model.layers):
-            if len(layer.weights):
-                names.append(index)
-        return names
-
-    def compute_sparsity(self, tensor):
-        """Compute the sparsity.
-
-        Args:
-            tensor: Tensorflow tensor
-
-        Return:
-            (the original tensor size, number of zero elements, number of non-zero elements)
-        """
-        mask = np.ones_like(tensor)
-        tensor_size = tensor.size
-        dense_mask = tensor != 0
-        dense_size = dense_mask.sum()
-        return tensor_size, tensor_size - dense_size, dense_size
-
-    def report_sparsity(self):
-        """Get sparsity of the model.
-
-        Returns:
-            df (DataFrame): DataFrame of sparsity of each weight.
-            total_sparsity (float): total sparsity of model.
-        """
-        import numpy as np
-        import pandas as pd
-        import tensorflow as tf
-
-        df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"])
-        pd.set_option("display.precision", 2)
-        param_dims = [2, 4]
-        params_size = 0
-        sparse_params_size = 0
-        for index, layer in enumerate(self.model.layers):
-            if not len(layer.weights):
-                continue
-            # Extract just the actual parameter's name, which in this context we treat
-            # as its "type"
-            weights = layer.get_weights()[0]
-            if weights.ndim in param_dims:
-                param_size, sparse_param_size, dense_param_size = self.compute_sparsity(weights)
-                density = dense_param_size / param_size
-                params_size += param_size
-                sparse_params_size += sparse_param_size
-                df.loc[len(df.index)] = [
-                    index,
-                    list(weights.shape),
-                    dense_param_size,
-                    sparse_param_size,
-                    (1 - density) * 100,
-                ]
-
-        total_sparsity = sparse_params_size / params_size * 100
-
-        df.loc[len(df.index)] = [
-            "Total sparsity:",
-            "-",
-            params_size,
-            sparse_params_size,
-            total_sparsity,
-        ]
-
-        return df, total_sparsity
-
     @property
     def input_node_names(self):
         """Return input node names."""
@@ -1673,7 +1463,6 @@ def output_node_names(self):
     "AutoTrackable": TensorflowSavedModelModel,
     "llm_saved_model": TensorflowLLMModel,
     "keras": KerasModel,
-    "keras_qat": TensorflowQATModel,
 }
 
 
diff --git a/neural_compressor/tensorflow/utils/nets_factory.py b/neural_compressor/tensorflow/utils/nets_factory.py
deleted file mode 100644
index d09ef4ba1d1..00000000000
--- a/neural_compressor/tensorflow/utils/nets_factory.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""TF-Slim nets factory."""
-
-from neural_compressor.tensorflow.utils.utility import singleton
-
-
-@singleton
-class TFSlimNetsFactory(object):
-    """TF-Slim nets factory."""
-
-    def __init__(self):
-        """Initialize a TFSlimNetsFactory."""
-        # tf_slim only support specific models by default
-        self.default_slim_models = [
-            "alexnet_v2",
-            "overfeat",
-            "vgg_a",
-            "vgg_16",
-            "vgg_19",
-            "inception_v1",
-            "inception_v2",
-            "inception_v3",
-            "resnet_v1_50",
-            "resnet_v1_101",
-            "resnet_v1_152",
-            "resnet_v1_200",
-            "resnet_v2_50",
-            "resnet_v2_101",
-            "resnet_v2_152",
-            "resnet_v2_200",
-        ]
-
-        from tf_slim.nets import alexnet, inception, overfeat, resnet_v1, resnet_v2, vgg
-
-        self.networks_map = {
-            "alexnet_v2": {
-                "model": alexnet.alexnet_v2,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": alexnet.alexnet_v2_arg_scope,
-            },
-            "overfeat": {
-                "model": overfeat.overfeat,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": overfeat.overfeat_arg_scope,
-            },
-            "vgg_a": {
-                "model": vgg.vgg_a,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": vgg.vgg_arg_scope,
-            },
-            "vgg_16": {
-                "model": vgg.vgg_16,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": vgg.vgg_arg_scope,
-            },
-            "vgg_19": {
-                "model": vgg.vgg_19,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": vgg.vgg_arg_scope,
-            },
-            "inception_v1": {
-                "model": inception.inception_v1,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": inception.inception_v1_arg_scope,
-            },
-            "inception_v2": {
-                "model": inception.inception_v2,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": inception.inception_v2_arg_scope,
-            },
-            "inception_v3": {
-                "model": inception.inception_v3,
-                "input_shape": [None, 299, 299, 3],
-                "num_classes": 1001,
-                "arg_scope": inception.inception_v3_arg_scope,
-            },
-            "resnet_v1_50": {
-                "model": resnet_v1.resnet_v1_50,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": resnet_v1.resnet_arg_scope,
-            },
-            "resnet_v1_101": {
-                "model": resnet_v1.resnet_v1_101,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": resnet_v1.resnet_arg_scope,
-            },
-            "resnet_v1_152": {
-                "model": resnet_v1.resnet_v1_152,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": resnet_v1.resnet_arg_scope,
-            },
-            "resnet_v1_200": {
-                "model": resnet_v1.resnet_v1_200,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1000,
-                "arg_scope": resnet_v1.resnet_arg_scope,
-            },
-            "resnet_v2_50": {
-                "model": resnet_v2.resnet_v2_50,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": resnet_v2.resnet_arg_scope,
-            },
-            "resnet_v2_101": {
-                "model": resnet_v2.resnet_v2_101,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": resnet_v2.resnet_arg_scope,
-            },
-            "resnet_v2_152": {
-                "model": resnet_v2.resnet_v2_152,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": resnet_v2.resnet_arg_scope,
-            },
-            "resnet_v2_200": {
-                "model": resnet_v2.resnet_v2_200,
-                "input_shape": [None, 224, 224, 3],
-                "num_classes": 1001,
-                "arg_scope": resnet_v2.resnet_arg_scope,
-            },
-        }
-
-    def register(self, name, model_func, input_shape, arg_scope, **kwargs):
-        """Register a model to TFSlimNetsFactory.
-
-        Args:
-            name (str): name of a model.
-            model_func (_type_): model that built from slim.
-            input_shape (_type_): input tensor shape.
-            arg_scope (_type_): slim arg scope that needed.
-        """
-        net_info = {"model": model_func, "input_shape": input_shape, "arg_scope": arg_scope}
-        net = {name: {**net_info, **kwargs}}
-        self.networks_map.update(net)
-        self.default_slim_models.append(name)
diff --git a/neural_compressor/tensorflow/utils/utility.py b/neural_compressor/tensorflow/utils/utility.py
index 886dcffc234..a7671da1f1e 100644
--- a/neural_compressor/tensorflow/utils/utility.py
+++ b/neural_compressor/tensorflow/utils/utility.py
@@ -161,67 +161,6 @@ def get_tensor_histogram(tensor_data, bins=2048):
     return (hist, hist_edges, min_val, max_val, th)
 
 
-def Dequantize(data, scale_info):
-    """Dequantize the data with the scale_info."""
-    original_shape = data.shape
-    max_value = 255.0 if scale_info[0].find("Relu") != -1.0 else 127.0
-    _scale = (np.array(scale_info[2]) - np.array(scale_info[1])) / max_value
-    de_scale = np.ones(original_shape) * _scale
-    de_data = np.multiply(data, de_scale).astype(np.float32)
-    return de_data
-
-
-def dequantize_weight(weight_tensor, min_filter_tensor, max_filter_tensor):
-    """Dequantize the weight with min-max filter tensors."""
-    weight_channel = weight_tensor.shape[-1]
-    if len(min_filter_tensor) == 1:
-        weight_tensor = weight_tensor * ((max_filter_tensor[0] - min_filter_tensor[0]) / 127.0)
-    else:
-        # TODO to calculate the de-quantized result in a parallel way
-        for i in range(weight_channel):
-            weight_tensor[:, :, :, i] = weight_tensor[:, :, :, i] * (
-                (max_filter_tensor[i] - min_filter_tensor[i]) / 127.0
-            )
-    return weight_tensor
-
-
-def dump_data_to_local(data, path, filename):
-    """Dump data to local as pkl file.
-
-    Args:
-        data: Data used to dump
-        path: The directory to save data
-        filename: The filename to dump
-
-    Returns:
-        loaded data
-    """
-    from pathlib import Path
-
-    if not os.path.exists(path):
-        Path(path).mkdir(parents=True, exist_ok=True)
-    file_path = os.path.join(path, filename)
-    with open(file_path, "wb") as fp:
-        pickle.dump(data, fp)
-        logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path)
-
-
-def load_data_from_pkl(path, filename):
-    """Load data from local pkl file.
-
-    Args:
-        path: The directory to load data
-        filename: The filename to load
-    """
-    try:
-        file_path = os.path.join(path, filename)
-        with open(file_path, "rb") as fp:
-            data = pickle.load(fp)
-            return data
-    except FileExistsError:
-        logging.getLogger("neural_compressor").info("Can not open %s." % path)
-
-
 def singleton(cls):
     """Not displayed in API Docs.
 
@@ -395,33 +334,143 @@ def __exit__(self, type, value, traceback):
         self.tmp_file.close()
 
 
-class LazyImport(object):
-    """Lazy import python module till use."""
+@singleton
+class TFSlimNetsFactory(object):  # pragma: no cover
+    """TF-Slim nets factory."""
 
-    def __init__(self, module_name):
-        """Init LazyImport object.
+    def __init__(self):
+        """Initialize a TFSlimNetsFactory."""
+        # tf_slim only support specific models by default
+        self.default_slim_models = [
+            "alexnet_v2",
+            "overfeat",
+            "vgg_a",
+            "vgg_16",
+            "vgg_19",
+            "inception_v1",
+            "inception_v2",
+            "inception_v3",
+            "resnet_v1_50",
+            "resnet_v1_101",
+            "resnet_v1_152",
+            "resnet_v1_200",
+            "resnet_v2_50",
+            "resnet_v2_101",
+            "resnet_v2_152",
+            "resnet_v2_200",
+        ]
+
+        from tf_slim.nets import alexnet, inception, overfeat, resnet_v1, resnet_v2, vgg
+
+        self.networks_map = {
+            "alexnet_v2": {
+                "model": alexnet.alexnet_v2,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": alexnet.alexnet_v2_arg_scope,
+            },
+            "overfeat": {
+                "model": overfeat.overfeat,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": overfeat.overfeat_arg_scope,
+            },
+            "vgg_a": {
+                "model": vgg.vgg_a,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": vgg.vgg_arg_scope,
+            },
+            "vgg_16": {
+                "model": vgg.vgg_16,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": vgg.vgg_arg_scope,
+            },
+            "vgg_19": {
+                "model": vgg.vgg_19,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": vgg.vgg_arg_scope,
+            },
+            "inception_v1": {
+                "model": inception.inception_v1,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": inception.inception_v1_arg_scope,
+            },
+            "inception_v2": {
+                "model": inception.inception_v2,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": inception.inception_v2_arg_scope,
+            },
+            "inception_v3": {
+                "model": inception.inception_v3,
+                "input_shape": [None, 299, 299, 3],
+                "num_classes": 1001,
+                "arg_scope": inception.inception_v3_arg_scope,
+            },
+            "resnet_v1_50": {
+                "model": resnet_v1.resnet_v1_50,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": resnet_v1.resnet_arg_scope,
+            },
+            "resnet_v1_101": {
+                "model": resnet_v1.resnet_v1_101,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": resnet_v1.resnet_arg_scope,
+            },
+            "resnet_v1_152": {
+                "model": resnet_v1.resnet_v1_152,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": resnet_v1.resnet_arg_scope,
+            },
+            "resnet_v1_200": {
+                "model": resnet_v1.resnet_v1_200,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1000,
+                "arg_scope": resnet_v1.resnet_arg_scope,
+            },
+            "resnet_v2_50": {
+                "model": resnet_v2.resnet_v2_50,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": resnet_v2.resnet_arg_scope,
+            },
+            "resnet_v2_101": {
+                "model": resnet_v2.resnet_v2_101,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": resnet_v2.resnet_arg_scope,
+            },
+            "resnet_v2_152": {
+                "model": resnet_v2.resnet_v2_152,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": resnet_v2.resnet_arg_scope,
+            },
+            "resnet_v2_200": {
+                "model": resnet_v2.resnet_v2_200,
+                "input_shape": [None, 224, 224, 3],
+                "num_classes": 1001,
+                "arg_scope": resnet_v2.resnet_arg_scope,
+            },
+        }
+
+    def register(self, name, model_func, input_shape, arg_scope, **kwargs):
+        """Register a model to TFSlimNetsFactory.
 
         Args:
-           module_name (string): The name of module imported later
+            name (str): name of a model.
+            model_func (_type_): model that built from slim.
+            input_shape (_type_): input tensor shape.
+            arg_scope (_type_): slim arg scope that needed.
         """
-        self.module_name = module_name
-        self.module = None
-
-    def __getattr__(self, name):
-        """Get the attributes of the module by name."""
-        try:
-            self.module = importlib.import_module(self.module_name)
-            mod = getattr(self.module, name)
-        except:
-            spec = importlib.util.find_spec(str(self.module_name + "." + name))
-            mod = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(mod)
-        return mod
-
-    def __call__(self, *args, **kwargs):
-        """Call the function in that module."""
-        function_name = self.module_name.split(".")[-1]
-        module_name = self.module_name.split(f".{function_name}")[0]
-        self.module = importlib.import_module(module_name)
-        function = getattr(self.module, function_name)
-        return function(*args, **kwargs)
+        net_info = {"model": model_func, "input_shape": input_shape, "arg_scope": arg_scope}
+        net = {name: {**net_info, **kwargs}}
+        self.networks_map.update(net)
+        self.default_slim_models.append(name)
diff --git a/test/3x/tensorflow/keras/test_layers.py b/test/3x/tensorflow/keras/test_layers.py
new file mode 100644
index 00000000000..b43b3fd8bf7
--- /dev/null
+++ b/test/3x/tensorflow/keras/test_layers.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import shutil
+import unittest
+
+import keras
+import numpy as np
+import tensorflow as tf
+
+from neural_compressor.common import Logger
+from neural_compressor.tensorflow.utils import version1_gte_version2
+
+logger = Logger().get_logger()
+
+
+def build_model1():
+    # Load MNIST dataset
+    mnist = keras.datasets.mnist
+
+    # 60000 images in train and 10000 images in test, but we don't need so much for ut
+    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+    train_images, train_labels = train_images[:1000], train_labels[:1000]
+    test_images, test_labels = test_images[:200], test_labels[:200]
+
+    # Normalize the input image so that each pixel value is between 0 to 1.
+    train_images = train_images / 255.0
+    test_images = test_images / 255.0
+
+    # Define the model architecture.
+    model = keras.Sequential(
+        [
+            keras.layers.InputLayer(input_shape=(28, 28)),
+            keras.layers.Reshape(target_shape=(28, 28, 1)),
+            keras.layers.DepthwiseConv2D(3, 3, activation="relu", name="conv2d"),
+            keras.layers.MaxPooling2D(pool_size=(2, 2)),
+            keras.layers.Flatten(),
+            keras.layers.Dense(10, name="dense"),
+        ]
+    )
+    # Train the digit classification model
+    model.compile(
+        optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"]
+    )
+
+    model.fit(
+        train_images,
+        train_labels,
+        epochs=1,
+        validation_split=0.1,
+    )
+
+    _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
+
+    print("Baseline test accuracy:", baseline_model_accuracy)
+    if version1_gte_version2(tf.__version__, "2.16.1"):
+        model.save("baseline_model1.keras")
+    else:
+        model.save("baseline_model1")
+
+
+def build_model2():
+    # Load MNIST dataset
+    mnist = keras.datasets.mnist
+
+    # 60000 images in train and 10000 images in test, but we don't need so much for ut
+    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+    train_images, train_labels = train_images[:1000], train_labels[:1000]
+    test_images, test_labels = test_images[:200], test_labels[:200]
+
+    # Normalize the input image so that each pixel value is between 0 to 1.
+    train_images = train_images / 255.0
+    test_images = test_images / 255.0
+
+    # Define the model architecture.
+    model = keras.Sequential(
+        [
+            keras.layers.InputLayer(input_shape=(28, 28)),
+            keras.layers.Reshape(target_shape=(28, 28, 1)),
+            keras.layers.SeparableConv2D(3, 3, activation="relu"),
+            keras.layers.AveragePooling2D(pool_size=(2, 2)),
+            keras.layers.Flatten(),
+            keras.layers.Dense(10, name="dense"),
+        ]
+    )
+    # Train the digit classification model
+    model.compile(
+        optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"]
+    )
+
+    model.fit(
+        train_images,
+        train_labels,
+        epochs=1,
+        validation_split=0.1,
+    )
+
+    _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
+
+    print("Baseline test accuracy:", baseline_model_accuracy)
+    if version1_gte_version2(tf.__version__, "2.16.1"):
+        model.save("baseline_model2.keras")
+    else:
+        model.save("baseline_model2")
+
+
+class Dataset(object):
+    def __init__(self, batch_size=1):
+        self.batch_size = batch_size
+        mnist = keras.datasets.mnist
+        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+        train_images, train_labels = train_images[:1000], train_labels[:1000]
+        test_images, test_labels = test_images[:200], test_labels[:200]
+        # Normalize the input image so that each pixel value is between 0 to 1.
+        self.train_images = train_images / 255.0
+        self.test_images = test_images / 255.0
+        self.train_labels = train_labels
+        self.test_labels = test_labels
+
+    def __len__(self):
+        return len(self.test_images)
+
+    def __getitem__(self, idx):
+        return self.test_images[idx], self.test_labels[idx]
+
+
+class MyDataloader:
+    def __init__(self, dataset, batch_size=1):
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.length = math.ceil(len(dataset) / self.batch_size)
+
+    def __iter__(self):
+        for _, (images, labels) in enumerate(self.dataset):
+            images = np.expand_dims(images, axis=0)
+            labels = np.expand_dims(labels, axis=0)
+            yield (images, labels)
+
+    def __len__(self):
+        return self.length
+
+
+class TestTF3xNewApi(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        build_model1()
+        build_model2()
+        os.environ["ITEX_ONEDNN_GRAPH"] = "1"
+        self.fp32_model_path1 = (
+            "baseline_model1.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model1"
+        )
+        self.fp32_model_path2 = (
+            "baseline_model2.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model2"
+        )
+
+    @classmethod
+    def tearDownClass(self):
+        if self.fp32_model_path1.endswith(".keras"):
+            os.remove(self.fp32_model_path1)
+            os.remove(self.fp32_model_path2)
+        else:
+            shutil.rmtree(self.fp32_model_path1, ignore_errors=True)
+            shutil.rmtree(self.fp32_model_path2, ignore_errors=True)
+        os.environ["ITEX_ONEDNN_GRAPH"] = "0"
+
+    def test_depthwise_conv2d(self):
+        logger.info("test_static_quant_from_dict_default")
+        from neural_compressor.tensorflow import quantize_model
+        from neural_compressor.tensorflow.keras import get_default_static_quant_config
+
+        calib_dataloader = MyDataloader(dataset=Dataset())
+        fp32_model = keras.models.load_model(self.fp32_model_path1)
+        qmodel = quantize_model(fp32_model, get_default_static_quant_config(), calib_dataloader)
+        self.assertIsNotNone(qmodel)
+
+        for layer in qmodel.layers:
+            if layer.name == "conv2d":
+                self.assertEqual(layer.__class__.__name__, "QDepthwiseConv2D")
+                break
+
+    def test_seprable_conv2d(self):
+        logger.info("test_static_quant_from_dict_default")
+        from neural_compressor.tensorflow import quantize_model
+        from neural_compressor.tensorflow.keras import get_default_static_quant_config
+
+        calib_dataloader = MyDataloader(dataset=Dataset())
+        fp32_model = keras.models.load_model(self.fp32_model_path2)
+        qmodel = quantize_model(fp32_model, get_default_static_quant_config(), calib_dataloader)
+        self.assertIsNotNone(qmodel)
+
+        for layer in qmodel.layers:
+            if layer.name == "conv2d":
+                self.assertEqual(layer.__class__.__name__, "QSeparableConv2D")
+                break
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/3x/tensorflow/keras/test_model_wrappers.py b/test/3x/tensorflow/keras/test_model_wrappers.py
index b9cb3eecfd0..e4d906e27a2 100644
--- a/test/3x/tensorflow/keras/test_model_wrappers.py
+++ b/test/3x/tensorflow/keras/test_model_wrappers.py
@@ -94,30 +94,6 @@ def test_keras_model(self):
 
         self.assertEqual(os.path.isfile("./keras_model.keras"), True)
 
-    def test_tf_qat_model(self):
-        if parse_version(tf.version.VERSION) < parse_version("2.3.0"):
-            return
-
-        from neural_compressor.tensorflow.utils.model_wrappers import TensorflowQATModel
-
-        keras_model = self.model
-        model = TensorflowQATModel(keras_model)
-        self.assertEqual(isinstance(model.model, tf.keras.Model), True)
-        self.assertEqual(model.model_path, None)
-
-        keras_model.save("./simple_model.keras")
-        model = TensorflowQATModel("./simple_model.keras")
-        self.assertEqual(isinstance(model.model, tf.keras.Model), True)
-        self.assertEqual(model.model_path, "./simple_model.keras")
-
-        model.save("./keras_model.keras")
-        loaded_model = tf.keras.models.load_model("./keras_model.keras")
-        self.assertEqual(isinstance(loaded_model, tf.keras.Model), True)
-
-        model.save("keras_model.h5")
-        loaded_model = tf.keras.models.load_model("keras_model.h5")
-        self.assertEqual(isinstance(loaded_model, tf.keras.Model), True)
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py b/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py
deleted file mode 100644
index b538c34a43d..00000000000
--- a/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-#  -*- coding: utf-8 -*-
-#
-import os
-import platform
-import unittest
-
-import tensorflow as tf
-
-from neural_compressor.tensorflow.quantization.utils.utility import get_estimator_graph
-from neural_compressor.tensorflow.utils import version1_gte_version2
-
-
-class TestEstimatorGraphConvert(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        if version1_gte_version2(tf.version.VERSION, "2.16.1"):
-            return
-
-        self.dst_path = "/tmp/.neural_compressor/train.csv"
-        self.titanic_file = tf.keras.utils.get_file(
-            self.dst_path, "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
-        )
-
-    @unittest.skipIf(
-        version1_gte_version2(tf.version.VERSION, "2.16.1"), "The estimator APIs are deleted after TF2.16.1"
-    )
-    def test_get_estimator_graph(self):
-        def train_input_fn():
-            titanic = tf.data.experimental.make_csv_dataset(self.titanic_file, batch_size=32, label_name="survived")
-            titanic_batches = titanic.cache().repeat().shuffle(500).prefetch(tf.data.experimental.AUTOTUNE)
-            return titanic_batches
-
-        age = tf.feature_column.numeric_column("age")
-        cls = tf.feature_column.categorical_column_with_vocabulary_list("class", ["First", "Second", "Third"])
-        embark = tf.feature_column.categorical_column_with_hash_bucket("embark_town", 32)
-        import tempfile
-
-        model_dir = tempfile.mkdtemp()
-        model = tf.estimator.LinearClassifier(model_dir=model_dir, feature_columns=[embark, cls, age], n_classes=2)
-        model = model.train(input_fn=train_input_fn, steps=100)
-        result = model.evaluate(train_input_fn, steps=10)
-
-        graph = get_estimator_graph(model, train_input_fn)
-
-        self.assertTrue(isinstance(graph, tf.Graph))
-        graph_def = graph.as_graph_def()
-        self.assertGreater(len(graph_def.node), 1)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/test/3x/tensorflow/quantization/ptq/test_set_tensor.py b/test/3x/tensorflow/quantization/ptq/test_set_tensor.py
deleted file mode 100644
index 99e5f50ff7d..00000000000
--- a/test/3x/tensorflow/quantization/ptq/test_set_tensor.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import os
-import shutil
-import unittest
-
-import numpy as np
-import tensorflow as tf
-import yaml
-from tensorflow.compat.v1 import graph_util
-
-from neural_compressor.tensorflow.algorithms.static_quant.tensorflow import TensorFlowAdaptor
-from neural_compressor.tensorflow.utils import disable_random
-
-
-class TestSetTensor(unittest.TestCase):
-    @classmethod
-    def tearDownClass(self):
-        shutil.rmtree("./saved", ignore_errors=True)
-
-    @disable_random()
-    def test_fp32bias(self):
-        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
-        paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
-        x_pad = tf.pad(x, paddings, "CONSTANT")
-        conv_weights = tf.compat.v1.get_variable(
-            "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()
-        )
-        conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID")
-
-        conv_bias = tf.compat.v1.get_variable(
-            "bias", [16], dtype=tf.float32, initializer=tf.compat.v1.random_normal_initializer()
-        )
-
-        conv_bias = tf.math.add(conv, conv_bias)
-        relu6 = tf.nn.relu6(conv_bias, name="op_to_store")
-
-        out_name = relu6.name.split(":")[0]
-        with tf.compat.v1.Session() as sess:
-            sess.run(tf.compat.v1.global_variables_initializer())
-            constant_graph = graph_util.convert_variables_to_constants(
-                sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]
-            )
-
-            from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-            from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDataset
-
-            dataset = DummyDataset(shape=(100, 56, 56, 16), label=True)
-            calib_dataloader = BaseDataLoader(dataset)
-            quant_config = StaticQuantConfig()
-            q_model = quantize_model(constant_graph, quant_config, calib_dataloader)
-
-            framework_specific_info = {
-                "device": "cpu",
-                "workspace_path": "saved",
-                "random_seed": 1978,
-                "inputs": ["input"],
-                "outputs": ["op_to_store"],
-                "approach": "post_training_static_quant",
-                "format": "default",
-                "backend": "default",
-            }
-            adaptor = TensorFlowAdaptor(framework_specific_info)
-            adaptor.set_tensor(q_model, {"bias": np.random.random(16)})
-
-            from tensorflow.core.framework import attr_value_pb2
-            from tensorflow.python.framework import dtypes
-
-            for node in q_model.graph_def.node:
-                if node.name == "bias":
-                    self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum))
-
-    @disable_random()
-    def test_int32bias(self):
-        x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input")
-        paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
-        x_pad = tf.pad(x, paddings, "CONSTANT")
-        conv_weights = tf.compat.v1.get_variable(
-            "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()
-        )
-        conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID")
-
-        conv_bias = tf.compat.v1.get_variable("bias", [16], dtype=tf.float32)
-
-        conv_bias = tf.math.add(conv, conv_bias)
-        relu6 = tf.nn.relu6(conv_bias, name="relu_0")
-
-        conv_weights1 = tf.compat.v1.get_variable(
-            "weight1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()
-        )
-        conv1 = tf.nn.conv2d(relu6, conv_weights1, strides=[1, 2, 2, 1], padding="VALID")
-
-        conv_bias1 = tf.compat.v1.get_variable("bias1", [16], dtype=tf.float32)
-
-        conv_bias1 = tf.math.add(conv1, conv_bias1)
-        relu6 = tf.nn.relu6(conv_bias1, name="relu_1")
-
-        conv_weights2 = tf.compat.v1.get_variable(
-            "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer()
-        )
-        conv2 = tf.nn.conv2d(relu6, conv_weights2, strides=[1, 2, 2, 1], padding="VALID")
-
-        conv_bias2 = tf.compat.v1.get_variable("bias2", [16], dtype=tf.float32)
-
-        conv_bias2 = tf.math.add(conv2, conv_bias2)
-        relu6 = tf.nn.relu6(conv_bias2, name="op_to_store")
-        out_name = relu6.name.split(":")[0]
-        with tf.compat.v1.Session() as sess:
-            sess.run(tf.compat.v1.global_variables_initializer())
-            constant_graph = graph_util.convert_variables_to_constants(
-                sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name]
-            )
-
-            for i in constant_graph.node:
-                if i.op.find("Add") != -1:
-                    i.op = "Add"
-
-            from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-            from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDataset
-
-            dataset = DummyDataset(shape=(100, 56, 56, 16), label=True)
-            calib_dataloader = BaseDataLoader(dataset)
-            quant_config = StaticQuantConfig()
-            q_model = quantize_model(constant_graph, quant_config, calib_dataloader)
-
-            framework_specific_info = {
-                "device": "cpu",
-                "workspace_path": "saved",
-                "random_seed": 1978,
-                "inputs": ["input"],
-                "outputs": ["op_to_store"],
-                "approach": "post_training_static_quant",
-                "format": "default",
-                "backend": "default",
-            }
-            adaptor = TensorFlowAdaptor(framework_specific_info)
-            adaptor.set_tensor(q_model, {"bias1": np.random.randint(6, size=2, dtype="int32")})
-            from tensorflow.core.framework import attr_value_pb2
-            from tensorflow.python.framework import dtypes
-
-            for node in q_model.graph_def.node:
-                if node.name == "bias2":
-                    self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py
index 579f69166d5..4a48387af55 100644
--- a/test/3x/tensorflow/test_config.py
+++ b/test/3x/tensorflow/test_config.py
@@ -213,6 +213,35 @@ def test_static_quant_from_dict_advance(self):
 
         self.assertEqual(conv2d_quantized, False)
 
+    def test_static_quant_from_dict_advance2(self):
+        logger.info("test_static_quant_from_dict_advance2")
+        from neural_compressor.tensorflow import quantize_model
+        from neural_compressor.tensorflow.utils import DummyDataset
+
+        dataset = DummyDataset(shape=(100, 32, 32, 3), label=True)
+        calib_dataloader = MyDataLoader(dataset=dataset)
+        fp32_model = self.graph
+        quant_config = {
+            "static_quant": {
+                "global": {
+                    "weight_dtype": "int8",
+                    "weight_sym": True,
+                    "weight_granularity": "per_channel",
+                    "act_dtype": "int8",
+                    "act_sym": True,
+                    "act_granularity": "per_channel",
+                },
+                "local": {
+                    "conv1": {
+                        "weight_algorithm": "kl",
+                        "act_algorithm": "kl",
+                    }
+                },
+            }
+        }
+        qmodel = quantize_model(fp32_model, quant_config, calib_dataloader)
+        self.assertIsNotNone(qmodel)
+
     def test_static_quant_from_class_advance(self):
         logger.info("test_static_quant_from_class_advance")
         from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
diff --git a/test/3x/tensorflow/test_model_wrappers.py b/test/3x/tensorflow/test_model_wrappers.py
index da76526e6f9..b5cdb7be9de 100644
--- a/test/3x/tensorflow/test_model_wrappers.py
+++ b/test/3x/tensorflow/test_model_wrappers.py
@@ -147,8 +147,6 @@ def test_validate_graph_node(self):
         "Only supports tf previous to the version 2.16.1",
     )
     def test_estimator(self):
-        from neural_compressor.tensorflow.quantization.utils.utility import get_estimator_graph
-
         model_fn = build_estimator()
         input_fn = build_input_fn()
         estimator = tf.estimator.Estimator(model_fn, model_dir=None, config=None, params=None, warm_start_from=None)
@@ -205,7 +203,7 @@ def test_slim(self):
         self.assertGreaterEqual(len(model.input_node_names), 1)
         self.assertEqual(model.model_path, "./slim_ckpt/inception_v1.ckpt")
         # test net factory
-        from neural_compressor.tensorflow.utils.nets_factory import TFSlimNetsFactory
+        from neural_compressor.tensorflow.utils.utility import TFSlimNetsFactory
 
         factory = TFSlimNetsFactory()
         from tf_slim.nets import inception