From 2506c455808c0ae4fca6c05fa76c3ba5d07cbe8e Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 10 Jan 2024 12:36:05 +0800
Subject: [PATCH 01/15] replace all tuning to tune

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tune.py | 106 ++++++++++++++++++++------
 neural_compressor/torch/__init__.py   |   2 +-
 neural_compressor/torch/tune.py       |  21 +++--
 test/3x/torch/test_autotune.py        |  34 ++++-----
 4 files changed, 110 insertions(+), 53 deletions(-)

diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py
index 69652d5856b..0b6ef47ddb1 100644
--- a/neural_compressor/common/base_tune.py
+++ b/neural_compressor/common/base_tune.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from abc import abstractmethod
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
 from neural_compressor.common.logger import Logger
@@ -37,7 +37,7 @@ def apply(self) -> Any:
         raise NotImplementedError
 
 
-class TuningObjectives:
+class TuneObjectives:
     EVAL_FN = "eval_fn"
     WEIGHT = "weight"
     FN_NAME = "name"
@@ -66,7 +66,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) ->
         # TODO update the result according to the weight and algo_name
         return overall_result + eval_result * eval_pair[self.WEIGHT]
 
-    def get_number_of_tuning_objectives(self) -> int:
+    def get_number_of_tune_objectives(self) -> int:
         return len(self.eval_fn_registry)
 
     def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
@@ -91,41 +91,96 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non
         self._set_eval_fn_registry(eval_fns)
 
 
-tuning_objectives = TuningObjectives()
+tune_objectives = TuneObjectives()
 
 
-class BaseTuningConfig:
-    """Base Class for Tuning Criterion.
+class BaseTuneConfig:
+    """Base Class for Tune Criterion.
 
     Args:
         quant_configs: quantization configs. Default value is empty.
-        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
+        timeout: Tune timeout (seconds). Default value is 0 which means early stop.
         max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit.
     """
 
     def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None:
-        """Init a TuningCriterion object."""
+        """Init a TuneCriterion object."""
         self.quant_configs = quant_configs
         self.timeout = timeout
         self.max_trials = max_trials
 
 
+class Trial:
+    def __init__(
+        self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, tune_objectives: TuneObjectives
+    ):
+        # The unique id to refer to one trial, it's used by the tuner.
+        self.trial_id = None
+        self._trial_result = None
+        self.set_trail_result_cnt = 0
+        self.float_model = float_model
+        self.quant_model = None
+        self.quant_config = quant_config
+        self.fwk_wrapper = fwk_wrapper
+        self.tune_objectives = tune_objectives
+        self._post_init()
+
+    def _post_init(self):
+        """Post initialize one trial."""
+        # generate the trial_id
+        pass
+
+    @property
+    def trial_result(self):
+        return self._trial_result
+
+    @trial_result.setter
+    def trial_result(self, result):
+        assert self.set_trail_result_cnt < 1, "The trial result already be set."
+        self._trial_result = result
+        self.set_trail_result_cnt += 1
+
+    def quantize(self):
+        """Quantize the model with given quant_config."""
+        quant_model = self.fwk_wrapper.apply(self.quant_config)
+        self.quant_model = quant_model
+        return quant_model
+
+    def get_eval_result(self) -> float:
+        """Retune the evaluation result.
+
+        The evaluation process is triggered by Lazy only when it is needed, and it is called only once.
+        """
+        if not self.trial_result:
+            eval_score = self.tune_objectives.evaluate(self.quant_model)
+            self.trial_result = eval_score
+        return self.trial_result
+
+    def recover_quant_model(self):
+        """The quantized model should be destroyed after evaluation to save the memory
+        and recovery it before end the tuning process."""
+        pass
+
+    def destroy_quant_model(self) -> None:
+        """"""
+        pass
+
+
 class Tuner:
     def __init__(
-        self, tune_config: BaseTuningConfig, tuning_objectives: TuningObjectives, fwk_wrapper: FrameworkWrapper
+        self, float_model, tune_config: BaseTuneConfig, tune_objectives: TuneObjectives, fwk_wrapper: FrameworkWrapper
     ) -> None:
+        self.float_model = float_model
         self.tune_config = tune_config
-        self.tuning_objectives = tuning_objectives
+        self.tune_objectives = tune_objectives
         self.fwk_wrapper = fwk_wrapper
         self._post_init()
 
     def _post_init(self) -> None:
         # check the number of evaluation functions
-        num_tuning_objectives = self.tuning_objectives.get_number_of_tuning_objectives()
-        assert (
-            num_tuning_objectives > 0
-        ), "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_tuning_objectives} tuning objectives.")
+        num_tune_objectives = self.tune_objectives.get_number_of_tune_objectives()
+        assert num_tune_objectives > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
+        logger.info(f"There are {num_tune_objectives} tune objectives.")
 
     @staticmethod
     def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
@@ -143,17 +198,22 @@ def parse_quant_configs(self) -> List[BaseConfig]:
             quant_config_list.extend(Tuner.parse_quant_config(quant_config))
         return quant_config_list
 
-    def get_best_model(self, q_model, objective_score: Union[float, int]) -> Any:
+    def get_best_model(self) -> Any:
         # TODO(Yi) enable it at the next PR
         pass
 
-    def get_tuning_objective_score(self, model) -> float:
-        eval_result = self.tuning_objectives.evaluate(model)
-        return eval_result
+    def needs_stop(self):
+        return False
+
+    def update_tune_history(self, trial: Trial):
+        pass
 
     def search(self) -> Any:
         for config in self.parse_quant_configs():
-            logger.info(f"config {config}")
-            q_model = self.fwk_wrapper.apply(quant_config=config)
-            if self.get_best_model(q_model, self.get_tuning_objective_score(q_model)):
-                return q_model
+            logger.info(f"Config {config}")
+            trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, tune_objectives=self.tune_objectives)
+            trial.quantize()
+            trial.get_eval_result()
+            self.update_tune_history(trial)
+            if self.needs_stop():
+                return self.get_best_model()
diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py
index 57cfe472297..18ddc1ff9a3 100644
--- a/neural_compressor/torch/__init__.py
+++ b/neural_compressor/torch/__init__.py
@@ -23,4 +23,4 @@
     get_default_gptq_config,
 )
 
-from neural_compressor.torch.tune import autotune, TuningConfig, get_default_tune_config
+from neural_compressor.torch.tune import autotune, TuneConfig, get_default_tune_config
diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
index 656d6c5b1be..4480650d9eb 100644
--- a/neural_compressor/torch/tune.py
+++ b/neural_compressor/torch/tune.py
@@ -16,18 +16,13 @@
 
 import torch
 
-from neural_compressor.common.base_tune import BaseTuningConfig, FrameworkWrapper, Tuner, tuning_objectives
+from neural_compressor.common.base_tune import BaseTuneConfig, FrameworkWrapper, Tuner, tune_objectives
 from neural_compressor.common.logger import Logger
 from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
 
 logger = Logger().get_logger()
 
 
-def get_default_tuning_config():
-    # TODO (Yi) support it in the next PR
-    return None
-
-
 class TorchWrapper(FrameworkWrapper):
     """Concrete implementation of `FrameworkWrapper` for PyTorch models."""
 
@@ -47,25 +42,27 @@ def apply(self, quant_config):
         return q_model
 
 
-class TuningConfig(BaseTuningConfig):
+class TuneConfig(BaseTuneConfig):
     def __init__(self, quant_configs=None, timeout=0, max_trials=100):
         super().__init__(quant_configs, timeout, max_trials)
 
 
 def autotune(
     model: torch.nn.Module,
-    tune_config: TuningConfig,
+    tune_config: TuneConfig,
     eval_fns: Optional[Union[Dict, List[Dict]]] = None,
     run_fn=None,
     run_args=None,
 ):
-    tuning_objectives.set_eval_fn_registry(eval_fns)
+    tune_objectives.set_eval_fn_registry(eval_fns)
     torch_wrapper = TorchWrapper(model, run_fn, run_args)
-    tuner = Tuner(tune_config=tune_config, tuning_objectives=tuning_objectives, fwk_wrapper=torch_wrapper)
+    tuner = Tuner(
+        float_model=model, tune_config=tune_config, tune_objectives=tune_objectives, fwk_wrapper=torch_wrapper
+    )
     best_qmodel = tuner.search()
     return best_qmodel
 
 
 def get_default_tune_config():
-    # TODO use the registered default tuning config in the next PR
-    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
+    # TODO use the registered default tune config in the next PR
+    return TuneConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 876311355f1..60cfe313344 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -10,13 +10,13 @@
 import torch
 
 
-def reset_tuning_target(test_func):
+def reset_tune_target(test_func):
     @wraps(test_func)
     def wrapper(*args, **kwargs):
-        # Reset tuning targets before running the test
-        from neural_compressor.common.base_tune import tuning_objectives
+        # Reset tune targets before running the test
+        from neural_compressor.common.base_tune import tune_objectives
 
-        tuning_objectives.eval_fn_registry = []
+        tune_objectives.eval_fn_registry = []
         return test_func(*args, **kwargs)
 
     return wrapper
@@ -58,27 +58,27 @@ def setUp(self):
         # print the test name
         logger.info(f"Running TestAutoTune test: {self.id()}")
 
-    @reset_tuning_target
+    @reset_tune_target
     def test_autotune_api(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import tuning_objectives
-        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
+        from neural_compressor.common.base_tune import tune_objectives
+        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
 
         def eval_acc_fn(model) -> float:
             return 1.0
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(
             model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}]
         )
         self.assertIsNone(best_model)
-        self.assertEqual(len(tuning_objectives.eval_fn_registry), 1)
+        self.assertEqual(len(tune_objectives.eval_fn_registry), 1)
 
-    @reset_tuning_target
+    @reset_tune_target
     def test_autotune_api_2(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import tuning_objectives
-        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
+        from neural_compressor.common.base_tune import tune_objectives
+        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
 
         def eval_acc_fn(model) -> float:
             return 1.0
@@ -94,17 +94,17 @@ def eval_perf_fn(model) -> float:
             },
         ]
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
         self.assertIsNone(best_model)
-        self.assertEqual(len(tuning_objectives.eval_fn_registry), 2)
+        self.assertEqual(len(tune_objectives.eval_fn_registry), 2)
 
-    @reset_tuning_target
+    @reset_tune_target
     def test_autotune_not_eval_func(self):
         logger.info("test_autotune_api")
-        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
+        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
 
         # Use assertRaises to check that an AssertionError is raised
         with self.assertRaises(AssertionError) as context:

From c721f17b364a9f1cbb152d21762dff3f2f578688 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 08:36:55 +0800
Subject: [PATCH 02/15] rename `TuneObjetives` into `Evaluator`

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tune.py | 28 +++++++++++++--------------
 neural_compressor/torch/__init__.py   |  3 ++-
 neural_compressor/torch/tune.py       | 17 +++++-----------
 test/3x/torch/test_autotune.py        | 24 +++++++++++------------
 4 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py
index 0b6ef47ddb1..b7bbc145451 100644
--- a/neural_compressor/common/base_tune.py
+++ b/neural_compressor/common/base_tune.py
@@ -37,7 +37,7 @@ def apply(self) -> Any:
         raise NotImplementedError
 
 
-class TuneObjectives:
+class Evaluator:
     EVAL_FN = "eval_fn"
     WEIGHT = "weight"
     FN_NAME = "name"
@@ -66,7 +66,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) ->
         # TODO update the result according to the weight and algo_name
         return overall_result + eval_result * eval_pair[self.WEIGHT]
 
-    def get_number_of_tune_objectives(self) -> int:
+    def get_number_of_eval_funtions(self) -> int:
         return len(self.eval_fn_registry)
 
     def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
@@ -91,10 +91,10 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non
         self._set_eval_fn_registry(eval_fns)
 
 
-tune_objectives = TuneObjectives()
+evaluator = Evaluator()
 
 
-class BaseTuneConfig:
+class TuningConfig:
     """Base Class for Tune Criterion.
 
     Args:
@@ -111,9 +111,7 @@ def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None:
 
 
 class Trial:
-    def __init__(
-        self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, tune_objectives: TuneObjectives
-    ):
+    def __init__(self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, evaluator: Evaluator):
         # The unique id to refer to one trial, it's used by the tuner.
         self.trial_id = None
         self._trial_result = None
@@ -122,7 +120,7 @@ def __init__(
         self.quant_model = None
         self.quant_config = quant_config
         self.fwk_wrapper = fwk_wrapper
-        self.tune_objectives = tune_objectives
+        self.evaluator = evaluator
         self._post_init()
 
     def _post_init(self):
@@ -152,7 +150,7 @@ def get_eval_result(self) -> float:
         The evaluation process is triggered by Lazy only when it is needed, and it is called only once.
         """
         if not self.trial_result:
-            eval_score = self.tune_objectives.evaluate(self.quant_model)
+            eval_score = self.evaluator.evaluate(self.quant_model)
             self.trial_result = eval_score
         return self.trial_result
 
@@ -168,19 +166,19 @@ def destroy_quant_model(self) -> None:
 
 class Tuner:
     def __init__(
-        self, float_model, tune_config: BaseTuneConfig, tune_objectives: TuneObjectives, fwk_wrapper: FrameworkWrapper
+        self, float_model, tune_config: TuningConfig, evaluator: Evaluator, fwk_wrapper: FrameworkWrapper
     ) -> None:
         self.float_model = float_model
         self.tune_config = tune_config
-        self.tune_objectives = tune_objectives
+        self.evaluator = evaluator
         self.fwk_wrapper = fwk_wrapper
         self._post_init()
 
     def _post_init(self) -> None:
         # check the number of evaluation functions
-        num_tune_objectives = self.tune_objectives.get_number_of_tune_objectives()
-        assert num_tune_objectives > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_tune_objectives} tune objectives.")
+        num_evaluator = self.evaluator.get_number_of_eval_funtions()
+        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
+        logger.info(f"There are {num_evaluator} tune objectives.")
 
     @staticmethod
     def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
@@ -211,7 +209,7 @@ def update_tune_history(self, trial: Trial):
     def search(self) -> Any:
         for config in self.parse_quant_configs():
             logger.info(f"Config {config}")
-            trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, tune_objectives=self.tune_objectives)
+            trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, evaluator=self.evaluator)
             trial.quantize()
             trial.get_eval_result()
             self.update_tune_history(trial)
diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py
index 18ddc1ff9a3..42fb003946f 100644
--- a/neural_compressor/torch/__init__.py
+++ b/neural_compressor/torch/__init__.py
@@ -23,4 +23,5 @@
     get_default_gptq_config,
 )
 
-from neural_compressor.torch.tune import autotune, TuneConfig, get_default_tune_config
+from neural_compressor.common.base_tune import TuningConfig
+from neural_compressor.torch.tune import autotune, get_default_tune_config
diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
index 4480650d9eb..bd6d0d0763f 100644
--- a/neural_compressor/torch/tune.py
+++ b/neural_compressor/torch/tune.py
@@ -16,7 +16,7 @@
 
 import torch
 
-from neural_compressor.common.base_tune import BaseTuneConfig, FrameworkWrapper, Tuner, tune_objectives
+from neural_compressor.common.base_tune import FrameworkWrapper, Tuner, TuningConfig, evaluator
 from neural_compressor.common.logger import Logger
 from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
 
@@ -42,27 +42,20 @@ def apply(self, quant_config):
         return q_model
 
 
-class TuneConfig(BaseTuneConfig):
-    def __init__(self, quant_configs=None, timeout=0, max_trials=100):
-        super().__init__(quant_configs, timeout, max_trials)
-
-
 def autotune(
     model: torch.nn.Module,
-    tune_config: TuneConfig,
+    tune_config: TuningConfig,
     eval_fns: Optional[Union[Dict, List[Dict]]] = None,
     run_fn=None,
     run_args=None,
 ):
-    tune_objectives.set_eval_fn_registry(eval_fns)
+    evaluator.set_eval_fn_registry(eval_fns)
     torch_wrapper = TorchWrapper(model, run_fn, run_args)
-    tuner = Tuner(
-        float_model=model, tune_config=tune_config, tune_objectives=tune_objectives, fwk_wrapper=torch_wrapper
-    )
+    tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper)
     best_qmodel = tuner.search()
     return best_qmodel
 
 
 def get_default_tune_config():
     # TODO use the registered default tune config in the next PR
-    return TuneConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
+    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 60cfe313344..840d88f706d 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -14,9 +14,9 @@ def reset_tune_target(test_func):
     @wraps(test_func)
     def wrapper(*args, **kwargs):
         # Reset tune targets before running the test
-        from neural_compressor.common.base_tune import tune_objectives
+        from neural_compressor.common.base_tune import evaluator
 
-        tune_objectives.eval_fn_registry = []
+        evaluator.eval_fn_registry = []
         return test_func(*args, **kwargs)
 
     return wrapper
@@ -61,24 +61,24 @@ def setUp(self):
     @reset_tune_target
     def test_autotune_api(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import tune_objectives
-        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
+        from neural_compressor.common.base_tune import evaluator
+        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
 
         def eval_acc_fn(model) -> float:
             return 1.0
 
-        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(
             model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}]
         )
         self.assertIsNone(best_model)
-        self.assertEqual(len(tune_objectives.eval_fn_registry), 1)
+        self.assertEqual(len(evaluator.eval_fn_registry), 1)
 
     @reset_tune_target
     def test_autotune_api_2(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import tune_objectives
-        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
+        from neural_compressor.common.base_tune import evaluator
+        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
 
         def eval_acc_fn(model) -> float:
             return 1.0
@@ -94,17 +94,17 @@ def eval_perf_fn(model) -> float:
             },
         ]
 
-        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
         self.assertIsNone(best_model)
-        self.assertEqual(len(tune_objectives.eval_fn_registry), 2)
+        self.assertEqual(len(evaluator.eval_fn_registry), 2)
 
     @reset_tune_target
     def test_autotune_not_eval_func(self):
         logger.info("test_autotune_api")
-        from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune
+        from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
 
-        custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
 
         # Use assertRaises to check that an AssertionError is raised
         with self.assertRaises(AssertionError) as context:

From 249cf97f940dc8343a73c5ee184e88bc247f7dc7 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 09:18:23 +0800
Subject: [PATCH 03/15] refactor the tuning pipleline

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/torch/tune.py | 120 +++++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 26 deletions(-)

diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
index bd6d0d0763f..c1b37037e33 100644
--- a/neural_compressor/torch/tune.py
+++ b/neural_compressor/torch/tune.py
@@ -12,34 +12,87 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Callable, Dict, Generator, List, Optional, Tuple, Union
 
 import torch
 
-from neural_compressor.common.base_tune import FrameworkWrapper, Tuner, TuningConfig, evaluator
+from neural_compressor.common.base_config import BaseConfig
+from neural_compressor.common.base_tune import TuningConfig, evaluator
 from neural_compressor.common.logger import Logger
+from neural_compressor.torch import quantize
 from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
 
 logger = Logger().get_logger()
 
 
-class TorchWrapper(FrameworkWrapper):
-    """Concrete implementation of `FrameworkWrapper` for PyTorch models."""
+class Sampler:
+    pass
 
-    def __init__(
-        self, model: torch.nn.Module, run_fn: Optional[Callable] = None, run_args: Optional[Tuple] = None
-    ) -> None:
-        super().__init__(model)
-        self.run_fn = run_fn
-        self.run_args = run_args
 
-    def apply(self, quant_config):
-        """The entry to apply quantization algorithms on a given a model."""
-        logger.info(f"apply quant_config: {quant_config}.")
-        from neural_compressor.torch import quantize
+class ConfigLoader:
+    def __iter__(self) -> Generator[BaseConfig]:
+        yield None
 
-        q_model = quantize(model=self.model, quant_config=quant_config, run_fn=self.run_fn, run_args=self.run_args)
-        return q_model
+
+class TuningMonitor:
+    def __init__(self) -> None:
+        # TODO refine the `tuning_history` with a more appropriate data structure
+        self.tuning_history: list = []
+
+    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
+        self.tuning_history.append([trial_index, eval_result, quant_config])
+
+    def get_best_quant_config(self) -> BaseConfig:
+        return self.tuning_history[0][2]
+
+    def need_stop(self) -> bool:
+        return True
+
+
+class TuningLogger:
+    @classmethod
+    def tuning_start(cls) -> None:
+        logger.info("Tuning started.")
+
+    @classmethod
+    def trial_start(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail started.")
+
+    @classmethod
+    def quantization_start(cls) -> None:
+        logger.info("Quantization started.")
+
+    @classmethod
+    def quantization_end(cls) -> None:
+        logger.info("Quantization end.")
+
+    @classmethod
+    def evaluation_start(cls) -> None:
+        logger.info("Evaluation started.")
+
+    @classmethod
+    def evaluation_end(cls) -> None:
+        logger.info("Evaluation end.")
+
+    @classmethod
+    def trial_end(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail end.")
+
+    @classmethod
+    def tuning_end(cls) -> None:
+        logger.info("Tuning completed.")
+
+
+def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
+    config_loader = ConfigLoader()
+    tuning_logger = TuningLogger()
+    tuning_monitor = TuningMonitor()
+    return config_loader, tuning_logger, tuning_monitor
+
+
+def get_default_tune_config():
+    # TODO use the registered default tune config in the next PR
+    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
 
 
 def autotune(
@@ -48,14 +101,29 @@ def autotune(
     eval_fns: Optional[Union[Dict, List[Dict]]] = None,
     run_fn=None,
     run_args=None,
-):
-    evaluator.set_eval_fn_registry(eval_fns)
-    torch_wrapper = TorchWrapper(model, run_fn, run_args)
-    tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper)
-    best_qmodel = tuner.search()
-    return best_qmodel
-
+) -> Optional[torch.nn.Module]:
+    # TODO Old Impl, remove it before merge
+    # evaluator.set_eval_fn_registry(eval_fns)
+    # torch_wrapper = TorchWrapper(model, run_fn, run_args)
+    # tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper)
+    # best_qmodel = tuner.search()
 
-def get_default_tune_config():
-    # TODO use the registered default tune config in the next PR
-    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
+    best_quant_model = None
+    evaluator.set_eval_fn_registry(eval_fns)
+    config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
+    tuning_logger.tuning_start()
+    for trial_index, quant_config in enumerate(config_loader):
+        tuning_logger.trial_start(trial_index=trial_index)
+        tuning_logger.quantization_start()
+        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args)
+        tuning_logger.quantization_end()
+        tuning_logger.evaluation_start()
+        eval_result: float = evaluator.evaluate(q_model)
+        tuning_logger.evaluation_end()
+        tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
+        if tuning_monitor.need_stop():
+            best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
+            best_quant_model = quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args)()
+        tuning_logger.trial_end()
+    tuning_logger.tuning_end()
+    return best_quant_model

From 572848cc4c1d26bb4cae46da78561e449b458f43 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 09:36:02 +0800
Subject: [PATCH 04/15] revert tune to tuning

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tune.py |  8 ++++----
 neural_compressor/torch/tune.py       |  2 +-
 test/3x/torch/test_autotune.py        | 10 +++++-----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py
index b7bbc145451..ad6e0fe526e 100644
--- a/neural_compressor/common/base_tune.py
+++ b/neural_compressor/common/base_tune.py
@@ -95,12 +95,12 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non
 
 
 class TuningConfig:
-    """Base Class for Tune Criterion.
+    """Base Class for Tuning Criterion.
 
     Args:
         quant_configs: quantization configs. Default value is empty.
-        timeout: Tune timeout (seconds). Default value is 0 which means early stop.
-        max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit.
+        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
+        max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
     """
 
     def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None:
@@ -178,7 +178,7 @@ def _post_init(self) -> None:
         # check the number of evaluation functions
         num_evaluator = self.evaluator.get_number_of_eval_funtions()
         assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_evaluator} tune objectives.")
+        logger.info(f"There are {num_evaluator} tuning objectives.")
 
     @staticmethod
     def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
index c1b37037e33..1a57bae0471 100644
--- a/neural_compressor/torch/tune.py
+++ b/neural_compressor/torch/tune.py
@@ -91,7 +91,7 @@ def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger
 
 
 def get_default_tune_config():
-    # TODO use the registered default tune config in the next PR
+    # TODO use the registered default tuning config in the next PR
     return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
 
 
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 840d88f706d..cdb0067c165 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -10,10 +10,10 @@
 import torch
 
 
-def reset_tune_target(test_func):
+def reset_tuning_target(test_func):
     @wraps(test_func)
     def wrapper(*args, **kwargs):
-        # Reset tune targets before running the test
+        # Reset tuning targets before running the test
         from neural_compressor.common.base_tune import evaluator
 
         evaluator.eval_fn_registry = []
@@ -58,7 +58,7 @@ def setUp(self):
         # print the test name
         logger.info(f"Running TestAutoTune test: {self.id()}")
 
-    @reset_tune_target
+    @reset_tuning_target
     def test_autotune_api(self):
         logger.info("test_autotune_api")
         from neural_compressor.common.base_tune import evaluator
@@ -74,7 +74,7 @@ def eval_acc_fn(model) -> float:
         self.assertIsNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 1)
 
-    @reset_tune_target
+    @reset_tuning_target
     def test_autotune_api_2(self):
         logger.info("test_autotune_api")
         from neural_compressor.common.base_tune import evaluator
@@ -99,7 +99,7 @@ def eval_perf_fn(model) -> float:
         self.assertIsNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 2)
 
-    @reset_tune_target
+    @reset_tuning_target
     def test_autotune_not_eval_func(self):
         logger.info("test_autotune_api")
         from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune

From 9afaf44f8a9dfa1510ac6a971b7cb8c1cd96544c Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 10:00:23 +0800
Subject: [PATCH 05/15] fix UTs

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tune.py | 195 ++++++++++++--------------
 neural_compressor/torch/tune.py       |  82 +----------
 test/3x/torch/test_autotune.py        |   4 +-
 3 files changed, 99 insertions(+), 182 deletions(-)

diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py
index ad6e0fe526e..5b8bb3fc4d9 100644
--- a/neural_compressor/common/base_tune.py
+++ b/neural_compressor/common/base_tune.py
@@ -12,29 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import abstractmethod
-from typing import Any, Dict, List, Optional, Union
+
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
 from neural_compressor.common.logger import Logger
 
 logger = Logger().get_logger()
 
-
-class FrameworkWrapper:
-    """Abstract base class for wrap framework's APIs.
-
-    FrameworkWrapper provides a uniform interface for encapsulating different framework's APIs.
-    This class is intended to be used by a `tuner` to obtain quantized models.
-    """
-
-    def __init__(self, model) -> None:
-        self.model = model
-
-    @abstractmethod
-    def apply(self) -> Any:
-        """The entry to apply algorithms on a given model."""
-        raise NotImplementedError
+__all__ = [
+    "Evaluator",
+    "TuningConfig",
+    "Sampler",
+    "ConfigLoader",
+    "TuningMonitor",
+    "TuningLogger",
+    "init_tuning",
+]
 
 
 class Evaluator:
@@ -90,6 +84,12 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non
             raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.")
         self._set_eval_fn_registry(eval_fns)
 
+    def self_check(self) -> None:
+        # check the number of evaluation functions
+        num_evaluator = self.get_number_of_eval_funtions()
+        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
+        logger.info(f"There are {num_evaluator} tuning objectives.")
+
 
 evaluator = Evaluator()
 
@@ -103,83 +103,19 @@ class TuningConfig:
         max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
     """
 
-    def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None:
+    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None:
         """Init a TuneCriterion object."""
         self.quant_configs = quant_configs
         self.timeout = timeout
         self.max_trials = max_trials
+        self.sampler = sampler
 
 
-class Trial:
-    def __init__(self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, evaluator: Evaluator):
-        # The unique id to refer to one trial, it's used by the tuner.
-        self.trial_id = None
-        self._trial_result = None
-        self.set_trail_result_cnt = 0
-        self.float_model = float_model
-        self.quant_model = None
-        self.quant_config = quant_config
-        self.fwk_wrapper = fwk_wrapper
-        self.evaluator = evaluator
-        self._post_init()
-
-    def _post_init(self):
-        """Post initialize one trial."""
-        # generate the trial_id
-        pass
-
-    @property
-    def trial_result(self):
-        return self._trial_result
-
-    @trial_result.setter
-    def trial_result(self, result):
-        assert self.set_trail_result_cnt < 1, "The trial result already be set."
-        self._trial_result = result
-        self.set_trail_result_cnt += 1
-
-    def quantize(self):
-        """Quantize the model with given quant_config."""
-        quant_model = self.fwk_wrapper.apply(self.quant_config)
-        self.quant_model = quant_model
-        return quant_model
-
-    def get_eval_result(self) -> float:
-        """Retune the evaluation result.
-
-        The evaluation process is triggered by Lazy only when it is needed, and it is called only once.
-        """
-        if not self.trial_result:
-            eval_score = self.evaluator.evaluate(self.quant_model)
-            self.trial_result = eval_score
-        return self.trial_result
-
-    def recover_quant_model(self):
-        """The quantized model should be destroyed after evaluation to save the memory
-        and recovery it before end the tuning process."""
-        pass
-
-    def destroy_quant_model(self) -> None:
-        """"""
-        pass
-
-
-class Tuner:
-    def __init__(
-        self, float_model, tune_config: TuningConfig, evaluator: Evaluator, fwk_wrapper: FrameworkWrapper
-    ) -> None:
-        self.float_model = float_model
-        self.tune_config = tune_config
-        self.evaluator = evaluator
-        self.fwk_wrapper = fwk_wrapper
-        self._post_init()
-
-    def _post_init(self) -> None:
-        # check the number of evaluation functions
-        num_evaluator = self.evaluator.get_number_of_eval_funtions()
-        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_evaluator} tuning objectives.")
+class Sampler:
+    pass
 
+
+class ConfigLoader:
     @staticmethod
     def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
         if isinstance(quant_config, ComposableConfig):
@@ -192,26 +128,75 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
 
     def parse_quant_configs(self) -> List[BaseConfig]:
         quant_config_list = []
-        for quant_config in self.tune_config.quant_configs:
-            quant_config_list.extend(Tuner.parse_quant_config(quant_config))
+        for quant_config in self.quant_configs:
+            quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config))
         return quant_config_list
 
-    def get_best_model(self) -> Any:
-        # TODO(Yi) enable it at the next PR
-        pass
+    def __init__(self, quant_configs, sampler: Sampler):
+        self.quant_configs = quant_configs
+        self.sampler = sampler
 
-    def needs_stop(self):
-        return False
+    def __iter__(self):
+        for config in self.parse_quant_configs():
+            yield config
 
-    def update_tune_history(self, trial: Trial):
-        pass
 
-    def search(self) -> Any:
-        for config in self.parse_quant_configs():
-            logger.info(f"Config {config}")
-            trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, evaluator=self.evaluator)
-            trial.quantize()
-            trial.get_eval_result()
-            self.update_tune_history(trial)
-            if self.needs_stop():
-                return self.get_best_model()
+class TuningMonitor:
+    def __init__(self) -> None:
+        # TODO refine the `tuning_history` with a more appropriate data structure
+        self.tuning_history: list = []
+
+    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
+        self.tuning_history.append([trial_index, eval_result, quant_config])
+
+    def get_best_quant_config(self) -> BaseConfig:
+        return self.tuning_history[0][2]
+
+    def need_stop(self) -> bool:
+        return True
+
+
+class TuningLogger:
+    """A unified logger for the tuning process.
+
+    It assists validation teams in retrieving logs.
+    """
+
+    @classmethod
+    def tuning_start(cls) -> None:
+        logger.info("Tuning started.")
+
+    @classmethod
+    def trial_start(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail started.")
+
+    @classmethod
+    def quantization_start(cls) -> None:
+        logger.info("Quantization started.")
+
+    @classmethod
+    def quantization_end(cls) -> None:
+        logger.info("Quantization end.")
+
+    @classmethod
+    def evaluation_start(cls) -> None:
+        logger.info("Evaluation started.")
+
+    @classmethod
+    def evaluation_end(cls) -> None:
+        logger.info("Evaluation end.")
+
+    @classmethod
+    def trial_end(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail end.")
+
+    @classmethod
+    def tuning_end(cls) -> None:
+        logger.info("Tuning completed.")
+
+
+def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
+    config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler)
+    tuning_logger = TuningLogger()
+    tuning_monitor = TuningMonitor()
+    return config_loader, tuning_logger, tuning_monitor
diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
index 1a57bae0471..32456d3f407 100644
--- a/neural_compressor/torch/tune.py
+++ b/neural_compressor/torch/tune.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Dict, Generator, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 
 import torch
 
 from neural_compressor.common.base_config import BaseConfig
-from neural_compressor.common.base_tune import TuningConfig, evaluator
+from neural_compressor.common.base_tune import TuningConfig, evaluator, init_tuning
 from neural_compressor.common.logger import Logger
 from neural_compressor.torch import quantize
 from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
@@ -25,72 +25,7 @@
 logger = Logger().get_logger()
 
 
-class Sampler:
-    pass
-
-
-class ConfigLoader:
-    def __iter__(self) -> Generator[BaseConfig]:
-        yield None
-
-
-class TuningMonitor:
-    def __init__(self) -> None:
-        # TODO refine the `tuning_history` with a more appropriate data structure
-        self.tuning_history: list = []
-
-    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
-        self.tuning_history.append([trial_index, eval_result, quant_config])
-
-    def get_best_quant_config(self) -> BaseConfig:
-        return self.tuning_history[0][2]
-
-    def need_stop(self) -> bool:
-        return True
-
-
-class TuningLogger:
-    @classmethod
-    def tuning_start(cls) -> None:
-        logger.info("Tuning started.")
-
-    @classmethod
-    def trial_start(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail started.")
-
-    @classmethod
-    def quantization_start(cls) -> None:
-        logger.info("Quantization started.")
-
-    @classmethod
-    def quantization_end(cls) -> None:
-        logger.info("Quantization end.")
-
-    @classmethod
-    def evaluation_start(cls) -> None:
-        logger.info("Evaluation started.")
-
-    @classmethod
-    def evaluation_end(cls) -> None:
-        logger.info("Evaluation end.")
-
-    @classmethod
-    def trial_end(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail end.")
-
-    @classmethod
-    def tuning_end(cls) -> None:
-        logger.info("Tuning completed.")
-
-
-def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
-    config_loader = ConfigLoader()
-    tuning_logger = TuningLogger()
-    tuning_monitor = TuningMonitor()
-    return config_loader, tuning_logger, tuning_monitor
-
-
-def get_default_tune_config():
+def get_default_tune_config() -> TuningConfig:
     # TODO use the registered default tuning config in the next PR
     return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
 
@@ -102,14 +37,10 @@ def autotune(
     run_fn=None,
     run_args=None,
 ) -> Optional[torch.nn.Module]:
-    # TODO Old Impl, remove it before merge
-    # evaluator.set_eval_fn_registry(eval_fns)
-    # torch_wrapper = TorchWrapper(model, run_fn, run_args)
-    # tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper)
-    # best_qmodel = tuner.search()
-
+    """The main entry of auto-tune."""
     best_quant_model = None
     evaluator.set_eval_fn_registry(eval_fns)
+    evaluator.self_check()
     config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
     tuning_logger.tuning_start()
     for trial_index, quant_config in enumerate(config_loader):
@@ -123,7 +54,8 @@ def autotune(
         tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
         if tuning_monitor.need_stop():
             best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
-            best_quant_model = quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args)()
+            quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
+            best_quant_model = model  # quantize model inplace
         tuning_logger.trial_end()
     tuning_logger.tuning_end()
     return best_quant_model
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index cdb0067c165..85ddf73494e 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -71,7 +71,7 @@ def eval_acc_fn(model) -> float:
         best_model = autotune(
             model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}]
         )
-        self.assertIsNone(best_model)
+        self.assertIsNotNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 1)
 
     @reset_tuning_target
@@ -96,7 +96,7 @@ def eval_perf_fn(model) -> float:
 
         custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
-        self.assertIsNone(best_model)
+        self.assertIsNotNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 2)
 
     @reset_tuning_target

From fe362f9dcd4cc8a964a19030bd0de08d509f9bca Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 10:02:25 +0800
Subject: [PATCH 06/15] rename some files

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 202 ++++++++++++++++++++++++
 neural_compressor/torch/autotune.py     |  61 +++++++
 2 files changed, 263 insertions(+)
 create mode 100644 neural_compressor/common/base_tuning.py
 create mode 100644 neural_compressor/torch/autotune.py

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
new file mode 100644
index 00000000000..5b8bb3fc4d9
--- /dev/null
+++ b/neural_compressor/common/base_tuning.py
@@ -0,0 +1,202 @@
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from neural_compressor.common.base_config import BaseConfig, ComposableConfig
+from neural_compressor.common.logger import Logger
+
+logger = Logger().get_logger()
+
+__all__ = [
+    "Evaluator",
+    "TuningConfig",
+    "Sampler",
+    "ConfigLoader",
+    "TuningMonitor",
+    "TuningLogger",
+    "init_tuning",
+]
+
+
+class Evaluator:
+    EVAL_FN = "eval_fn"
+    WEIGHT = "weight"
+    FN_NAME = "name"
+    EVAL_FN_TEMPLATE: Dict[str, Any] = {EVAL_FN: None, WEIGHT: 1.0, FN_NAME: None}
+
+    def __init__(self) -> None:
+        self.eval_fn_registry: List[Dict[str, Any]] = []
+
+    def evaluate(self, model) -> float:
+        """Evaluate the model using registered evaluation functions.
+
+        Args:
+            model: The fp32 model or quantized model.
+
+        Returns:
+            The overall result of all registered evaluation functions.
+        """
+        result = 0
+        for eval_pair in self.eval_fn_registry:
+            eval_fn = eval_pair[self.EVAL_FN]
+            eval_result = eval_fn(model)
+            result = self._update_the_objective_score(eval_pair, eval_result, result)
+        return result
+
+    def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float:
+        # TODO update the result according to the weight and algo_name
+        return overall_result + eval_result * eval_pair[self.WEIGHT]
+
+    def get_number_of_eval_funtions(self) -> int:
+        return len(self.eval_fn_registry)
+
+    def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
+        self.eval_fn_registry = [
+            {
+                self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN],
+                self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0),
+                self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, user_eval_fn_pair[self.EVAL_FN].__name__),
+            }
+            for user_eval_fn_pair in user_eval_fns
+        ]
+
+    def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None:
+        if eval_fns is None:
+            return
+        elif isinstance(eval_fns, Dict):
+            eval_fns = [eval_fns]
+        elif isinstance(eval_fns, List):
+            assert all([isinstance(eval_fn_pair, Dict) for eval_fn_pair in eval_fns])
+        else:
+            raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.")
+        self._set_eval_fn_registry(eval_fns)
+
+    def self_check(self) -> None:
+        # check the number of evaluation functions
+        num_evaluator = self.get_number_of_eval_funtions()
+        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
+        logger.info(f"There are {num_evaluator} tuning objectives.")
+
+
+evaluator = Evaluator()
+
+
+class TuningConfig:
+    """Base Class for Tuning Criterion.
+
+    Args:
+        quant_configs: quantization configs. Default value is empty.
+        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
+        max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
+    """
+
+    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None:
+        """Init a TuneCriterion object."""
+        self.quant_configs = quant_configs
+        self.timeout = timeout
+        self.max_trials = max_trials
+        self.sampler = sampler
+
+
+class Sampler:
+    pass
+
+
+class ConfigLoader:
+    @staticmethod
+    def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
+        if isinstance(quant_config, ComposableConfig):
+            result = []
+            for q_config in quant_config.config_list:
+                result += q_config.expand()
+            return result
+        else:
+            return quant_config.expand()
+
+    def parse_quant_configs(self) -> List[BaseConfig]:
+        quant_config_list = []
+        for quant_config in self.quant_configs:
+            quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config))
+        return quant_config_list
+
+    def __init__(self, quant_configs, sampler: Sampler):
+        self.quant_configs = quant_configs
+        self.sampler = sampler
+
+    def __iter__(self):
+        for config in self.parse_quant_configs():
+            yield config
+
+
+class TuningMonitor:
+    def __init__(self) -> None:
+        # TODO refine the `tuning_history` with a more appropriate data structure
+        self.tuning_history: list = []
+
+    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
+        self.tuning_history.append([trial_index, eval_result, quant_config])
+
+    def get_best_quant_config(self) -> BaseConfig:
+        return self.tuning_history[0][2]
+
+    def need_stop(self) -> bool:
+        return True
+
+
+class TuningLogger:
+    """A unified logger for the tuning process.
+
+    It assists validation teams in retrieving logs.
+    """
+
+    @classmethod
+    def tuning_start(cls) -> None:
+        logger.info("Tuning started.")
+
+    @classmethod
+    def trial_start(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail started.")
+
+    @classmethod
+    def quantization_start(cls) -> None:
+        logger.info("Quantization started.")
+
+    @classmethod
+    def quantization_end(cls) -> None:
+        logger.info("Quantization end.")
+
+    @classmethod
+    def evaluation_start(cls) -> None:
+        logger.info("Evaluation started.")
+
+    @classmethod
+    def evaluation_end(cls) -> None:
+        logger.info("Evaluation end.")
+
+    @classmethod
+    def trial_end(cls, trial_index: int = None) -> None:
+        logger.info(f" {trial_index}-trail end.")
+
+    @classmethod
+    def tuning_end(cls) -> None:
+        logger.info("Tuning completed.")
+
+
+def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
+    config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler)
+    tuning_logger = TuningLogger()
+    tuning_monitor = TuningMonitor()
+    return config_loader, tuning_logger, tuning_monitor
diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py
new file mode 100644
index 00000000000..d734839d7f1
--- /dev/null
+++ b/neural_compressor/torch/autotune.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Optional, Union
+
+import torch
+
+from neural_compressor.common.base_config import BaseConfig
+from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning
+from neural_compressor.common.logger import Logger
+from neural_compressor.torch import quantize
+from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
+
+logger = Logger().get_logger()
+
+
+def get_default_tune_config() -> TuningConfig:
+    # TODO use the registered default tuning config in the next PR
+    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
+
+
+def autotune(
+    model: torch.nn.Module,
+    tune_config: TuningConfig,
+    eval_fns: Optional[Union[Dict, List[Dict]]] = None,
+    run_fn=None,
+    run_args=None,
+) -> Optional[torch.nn.Module]:
+    """The main entry of auto-tune."""
+    best_quant_model = None
+    evaluator.set_eval_fn_registry(eval_fns)
+    evaluator.self_check()
+    config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
+    tuning_logger.tuning_start()
+    for trial_index, quant_config in enumerate(config_loader):
+        tuning_logger.trial_start(trial_index=trial_index)
+        tuning_logger.quantization_start()
+        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args)
+        tuning_logger.quantization_end()
+        tuning_logger.evaluation_start()
+        eval_result: float = evaluator.evaluate(q_model)
+        tuning_logger.evaluation_end()
+        tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
+        if tuning_monitor.need_stop():
+            best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
+            quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
+            best_quant_model = model  # quantize model inplace
+        tuning_logger.trial_end()
+    tuning_logger.tuning_end()
+    return best_quant_model

From 6560c797097b29f4f11f8c61e330085af77c9808 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 10:03:37 +0800
Subject: [PATCH 07/15] remove some files

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tune.py | 202 --------------------------
 neural_compressor/torch/tune.py       |  61 --------
 2 files changed, 263 deletions(-)
 delete mode 100644 neural_compressor/common/base_tune.py
 delete mode 100644 neural_compressor/torch/tune.py

diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py
deleted file mode 100644
index 5b8bb3fc4d9..00000000000
--- a/neural_compressor/common/base_tune.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from neural_compressor.common.base_config import BaseConfig, ComposableConfig
-from neural_compressor.common.logger import Logger
-
-logger = Logger().get_logger()
-
-__all__ = [
-    "Evaluator",
-    "TuningConfig",
-    "Sampler",
-    "ConfigLoader",
-    "TuningMonitor",
-    "TuningLogger",
-    "init_tuning",
-]
-
-
-class Evaluator:
-    EVAL_FN = "eval_fn"
-    WEIGHT = "weight"
-    FN_NAME = "name"
-    EVAL_FN_TEMPLATE: Dict[str, Any] = {EVAL_FN: None, WEIGHT: 1.0, FN_NAME: None}
-
-    def __init__(self) -> None:
-        self.eval_fn_registry: List[Dict[str, Any]] = []
-
-    def evaluate(self, model) -> float:
-        """Evaluate the model using registered evaluation functions.
-
-        Args:
-            model: The fp32 model or quantized model.
-
-        Returns:
-            The overall result of all registered evaluation functions.
-        """
-        result = 0
-        for eval_pair in self.eval_fn_registry:
-            eval_fn = eval_pair[self.EVAL_FN]
-            eval_result = eval_fn(model)
-            result = self._update_the_objective_score(eval_pair, eval_result, result)
-        return result
-
-    def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float:
-        # TODO update the result according to the weight and algo_name
-        return overall_result + eval_result * eval_pair[self.WEIGHT]
-
-    def get_number_of_eval_funtions(self) -> int:
-        return len(self.eval_fn_registry)
-
-    def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
-        self.eval_fn_registry = [
-            {
-                self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN],
-                self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0),
-                self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, user_eval_fn_pair[self.EVAL_FN].__name__),
-            }
-            for user_eval_fn_pair in user_eval_fns
-        ]
-
-    def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None:
-        if eval_fns is None:
-            return
-        elif isinstance(eval_fns, Dict):
-            eval_fns = [eval_fns]
-        elif isinstance(eval_fns, List):
-            assert all([isinstance(eval_fn_pair, Dict) for eval_fn_pair in eval_fns])
-        else:
-            raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.")
-        self._set_eval_fn_registry(eval_fns)
-
-    def self_check(self) -> None:
-        # check the number of evaluation functions
-        num_evaluator = self.get_number_of_eval_funtions()
-        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_evaluator} tuning objectives.")
-
-
-evaluator = Evaluator()
-
-
-class TuningConfig:
-    """Base Class for Tuning Criterion.
-
-    Args:
-        quant_configs: quantization configs. Default value is empty.
-        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
-        max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
-    """
-
-    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None:
-        """Init a TuneCriterion object."""
-        self.quant_configs = quant_configs
-        self.timeout = timeout
-        self.max_trials = max_trials
-        self.sampler = sampler
-
-
-class Sampler:
-    pass
-
-
-class ConfigLoader:
-    @staticmethod
-    def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
-        if isinstance(quant_config, ComposableConfig):
-            result = []
-            for q_config in quant_config.config_list:
-                result += q_config.expand()
-            return result
-        else:
-            return quant_config.expand()
-
-    def parse_quant_configs(self) -> List[BaseConfig]:
-        quant_config_list = []
-        for quant_config in self.quant_configs:
-            quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config))
-        return quant_config_list
-
-    def __init__(self, quant_configs, sampler: Sampler):
-        self.quant_configs = quant_configs
-        self.sampler = sampler
-
-    def __iter__(self):
-        for config in self.parse_quant_configs():
-            yield config
-
-
-class TuningMonitor:
-    def __init__(self) -> None:
-        # TODO refine the `tuning_history` with a more appropriate data structure
-        self.tuning_history: list = []
-
-    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
-        self.tuning_history.append([trial_index, eval_result, quant_config])
-
-    def get_best_quant_config(self) -> BaseConfig:
-        return self.tuning_history[0][2]
-
-    def need_stop(self) -> bool:
-        return True
-
-
-class TuningLogger:
-    """A unified logger for the tuning process.
-
-    It assists validation teams in retrieving logs.
-    """
-
-    @classmethod
-    def tuning_start(cls) -> None:
-        logger.info("Tuning started.")
-
-    @classmethod
-    def trial_start(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail started.")
-
-    @classmethod
-    def quantization_start(cls) -> None:
-        logger.info("Quantization started.")
-
-    @classmethod
-    def quantization_end(cls) -> None:
-        logger.info("Quantization end.")
-
-    @classmethod
-    def evaluation_start(cls) -> None:
-        logger.info("Evaluation started.")
-
-    @classmethod
-    def evaluation_end(cls) -> None:
-        logger.info("Evaluation end.")
-
-    @classmethod
-    def trial_end(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail end.")
-
-    @classmethod
-    def tuning_end(cls) -> None:
-        logger.info("Tuning completed.")
-
-
-def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
-    config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler)
-    tuning_logger = TuningLogger()
-    tuning_monitor = TuningMonitor()
-    return config_loader, tuning_logger, tuning_monitor
diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py
deleted file mode 100644
index 32456d3f407..00000000000
--- a/neural_compressor/torch/tune.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Dict, List, Optional, Union
-
-import torch
-
-from neural_compressor.common.base_config import BaseConfig
-from neural_compressor.common.base_tune import TuningConfig, evaluator, init_tuning
-from neural_compressor.common.logger import Logger
-from neural_compressor.torch import quantize
-from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig
-
-logger = Logger().get_logger()
-
-
-def get_default_tune_config() -> TuningConfig:
-    # TODO use the registered default tuning config in the next PR
-    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
-
-
-def autotune(
-    model: torch.nn.Module,
-    tune_config: TuningConfig,
-    eval_fns: Optional[Union[Dict, List[Dict]]] = None,
-    run_fn=None,
-    run_args=None,
-) -> Optional[torch.nn.Module]:
-    """The main entry of auto-tune."""
-    best_quant_model = None
-    evaluator.set_eval_fn_registry(eval_fns)
-    evaluator.self_check()
-    config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config)
-    tuning_logger.tuning_start()
-    for trial_index, quant_config in enumerate(config_loader):
-        tuning_logger.trial_start(trial_index=trial_index)
-        tuning_logger.quantization_start()
-        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args)
-        tuning_logger.quantization_end()
-        tuning_logger.evaluation_start()
-        eval_result: float = evaluator.evaluate(q_model)
-        tuning_logger.evaluation_end()
-        tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
-        if tuning_monitor.need_stop():
-            best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
-            quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
-            best_quant_model = model  # quantize model inplace
-        tuning_logger.trial_end()
-    tuning_logger.tuning_end()
-    return best_quant_model

From b01c8058b718b3c0043b5ed7c3d5ddc68e1c800d Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 10:45:16 +0800
Subject: [PATCH 08/15] fixed import error

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/torch/__init__.py | 4 ++--
 test/3x/torch/test_autotune.py      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py
index 42fb003946f..c50e60103ea 100644
--- a/neural_compressor/torch/__init__.py
+++ b/neural_compressor/torch/__init__.py
@@ -23,5 +23,5 @@
     get_default_gptq_config,
 )
 
-from neural_compressor.common.base_tune import TuningConfig
-from neural_compressor.torch.tune import autotune, get_default_tune_config
+from neural_compressor.common.base_tuning import TuningConfig
+from neural_compressor.torch.autotune import autotune, get_default_tune_config
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 85ddf73494e..7e67436e87c 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -14,7 +14,7 @@ def reset_tuning_target(test_func):
     @wraps(test_func)
     def wrapper(*args, **kwargs):
         # Reset tuning targets before running the test
-        from neural_compressor.common.base_tune import evaluator
+        from neural_compressor.common.base_tuning import evaluator
 
         evaluator.eval_fn_registry = []
         return test_func(*args, **kwargs)
@@ -61,7 +61,7 @@ def setUp(self):
     @reset_tuning_target
     def test_autotune_api(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import evaluator
+        from neural_compressor.common.base_tuning import evaluator
         from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
 
         def eval_acc_fn(model) -> float:
@@ -77,7 +77,7 @@ def eval_acc_fn(model) -> float:
     @reset_tuning_target
     def test_autotune_api_2(self):
         logger.info("test_autotune_api")
-        from neural_compressor.common.base_tune import evaluator
+        from neural_compressor.common.base_tuning import evaluator
         from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune
 
         def eval_acc_fn(model) -> float:

From ec381bc19449753c78240bcf22a663d2ed7c2524 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 12:16:53 +0800
Subject: [PATCH 09/15] hanle single eval

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index 5b8bb3fc4d9..fd95ea89056 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+import copy
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
@@ -76,6 +77,12 @@ def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
     def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None:
         if eval_fns is None:
             return
+        elif callable(eval_fns):
+            # single eval_fn
+            eval_fn_pair = copy.deepcopy(self.EVAL_FN_TEMPLATE)
+            eval_fn_pair[self.EVAL_FN] = eval_fns
+            eval_fn_pair[self.FN_NAME] = eval_fns.__name__
+            eval_fns = [eval_fn_pair]
         elif isinstance(eval_fns, Dict):
             eval_fns = [eval_fns]
         elif isinstance(eval_fns, List):

From f278c2f850762adfcfc548afb368215add47b257 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 12:37:54 +0800
Subject: [PATCH 10/15] refine tuning monitor

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 100 +++++++++++++++---------
 1 file changed, 63 insertions(+), 37 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index fd95ea89056..d722b12f85a 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -14,6 +14,7 @@
 
 
 import copy
+import uuid
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
@@ -61,7 +62,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) ->
         # TODO update the result according to the weight and algo_name
         return overall_result + eval_result * eval_pair[self.WEIGHT]
 
-    def get_number_of_eval_funtions(self) -> int:
+    def get_number_of_eval_functions(self) -> int:
         return len(self.eval_fn_registry)
 
     def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
@@ -93,31 +94,14 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non
 
     def self_check(self) -> None:
         # check the number of evaluation functions
-        num_evaluator = self.get_number_of_eval_funtions()
-        assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
-        logger.info(f"There are {num_evaluator} tuning objectives.")
+        num_eval_fns = self.get_number_of_eval_functions()
+        assert num_eval_fns > 0, "Please ensure that you register at least one evaluation metric for auto-tune."
+        logger.info("There are %d evaluations functions.", num_eval_fns)
 
 
 evaluator = Evaluator()
 
 
-class TuningConfig:
-    """Base Class for Tuning Criterion.
-
-    Args:
-        quant_configs: quantization configs. Default value is empty.
-        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
-        max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
-    """
-
-    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None:
-        """Init a TuneCriterion object."""
-        self.quant_configs = quant_configs
-        self.timeout = timeout
-        self.max_trials = max_trials
-        self.sampler = sampler
-
-
 class Sampler:
     pass
 
@@ -148,21 +132,6 @@ def __iter__(self):
             yield config
 
 
-class TuningMonitor:
-    def __init__(self) -> None:
-        # TODO refine the `tuning_history` with a more appropriate data structure
-        self.tuning_history: list = []
-
-    def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None:
-        self.tuning_history.append([trial_index, eval_result, quant_config])
-
-    def get_best_quant_config(self) -> BaseConfig:
-        return self.tuning_history[0][2]
-
-    def need_stop(self) -> bool:
-        return True
-
-
 class TuningLogger:
     """A unified logger for the tuning process.
 
@@ -202,8 +171,65 @@ def tuning_end(cls) -> None:
         logger.info("Tuning completed.")
 
 
+class TuningConfig:
+    """Base Class for Tuning Criterion.
+
+    Args:
+        quant_configs: quantization configs. Default value is empty.
+        timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
+        max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
+    """
+
+    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None:
+        """Init a TuneCriterion object."""
+        self.quant_configs = quant_configs
+        self.timeout = timeout
+        self.max_trials = max_trials
+        self.sampler = sampler
+
+
+class _TrialRecord:
+    @staticmethod
+    def _generate_unique_id():
+        unique_id = str(uuid.uuid4())
+        return unique_id
+
+    def __init__(self, trial_index: int, trial_result: Union[int, float], quant_config: BaseConfig):
+        # The unique id to refer to one trial
+        self.trial_id = _TrialRecord._generate_unique_id()
+        self.trial_index = trial_index
+        self.trial_result = trial_result
+        self.quant_config = quant_config
+
+
+class TuningMonitor:
+    def __init__(self, tuning_config: TuningConfig) -> None:
+        self.tuning_config = tuning_config
+        self.trial_cnt = 0
+        self.tuning_history: List[_TrialRecord] = []
+
+    def add_trial_result(self, trial_index: int, trial_result: Union[int, float], quant_config: BaseConfig) -> None:
+        self.trial_cnt += 1
+        trial_record = _TrialRecord(trial_index, trial_result, quant_config)
+        self.tuning_history.append(trial_record)
+
+    def get_number_of_trials(self):
+        return len(self.tuning_history)
+
+    def get_best_quant_config(self) -> BaseConfig:
+        assert self.get_number_of_trials() > 0, "Not trial record in tuning monitor."
+        # Put the record with a higher score at the beginning
+        sorted_trials_records: List[_TrialRecord] = sorted(
+            self.tuning_history, key=lambda x: x.trial_result, reverse=True
+        )
+        return sorted_trials_records[0].quant_config
+
+    def need_stop(self) -> bool:
+        return self.trial_cnt >= self.tuning_config.max_trials
+
+
 def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
     config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler)
     tuning_logger = TuningLogger()
-    tuning_monitor = TuningMonitor()
+    tuning_monitor = TuningMonitor(tuning_config)
     return config_loader, tuning_logger, tuning_monitor

From f7b9d7cca8975cb634aaaf300e393450d9778245 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 12:43:42 +0800
Subject: [PATCH 11/15] refine log

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index d722b12f85a..fdc0908473c 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -144,7 +144,7 @@ def tuning_start(cls) -> None:
 
     @classmethod
     def trial_start(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail started.")
+        logger.info(" %d-trail started.", trial_index)
 
     @classmethod
     def quantization_start(cls) -> None:
@@ -164,7 +164,7 @@ def evaluation_end(cls) -> None:
 
     @classmethod
     def trial_end(cls, trial_index: int = None) -> None:
-        logger.info(f" {trial_index}-trail end.")
+        logger.info(" %d-trail end.", trial_index)
 
     @classmethod
     def tuning_end(cls) -> None:

From 760763be2cea6ba3098521b417ec06834fc93583 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 12:58:09 +0800
Subject: [PATCH 12/15] add more note

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index fdc0908473c..586b5927d8c 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -15,7 +15,7 @@
 
 import copy
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
 from neural_compressor.common.logger import Logger
@@ -34,6 +34,25 @@
 
 
 class Evaluator:
+    """Evaluator is a collection of evaluation functions.
+
+    Examples:
+        def eval_acc(model):
+            ...
+
+        def eval_perf(molde):
+            ...
+
+        # Usage
+        user_eval_fns1 = eval_acc
+        user_eval_fns2 = {"eval_fn": eval_acc}
+        user_eval_fns3 = {"eval_fn": eval_acc, "weight": 1.0, "name": "accuracy"}
+        user_eval_fns4 = [
+            {"eval_fn": eval_acc, "weight": .0.5},
+            {"eval_fn": eval_perf, "weight": 0.5, "name": "accuracy"},
+            ]
+    """
+
     EVAL_FN = "eval_fn"
     WEIGHT = "weight"
     FN_NAME = "name"
@@ -75,7 +94,8 @@ def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
             for user_eval_fn_pair in user_eval_fns
         ]
 
-    def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None:
+    def set_eval_fn_registry(self, eval_fns: Optional[Union[Callable, Dict, List[Dict]]] = None) -> None:
+        # About the eval_fns format, refer the class docstring for details.
         if eval_fns is None:
             return
         elif callable(eval_fns):

From 0450cb21bc91b25f3e0a1ac6eca89298e65a3fd2 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 13:19:58 +0800
Subject: [PATCH 13/15] refine the tuning logger

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 43 +++++++++++++++++--------
 neural_compressor/torch/autotune.py     |  2 +-
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index 586b5927d8c..49b5148052d 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -14,8 +14,9 @@
 
 
 import copy
+import inspect
 import uuid
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
 
 from neural_compressor.common.base_config import BaseConfig, ComposableConfig
 from neural_compressor.common.logger import Logger
@@ -127,6 +128,10 @@ class Sampler:
 
 
 class ConfigLoader:
+    def __init__(self, quant_configs, sampler: Sampler) -> None:
+        self.quant_configs = quant_configs
+        self.sampler = sampler
+
     @staticmethod
     def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
         if isinstance(quant_config, ComposableConfig):
@@ -138,16 +143,13 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
             return quant_config.expand()
 
     def parse_quant_configs(self) -> List[BaseConfig]:
+        # TODO (Yi) separate this functionality into `Sampler` in the next PR
         quant_config_list = []
         for quant_config in self.quant_configs:
             quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config))
         return quant_config_list
 
-    def __init__(self, quant_configs, sampler: Sampler):
-        self.quant_configs = quant_configs
-        self.sampler = sampler
-
-    def __iter__(self):
+    def __iter__(self) -> Generator[BaseConfig, Any, None]:
         for config in self.parse_quant_configs():
             yield config
 
@@ -158,37 +160,49 @@ class TuningLogger:
     It assists validation teams in retrieving logs.
     """
 
+    @classmethod
+    def _log_call_info(cls, message: str) -> str:
+        frame = inspect.currentframe().f_back.f_back
+        # Extract file name and line number
+        file_path = frame.f_code.co_filename
+        file_name = file_path.split("/")[-1]
+        line_number = frame.f_lineno
+        # Log the call position along with the message
+        logger.info(f"[{file_name}:{line_number}(Call position)] {message}")
+
     @classmethod
     def tuning_start(cls) -> None:
-        logger.info("Tuning started.")
+        cls._log_call_info("Tuning started.")
 
     @classmethod
     def trial_start(cls, trial_index: int = None) -> None:
-        logger.info(" %d-trail started.", trial_index)
+        cls._log_call_info(
+            f" {trial_index}-trail started.",
+        )
 
     @classmethod
     def quantization_start(cls) -> None:
-        logger.info("Quantization started.")
+        cls._log_call_info("Quantization started.")
 
     @classmethod
     def quantization_end(cls) -> None:
-        logger.info("Quantization end.")
+        cls._log_call_info("Quantization end.")
 
     @classmethod
     def evaluation_start(cls) -> None:
-        logger.info("Evaluation started.")
+        cls._log_call_info("Evaluation started.")
 
     @classmethod
     def evaluation_end(cls) -> None:
-        logger.info("Evaluation end.")
+        cls._log_call_info("Evaluation end.")
 
     @classmethod
     def trial_end(cls, trial_index: int = None) -> None:
-        logger.info(" %d-trail end.", trial_index)
+        cls._log_call_info(f" {trial_index}-trail end.")
 
     @classmethod
     def tuning_end(cls) -> None:
-        logger.info("Tuning completed.")
+        cls._log_call_info("Tuning completed.")
 
 
 class TuningConfig:
@@ -245,6 +259,7 @@ def get_best_quant_config(self) -> BaseConfig:
         return sorted_trials_records[0].quant_config
 
     def need_stop(self) -> bool:
+        # TODO Support more stop criteria in the next PR, such as `reach accuracy goal`, `timeout`, and so on.
         return self.trial_cnt >= self.tuning_config.max_trials
 
 
diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py
index d734839d7f1..64617abbd5c 100644
--- a/neural_compressor/torch/autotune.py
+++ b/neural_compressor/torch/autotune.py
@@ -56,6 +56,6 @@ def autotune(
             best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
             quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
             best_quant_model = model  # quantize model inplace
-        tuning_logger.trial_end()
+        tuning_logger.trial_end(trial_index)
     tuning_logger.tuning_end()
     return best_quant_model

From 14c3fee2ea127a22f6f8c75de854f8230b5d7db5 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 17 Jan 2024 14:21:04 +0800
Subject: [PATCH 14/15] add __all__ for autotune

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/torch/autotune.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py
index 64617abbd5c..cded26ebc48 100644
--- a/neural_compressor/torch/autotune.py
+++ b/neural_compressor/torch/autotune.py
@@ -25,6 +25,12 @@
 logger = Logger().get_logger()
 
 
+__all__ = [
+    "get_default_tune_config",
+    "autotune",
+]
+
+
 def get_default_tune_config() -> TuningConfig:
     # TODO use the registered default tuning config in the next PR
     return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])

From ef8ea0384878cb0522a00fc4582776bb4121c407 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Thu, 18 Jan 2024 09:46:17 +0800
Subject: [PATCH 15/15] fixed some typos

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index 49b5148052d..e207f9c722b 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -49,7 +49,7 @@ def eval_perf(molde):
         user_eval_fns2 = {"eval_fn": eval_acc}
         user_eval_fns3 = {"eval_fn": eval_acc, "weight": 1.0, "name": "accuracy"}
         user_eval_fns4 = [
-            {"eval_fn": eval_acc, "weight": .0.5},
+            {"eval_fn": eval_acc, "weight": 0.5},
             {"eval_fn": eval_perf, "weight": 0.5, "name": "accuracy"},
             ]
     """
@@ -124,6 +124,7 @@ def self_check(self) -> None:
 
 
 class Sampler:
+    # TODO Separate sorting functionality of `ConfigLoader` into `Sampler` in the follow-up PR.
     pass
 
 
@@ -251,7 +252,7 @@ def get_number_of_trials(self):
         return len(self.tuning_history)
 
     def get_best_quant_config(self) -> BaseConfig:
-        assert self.get_number_of_trials() > 0, "Not trial record in tuning monitor."
+        assert self.get_number_of_trials() > 0, "No trial record in tuning monitor."
         # Put the record with a higher score at the beginning
         sorted_trials_records: List[_TrialRecord] = sorted(
             self.tuning_history, key=lambda x: x.trial_result, reverse=True