From 2506c455808c0ae4fca6c05fa76c3ba5d07cbe8e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 10 Jan 2024 12:36:05 +0800 Subject: [PATCH 01/15] replace all tuning to tune Signed-off-by: yiliu30 --- neural_compressor/common/base_tune.py | 106 ++++++++++++++++++++------ neural_compressor/torch/__init__.py | 2 +- neural_compressor/torch/tune.py | 21 +++-- test/3x/torch/test_autotune.py | 34 ++++----- 4 files changed, 110 insertions(+), 53 deletions(-) diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py index 69652d5856b..0b6ef47ddb1 100644 --- a/neural_compressor/common/base_tune.py +++ b/neural_compressor/common/base_tune.py @@ -13,7 +13,7 @@ # limitations under the License. from abc import abstractmethod -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig from neural_compressor.common.logger import Logger @@ -37,7 +37,7 @@ def apply(self) -> Any: raise NotImplementedError -class TuningObjectives: +class TuneObjectives: EVAL_FN = "eval_fn" WEIGHT = "weight" FN_NAME = "name" @@ -66,7 +66,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> # TODO update the result according to the weight and algo_name return overall_result + eval_result * eval_pair[self.WEIGHT] - def get_number_of_tuning_objectives(self) -> int: + def get_number_of_tune_objectives(self) -> int: return len(self.eval_fn_registry) def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: @@ -91,41 +91,96 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non self._set_eval_fn_registry(eval_fns) -tuning_objectives = TuningObjectives() +tune_objectives = TuneObjectives() -class BaseTuningConfig: - """Base Class for Tuning Criterion. +class BaseTuneConfig: + """Base Class for Tune Criterion. Args: quant_configs: quantization configs. Default value is empty. - timeout: Tuning timeout (seconds). Default value is 0 which means early stop. + timeout: Tune timeout (seconds). Default value is 0 which means early stop. max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit. """ def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None: - """Init a TuningCriterion object.""" + """Init a TuneCriterion object.""" self.quant_configs = quant_configs self.timeout = timeout self.max_trials = max_trials +class Trial: + def __init__( + self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, tune_objectives: TuneObjectives + ): + # The unique id to refer to one trial, it's used by the tuner. + self.trial_id = None + self._trial_result = None + self.set_trail_result_cnt = 0 + self.float_model = float_model + self.quant_model = None + self.quant_config = quant_config + self.fwk_wrapper = fwk_wrapper + self.tune_objectives = tune_objectives + self._post_init() + + def _post_init(self): + """Post initialize one trial.""" + # generate the trial_id + pass + + @property + def trial_result(self): + return self._trial_result + + @trial_result.setter + def trial_result(self, result): + assert self.set_trail_result_cnt < 1, "The trial result already be set." + self._trial_result = result + self.set_trail_result_cnt += 1 + + def quantize(self): + """Quantize the model with given quant_config.""" + quant_model = self.fwk_wrapper.apply(self.quant_config) + self.quant_model = quant_model + return quant_model + + def get_eval_result(self) -> float: + """Retune the evaluation result. + + The evaluation process is triggered by Lazy only when it is needed, and it is called only once. + """ + if not self.trial_result: + eval_score = self.tune_objectives.evaluate(self.quant_model) + self.trial_result = eval_score + return self.trial_result + + def recover_quant_model(self): + """The quantized model should be destroyed after evaluation to save the memory + and recovery it before end the tuning process.""" + pass + + def destroy_quant_model(self) -> None: + """""" + pass + + class Tuner: def __init__( - self, tune_config: BaseTuningConfig, tuning_objectives: TuningObjectives, fwk_wrapper: FrameworkWrapper + self, float_model, tune_config: BaseTuneConfig, tune_objectives: TuneObjectives, fwk_wrapper: FrameworkWrapper ) -> None: + self.float_model = float_model self.tune_config = tune_config - self.tuning_objectives = tuning_objectives + self.tune_objectives = tune_objectives self.fwk_wrapper = fwk_wrapper self._post_init() def _post_init(self) -> None: # check the number of evaluation functions - num_tuning_objectives = self.tuning_objectives.get_number_of_tuning_objectives() - assert ( - num_tuning_objectives > 0 - ), "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_tuning_objectives} tuning objectives.") + num_tune_objectives = self.tune_objectives.get_number_of_tune_objectives() + assert num_tune_objectives > 0, "Please ensure that you register at least one evaluation metric for auto-tune." + logger.info(f"There are {num_tune_objectives} tune objectives.") @staticmethod def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: @@ -143,17 +198,22 @@ def parse_quant_configs(self) -> List[BaseConfig]: quant_config_list.extend(Tuner.parse_quant_config(quant_config)) return quant_config_list - def get_best_model(self, q_model, objective_score: Union[float, int]) -> Any: + def get_best_model(self) -> Any: # TODO(Yi) enable it at the next PR pass - def get_tuning_objective_score(self, model) -> float: - eval_result = self.tuning_objectives.evaluate(model) - return eval_result + def needs_stop(self): + return False + + def update_tune_history(self, trial: Trial): + pass def search(self) -> Any: for config in self.parse_quant_configs(): - logger.info(f"config {config}") - q_model = self.fwk_wrapper.apply(quant_config=config) - if self.get_best_model(q_model, self.get_tuning_objective_score(q_model)): - return q_model + logger.info(f"Config {config}") + trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, tune_objectives=self.tune_objectives) + trial.quantize() + trial.get_eval_result() + self.update_tune_history(trial) + if self.needs_stop(): + return self.get_best_model() diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 57cfe472297..18ddc1ff9a3 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -23,4 +23,4 @@ get_default_gptq_config, ) -from neural_compressor.torch.tune import autotune, TuningConfig, get_default_tune_config +from neural_compressor.torch.tune import autotune, TuneConfig, get_default_tune_config diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py index 656d6c5b1be..4480650d9eb 100644 --- a/neural_compressor/torch/tune.py +++ b/neural_compressor/torch/tune.py @@ -16,18 +16,13 @@ import torch -from neural_compressor.common.base_tune import BaseTuningConfig, FrameworkWrapper, Tuner, tuning_objectives +from neural_compressor.common.base_tune import BaseTuneConfig, FrameworkWrapper, Tuner, tune_objectives from neural_compressor.common.logger import Logger from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig logger = Logger().get_logger() -def get_default_tuning_config(): - # TODO (Yi) support it in the next PR - return None - - class TorchWrapper(FrameworkWrapper): """Concrete implementation of `FrameworkWrapper` for PyTorch models.""" @@ -47,25 +42,27 @@ def apply(self, quant_config): return q_model -class TuningConfig(BaseTuningConfig): +class TuneConfig(BaseTuneConfig): def __init__(self, quant_configs=None, timeout=0, max_trials=100): super().__init__(quant_configs, timeout, max_trials) def autotune( model: torch.nn.Module, - tune_config: TuningConfig, + tune_config: TuneConfig, eval_fns: Optional[Union[Dict, List[Dict]]] = None, run_fn=None, run_args=None, ): - tuning_objectives.set_eval_fn_registry(eval_fns) + tune_objectives.set_eval_fn_registry(eval_fns) torch_wrapper = TorchWrapper(model, run_fn, run_args) - tuner = Tuner(tune_config=tune_config, tuning_objectives=tuning_objectives, fwk_wrapper=torch_wrapper) + tuner = Tuner( + float_model=model, tune_config=tune_config, tune_objectives=tune_objectives, fwk_wrapper=torch_wrapper + ) best_qmodel = tuner.search() return best_qmodel def get_default_tune_config(): - # TODO use the registered default tuning config in the next PR - return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) + # TODO use the registered default tune config in the next PR + return TuneConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 876311355f1..60cfe313344 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -10,13 +10,13 @@ import torch -def reset_tuning_target(test_func): +def reset_tune_target(test_func): @wraps(test_func) def wrapper(*args, **kwargs): - # Reset tuning targets before running the test - from neural_compressor.common.base_tune import tuning_objectives + # Reset tune targets before running the test + from neural_compressor.common.base_tune import tune_objectives - tuning_objectives.eval_fn_registry = [] + tune_objectives.eval_fn_registry = [] return test_func(*args, **kwargs) return wrapper @@ -58,27 +58,27 @@ def setUp(self): # print the test name logger.info(f"Running TestAutoTune test: {self.id()}") - @reset_tuning_target + @reset_tune_target def test_autotune_api(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import tuning_objectives - from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune + from neural_compressor.common.base_tune import tune_objectives + from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune def eval_acc_fn(model) -> float: return 1.0 - custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune( model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] ) self.assertIsNone(best_model) - self.assertEqual(len(tuning_objectives.eval_fn_registry), 1) + self.assertEqual(len(tune_objectives.eval_fn_registry), 1) - @reset_tuning_target + @reset_tune_target def test_autotune_api_2(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import tuning_objectives - from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune + from neural_compressor.common.base_tune import tune_objectives + from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune def eval_acc_fn(model) -> float: return 1.0 @@ -94,17 +94,17 @@ def eval_perf_fn(model) -> float: }, ] - custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) self.assertIsNone(best_model) - self.assertEqual(len(tuning_objectives.eval_fn_registry), 2) + self.assertEqual(len(tune_objectives.eval_fn_registry), 2) - @reset_tuning_target + @reset_tune_target def test_autotune_not_eval_func(self): logger.info("test_autotune_api") - from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune + from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune - custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) # Use assertRaises to check that an AssertionError is raised with self.assertRaises(AssertionError) as context: From c721f17b364a9f1cbb152d21762dff3f2f578688 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 08:36:55 +0800 Subject: [PATCH 02/15] rename `TuneObjetives` into `Evaluator` Signed-off-by: yiliu30 --- neural_compressor/common/base_tune.py | 28 +++++++++++++-------------- neural_compressor/torch/__init__.py | 3 ++- neural_compressor/torch/tune.py | 17 +++++----------- test/3x/torch/test_autotune.py | 24 +++++++++++------------ 4 files changed, 32 insertions(+), 40 deletions(-) diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py index 0b6ef47ddb1..b7bbc145451 100644 --- a/neural_compressor/common/base_tune.py +++ b/neural_compressor/common/base_tune.py @@ -37,7 +37,7 @@ def apply(self) -> Any: raise NotImplementedError -class TuneObjectives: +class Evaluator: EVAL_FN = "eval_fn" WEIGHT = "weight" FN_NAME = "name" @@ -66,7 +66,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> # TODO update the result according to the weight and algo_name return overall_result + eval_result * eval_pair[self.WEIGHT] - def get_number_of_tune_objectives(self) -> int: + def get_number_of_eval_funtions(self) -> int: return len(self.eval_fn_registry) def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: @@ -91,10 +91,10 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non self._set_eval_fn_registry(eval_fns) -tune_objectives = TuneObjectives() +evaluator = Evaluator() -class BaseTuneConfig: +class TuningConfig: """Base Class for Tune Criterion. Args: @@ -111,9 +111,7 @@ def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None: class Trial: - def __init__( - self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, tune_objectives: TuneObjectives - ): + def __init__(self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, evaluator: Evaluator): # The unique id to refer to one trial, it's used by the tuner. self.trial_id = None self._trial_result = None @@ -122,7 +120,7 @@ def __init__( self.quant_model = None self.quant_config = quant_config self.fwk_wrapper = fwk_wrapper - self.tune_objectives = tune_objectives + self.evaluator = evaluator self._post_init() def _post_init(self): @@ -152,7 +150,7 @@ def get_eval_result(self) -> float: The evaluation process is triggered by Lazy only when it is needed, and it is called only once. """ if not self.trial_result: - eval_score = self.tune_objectives.evaluate(self.quant_model) + eval_score = self.evaluator.evaluate(self.quant_model) self.trial_result = eval_score return self.trial_result @@ -168,19 +166,19 @@ def destroy_quant_model(self) -> None: class Tuner: def __init__( - self, float_model, tune_config: BaseTuneConfig, tune_objectives: TuneObjectives, fwk_wrapper: FrameworkWrapper + self, float_model, tune_config: TuningConfig, evaluator: Evaluator, fwk_wrapper: FrameworkWrapper ) -> None: self.float_model = float_model self.tune_config = tune_config - self.tune_objectives = tune_objectives + self.evaluator = evaluator self.fwk_wrapper = fwk_wrapper self._post_init() def _post_init(self) -> None: # check the number of evaluation functions - num_tune_objectives = self.tune_objectives.get_number_of_tune_objectives() - assert num_tune_objectives > 0, "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_tune_objectives} tune objectives.") + num_evaluator = self.evaluator.get_number_of_eval_funtions() + assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." + logger.info(f"There are {num_evaluator} tune objectives.") @staticmethod def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: @@ -211,7 +209,7 @@ def update_tune_history(self, trial: Trial): def search(self) -> Any: for config in self.parse_quant_configs(): logger.info(f"Config {config}") - trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, tune_objectives=self.tune_objectives) + trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, evaluator=self.evaluator) trial.quantize() trial.get_eval_result() self.update_tune_history(trial) diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 18ddc1ff9a3..42fb003946f 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -23,4 +23,5 @@ get_default_gptq_config, ) -from neural_compressor.torch.tune import autotune, TuneConfig, get_default_tune_config +from neural_compressor.common.base_tune import TuningConfig +from neural_compressor.torch.tune import autotune, get_default_tune_config diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py index 4480650d9eb..bd6d0d0763f 100644 --- a/neural_compressor/torch/tune.py +++ b/neural_compressor/torch/tune.py @@ -16,7 +16,7 @@ import torch -from neural_compressor.common.base_tune import BaseTuneConfig, FrameworkWrapper, Tuner, tune_objectives +from neural_compressor.common.base_tune import FrameworkWrapper, Tuner, TuningConfig, evaluator from neural_compressor.common.logger import Logger from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig @@ -42,27 +42,20 @@ def apply(self, quant_config): return q_model -class TuneConfig(BaseTuneConfig): - def __init__(self, quant_configs=None, timeout=0, max_trials=100): - super().__init__(quant_configs, timeout, max_trials) - - def autotune( model: torch.nn.Module, - tune_config: TuneConfig, + tune_config: TuningConfig, eval_fns: Optional[Union[Dict, List[Dict]]] = None, run_fn=None, run_args=None, ): - tune_objectives.set_eval_fn_registry(eval_fns) + evaluator.set_eval_fn_registry(eval_fns) torch_wrapper = TorchWrapper(model, run_fn, run_args) - tuner = Tuner( - float_model=model, tune_config=tune_config, tune_objectives=tune_objectives, fwk_wrapper=torch_wrapper - ) + tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper) best_qmodel = tuner.search() return best_qmodel def get_default_tune_config(): # TODO use the registered default tune config in the next PR - return TuneConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) + return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 60cfe313344..840d88f706d 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -14,9 +14,9 @@ def reset_tune_target(test_func): @wraps(test_func) def wrapper(*args, **kwargs): # Reset tune targets before running the test - from neural_compressor.common.base_tune import tune_objectives + from neural_compressor.common.base_tune import evaluator - tune_objectives.eval_fn_registry = [] + evaluator.eval_fn_registry = [] return test_func(*args, **kwargs) return wrapper @@ -61,24 +61,24 @@ def setUp(self): @reset_tune_target def test_autotune_api(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import tune_objectives - from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune + from neural_compressor.common.base_tune import evaluator + from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune def eval_acc_fn(model) -> float: return 1.0 - custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune( model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] ) self.assertIsNone(best_model) - self.assertEqual(len(tune_objectives.eval_fn_registry), 1) + self.assertEqual(len(evaluator.eval_fn_registry), 1) @reset_tune_target def test_autotune_api_2(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import tune_objectives - from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune + from neural_compressor.common.base_tune import evaluator + from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune def eval_acc_fn(model) -> float: return 1.0 @@ -94,17 +94,17 @@ def eval_perf_fn(model) -> float: }, ] - custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) self.assertIsNone(best_model) - self.assertEqual(len(tune_objectives.eval_fn_registry), 2) + self.assertEqual(len(evaluator.eval_fn_registry), 2) @reset_tune_target def test_autotune_not_eval_func(self): logger.info("test_autotune_api") - from neural_compressor.torch import RTNWeightQuantConfig, TuneConfig, autotune + from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune - custom_tune_config = TuneConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) # Use assertRaises to check that an AssertionError is raised with self.assertRaises(AssertionError) as context: From 249cf97f940dc8343a73c5ee184e88bc247f7dc7 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 09:18:23 +0800 Subject: [PATCH 03/15] refactor the tuning pipleline Signed-off-by: yiliu30 --- neural_compressor/torch/tune.py | 120 +++++++++++++++++++++++++------- 1 file changed, 94 insertions(+), 26 deletions(-) diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py index bd6d0d0763f..c1b37037e33 100644 --- a/neural_compressor/torch/tune.py +++ b/neural_compressor/torch/tune.py @@ -12,34 +12,87 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Callable, Dict, Generator, List, Optional, Tuple, Union import torch -from neural_compressor.common.base_tune import FrameworkWrapper, Tuner, TuningConfig, evaluator +from neural_compressor.common.base_config import BaseConfig +from neural_compressor.common.base_tune import TuningConfig, evaluator from neural_compressor.common.logger import Logger +from neural_compressor.torch import quantize from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig logger = Logger().get_logger() -class TorchWrapper(FrameworkWrapper): - """Concrete implementation of `FrameworkWrapper` for PyTorch models.""" +class Sampler: + pass - def __init__( - self, model: torch.nn.Module, run_fn: Optional[Callable] = None, run_args: Optional[Tuple] = None - ) -> None: - super().__init__(model) - self.run_fn = run_fn - self.run_args = run_args - def apply(self, quant_config): - """The entry to apply quantization algorithms on a given a model.""" - logger.info(f"apply quant_config: {quant_config}.") - from neural_compressor.torch import quantize +class ConfigLoader: + def __iter__(self) -> Generator[BaseConfig]: + yield None - q_model = quantize(model=self.model, quant_config=quant_config, run_fn=self.run_fn, run_args=self.run_args) - return q_model + +class TuningMonitor: + def __init__(self) -> None: + # TODO refine the `tuning_history` with a more appropriate data structure + self.tuning_history: list = [] + + def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: + self.tuning_history.append([trial_index, eval_result, quant_config]) + + def get_best_quant_config(self) -> BaseConfig: + return self.tuning_history[0][2] + + def need_stop(self) -> bool: + return True + + +class TuningLogger: + @classmethod + def tuning_start(cls) -> None: + logger.info("Tuning started.") + + @classmethod + def trial_start(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail started.") + + @classmethod + def quantization_start(cls) -> None: + logger.info("Quantization started.") + + @classmethod + def quantization_end(cls) -> None: + logger.info("Quantization end.") + + @classmethod + def evaluation_start(cls) -> None: + logger.info("Evaluation started.") + + @classmethod + def evaluation_end(cls) -> None: + logger.info("Evaluation end.") + + @classmethod + def trial_end(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail end.") + + @classmethod + def tuning_end(cls) -> None: + logger.info("Tuning completed.") + + +def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: + config_loader = ConfigLoader() + tuning_logger = TuningLogger() + tuning_monitor = TuningMonitor() + return config_loader, tuning_logger, tuning_monitor + + +def get_default_tune_config(): + # TODO use the registered default tune config in the next PR + return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) def autotune( @@ -48,14 +101,29 @@ def autotune( eval_fns: Optional[Union[Dict, List[Dict]]] = None, run_fn=None, run_args=None, -): - evaluator.set_eval_fn_registry(eval_fns) - torch_wrapper = TorchWrapper(model, run_fn, run_args) - tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper) - best_qmodel = tuner.search() - return best_qmodel - +) -> Optional[torch.nn.Module]: + # TODO Old Impl, remove it before merge + # evaluator.set_eval_fn_registry(eval_fns) + # torch_wrapper = TorchWrapper(model, run_fn, run_args) + # tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper) + # best_qmodel = tuner.search() -def get_default_tune_config(): - # TODO use the registered default tune config in the next PR - return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) + best_quant_model = None + evaluator.set_eval_fn_registry(eval_fns) + config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) + tuning_logger.tuning_start() + for trial_index, quant_config in enumerate(config_loader): + tuning_logger.trial_start(trial_index=trial_index) + tuning_logger.quantization_start() + q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args) + tuning_logger.quantization_end() + tuning_logger.evaluation_start() + eval_result: float = evaluator.evaluate(q_model) + tuning_logger.evaluation_end() + tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) + if tuning_monitor.need_stop(): + best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() + best_quant_model = quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args)() + tuning_logger.trial_end() + tuning_logger.tuning_end() + return best_quant_model From 572848cc4c1d26bb4cae46da78561e449b458f43 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 09:36:02 +0800 Subject: [PATCH 04/15] revert tune to tuning Signed-off-by: yiliu30 --- neural_compressor/common/base_tune.py | 8 ++++---- neural_compressor/torch/tune.py | 2 +- test/3x/torch/test_autotune.py | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py index b7bbc145451..ad6e0fe526e 100644 --- a/neural_compressor/common/base_tune.py +++ b/neural_compressor/common/base_tune.py @@ -95,12 +95,12 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non class TuningConfig: - """Base Class for Tune Criterion. + """Base Class for Tuning Criterion. Args: quant_configs: quantization configs. Default value is empty. - timeout: Tune timeout (seconds). Default value is 0 which means early stop. - max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit. + timeout: Tuning timeout (seconds). Default value is 0 which means early stop. + max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. """ def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None: @@ -178,7 +178,7 @@ def _post_init(self) -> None: # check the number of evaluation functions num_evaluator = self.evaluator.get_number_of_eval_funtions() assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_evaluator} tune objectives.") + logger.info(f"There are {num_evaluator} tuning objectives.") @staticmethod def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py index c1b37037e33..1a57bae0471 100644 --- a/neural_compressor/torch/tune.py +++ b/neural_compressor/torch/tune.py @@ -91,7 +91,7 @@ def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger def get_default_tune_config(): - # TODO use the registered default tune config in the next PR + # TODO use the registered default tuning config in the next PR return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 840d88f706d..cdb0067c165 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -10,10 +10,10 @@ import torch -def reset_tune_target(test_func): +def reset_tuning_target(test_func): @wraps(test_func) def wrapper(*args, **kwargs): - # Reset tune targets before running the test + # Reset tuning targets before running the test from neural_compressor.common.base_tune import evaluator evaluator.eval_fn_registry = [] @@ -58,7 +58,7 @@ def setUp(self): # print the test name logger.info(f"Running TestAutoTune test: {self.id()}") - @reset_tune_target + @reset_tuning_target def test_autotune_api(self): logger.info("test_autotune_api") from neural_compressor.common.base_tune import evaluator @@ -74,7 +74,7 @@ def eval_acc_fn(model) -> float: self.assertIsNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 1) - @reset_tune_target + @reset_tuning_target def test_autotune_api_2(self): logger.info("test_autotune_api") from neural_compressor.common.base_tune import evaluator @@ -99,7 +99,7 @@ def eval_perf_fn(model) -> float: self.assertIsNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 2) - @reset_tune_target + @reset_tuning_target def test_autotune_not_eval_func(self): logger.info("test_autotune_api") from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune From 9afaf44f8a9dfa1510ac6a971b7cb8c1cd96544c Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 10:00:23 +0800 Subject: [PATCH 05/15] fix UTs Signed-off-by: yiliu30 --- neural_compressor/common/base_tune.py | 195 ++++++++++++-------------- neural_compressor/torch/tune.py | 82 +---------- test/3x/torch/test_autotune.py | 4 +- 3 files changed, 99 insertions(+), 182 deletions(-) diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py index ad6e0fe526e..5b8bb3fc4d9 100644 --- a/neural_compressor/common/base_tune.py +++ b/neural_compressor/common/base_tune.py @@ -12,29 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from abc import abstractmethod -from typing import Any, Dict, List, Optional, Union + +from typing import Any, Dict, List, Optional, Tuple, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig from neural_compressor.common.logger import Logger logger = Logger().get_logger() - -class FrameworkWrapper: - """Abstract base class for wrap framework's APIs. - - FrameworkWrapper provides a uniform interface for encapsulating different framework's APIs. - This class is intended to be used by a `tuner` to obtain quantized models. - """ - - def __init__(self, model) -> None: - self.model = model - - @abstractmethod - def apply(self) -> Any: - """The entry to apply algorithms on a given model.""" - raise NotImplementedError +__all__ = [ + "Evaluator", + "TuningConfig", + "Sampler", + "ConfigLoader", + "TuningMonitor", + "TuningLogger", + "init_tuning", +] class Evaluator: @@ -90,6 +84,12 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.") self._set_eval_fn_registry(eval_fns) + def self_check(self) -> None: + # check the number of evaluation functions + num_evaluator = self.get_number_of_eval_funtions() + assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." + logger.info(f"There are {num_evaluator} tuning objectives.") + evaluator = Evaluator() @@ -103,83 +103,19 @@ class TuningConfig: max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. """ - def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None: + def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None: """Init a TuneCriterion object.""" self.quant_configs = quant_configs self.timeout = timeout self.max_trials = max_trials + self.sampler = sampler -class Trial: - def __init__(self, float_model, quant_config: BaseConfig, fwk_wrapper: FrameworkWrapper, evaluator: Evaluator): - # The unique id to refer to one trial, it's used by the tuner. - self.trial_id = None - self._trial_result = None - self.set_trail_result_cnt = 0 - self.float_model = float_model - self.quant_model = None - self.quant_config = quant_config - self.fwk_wrapper = fwk_wrapper - self.evaluator = evaluator - self._post_init() - - def _post_init(self): - """Post initialize one trial.""" - # generate the trial_id - pass - - @property - def trial_result(self): - return self._trial_result - - @trial_result.setter - def trial_result(self, result): - assert self.set_trail_result_cnt < 1, "The trial result already be set." - self._trial_result = result - self.set_trail_result_cnt += 1 - - def quantize(self): - """Quantize the model with given quant_config.""" - quant_model = self.fwk_wrapper.apply(self.quant_config) - self.quant_model = quant_model - return quant_model - - def get_eval_result(self) -> float: - """Retune the evaluation result. - - The evaluation process is triggered by Lazy only when it is needed, and it is called only once. - """ - if not self.trial_result: - eval_score = self.evaluator.evaluate(self.quant_model) - self.trial_result = eval_score - return self.trial_result - - def recover_quant_model(self): - """The quantized model should be destroyed after evaluation to save the memory - and recovery it before end the tuning process.""" - pass - - def destroy_quant_model(self) -> None: - """""" - pass - - -class Tuner: - def __init__( - self, float_model, tune_config: TuningConfig, evaluator: Evaluator, fwk_wrapper: FrameworkWrapper - ) -> None: - self.float_model = float_model - self.tune_config = tune_config - self.evaluator = evaluator - self.fwk_wrapper = fwk_wrapper - self._post_init() - - def _post_init(self) -> None: - # check the number of evaluation functions - num_evaluator = self.evaluator.get_number_of_eval_funtions() - assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_evaluator} tuning objectives.") +class Sampler: + pass + +class ConfigLoader: @staticmethod def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: if isinstance(quant_config, ComposableConfig): @@ -192,26 +128,75 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: def parse_quant_configs(self) -> List[BaseConfig]: quant_config_list = [] - for quant_config in self.tune_config.quant_configs: - quant_config_list.extend(Tuner.parse_quant_config(quant_config)) + for quant_config in self.quant_configs: + quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config)) return quant_config_list - def get_best_model(self) -> Any: - # TODO(Yi) enable it at the next PR - pass + def __init__(self, quant_configs, sampler: Sampler): + self.quant_configs = quant_configs + self.sampler = sampler - def needs_stop(self): - return False + def __iter__(self): + for config in self.parse_quant_configs(): + yield config - def update_tune_history(self, trial: Trial): - pass - def search(self) -> Any: - for config in self.parse_quant_configs(): - logger.info(f"Config {config}") - trial = Trial(self.float_model, config, fwk_wrapper=self.fwk_wrapper, evaluator=self.evaluator) - trial.quantize() - trial.get_eval_result() - self.update_tune_history(trial) - if self.needs_stop(): - return self.get_best_model() +class TuningMonitor: + def __init__(self) -> None: + # TODO refine the `tuning_history` with a more appropriate data structure + self.tuning_history: list = [] + + def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: + self.tuning_history.append([trial_index, eval_result, quant_config]) + + def get_best_quant_config(self) -> BaseConfig: + return self.tuning_history[0][2] + + def need_stop(self) -> bool: + return True + + +class TuningLogger: + """A unified logger for the tuning process. + + It assists validation teams in retrieving logs. + """ + + @classmethod + def tuning_start(cls) -> None: + logger.info("Tuning started.") + + @classmethod + def trial_start(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail started.") + + @classmethod + def quantization_start(cls) -> None: + logger.info("Quantization started.") + + @classmethod + def quantization_end(cls) -> None: + logger.info("Quantization end.") + + @classmethod + def evaluation_start(cls) -> None: + logger.info("Evaluation started.") + + @classmethod + def evaluation_end(cls) -> None: + logger.info("Evaluation end.") + + @classmethod + def trial_end(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail end.") + + @classmethod + def tuning_end(cls) -> None: + logger.info("Tuning completed.") + + +def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: + config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler) + tuning_logger = TuningLogger() + tuning_monitor = TuningMonitor() + return config_loader, tuning_logger, tuning_monitor diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py index 1a57bae0471..32456d3f407 100644 --- a/neural_compressor/torch/tune.py +++ b/neural_compressor/torch/tune.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, Dict, Generator, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Union import torch from neural_compressor.common.base_config import BaseConfig -from neural_compressor.common.base_tune import TuningConfig, evaluator +from neural_compressor.common.base_tune import TuningConfig, evaluator, init_tuning from neural_compressor.common.logger import Logger from neural_compressor.torch import quantize from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig @@ -25,72 +25,7 @@ logger = Logger().get_logger() -class Sampler: - pass - - -class ConfigLoader: - def __iter__(self) -> Generator[BaseConfig]: - yield None - - -class TuningMonitor: - def __init__(self) -> None: - # TODO refine the `tuning_history` with a more appropriate data structure - self.tuning_history: list = [] - - def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: - self.tuning_history.append([trial_index, eval_result, quant_config]) - - def get_best_quant_config(self) -> BaseConfig: - return self.tuning_history[0][2] - - def need_stop(self) -> bool: - return True - - -class TuningLogger: - @classmethod - def tuning_start(cls) -> None: - logger.info("Tuning started.") - - @classmethod - def trial_start(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail started.") - - @classmethod - def quantization_start(cls) -> None: - logger.info("Quantization started.") - - @classmethod - def quantization_end(cls) -> None: - logger.info("Quantization end.") - - @classmethod - def evaluation_start(cls) -> None: - logger.info("Evaluation started.") - - @classmethod - def evaluation_end(cls) -> None: - logger.info("Evaluation end.") - - @classmethod - def trial_end(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail end.") - - @classmethod - def tuning_end(cls) -> None: - logger.info("Tuning completed.") - - -def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: - config_loader = ConfigLoader() - tuning_logger = TuningLogger() - tuning_monitor = TuningMonitor() - return config_loader, tuning_logger, tuning_monitor - - -def get_default_tune_config(): +def get_default_tune_config() -> TuningConfig: # TODO use the registered default tuning config in the next PR return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) @@ -102,14 +37,10 @@ def autotune( run_fn=None, run_args=None, ) -> Optional[torch.nn.Module]: - # TODO Old Impl, remove it before merge - # evaluator.set_eval_fn_registry(eval_fns) - # torch_wrapper = TorchWrapper(model, run_fn, run_args) - # tuner = Tuner(float_model=model, tune_config=tune_config, evaluator=evaluator, fwk_wrapper=torch_wrapper) - # best_qmodel = tuner.search() - + """The main entry of auto-tune.""" best_quant_model = None evaluator.set_eval_fn_registry(eval_fns) + evaluator.self_check() config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) tuning_logger.tuning_start() for trial_index, quant_config in enumerate(config_loader): @@ -123,7 +54,8 @@ def autotune( tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) if tuning_monitor.need_stop(): best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() - best_quant_model = quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args)() + quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) + best_quant_model = model # quantize model inplace tuning_logger.trial_end() tuning_logger.tuning_end() return best_quant_model diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index cdb0067c165..85ddf73494e 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -71,7 +71,7 @@ def eval_acc_fn(model) -> float: best_model = autotune( model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] ) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 1) @reset_tuning_target @@ -96,7 +96,7 @@ def eval_perf_fn(model) -> float: custom_tune_config = TuningConfig(quant_configs=[RTNWeightQuantConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 2) @reset_tuning_target From fe362f9dcd4cc8a964a19030bd0de08d509f9bca Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 10:02:25 +0800 Subject: [PATCH 06/15] rename some files Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 202 ++++++++++++++++++++++++ neural_compressor/torch/autotune.py | 61 +++++++ 2 files changed, 263 insertions(+) create mode 100644 neural_compressor/common/base_tuning.py create mode 100644 neural_compressor/torch/autotune.py diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py new file mode 100644 index 00000000000..5b8bb3fc4d9 --- /dev/null +++ b/neural_compressor/common/base_tuning.py @@ -0,0 +1,202 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Any, Dict, List, Optional, Tuple, Union + +from neural_compressor.common.base_config import BaseConfig, ComposableConfig +from neural_compressor.common.logger import Logger + +logger = Logger().get_logger() + +__all__ = [ + "Evaluator", + "TuningConfig", + "Sampler", + "ConfigLoader", + "TuningMonitor", + "TuningLogger", + "init_tuning", +] + + +class Evaluator: + EVAL_FN = "eval_fn" + WEIGHT = "weight" + FN_NAME = "name" + EVAL_FN_TEMPLATE: Dict[str, Any] = {EVAL_FN: None, WEIGHT: 1.0, FN_NAME: None} + + def __init__(self) -> None: + self.eval_fn_registry: List[Dict[str, Any]] = [] + + def evaluate(self, model) -> float: + """Evaluate the model using registered evaluation functions. + + Args: + model: The fp32 model or quantized model. + + Returns: + The overall result of all registered evaluation functions. + """ + result = 0 + for eval_pair in self.eval_fn_registry: + eval_fn = eval_pair[self.EVAL_FN] + eval_result = eval_fn(model) + result = self._update_the_objective_score(eval_pair, eval_result, result) + return result + + def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float: + # TODO update the result according to the weight and algo_name + return overall_result + eval_result * eval_pair[self.WEIGHT] + + def get_number_of_eval_funtions(self) -> int: + return len(self.eval_fn_registry) + + def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: + self.eval_fn_registry = [ + { + self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN], + self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0), + self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, user_eval_fn_pair[self.EVAL_FN].__name__), + } + for user_eval_fn_pair in user_eval_fns + ] + + def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None: + if eval_fns is None: + return + elif isinstance(eval_fns, Dict): + eval_fns = [eval_fns] + elif isinstance(eval_fns, List): + assert all([isinstance(eval_fn_pair, Dict) for eval_fn_pair in eval_fns]) + else: + raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.") + self._set_eval_fn_registry(eval_fns) + + def self_check(self) -> None: + # check the number of evaluation functions + num_evaluator = self.get_number_of_eval_funtions() + assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." + logger.info(f"There are {num_evaluator} tuning objectives.") + + +evaluator = Evaluator() + + +class TuningConfig: + """Base Class for Tuning Criterion. + + Args: + quant_configs: quantization configs. Default value is empty. + timeout: Tuning timeout (seconds). Default value is 0 which means early stop. + max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. + """ + + def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None: + """Init a TuneCriterion object.""" + self.quant_configs = quant_configs + self.timeout = timeout + self.max_trials = max_trials + self.sampler = sampler + + +class Sampler: + pass + + +class ConfigLoader: + @staticmethod + def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: + if isinstance(quant_config, ComposableConfig): + result = [] + for q_config in quant_config.config_list: + result += q_config.expand() + return result + else: + return quant_config.expand() + + def parse_quant_configs(self) -> List[BaseConfig]: + quant_config_list = [] + for quant_config in self.quant_configs: + quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config)) + return quant_config_list + + def __init__(self, quant_configs, sampler: Sampler): + self.quant_configs = quant_configs + self.sampler = sampler + + def __iter__(self): + for config in self.parse_quant_configs(): + yield config + + +class TuningMonitor: + def __init__(self) -> None: + # TODO refine the `tuning_history` with a more appropriate data structure + self.tuning_history: list = [] + + def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: + self.tuning_history.append([trial_index, eval_result, quant_config]) + + def get_best_quant_config(self) -> BaseConfig: + return self.tuning_history[0][2] + + def need_stop(self) -> bool: + return True + + +class TuningLogger: + """A unified logger for the tuning process. + + It assists validation teams in retrieving logs. + """ + + @classmethod + def tuning_start(cls) -> None: + logger.info("Tuning started.") + + @classmethod + def trial_start(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail started.") + + @classmethod + def quantization_start(cls) -> None: + logger.info("Quantization started.") + + @classmethod + def quantization_end(cls) -> None: + logger.info("Quantization end.") + + @classmethod + def evaluation_start(cls) -> None: + logger.info("Evaluation started.") + + @classmethod + def evaluation_end(cls) -> None: + logger.info("Evaluation end.") + + @classmethod + def trial_end(cls, trial_index: int = None) -> None: + logger.info(f" {trial_index}-trail end.") + + @classmethod + def tuning_end(cls) -> None: + logger.info("Tuning completed.") + + +def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: + config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler) + tuning_logger = TuningLogger() + tuning_monitor = TuningMonitor() + return config_loader, tuning_logger, tuning_monitor diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py new file mode 100644 index 00000000000..d734839d7f1 --- /dev/null +++ b/neural_compressor/torch/autotune.py @@ -0,0 +1,61 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Optional, Union + +import torch + +from neural_compressor.common.base_config import BaseConfig +from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning +from neural_compressor.common.logger import Logger +from neural_compressor.torch import quantize +from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig + +logger = Logger().get_logger() + + +def get_default_tune_config() -> TuningConfig: + # TODO use the registered default tuning config in the next PR + return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) + + +def autotune( + model: torch.nn.Module, + tune_config: TuningConfig, + eval_fns: Optional[Union[Dict, List[Dict]]] = None, + run_fn=None, + run_args=None, +) -> Optional[torch.nn.Module]: + """The main entry of auto-tune.""" + best_quant_model = None + evaluator.set_eval_fn_registry(eval_fns) + evaluator.self_check() + config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) + tuning_logger.tuning_start() + for trial_index, quant_config in enumerate(config_loader): + tuning_logger.trial_start(trial_index=trial_index) + tuning_logger.quantization_start() + q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args) + tuning_logger.quantization_end() + tuning_logger.evaluation_start() + eval_result: float = evaluator.evaluate(q_model) + tuning_logger.evaluation_end() + tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) + if tuning_monitor.need_stop(): + best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() + quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) + best_quant_model = model # quantize model inplace + tuning_logger.trial_end() + tuning_logger.tuning_end() + return best_quant_model From 6560c797097b29f4f11f8c61e330085af77c9808 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 10:03:37 +0800 Subject: [PATCH 07/15] remove some files Signed-off-by: yiliu30 --- neural_compressor/common/base_tune.py | 202 -------------------------- neural_compressor/torch/tune.py | 61 -------- 2 files changed, 263 deletions(-) delete mode 100644 neural_compressor/common/base_tune.py delete mode 100644 neural_compressor/torch/tune.py diff --git a/neural_compressor/common/base_tune.py b/neural_compressor/common/base_tune.py deleted file mode 100644 index 5b8bb3fc4d9..00000000000 --- a/neural_compressor/common/base_tune.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from typing import Any, Dict, List, Optional, Tuple, Union - -from neural_compressor.common.base_config import BaseConfig, ComposableConfig -from neural_compressor.common.logger import Logger - -logger = Logger().get_logger() - -__all__ = [ - "Evaluator", - "TuningConfig", - "Sampler", - "ConfigLoader", - "TuningMonitor", - "TuningLogger", - "init_tuning", -] - - -class Evaluator: - EVAL_FN = "eval_fn" - WEIGHT = "weight" - FN_NAME = "name" - EVAL_FN_TEMPLATE: Dict[str, Any] = {EVAL_FN: None, WEIGHT: 1.0, FN_NAME: None} - - def __init__(self) -> None: - self.eval_fn_registry: List[Dict[str, Any]] = [] - - def evaluate(self, model) -> float: - """Evaluate the model using registered evaluation functions. - - Args: - model: The fp32 model or quantized model. - - Returns: - The overall result of all registered evaluation functions. - """ - result = 0 - for eval_pair in self.eval_fn_registry: - eval_fn = eval_pair[self.EVAL_FN] - eval_result = eval_fn(model) - result = self._update_the_objective_score(eval_pair, eval_result, result) - return result - - def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float: - # TODO update the result according to the weight and algo_name - return overall_result + eval_result * eval_pair[self.WEIGHT] - - def get_number_of_eval_funtions(self) -> int: - return len(self.eval_fn_registry) - - def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: - self.eval_fn_registry = [ - { - self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN], - self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0), - self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, user_eval_fn_pair[self.EVAL_FN].__name__), - } - for user_eval_fn_pair in user_eval_fns - ] - - def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None: - if eval_fns is None: - return - elif isinstance(eval_fns, Dict): - eval_fns = [eval_fns] - elif isinstance(eval_fns, List): - assert all([isinstance(eval_fn_pair, Dict) for eval_fn_pair in eval_fns]) - else: - raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.") - self._set_eval_fn_registry(eval_fns) - - def self_check(self) -> None: - # check the number of evaluation functions - num_evaluator = self.get_number_of_eval_funtions() - assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_evaluator} tuning objectives.") - - -evaluator = Evaluator() - - -class TuningConfig: - """Base Class for Tuning Criterion. - - Args: - quant_configs: quantization configs. Default value is empty. - timeout: Tuning timeout (seconds). Default value is 0 which means early stop. - max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. - """ - - def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None: - """Init a TuneCriterion object.""" - self.quant_configs = quant_configs - self.timeout = timeout - self.max_trials = max_trials - self.sampler = sampler - - -class Sampler: - pass - - -class ConfigLoader: - @staticmethod - def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: - if isinstance(quant_config, ComposableConfig): - result = [] - for q_config in quant_config.config_list: - result += q_config.expand() - return result - else: - return quant_config.expand() - - def parse_quant_configs(self) -> List[BaseConfig]: - quant_config_list = [] - for quant_config in self.quant_configs: - quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config)) - return quant_config_list - - def __init__(self, quant_configs, sampler: Sampler): - self.quant_configs = quant_configs - self.sampler = sampler - - def __iter__(self): - for config in self.parse_quant_configs(): - yield config - - -class TuningMonitor: - def __init__(self) -> None: - # TODO refine the `tuning_history` with a more appropriate data structure - self.tuning_history: list = [] - - def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: - self.tuning_history.append([trial_index, eval_result, quant_config]) - - def get_best_quant_config(self) -> BaseConfig: - return self.tuning_history[0][2] - - def need_stop(self) -> bool: - return True - - -class TuningLogger: - """A unified logger for the tuning process. - - It assists validation teams in retrieving logs. - """ - - @classmethod - def tuning_start(cls) -> None: - logger.info("Tuning started.") - - @classmethod - def trial_start(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail started.") - - @classmethod - def quantization_start(cls) -> None: - logger.info("Quantization started.") - - @classmethod - def quantization_end(cls) -> None: - logger.info("Quantization end.") - - @classmethod - def evaluation_start(cls) -> None: - logger.info("Evaluation started.") - - @classmethod - def evaluation_end(cls) -> None: - logger.info("Evaluation end.") - - @classmethod - def trial_end(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail end.") - - @classmethod - def tuning_end(cls) -> None: - logger.info("Tuning completed.") - - -def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: - config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler) - tuning_logger = TuningLogger() - tuning_monitor = TuningMonitor() - return config_loader, tuning_logger, tuning_monitor diff --git a/neural_compressor/torch/tune.py b/neural_compressor/torch/tune.py deleted file mode 100644 index 32456d3f407..00000000000 --- a/neural_compressor/torch/tune.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, List, Optional, Union - -import torch - -from neural_compressor.common.base_config import BaseConfig -from neural_compressor.common.base_tune import TuningConfig, evaluator, init_tuning -from neural_compressor.common.logger import Logger -from neural_compressor.torch import quantize -from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig - -logger = Logger().get_logger() - - -def get_default_tune_config() -> TuningConfig: - # TODO use the registered default tuning config in the next PR - return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) - - -def autotune( - model: torch.nn.Module, - tune_config: TuningConfig, - eval_fns: Optional[Union[Dict, List[Dict]]] = None, - run_fn=None, - run_args=None, -) -> Optional[torch.nn.Module]: - """The main entry of auto-tune.""" - best_quant_model = None - evaluator.set_eval_fn_registry(eval_fns) - evaluator.self_check() - config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) - tuning_logger.tuning_start() - for trial_index, quant_config in enumerate(config_loader): - tuning_logger.trial_start(trial_index=trial_index) - tuning_logger.quantization_start() - q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args) - tuning_logger.quantization_end() - tuning_logger.evaluation_start() - eval_result: float = evaluator.evaluate(q_model) - tuning_logger.evaluation_end() - tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) - if tuning_monitor.need_stop(): - best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() - quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) - best_quant_model = model # quantize model inplace - tuning_logger.trial_end() - tuning_logger.tuning_end() - return best_quant_model From b01c8058b718b3c0043b5ed7c3d5ddc68e1c800d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 10:45:16 +0800 Subject: [PATCH 08/15] fixed import error Signed-off-by: yiliu30 --- neural_compressor/torch/__init__.py | 4 ++-- test/3x/torch/test_autotune.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 42fb003946f..c50e60103ea 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -23,5 +23,5 @@ get_default_gptq_config, ) -from neural_compressor.common.base_tune import TuningConfig -from neural_compressor.torch.tune import autotune, get_default_tune_config +from neural_compressor.common.base_tuning import TuningConfig +from neural_compressor.torch.autotune import autotune, get_default_tune_config diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 85ddf73494e..7e67436e87c 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -14,7 +14,7 @@ def reset_tuning_target(test_func): @wraps(test_func) def wrapper(*args, **kwargs): # Reset tuning targets before running the test - from neural_compressor.common.base_tune import evaluator + from neural_compressor.common.base_tuning import evaluator evaluator.eval_fn_registry = [] return test_func(*args, **kwargs) @@ -61,7 +61,7 @@ def setUp(self): @reset_tuning_target def test_autotune_api(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import evaluator + from neural_compressor.common.base_tuning import evaluator from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune def eval_acc_fn(model) -> float: @@ -77,7 +77,7 @@ def eval_acc_fn(model) -> float: @reset_tuning_target def test_autotune_api_2(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tune import evaluator + from neural_compressor.common.base_tuning import evaluator from neural_compressor.torch import RTNWeightQuantConfig, TuningConfig, autotune def eval_acc_fn(model) -> float: From ec381bc19449753c78240bcf22a663d2ed7c2524 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 12:16:53 +0800 Subject: [PATCH 09/15] hanle single eval Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index 5b8bb3fc4d9..fd95ea89056 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -13,6 +13,7 @@ # limitations under the License. +import copy from typing import Any, Dict, List, Optional, Tuple, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig @@ -76,6 +77,12 @@ def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None: if eval_fns is None: return + elif callable(eval_fns): + # single eval_fn + eval_fn_pair = copy.deepcopy(self.EVAL_FN_TEMPLATE) + eval_fn_pair[self.EVAL_FN] = eval_fns + eval_fn_pair[self.FN_NAME] = eval_fns.__name__ + eval_fns = [eval_fn_pair] elif isinstance(eval_fns, Dict): eval_fns = [eval_fns] elif isinstance(eval_fns, List): From f278c2f850762adfcfc548afb368215add47b257 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 12:37:54 +0800 Subject: [PATCH 10/15] refine tuning monitor Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 100 +++++++++++++++--------- 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index fd95ea89056..d722b12f85a 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -14,6 +14,7 @@ import copy +import uuid from typing import Any, Dict, List, Optional, Tuple, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig @@ -61,7 +62,7 @@ def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> # TODO update the result according to the weight and algo_name return overall_result + eval_result * eval_pair[self.WEIGHT] - def get_number_of_eval_funtions(self) -> int: + def get_number_of_eval_functions(self) -> int: return len(self.eval_fn_registry) def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: @@ -93,31 +94,14 @@ def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = Non def self_check(self) -> None: # check the number of evaluation functions - num_evaluator = self.get_number_of_eval_funtions() - assert num_evaluator > 0, "Please ensure that you register at least one evaluation metric for auto-tune." - logger.info(f"There are {num_evaluator} tuning objectives.") + num_eval_fns = self.get_number_of_eval_functions() + assert num_eval_fns > 0, "Please ensure that you register at least one evaluation metric for auto-tune." + logger.info("There are %d evaluations functions.", num_eval_fns) evaluator = Evaluator() -class TuningConfig: - """Base Class for Tuning Criterion. - - Args: - quant_configs: quantization configs. Default value is empty. - timeout: Tuning timeout (seconds). Default value is 0 which means early stop. - max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. - """ - - def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: "Sampler" = None) -> None: - """Init a TuneCriterion object.""" - self.quant_configs = quant_configs - self.timeout = timeout - self.max_trials = max_trials - self.sampler = sampler - - class Sampler: pass @@ -148,21 +132,6 @@ def __iter__(self): yield config -class TuningMonitor: - def __init__(self) -> None: - # TODO refine the `tuning_history` with a more appropriate data structure - self.tuning_history: list = [] - - def add_trial_result(self, trial_index: int, eval_result: Union[int, float], quant_config: BaseConfig) -> None: - self.tuning_history.append([trial_index, eval_result, quant_config]) - - def get_best_quant_config(self) -> BaseConfig: - return self.tuning_history[0][2] - - def need_stop(self) -> bool: - return True - - class TuningLogger: """A unified logger for the tuning process. @@ -202,8 +171,65 @@ def tuning_end(cls) -> None: logger.info("Tuning completed.") +class TuningConfig: + """Base Class for Tuning Criterion. + + Args: + quant_configs: quantization configs. Default value is empty. + timeout: Tuning timeout (seconds). Default value is 0 which means early stop. + max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. + """ + + def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None: + """Init a TuneCriterion object.""" + self.quant_configs = quant_configs + self.timeout = timeout + self.max_trials = max_trials + self.sampler = sampler + + +class _TrialRecord: + @staticmethod + def _generate_unique_id(): + unique_id = str(uuid.uuid4()) + return unique_id + + def __init__(self, trial_index: int, trial_result: Union[int, float], quant_config: BaseConfig): + # The unique id to refer to one trial + self.trial_id = _TrialRecord._generate_unique_id() + self.trial_index = trial_index + self.trial_result = trial_result + self.quant_config = quant_config + + +class TuningMonitor: + def __init__(self, tuning_config: TuningConfig) -> None: + self.tuning_config = tuning_config + self.trial_cnt = 0 + self.tuning_history: List[_TrialRecord] = [] + + def add_trial_result(self, trial_index: int, trial_result: Union[int, float], quant_config: BaseConfig) -> None: + self.trial_cnt += 1 + trial_record = _TrialRecord(trial_index, trial_result, quant_config) + self.tuning_history.append(trial_record) + + def get_number_of_trials(self): + return len(self.tuning_history) + + def get_best_quant_config(self) -> BaseConfig: + assert self.get_number_of_trials() > 0, "Not trial record in tuning monitor." + # Put the record with a higher score at the beginning + sorted_trials_records: List[_TrialRecord] = sorted( + self.tuning_history, key=lambda x: x.trial_result, reverse=True + ) + return sorted_trials_records[0].quant_config + + def need_stop(self) -> bool: + return self.trial_cnt >= self.tuning_config.max_trials + + def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler) tuning_logger = TuningLogger() - tuning_monitor = TuningMonitor() + tuning_monitor = TuningMonitor(tuning_config) return config_loader, tuning_logger, tuning_monitor From f7b9d7cca8975cb634aaaf300e393450d9778245 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 12:43:42 +0800 Subject: [PATCH 11/15] refine log Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index d722b12f85a..fdc0908473c 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -144,7 +144,7 @@ def tuning_start(cls) -> None: @classmethod def trial_start(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail started.") + logger.info(" %d-trail started.", trial_index) @classmethod def quantization_start(cls) -> None: @@ -164,7 +164,7 @@ def evaluation_end(cls) -> None: @classmethod def trial_end(cls, trial_index: int = None) -> None: - logger.info(f" {trial_index}-trail end.") + logger.info(" %d-trail end.", trial_index) @classmethod def tuning_end(cls) -> None: From 760763be2cea6ba3098521b417ec06834fc93583 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 12:58:09 +0800 Subject: [PATCH 12/15] add more note Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index fdc0908473c..586b5927d8c 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -15,7 +15,7 @@ import copy import uuid -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig from neural_compressor.common.logger import Logger @@ -34,6 +34,25 @@ class Evaluator: + """Evaluator is a collection of evaluation functions. + + Examples: + def eval_acc(model): + ... + + def eval_perf(molde): + ... + + # Usage + user_eval_fns1 = eval_acc + user_eval_fns2 = {"eval_fn": eval_acc} + user_eval_fns3 = {"eval_fn": eval_acc, "weight": 1.0, "name": "accuracy"} + user_eval_fns4 = [ + {"eval_fn": eval_acc, "weight": .0.5}, + {"eval_fn": eval_perf, "weight": 0.5, "name": "accuracy"}, + ] + """ + EVAL_FN = "eval_fn" WEIGHT = "weight" FN_NAME = "name" @@ -75,7 +94,8 @@ def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None: for user_eval_fn_pair in user_eval_fns ] - def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None: + def set_eval_fn_registry(self, eval_fns: Optional[Union[Callable, Dict, List[Dict]]] = None) -> None: + # About the eval_fns format, refer the class docstring for details. if eval_fns is None: return elif callable(eval_fns): From 0450cb21bc91b25f3e0a1ac6eca89298e65a3fd2 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 13:19:58 +0800 Subject: [PATCH 13/15] refine the tuning logger Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 43 +++++++++++++++++-------- neural_compressor/torch/autotune.py | 2 +- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index 586b5927d8c..49b5148052d 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -14,8 +14,9 @@ import copy +import inspect import uuid -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union from neural_compressor.common.base_config import BaseConfig, ComposableConfig from neural_compressor.common.logger import Logger @@ -127,6 +128,10 @@ class Sampler: class ConfigLoader: + def __init__(self, quant_configs, sampler: Sampler) -> None: + self.quant_configs = quant_configs + self.sampler = sampler + @staticmethod def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: if isinstance(quant_config, ComposableConfig): @@ -138,16 +143,13 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: return quant_config.expand() def parse_quant_configs(self) -> List[BaseConfig]: + # TODO (Yi) separate this functionality into `Sampler` in the next PR quant_config_list = [] for quant_config in self.quant_configs: quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config)) return quant_config_list - def __init__(self, quant_configs, sampler: Sampler): - self.quant_configs = quant_configs - self.sampler = sampler - - def __iter__(self): + def __iter__(self) -> Generator[BaseConfig, Any, None]: for config in self.parse_quant_configs(): yield config @@ -158,37 +160,49 @@ class TuningLogger: It assists validation teams in retrieving logs. """ + @classmethod + def _log_call_info(cls, message: str) -> str: + frame = inspect.currentframe().f_back.f_back + # Extract file name and line number + file_path = frame.f_code.co_filename + file_name = file_path.split("/")[-1] + line_number = frame.f_lineno + # Log the call position along with the message + logger.info(f"[{file_name}:{line_number}(Call position)] {message}") + @classmethod def tuning_start(cls) -> None: - logger.info("Tuning started.") + cls._log_call_info("Tuning started.") @classmethod def trial_start(cls, trial_index: int = None) -> None: - logger.info(" %d-trail started.", trial_index) + cls._log_call_info( + f" {trial_index}-trail started.", + ) @classmethod def quantization_start(cls) -> None: - logger.info("Quantization started.") + cls._log_call_info("Quantization started.") @classmethod def quantization_end(cls) -> None: - logger.info("Quantization end.") + cls._log_call_info("Quantization end.") @classmethod def evaluation_start(cls) -> None: - logger.info("Evaluation started.") + cls._log_call_info("Evaluation started.") @classmethod def evaluation_end(cls) -> None: - logger.info("Evaluation end.") + cls._log_call_info("Evaluation end.") @classmethod def trial_end(cls, trial_index: int = None) -> None: - logger.info(" %d-trail end.", trial_index) + cls._log_call_info(f" {trial_index}-trail end.") @classmethod def tuning_end(cls) -> None: - logger.info("Tuning completed.") + cls._log_call_info("Tuning completed.") class TuningConfig: @@ -245,6 +259,7 @@ def get_best_quant_config(self) -> BaseConfig: return sorted_trials_records[0].quant_config def need_stop(self) -> bool: + # TODO Support more stop criteria in the next PR, such as `reach accuracy goal`, `timeout`, and so on. return self.trial_cnt >= self.tuning_config.max_trials diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py index d734839d7f1..64617abbd5c 100644 --- a/neural_compressor/torch/autotune.py +++ b/neural_compressor/torch/autotune.py @@ -56,6 +56,6 @@ def autotune( best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) best_quant_model = model # quantize model inplace - tuning_logger.trial_end() + tuning_logger.trial_end(trial_index) tuning_logger.tuning_end() return best_quant_model From 14c3fee2ea127a22f6f8c75de854f8230b5d7db5 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 17 Jan 2024 14:21:04 +0800 Subject: [PATCH 14/15] add __all__ for autotune Signed-off-by: yiliu30 --- neural_compressor/torch/autotune.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/neural_compressor/torch/autotune.py b/neural_compressor/torch/autotune.py index 64617abbd5c..cded26ebc48 100644 --- a/neural_compressor/torch/autotune.py +++ b/neural_compressor/torch/autotune.py @@ -25,6 +25,12 @@ logger = Logger().get_logger() +__all__ = [ + "get_default_tune_config", + "autotune", +] + + def get_default_tune_config() -> TuningConfig: # TODO use the registered default tuning config in the next PR return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])]) From ef8ea0384878cb0522a00fc4582776bb4121c407 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 18 Jan 2024 09:46:17 +0800 Subject: [PATCH 15/15] fixed some typos Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index 49b5148052d..e207f9c722b 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -49,7 +49,7 @@ def eval_perf(molde): user_eval_fns2 = {"eval_fn": eval_acc} user_eval_fns3 = {"eval_fn": eval_acc, "weight": 1.0, "name": "accuracy"} user_eval_fns4 = [ - {"eval_fn": eval_acc, "weight": .0.5}, + {"eval_fn": eval_acc, "weight": 0.5}, {"eval_fn": eval_perf, "weight": 0.5, "name": "accuracy"}, ] """ @@ -124,6 +124,7 @@ def self_check(self) -> None: class Sampler: + # TODO Separate sorting functionality of `ConfigLoader` into `Sampler` in the follow-up PR. pass @@ -251,7 +252,7 @@ def get_number_of_trials(self): return len(self.tuning_history) def get_best_quant_config(self) -> BaseConfig: - assert self.get_number_of_trials() > 0, "Not trial record in tuning monitor." + assert self.get_number_of_trials() > 0, "No trial record in tuning monitor." # Put the record with a higher score at the beginning sorted_trials_records: List[_TrialRecord] = sorted( self.tuning_history, key=lambda x: x.trial_result, reverse=True