diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py index 0515aa0fdfa..9fa37862cf3 100644 --- a/neural_compressor/common/base_config.py +++ b/neural_compressor/common/base_config.py @@ -25,7 +25,8 @@ from itertools import product from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union -from neural_compressor.common import Logger +from typing_extensions import Self + from neural_compressor.common.tuning_param import TuningParam from neural_compressor.common.utils import ( BASE_CONFIG, @@ -36,10 +37,9 @@ GLOBAL, LOCAL, OP_NAME_OR_MODULE_TYPE, + logger, ) -logger = Logger().get_logger() - __all__ = [ "options", "register_config", @@ -52,11 +52,18 @@ # Config registry to store all registered configs. -class ConfigRegistry: +class ConfigRegistry(object): registered_configs = {} + _config_registry = None + + def __new__(cls) -> Self: + if cls._config_registry is None: + cls._config_registry = super(ConfigRegistry, cls).__new__(cls) + + return cls._config_registry @classmethod - def register_config_impl(cls, framework_name="None", algo_name=None, priority=0): + def register_config_impl(cls, framework_name: str, algo_name: str, priority: Union[float, int] = 0): """Register config decorator. The register the configuration classes for different algorithms within specific frameworks. @@ -67,8 +74,8 @@ class ExampleAlgorithmConfig: # Configuration details for the ExampleAlgorithm Args: - framework_name: the framework name. Defaults to "None". - algo_name: the algorithm name. Defaults to None. + framework_name: the framework name. + algo_name: the algorithm name. priority: priority: the priority of the configuration. A larger number indicates a higher priority, which will be tried first at the auto-tune stage. Defaults to 0. """ @@ -116,7 +123,7 @@ def get_all_config_cls_by_fwk_name(cls, fwk_name: str) -> List[Type[BaseConfig]] config_registry = ConfigRegistry() -def register_config(framework_name="None", algo_name=None, priority=0): +def register_config(framework_name: str, algo_name: str, priority: Union[float, int] = 0): """Register config decorator. The register the configuration classes for different algorithms within specific frameworks. @@ -127,8 +134,8 @@ class ExampleAlgorithmConfig: # Configuration details for the ExampleAlgorithm Args: - framework_name: the framework name. Defaults to "None". - algo_name: the algorithm name. Defaults to None. + framework_name: the framework name. + algo_name: the algorithm name. priority: the priority of the configuration. A larger number indicates a higher priority, which will be tried first at the auto-tune stage. Defaults to 0. """ @@ -411,7 +418,7 @@ def to_config_mapping( @staticmethod def _is_op_type(name: str) -> bool: - # TODO (Yi), ort and tf need override it + # * Ort and TF may override this method. return not isinstance(name, str) @classmethod @@ -461,7 +468,6 @@ def to_config_mapping( ) -> OrderedDict[str, BaseConfig]: config_mapping = OrderedDict() for config in self.config_list: - global_config = config.global_config op_type_config_dict, op_name_config_dict = config._get_op_name_op_type_config() single_config_model_info = model_info.get(config.name, None) for op_name, op_type in single_config_model_info: diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index 352ac374ab4..54f908232ad 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -17,14 +17,12 @@ import uuid from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Sized, Tuple, Union -from neural_compressor.common import Logger from neural_compressor.common.base_config import BaseConfig -from neural_compressor.common.utils import TuningLogger - -logger = Logger().get_logger() +from neural_compressor.common.utils import TuningLogger, logger __all__ = [ "Evaluator", + "EvaluationFuncWrapper", "TuningConfig", "Sampler", "ConfigLoader", @@ -37,9 +35,27 @@ ] +class EvaluationFuncWrapper: + def __init__(self, eval_fn: Callable, eval_args=None): + """Evaluation function wrapper. + + Args: + eval_fn: a function for evaluated the float or quantized model + eval_args: positional arguments for `eval_fn` + """ + self.eval_fn = eval_fn + self.eval_args = eval_args + + def evaluate(self, model) -> Union[float, int]: + result = self.eval_fn(model, *self.eval_args) if self.eval_args else self.eval_fn(model) + return result + + class Evaluator: """Evaluator is a collection of evaluation functions. + Note: will deprecate this class in the future. + Examples: def eval_acc(model): ... @@ -82,7 +98,6 @@ def evaluate(self, model) -> float: return result def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float: - # TODO update the result according to the weight and algo_name return overall_result + eval_result * eval_pair[self.WEIGHT] def get_number_of_eval_functions(self) -> int: @@ -229,29 +244,25 @@ class TuningConfig: """Config for auto tuning pipeline. Examples: - # TODO: to refine it from neural_compressor.torch.quantization import TuningConfig tune_config = TuningConfig( config_set=[config1, config2, ...], max_trials=3, - tolerable_loss=0.01 - ) - - # Case 1: Tolerable Loss - fp32_baseline = 100 - config1_metric, config2_metric, ... = 98, 99, ... - - # Tuning result of case 1: - # The best tuning config is config2, because config2_metric >= fp32_baseline * (1 - tolerable_loss) - - # Case 2: Maximum Trials - fp32_baseline = 100 - config1_metric, config2_metric, config3_metric, ... = 98, 98, 97, ... - - # Tuning result of case 2: - # The best tuning config is config2, because of the following: - # 1. Not achieving the set goal. (config_metric < fp32_baseline * (1 - tolerable_loss)) - # 2. Reached maximum tuning times. + tolerable_loss=0.01) + + The tuning process stops when either of the following conditions is met: + 1) The number of trials reaches the maximum trials. + 2) The metric loss is within the tolerable loss. + + For condition 2), we calculate the metric loss as follows: + relative_loss = (fp32_baseline - eval_result_of_q_model) / fp32_baseline + If relative_loss <= tolerable_loss, we stop the tuning process. + For example: + tolerable_loss = 0.01 + fp32_baseline = 100 + eval_result_of_q_model = 99 + relative_loss = (100 - 99) / 100 = 0.01 + The metric loss is within the tolerable loss, so the tuning process is stopped. """ def __init__( @@ -321,10 +332,9 @@ def need_stop(self) -> bool: """Check if need to stop tuning. Either accuracy goal is met, max trials is reached or timeout is reached. Returns: - bool: True if need to stop, otherwise False. + stop_flag: True if need to stop, otherwise False. """ - # TODO: Support more stop criteria in the next PR, such as `timeout`, and so on. # reach max trials reach_max_trials = self.trial_cnt >= self.tuning_config.max_trials # reach accuracy goal diff --git a/neural_compressor/common/tuning_param.py b/neural_compressor/common/tuning_param.py index f7c894bb892..207811590ee 100644 --- a/neural_compressor/common/tuning_param.py +++ b/neural_compressor/common/tuning_param.py @@ -14,7 +14,7 @@ import typing from enum import Enum, auto -from typing import Any, List +from typing import Any from pydantic import BaseModel diff --git a/neural_compressor/onnxrt/quantization/autotune.py b/neural_compressor/onnxrt/quantization/autotune.py index 4f529ddd7b7..45a53737384 100644 --- a/neural_compressor/onnxrt/quantization/autotune.py +++ b/neural_compressor/onnxrt/quantization/autotune.py @@ -15,13 +15,13 @@ import os import tempfile from pathlib import Path -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Tuple, Union import onnx +from neural_compressor.common import logger from neural_compressor.common.base_config import BaseConfig, get_all_config_set_from_config_registry -from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning -from neural_compressor.common.utils import logger +from neural_compressor.common.base_tuning import EvaluationFuncWrapper, TuningConfig, init_tuning from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader from neural_compressor.onnxrt.quantization.config import FRAMEWORK_NAME from neural_compressor.onnxrt.quantization.quantize import _quantize @@ -39,7 +39,8 @@ def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: def autotune( model_input: Union[Path, str], tune_config: TuningConfig, - eval_fns: Union[Dict, List[Dict], Callable] = None, + eval_fn: Callable, + eval_args: Optional[Tuple[Any]] = None, calibration_data_reader: CalibrationDataReader = None, ) -> Union[None, onnx.ModelProto]: """The main entry of auto-tune. @@ -51,27 +52,22 @@ def autotune( Support: Expand parameters to a list of parameters like TuningConfig(config_set=[RTNConfig(weight_bits=[4, 8])]) Pass a list of configs like TuningConfig(config_set=[RTNConfig(), GPTQConfig()]) - eval_fns (Union[Dict, List[Dict], Callable]): evaluate functions. - During evaluation, autotune will only pass model path as input into eatch function. - Support: - single eval function, - Dict like {"eval_fn": eval_acc} or {"eval_fn": eval_acc, "weight": 1.0, "name": "accuracy"}, - List of Dict, like [ - {"eval_fn": eval_acc, "weight": 0.5}, - {"eval_fn": eval_perf, "weight": 0.5, "name": "accuracy"}, - ] + eval_fn (Callable): evaluate function. + During evaluation, autotune will only pass model path as the input of function. + eval_args (Optional[Tuple[Any]]): evaluate arguments. + Positional arguments for `eval_fn`. + calibration_data_reader (CalibrationDataReader): dataloader for calibration. """ best_quant_model = None - evaluator.set_eval_fn_registry(eval_fns) - evaluator.self_check() + eval_func_wrapper = EvaluationFuncWrapper(eval_fn, eval_args) config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) try: - baseline: float = evaluator.evaluate(model_input) + baseline: float = eval_func_wrapper.evaluate(model_input) except Exception as e: print(e) if "'str' object has no attribute 'SerializeToString'" in str(e): - logger.warning("Please refine your eval_fns to accept model path (str) as input.") + logger.warning("Please refine your eval_fn to accept model path (str) as input.") exit(0) tuning_monitor.set_baseline(baseline) tuning_logger.tuning_start() @@ -105,7 +101,7 @@ def autotune( Path(model_input).parent.joinpath("config.json").as_posix(), Path(tmp_dir).joinpath("config.json").as_posix(), ) - eval_result: float = evaluator.evaluate(Path(tmp_dir).joinpath(Path(model_input).name).as_posix()) + eval_result: float = eval_func_wrapper.evaluate(Path(tmp_dir).joinpath(Path(model_input).name).as_posix()) tuning_logger.evaluation_end() logger.info("Evaluation result: %.4f", eval_result) tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py index 4ae247a85db..e89756eece6 100644 --- a/neural_compressor/tensorflow/quantization/autotune.py +++ b/neural_compressor/tensorflow/quantization/autotune.py @@ -13,13 +13,13 @@ # limitations under the License. from copy import deepcopy -from typing import Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import tensorflow as tf from neural_compressor.common import logger from neural_compressor.common.base_config import BaseConfig, get_all_config_set_from_config_registry -from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning +from neural_compressor.common.base_tuning import EvaluationFuncWrapper, TuningConfig, init_tuning from neural_compressor.common.utils import dump_elapsed_time from neural_compressor.tensorflow.quantization import quantize_model from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME, StaticQuantConfig @@ -39,16 +39,16 @@ def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: def autotune( model: Union[str, tf.keras.Model, BaseModel], tune_config: TuningConfig, - eval_fns: Optional[Union[Dict, List[Dict]]] = None, + eval_fn: Callable, + eval_args: Optional[Tuple[Any]] = None, calib_dataloader: Callable = None, calib_iteration: int = 100, ) -> Optional[BaseModel]: """The main entry of auto-tune.""" best_quant_model = None - evaluator.set_eval_fn_registry(eval_fns) - evaluator.self_check() + eval_func_wrapper = EvaluationFuncWrapper(eval_fn, eval_args) config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) - baseline: float = evaluator.evaluate(model) + baseline: float = eval_func_wrapper.evaluate(model) tuning_monitor.set_baseline(baseline) tuning_logger.tuning_start() for trial_index, quant_config in enumerate(config_loader): @@ -58,7 +58,7 @@ def autotune( q_model = quantize_model(model, quant_config, calib_dataloader, calib_iteration) tuning_logger.quantization_end() tuning_logger.evaluation_start() - eval_result: float = evaluator.evaluate(q_model) + eval_result: float = eval_func_wrapper.evaluate(q_model) tuning_logger.evaluation_end() tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) tuning_logger.trial_end(trial_index) diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index 48c3bfe18d9..403d53c36cd 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -13,12 +13,12 @@ # limitations under the License. from copy import deepcopy -from typing import Dict, List, Optional, Union +from typing import Callable, List, Optional, Union import torch from neural_compressor.common.base_config import BaseConfig, get_all_config_set_from_config_registry -from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning +from neural_compressor.common.base_tuning import EvaluationFuncWrapper, TuningConfig, init_tuning from neural_compressor.common.utils import dump_elapsed_time from neural_compressor.torch.quantization import quantize from neural_compressor.torch.quantization.config import FRAMEWORK_NAME, RTNConfig @@ -46,16 +46,17 @@ def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: def autotune( model: torch.nn.Module, tune_config: TuningConfig, - eval_fns: Optional[Union[Dict, List[Dict]]] = None, + eval_fn: Callable, + eval_args=None, run_fn=None, run_args=None, + example_inputs=None, ) -> Optional[torch.nn.Module]: """The main entry of auto-tune.""" best_quant_model = None - evaluator.set_eval_fn_registry(eval_fns) - evaluator.self_check() + eval_func_wrapper = EvaluationFuncWrapper(eval_fn, eval_args) config_loader, tuning_logger, tuning_monitor = init_tuning(tuning_config=tune_config) - baseline: float = evaluator.evaluate(model) + baseline: float = eval_func_wrapper.evaluate(model) tuning_monitor.set_baseline(baseline) tuning_logger.tuning_start() for trial_index, quant_config in enumerate(config_loader): @@ -63,10 +64,17 @@ def autotune( tuning_logger.quantization_start() logger.info(quant_config.to_dict()) # !!! Make sure to use deepcopy only when inplace is set to `True`. - q_model = quantize(deepcopy(model), quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=True) + q_model = quantize( + deepcopy(model), + quant_config=quant_config, + run_fn=run_fn, + run_args=run_args, + inplace=True, + example_inputs=example_inputs, + ) tuning_logger.quantization_end() tuning_logger.evaluation_start() - eval_result: float = evaluator.evaluate(q_model) + eval_result: float = eval_func_wrapper.evaluate(q_model) tuning_logger.evaluation_end() tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) tuning_logger.trial_end(trial_index) @@ -76,7 +84,12 @@ def autotune( best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() # !!! Make sure to use deepcopy only when inplace is set to `True`. q_model = quantize( - deepcopy(model), quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True + deepcopy(model), + quant_config=best_quant_config, + run_fn=run_fn, + run_args=run_args, + inplace=True, + example_inputs=example_inputs, ) best_quant_model = q_model # quantize model inplace break diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index fb38b8eff53..9fd2540b803 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -18,7 +18,7 @@ import torch from typing_extensions import TypeAlias -from neural_compressor.common.utils import logger +from neural_compressor.common import logger OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]] diff --git a/test/3x/common/test_common.py b/test/3x/common/test_common.py index 338570912b2..68c25abd532 100644 --- a/test/3x/common/test_common.py +++ b/test/3x/common/test_common.py @@ -49,7 +49,7 @@ register_config, register_supported_configs_for_fwk, ) -from neural_compressor.common.base_tuning import ConfigLoader, ConfigSet, SequentialSampler +from neural_compressor.common.base_tuning import ConfigLoader, ConfigSet, Evaluator, SequentialSampler from neural_compressor.common.tuning_param import TuningParam from neural_compressor.common.utils import DEFAULT_WHITE_LIST, OP_NAME_OR_MODULE_TYPE @@ -193,6 +193,30 @@ def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: register_supported_configs_for_fwk(fwk_name=FAKE_FRAMEWORK_NAME) +class TestEvaluator(unittest.TestCase): + def test_single_eval_fn(self): + def fake_eval_fn(model): + return 1.0 + + evaluator = Evaluator() + evaluator.set_eval_fn_registry(fake_eval_fn) + evaluator.self_check() + self.assertEqual(evaluator.get_number_of_eval_functions(), 1) + + def test_single_eval_fn_dict(self): + acc_data = iter([1.0, 0.8, 0.99, 1.0, 0.99, 0.99]) + + def eval_acc_fn(model) -> float: + return next(acc_data) + + eval_fns = {"eval_fn": eval_acc_fn, "weight": 0.5, "name": "accuracy"} + + evaluator = Evaluator() + evaluator.set_eval_fn_registry(eval_fns) + evaluator.self_check() + self.assertEqual(evaluator.get_number_of_eval_functions(), 1) + + class TestBaseConfig(unittest.TestCase): @classmethod def setUpClass(self): diff --git a/test/3x/onnxrt/test_autotune.py b/test/3x/onnxrt/test_autotune.py index a1909868352..8291d3ef344 100644 --- a/test/3x/onnxrt/test_autotune.py +++ b/test/3x/onnxrt/test_autotune.py @@ -19,6 +19,7 @@ import os import shutil import unittest +from typing import Callable, Dict, List, Optional, Union from unittest.mock import patch import numpy as np @@ -27,14 +28,19 @@ from optimum.exporters.onnx import main_export from neural_compressor.common import Logger -from neural_compressor.common.base_tuning import TuningConfig, evaluator +from neural_compressor.common.base_tuning import Evaluator, TuningConfig from neural_compressor.onnxrt import AWQConfig, CalibrationDataReader, GPTQConfig, RTNConfig, SmoohQuantConfig -from neural_compressor.onnxrt.quantization import autotune, get_all_config_set -from neural_compressor.onnxrt.quantization.quantize import _quantize +from neural_compressor.onnxrt.quantization import autotune logger = Logger().get_logger() +def _create_evaluator_for_eval_fns(eval_fns: Optional[Union[Callable, Dict, List[Dict]]] = None) -> Evaluator: + evaluator = Evaluator() + evaluator.set_eval_fn_registry(eval_fns) + return evaluator + + class DataReader(CalibrationDataReader): def __init__(self, model): model = onnx.load(model) @@ -89,13 +95,13 @@ def eval_acc_fn(model) -> float: best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calibration_data_reader=self.data_reader, ) call_args_list = mock_warning.call_args_list # There may be multiple calls to warning, so we need to check all of them self.assertIn( - "Please refine your eval_fns to accept model path (str) as input.", [info[0][0] for info in call_args_list] + "Please refine your eval_fn to accept model path (str) as input.", [info[0][0] for info in call_args_list] ) def test_sq_auto_tune(self): @@ -117,21 +123,26 @@ def eval_perf_fn(model) -> float: }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + custom_tune_config = TuningConfig(config_set=[SmoohQuantConfig(alpha=0.5), SmoohQuantConfig(alpha=0.6)]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calibration_data_reader=self.data_reader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNotNone(best_model) custom_tune_config = TuningConfig(config_set=[SmoohQuantConfig(alpha=[0.5, 0.6])]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, calibration_data_reader=self.data_reader, ) self.assertEqual(len(evaluator.eval_fn_registry), 2) @@ -156,21 +167,26 @@ def eval_perf_fn(model) -> float: }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_group_size=32), RTNConfig(weight_group_size=64)]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calibration_data_reader=self.data_reader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNone(best_model) custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_group_size=[32, 64])]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, calibration_data_reader=self.data_reader, ) self.assertEqual(len(evaluator.eval_fn_registry), 2) @@ -201,21 +217,26 @@ def eval_perf_fn(model) -> float: }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + custom_tune_config = TuningConfig(config_set=[AWQConfig(weight_group_size=32), AWQConfig(weight_group_size=64)]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calibration_data_reader=self.data_reader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNone(best_model) custom_tune_config = TuningConfig(config_set=[AWQConfig(weight_group_size=[32, 64])]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, calibration_data_reader=self.data_reader, ) self.assertEqual(len(evaluator.eval_fn_registry), 2) @@ -245,6 +266,11 @@ def eval_perf_fn(model) -> float: "weight": 0.5, }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result custom_tune_config = TuningConfig( config_set=[GPTQConfig(weight_group_size=32), GPTQConfig(weight_group_size=64)] @@ -252,17 +278,16 @@ def eval_perf_fn(model) -> float: best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calibration_data_reader=self.data_reader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNone(best_model) custom_tune_config = TuningConfig(config_set=[GPTQConfig(weight_group_size=[32, 64])]) best_model = autotune( model_input=self.gptj, tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, calibration_data_reader=self.data_reader, ) self.assertEqual(len(evaluator.eval_fn_registry), 2) diff --git a/test/3x/tensorflow/test_autotune.py b/test/3x/tensorflow/test_autotune.py index aa1df2af3cd..9c89f8cd5fc 100644 --- a/test/3x/tensorflow/test_autotune.py +++ b/test/3x/tensorflow/test_autotune.py @@ -2,6 +2,7 @@ import shutil import unittest from functools import wraps +from typing import Callable, Dict, List, Optional, Union from unittest.mock import patch import numpy as np @@ -9,10 +10,16 @@ from tensorflow import keras from neural_compressor.common import logger -from neural_compressor.common.base_tuning import TuningConfig, evaluator +from neural_compressor.common.base_tuning import Evaluator, TuningConfig from neural_compressor.tensorflow.quantization import SmoothQuantConfig, StaticQuantConfig, autotune +def _create_evaluator_for_eval_fns(eval_fns: Optional[Union[Callable, Dict, List[Dict]]] = None) -> Evaluator: + evaluator = Evaluator() + evaluator.set_eval_fn_registry(eval_fns) + return evaluator + + def build_model(): # Load MNIST dataset mnist = keras.datasets.mnist @@ -125,10 +132,9 @@ def eval_perf_fn(model) -> float: best_model = autotune( model="baseline_model", tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calib_dataloader=calib_dataloader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNotNone(best_model) def test_sq_auto_tune(self): @@ -150,22 +156,27 @@ def eval_perf_fn(model) -> float: }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + calib_dataloader = MyDataloader(dataset=Dataset()) custom_tune_config = TuningConfig(config_set=[SmoothQuantConfig(alpha=0.5), SmoothQuantConfig(alpha=0.6)]) best_model = autotune( model="baseline_model", tune_config=custom_tune_config, - eval_fns=eval_acc_fn, + eval_fn=eval_acc_fn, calib_dataloader=calib_dataloader, ) - self.assertEqual(len(evaluator.eval_fn_registry), 1) self.assertIsNone(best_model) custom_tune_config = TuningConfig(config_set=[SmoothQuantConfig(alpha=[0.5, 0.6])]) best_model = autotune( model="baseline_model", tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, calib_dataloader=calib_dataloader, ) self.assertEqual(len(evaluator.eval_fn_registry), 2) diff --git a/test/3x/torch/quantization/weight_only/test_mixed_algos.py b/test/3x/torch/quantization/weight_only/test_mixed_algos.py index bc5ae94add3..d465f8cd9c3 100644 --- a/test/3x/torch/quantization/weight_only/test_mixed_algos.py +++ b/test/3x/torch/quantization/weight_only/test_mixed_algos.py @@ -5,7 +5,7 @@ import torch import transformers -from neural_compressor.common.utils import logger +from neural_compressor.common import logger from neural_compressor.torch.quantization import GPTQConfig, RTNConfig, quantize diff --git a/test/3x/torch/quantization/weight_only/test_woq_on_cuda.py b/test/3x/torch/quantization/weight_only/test_woq_on_cuda.py index 910b8682186..4709ece5742 100644 --- a/test/3x/torch/quantization/weight_only/test_woq_on_cuda.py +++ b/test/3x/torch/quantization/weight_only/test_woq_on_cuda.py @@ -5,7 +5,7 @@ import transformers from tqdm import tqdm -from neural_compressor.common.utils import logger +from neural_compressor.common import logger from neural_compressor.torch.algorithms.weight_only.gptq import move_input_to_device from neural_compressor.torch.quantization import ( AWQConfig, diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 6f267ab5a96..0c82a5af051 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -1,12 +1,14 @@ import unittest from functools import wraps +from typing import Any, Callable, Dict, List, Optional, Union from unittest.mock import patch import torch import transformers +from neural_compressor.common import logger from neural_compressor.torch.quantization import RTNConfig, TuningConfig, autotune, get_all_config_set -from neural_compressor.torch.utils import constants, logger +from neural_compressor.torch.utils import constants FAKE_DOUBLE_QUANT_CONFIGS = { "BNB_NF4": { @@ -32,6 +34,14 @@ }, } +from neural_compressor.common.base_tuning import Evaluator + + +def _create_evaluator_for_eval_fns(eval_fns: Optional[Union[Callable, Dict, List[Dict]]] = None) -> Evaluator: + evaluator = Evaluator() + evaluator.set_eval_fn_registry(eval_fns) + return evaluator + def reset_tuning_target(test_func): @wraps(test_func) @@ -141,22 +151,17 @@ def setUp(self): @reset_tuning_target def test_autotune_api(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tuning import evaluator def eval_acc_fn(model) -> float: return 1.0 custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6])], max_trials=2) - best_model = autotune( - model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] - ) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) - self.assertEqual(len(evaluator.eval_fn_registry), 1) @reset_tuning_target def test_autotune_api_2(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tuning import evaluator def eval_acc_fn(model) -> float: return 1.0 @@ -172,8 +177,14 @@ def eval_perf_fn(model) -> float: }, ] + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6])], max_trials=2) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_fn_wrapper) self.assertIsNotNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 2) @@ -195,32 +206,25 @@ def eval_perf_fn(model) -> float: "weight": 0.5, }, ] + + evaluator = _create_evaluator_for_eval_fns(eval_fns) + + def eval_fn_wrapper(model): + result = evaluator.evaluate(model) + return result + custom_tune_config = TuningConfig(config_set=get_all_config_set(), max_trials=4) best_model = autotune( model=model, tune_config=custom_tune_config, - eval_fns=eval_fns, + eval_fn=eval_fn_wrapper, run_fn=run_fn_for_gptq, run_args=(dataloader, True), # run_args should be a tuple ) self.assertIsNotNone(best_model) - @reset_tuning_target - def test_autotune_not_eval_func(self): - logger.info("test_autotune_api") - - custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6])], max_trials=2) - - # Use assertRaises to check that an AssertionError is raised - with self.assertRaises(AssertionError) as context: - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config) - self.assertEqual( - str(context.exception), "Please ensure that you register at least one evaluation metric for auto-tune." - ) - def test_autotune_baseline(self): logger.info("test_autotune_api") - from neural_compressor.common.base_tuning import evaluator baseline = [1.0] @@ -233,28 +237,28 @@ def eval_acc_fn(model): return res custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6, 5, 8])], max_trials=6) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_acc_fn) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) # case 2 # Where tolerable_loss is 0.1, we expect the tuning to end with a "0-trail end" output logged. acc_res_lst = baseline + [0.9] * 2 + [0.99] + [1.01] custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6, 5, 8])], tolerable_loss=0.1) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_acc_fn) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) # case 3 # Where tolerable_loss is -0.01, we expect the tuning to end with a "3-trail end" output logged. acc_res_lst = baseline + [0.9] * 2 + [0.99] + [1.01] custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6, 5, 8])], tolerable_loss=-0.01) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_acc_fn) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) # case 4 # Where tolerable_loss is 0.01 and accuracy meets the goal, we expect best model is None. acc_res_lst = baseline + [0.9] * 2 + [0.9] + [0.9] custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6, 5, 8])], tolerable_loss=0.01) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_acc_fn) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNone(best_model) @reset_tuning_target @@ -269,9 +273,7 @@ def eval_acc_fn(model) -> float: return 1.0 custom_tune_config = TuningConfig(config_set=get_rtn_double_quant_config_set(), max_trials=10) - best_model = autotune( - model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] - ) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) @reset_tuning_target @@ -288,9 +290,7 @@ def eval_acc_fn(model) -> float: custom_tune_config = TuningConfig( config_set=get_rtn_double_quant_config_set(), max_trials=10, tolerable_loss=-1 ) - best_model = autotune( - model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] - ) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNone(best_model) @patch("neural_compressor.torch.utils.constants.DOUBLE_QUANT_CONFIGS", FAKE_DOUBLE_QUANT_CONFIGS) @@ -305,9 +305,7 @@ def eval_acc_fn(model) -> float: return 1.0 custom_tune_config = TuningConfig(config_set=get_rtn_double_quant_config_set(), tolerable_loss=-1) - best_model = autotune( - model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] - ) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNone(best_model)