From 16a7b11508c008d4d4180a0fe0e31c75b8e5d662 Mon Sep 17 00:00:00 2001 From: Yi Liu <106061964+yiliu30@users.noreply.github.com> Date: Thu, 11 Jul 2024 17:13:24 +0800 Subject: [PATCH] Get default config based on the auto-detect CPU type (#1904) Signed-off-by: yiliu30 --- neural_compressor/common/utils/constants.py | 3 + neural_compressor/common/utils/utility.py | 132 +++++++++++++++++- .../torch/quantization/config.py | 48 ++++--- neural_compressor/torch/utils/utility.py | 41 +++++- test/3x/common/test_utility.py | 50 ++++++- test/3x/torch/test_config.py | 54 +++++-- 6 files changed, 291 insertions(+), 37 deletions(-) diff --git a/neural_compressor/common/utils/constants.py b/neural_compressor/common/utils/constants.py index adf7755003b..76846682fd4 100644 --- a/neural_compressor/common/utils/constants.py +++ b/neural_compressor/common/utils/constants.py @@ -56,3 +56,6 @@ class Mode(Enum): PREPARE = "prepare" CONVERT = "convert" QUANTIZE = "quantize" + + +SERVER_PROCESSOR_BRAND_KEY_WORLD_LST = ["Xeon"] diff --git a/neural_compressor/common/utils/utility.py b/neural_compressor/common/utils/utility.py index 8ba28d7512d..56326246d85 100644 --- a/neural_compressor/common/utils/utility.py +++ b/neural_compressor/common/utils/utility.py @@ -17,6 +17,7 @@ """The utility of common module.""" import collections +import enum import importlib import subprocess import time @@ -26,7 +27,7 @@ import psutil from prettytable import PrettyTable -from neural_compressor.common.utils import Mode, TuningLogger, logger +from neural_compressor.common.utils import Mode, TuningLogger, constants, logger __all__ = [ "set_workspace", @@ -41,6 +42,9 @@ "CpuInfo", "default_tuning_logger", "call_counter", + "cpu_info", + "ProcessorType", + "detect_processor_type_based_on_hw", "Statistics", ] @@ -92,7 +96,7 @@ def __call__(self, *args, **kwargs): @singleton class CpuInfo(object): - """CPU info collection.""" + """Get CPU Info.""" def __init__(self): """Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket.""" @@ -113,6 +117,39 @@ def __init__(self): b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3", # mov eax, 7 # cpuid # ret ) self._bf16 = bool(eax & (1 << 5)) + self._info = info + self._brand_raw = info.get("brand_raw", "") + # detect the below info when needed + self._cores = None + self._sockets = None + self._cores_per_socket = None + + @property + def brand_raw(self): + """Get the brand name of the CPU.""" + return self._brand_raw + + @brand_raw.setter + def brand_raw(self, brand_name): + """Set the brand name of the CPU.""" + self._brand_raw = brand_name + + @staticmethod + def _detect_cores(): + physical_cores = psutil.cpu_count(logical=False) + return physical_cores + + @property + def cores(self): + """Get the number of cores in platform.""" + if self._cores is None: + self._cores = self._detect_cores() + return self._cores + + @cores.setter + def cores(self, num_of_cores): + """Set the number of cores in platform.""" + self._cores = num_of_cores @property def bf16(self): @@ -124,6 +161,60 @@ def vnni(self): """Get whether it is vnni.""" return self._vnni + @property + def cores_per_socket(self) -> int: + """Get the cores per socket.""" + if self._cores_per_socket is None: + self._cores_per_socket = self.cores // self.sockets + return self._cores_per_socket + + @property + def sockets(self): + """Get the number of sockets in platform.""" + if self._sockets is None: + self._sockets = self._get_number_of_sockets() + return self._sockets + + @sockets.setter + def sockets(self, num_of_sockets): + """Set the number of sockets in platform.""" + self._sockets = num_of_sockets + + def _get_number_of_sockets(self) -> int: + if "arch" in self._info and "ARM" in self._info["arch"]: # pragma: no cover + return 1 + + num_sockets = None + cmd = "cat /proc/cpuinfo | grep 'physical id' | sort -u | wc -l" + if psutil.WINDOWS: + cmd = r'wmic cpu get DeviceID | C:\Windows\System32\find.exe /C "CPU"' + elif psutil.MACOS: # pragma: no cover + cmd = "sysctl -n machdep.cpu.core_count" + + num_sockets = None + try: + with subprocess.Popen( + args=cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=False, + ) as proc: + proc.wait() + if proc.stdout: + for line in proc.stdout: + num_sockets = int(line.decode("utf-8", errors="ignore").strip()) + except Exception as e: + logger.error("Failed to get number of sockets: %s" % e) + if isinstance(num_sockets, int) and num_sockets >= 1: + return num_sockets + else: + logger.warning("Failed to get number of sockets, return 1 as default.") + return 1 + + +cpu_info = CpuInfo() + def dump_elapsed_time(customized_msg=""): """Get the elapsed time for decorated functions. @@ -236,6 +327,43 @@ def wrapper(*args, **kwargs): return wrapper +class ProcessorType(enum.Enum): + Client = "Client" + Server = "Server" + + +def detect_processor_type_based_on_hw(): + """Detects the processor type based on the hardware configuration. + + Returns: + ProcessorType: The detected processor type (Server or Client). + """ + # Detect the processor type based on below conditions: + # If there are more than one sockets, it is a server. + # If the brand name includes key word in `SERVER_PROCESSOR_BRAND_KEY_WORLD_LST`, it is a server. + # If the memory size is greater than 32GB, it is a server. + log_mgs = "Processor type detected as {processor_type} due to {reason}." + if cpu_info.sockets > 1: + logger.info(log_mgs.format(processor_type=ProcessorType.Server.value, reason="there are more than one sockets")) + return ProcessorType.Server + elif any(brand in cpu_info.brand_raw for brand in constants.SERVER_PROCESSOR_BRAND_KEY_WORLD_LST): + logger.info( + log_mgs.format(processor_type=ProcessorType.Server.value, reason=f"the brand name is {cpu_info.brand_raw}.") + ) + return ProcessorType.Server + elif psutil.virtual_memory().total / (1024**3) > 32: + logger.info( + log_mgs.format(processor_type=ProcessorType.Server.value, reason="the memory size is greater than 32GB") + ) + return ProcessorType.Server + else: + logger.info( + "Processor type detected as %s, pass `processor_type='server'` to override it if needed.", + ProcessorType.Client.value, + ) + return ProcessorType.Client + + class Statistics: """The statistics printer.""" diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 71b01353d5a..9014f1576a3 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -23,6 +23,7 @@ import torch +import neural_compressor.torch.utils as torch_utils from neural_compressor.common.base_config import ( BaseConfig, config_registry, @@ -219,14 +220,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"] dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True] ) + @classmethod + def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]: + pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {} + pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True) + pre_defined_configs[torch_utils.ProcessorType.Server] = cls() + return pre_defined_configs -def get_default_rtn_config() -> RTNConfig: - """Generate the default rtn config. - Returns: - the default rtn config. - """ - return RTNConfig() +def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig: + process_type = torch_utils.get_processor_type_from_user_config(processor_type) + return RTNConfig.get_predefined_configs()[process_type] def get_default_double_quant_config(type="BNB_NF4"): @@ -378,14 +382,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig # TODO fwk owner needs to update it. return GPTQConfig(act_order=[True, False], use_sym=[False, True]) + @classmethod + def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]: + pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {} + pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True) + pre_defined_configs[torch_utils.ProcessorType.Server] = cls() + return pre_defined_configs -def get_default_gptq_config() -> GPTQConfig: - """Generate the default gptq config. - Returns: - the default gptq config. - """ - return GPTQConfig() +def get_default_gptq_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig: + process_type = torch_utils.get_processor_type_from_user_config(processor_type) + return GPTQConfig.get_predefined_configs()[process_type] ######################## AWQ Config ############################### @@ -725,6 +732,7 @@ def __init__( not_use_best_mse: bool = False, dynamic_max_gap: int = -1, scale_dtype: str = "fp16", + use_layer_wise: bool = False, white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, ): """Init AUTOROUND weight-only quantization config. @@ -777,6 +785,7 @@ def __init__( self.not_use_best_mse = not_use_best_mse self.dynamic_max_gap = dynamic_max_gap self.scale_dtype = scale_dtype + self.use_layer_wise = use_layer_wise self._post_init() @classmethod @@ -803,14 +812,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "AutoRoundConfig", List["AutoR # TODO fwk owner needs to update it. return AutoRoundConfig(bits=[4, 6]) + @classmethod + def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "AutoRoundConfig"]: + pre_defined_configs: Dict[torch_utils.ProcessorType, AutoRoundConfig] = {} + pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True) + pre_defined_configs[torch_utils.ProcessorType.Server] = cls() + return pre_defined_configs -def get_default_AutoRound_config() -> AutoRoundConfig: - """Generate the default AUTOROUND config. - Returns: - the default AUTOROUND config. - """ - return AutoRoundConfig() +def get_default_AutoRound_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig: + process_type = torch_utils.get_processor_type_from_user_config(processor_type) + return AutoRoundConfig.get_predefined_configs()[process_type] ######################## MX Config ############################### diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py index bf1bb2a77b1..599be8578f4 100644 --- a/neural_compressor/torch/utils/utility.py +++ b/neural_compressor/torch/utils/utility.py @@ -13,12 +13,21 @@ # limitations under the License. -from typing import Callable, Dict, List, Tuple, Union +import enum +from typing import Callable, Dict, List, Optional, Tuple, Union +import psutil import torch from typing_extensions import TypeAlias -from neural_compressor.common.utils import Mode, Statistics, logger +from neural_compressor.common.utils import ( + Mode, + ProcessorType, + Statistics, + cpu_info, + detect_processor_type_based_on_hw, + logger, +) OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]] @@ -235,3 +244,31 @@ def get_model_device(model: torch.nn.Module): """ for n, p in model.named_parameters(): return p.data.device.type # p.data.device == device(type='cpu') + + +def get_processor_type_from_user_config(user_processor_type: Optional[Union[str, ProcessorType]] = None): + """Get the processor type. + + Get the processor type based on the user configuration or automatically detect it based on the hardware. + + Args: + user_processor_type (Optional[Union[str, ProcessorType]]): The user-specified processor type. Defaults to None. + + Returns: + ProcessorType: The detected or user-specified processor type. + + Raises: + AssertionError: If the user-specified processor type is not supported. + NotImplementedError: If the processor type is not recognized. + """ + if user_processor_type is None: + processor_type = detect_processor_type_based_on_hw() + elif isinstance(user_processor_type, ProcessorType): + processor_type = user_processor_type + elif isinstance(user_processor_type, str): + user_processor_type = user_processor_type.lower().capitalize() + assert user_processor_type in ProcessorType.__members__, f"Unsupported processor type: {user_processor_type}" + processor_type = ProcessorType(user_processor_type) + else: + raise NotImplementedError(f"Unsupported processor type: {user_processor_type}") + return processor_type diff --git a/test/3x/common/test_utility.py b/test/3x/common/test_utility.py index b605b3b506b..fd349ce1706 100644 --- a/test/3x/common/test_utility.py +++ b/test/3x/common/test_utility.py @@ -11,6 +11,8 @@ import unittest from unittest.mock import MagicMock, patch +import pytest + import neural_compressor.common.utils.utility as inc_utils from neural_compressor.common import options from neural_compressor.common.utils import ( @@ -41,7 +43,7 @@ def test_set_random_seed(self): set_random_seed(seed) def test_set_workspace(self): - workspace = "/path/to/workspace" + workspace = "/tmp/inc_workspace" set_workspace(workspace) self.assertEqual(options.workspace, workspace) returned_workspace = get_workspace() @@ -78,6 +80,9 @@ def test_cpu_info(self): cpu_info = CpuInfo() assert isinstance(cpu_info.bf16, bool), "bf16 should be a boolean" assert isinstance(cpu_info.vnni, bool), "avx512 should be a boolean" + assert cpu_info.cores >= 1 + assert cpu_info.sockets >= 1 + assert cpu_info.cores_per_socket >= 1 class TestLazyImport(unittest.TestCase): @@ -113,6 +118,11 @@ def test_lazy_import_access_attr(self): self.assertIsNotNone(lazy_import.module) + def test_call_method_module_not_found(self): + with self.assertRaises(ImportError): + lazy_import = LazyImport("non_existent_module") + lazy_import(3, 4) + class TestUtils(unittest.TestCase): def test_dump_elapsed_time(self): @@ -190,5 +200,39 @@ def add(a, b): self.assertEqual(inc_utils.FUNC_CALL_COUNTS["add"], 3) -if __name__ == "__main__": - unittest.main() +class TestAutoDetectProcessorType: + @pytest.fixture + def force_client(self, monkeypatch): + monkeypatch.setattr(inc_utils.cpu_info, "sockets", 1) + monkeypatch.setattr(inc_utils.cpu_info, "brand_raw", "") + + # force the ram size detected by psutil <= 64GB + class MockMemory: + def __init__(self, total): + self.total = total + + # Patch the psutil.virtual_memory() method + monkeypatch.setattr(inc_utils.psutil, "virtual_memory", lambda: MockMemory(16 * 1024**3)) + + def test_auto_detect_processor_type(self, force_client): + p_type = inc_utils.detect_processor_type_based_on_hw() + assert ( + p_type == inc_utils.ProcessorType.Client + ), f"Expect processor type to be {inc_utils.ProcessorType.Client}, got {p_type}" + + def test_detect_processor_type_based_on_hw(self): + # Test when the brand name includes a server keyword + inc_utils.cpu_info.brand_raw = "Intel Xeon Server" + assert inc_utils.detect_processor_type_based_on_hw() == inc_utils.ProcessorType.Server + + # Test when the memory size is greater than 32GB + with patch("psutil.virtual_memory") as mock_virtual_memory: + mock_virtual_memory.return_value.total = 64 * 1024**3 + assert inc_utils.detect_processor_type_based_on_hw() == inc_utils.ProcessorType.Server + + # Test when none of the conditions are met + inc_utils.cpu_info.sockets = 1 + inc_utils.cpu_info.brand_raw = "Intel Core i7" + with patch("psutil.virtual_memory") as mock_virtual_memory: + mock_virtual_memory.return_value.total = 16 * 1024**3 + assert inc_utils.detect_processor_type_based_on_hw() == inc_utils.ProcessorType.Client diff --git a/test/3x/torch/test_config.py b/test/3x/torch/test_config.py index c5bdc5261cf..68e7d5975cc 100644 --- a/test/3x/torch/test_config.py +++ b/test/3x/torch/test_config.py @@ -1,9 +1,11 @@ import copy import unittest +import pytest import torch import transformers +import neural_compressor.torch.utils as torch_utils from neural_compressor.torch.quantization import ( AutoRoundConfig, AWQConfig, @@ -13,6 +15,8 @@ SmoothQuantConfig, StaticQuantConfig, TEQConfig, + get_default_AutoRound_config, + get_default_gptq_config, get_default_hqq_config, get_default_rtn_config, quantize, @@ -331,15 +335,41 @@ def test_hqq_config(self): self.assertEqual(hqq_config.to_dict(), hqq_config2.to_dict()) -class TestQuantConfigForAutotune(unittest.TestCase): - def test_expand_config(self): - # test the expand functionalities, the user is not aware it - - tune_config = RTNConfig(bits=[4, 6]) - expand_config_list = RTNConfig.expand(tune_config) - self.assertEqual(expand_config_list[0].bits, 4) - self.assertEqual(expand_config_list[1].bits, 6) - - -if __name__ == "__main__": - unittest.main() +class TestQuantConfigBasedonProcessorType: + + @pytest.mark.parametrize("config_cls", [RTNConfig, GPTQConfig, AutoRoundConfig]) + def test_get_config_based_on_processor_type(self, config_cls): + config_for_client = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Client] + assert ( + config_for_client.use_layer_wise + ), f"Expect use_layer_wise to be True, got {config_for_client.use_layer_wise}" + + config_for_server = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Server] + assert ( + config_for_server.use_layer_wise is False + ), f"Expect use_layer_wise to be False, got {config_for_server.use_layer_wise}" + + @pytest.fixture + def force_server(self, monkeypatch): + monkeypatch.setattr(torch_utils.utility.cpu_info, "sockets", 2) + + def test_get_default_config_force_server(self, force_server): + rtn_config = get_default_rtn_config() + assert not rtn_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {rtn_config.use_layer_wise}" + gptq_config = get_default_gptq_config() + assert not gptq_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {gptq_config.use_layer_wise}" + + @pytest.mark.parametrize("p_type", [None, torch_utils.ProcessorType.Client, torch_utils.ProcessorType.Server]) + def test_get_default_config(self, p_type): + rtn_config = get_default_rtn_config(processor_type=p_type) + assert rtn_config.use_layer_wise == ( + p_type == torch_utils.ProcessorType.Client + ), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {rtn_config.use_layer_wise}" + gptq_config = get_default_gptq_config(processor_type=p_type) + assert gptq_config.use_layer_wise == ( + p_type == torch_utils.ProcessorType.Client + ), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {gptq_config.use_layer_wise}" + autoround_config = get_default_AutoRound_config(processor_type=p_type) + assert autoround_config.use_layer_wise == ( + p_type == torch_utils.ProcessorType.Client + ), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {autoround_config.use_layer_wise}"