From 46ad0b39aa9e93ced2a442fd9b805805f8b45f8d Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Tue, 1 Aug 2023 12:48:53 +0800 Subject: [PATCH 1/4] Support inspect tensor for fx model Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/pytorch.py | 575 ++++++++++++------ .../test_adaptor_pytorch_1.x.py | 32 + 2 files changed, 435 insertions(+), 172 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index b1eda3d71dc..1650bb4c97c 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -847,6 +847,12 @@ def __init__(self, framework_specific_info): self.fp32_results = [] self.fp32_preds_as_label = False + if self.version.release >= Version("2.0").release: + static_quant_mapping = tq.quantization_mappings.get_default_static_quant_module_mappings() + self.fused_op_list = \ + [static_quant_mapping[key] for key in static_quant_mapping if "intrinsic." in str(key)] + self.fused_dict = {} + def calib_func(self, model, dataloader, tmp_iterations, conf=None): try: for idx, (input, label) in enumerate(dataloader): @@ -1229,6 +1235,400 @@ def _combine_capability(self, bf16_ops, q_capability): q_capability['optypewise'][bf16_op[1]] = [bf16_config, fp32_config] return q_capability + def get_fused_list(self, model): + """This is a helper function to get fused op list. + + Args: + model (object): input model + + Returns: + dict of op list + """ + fused_dict = {} + for op_name, child in model.named_modules(): + if type(child) in self.fused_op_list: + in_fused_loop = False + fp32_int8_ops = [op_name,] + type_name = str(child).split("(")[0] + prefix_index = op_name.rfind(".") + for fp32_op_name, module in self.pre_optimized_model.model.named_modules(): + fp32_type_name = str(module).split("(")[0] + prefix_fp32_index = fp32_op_name.rfind(".") + if op_name == fp32_op_name: + in_fused_loop = True + continue + elif in_fused_loop and \ + op_name[: prefix_index if prefix_index > -1 else 0] == \ + fp32_op_name[: prefix_fp32_index if prefix_fp32_index > -1 else 0]: + if "BatchNorm" in str(type(module)): + fp32_int8_ops.append(fp32_op_name) + continue + elif fp32_type_name in type_name.split(".")[-1][-len(fp32_type_name) - 2:]: + fp32_int8_ops.append(fp32_op_name) + in_fused_loop = False + break + else: + in_fused_loop = False + break + elif in_fused_loop: + in_fused_loop = False + break + fused_dict.update({op_name: fp32_int8_ops}) + return fused_dict + + def inspect_tensor(self, + model, + dataloader, + op_list=None, + iteration_list=None, + inspect_type='activation', + save_to_disk=False, + save_path=None, + quantization_cfg=None): + assert self.version.release >= Version("2.0").release, "Inspect_tensor only support torch 1.8 or above!" + from neural_compressor.utils.utility import dump_data_to_local + from torch import dequantize + is_quantized = model.is_quantized + op_list_ = [] + fp32_int8_map = {} + for op_name in op_list: + op_list_.append(op_name) + for key in self.fused_dict: + if op_name in self.fused_dict[key]: + op_list_.pop() + fp32_int8_map[op_name] = \ + {'activation': self.fused_dict[key][-1], 'weight': self.fused_dict[key][0]} + if not is_quantized: + op_list_.append(self.fused_dict[key][-1]) + elif self.fused_dict[key][0] not in op_list_: + op_list_.append(self.fused_dict[key][0]) + break + + assert min(iteration_list) > 0, \ + "Iteration number should great zero, 1 means first iteration." + iterations = max(iteration_list) if iteration_list is not None else -1 + new_model = self._pre_eval_hook(model, op_list=op_list_, iteration_list=iteration_list) + self.evaluate(new_model, dataloader, iteration=iterations) + observer_dict = {} + ret = {} + if inspect_type == 'activation' or inspect_type == 'all': + from torch.quantization.quantize import _get_observer_dict as get_observer_dict + ret['activation'] = [] + get_observer_dict(new_model.model, observer_dict) + if iteration_list is None: + iteration_list = [1] + for i in iteration_list: + summary = OrderedDict() + for key in observer_dict: + if isinstance(observer_dict[key], torch.nn.modules.linear.Identity): + continue + op_name = key.replace(".activation_post_process", "") + value = observer_dict[key].get_tensor_value()[i] + if op_name in op_list: + if type(value) is list: + summary[op_name] = {} + for index in range(len(value)): + summary[op_name].update({ + op_name + ".output" + str(index): + dequantize(value[index]).numpy() + if value[index].is_quantized else value[index].numpy() + }) + else: + summary[op_name] = { + op_name + ".output0": + dequantize(value).numpy() if value.is_quantized else value.numpy() + } + else: + if bool(self.fused_dict): + if is_quantized: + for a in fp32_int8_map: + if op_name == fp32_int8_map[a]['weight']: + if type(value) is list: + summary[a] = {} + for index in range(len(value)): + summary[a].update({ + a + ".output" + str(index): + dequantize(value[index]).numpy() + if value[index].is_quantized else + value[index].numpy() + }) + else: + summary[a] = { + a + ".output0": + dequantize(value).numpy() + if value.is_quantized else value.numpy() + } + else: + for a in fp32_int8_map: # pragma: no cover + if op_name == fp32_int8_map[a]['activation']: + if type(value) is list: + summary[a] = {} + for index in range(len(value)): + summary[a].update({ + a + ".output" + str(index): + dequantize(value[index]).numpy() + if value[index].is_quantized else + value[index].numpy() + }) + else: + summary[a] = { + a + ".output0": + dequantize(value).numpy() + if value.is_quantized else value.numpy() + } + + ret['activation'].append(summary) + + if inspect_type == 'weight' or inspect_type == 'all': + ret['weight'] = {} + state_dict = new_model._model.state_dict() + + for key in state_dict: + if not isinstance(state_dict[key], torch.Tensor): + continue + if 'weight' not in key and 'bias' not in key: + continue + + op = key[:key.rfind('.')] + op = op.replace('._packed_params', '') + + if op in op_list: + if op in ret['weight']: + ret['weight'][op].update({ + key: + dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized else state_dict[key].detach().numpy() + }) + else: + ret['weight'][op] = { + key: + dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized else state_dict[key].detach().numpy() + } + else: + if bool(self.fused_dict): + if is_quantized: + for a in fp32_int8_map: + if op == fp32_int8_map[a]['weight']: + if a in ret['weight']: + ret['weight'][a].update({ + key: + dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized else + state_dict[key].detach().numpy() + }) + else: + ret['weight'][a] = \ + {key: dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized else + state_dict[key].detach().numpy()} + break + else: + ret['weight'] = None + + if save_to_disk: + if not save_path: + save_path = self.workspace_path + dump_data_to_local(ret, save_path, 'inspect_result.pkl') + + return ret + + def _pre_eval_hook(self, model, op_list=None, iteration_list=None): + """The function is used to do some preprocession before evaluation phase. + Here, it used to add hook for dump output tensor for quantizable ops. + + Args: + model (object): input model + + Returns: + model (object): model with hook + """ + from abc import ABCMeta + + def _with_args(cls_or_self, **kwargs): + r"""Wrapper that allows creation of class factories. + + This can be useful when there is a need to create classes with the same + constructor arguments, but different instances. + + Example:: + + >>> Foo.with_args = classmethod(_with_args) + >>> foo_builder = Foo.with_args(a=3, b=4).with_args(answer=42) + >>> foo_instance1 = foo_builder() + >>> foo_instance2 = foo_builder() + >>> id(foo_instance1) == id(foo_instance2) + False + """ + class _PartialWrapper(object): + def __init__(self, p): + self.p = p + + def __call__(self, *args, **keywords): + return self.p(*args, **keywords) + + def __repr__(self): + return self.p.__repr__() + + with_args = _with_args + + r = _PartialWrapper(partial(cls_or_self, **kwargs)) + return r + + ABC = ABCMeta(str("ABC"), (object, ), {}) # compatible with Python 2 *and* 3: + + class _RecordingObserver(ABC, torch.nn.Module): + """The module is mainly for debug and records the tensor values during runtime. + + Args: + iteration_list (list, optional): indexs of iteration which to dump tensor. + """ + def __init__(self, iteration_list=None, **kwargs): + super(_RecordingObserver, self).__init__(**kwargs) + self.output_tensors_dict = OrderedDict() + self.current_iter = 1 + self.iteration_list = iteration_list + + def forward(self, x): + if (self.iteration_list is None and self.current_iter == 1) or \ + (self.iteration_list is not None and + self.current_iter in self.iteration_list): + if type(x) is tuple or type(x) is list: + self.output_tensors_dict[self.current_iter] = \ + [i.to("cpu") if i.device != 'cpu' else i.clone() for i in x] + else: + self.output_tensors_dict[self.current_iter] = \ + x.to("cpu") if x.device != "cpu" else x.clone() + self.current_iter += 1 + return x + + @torch.jit.export + def get_tensor_value(self): + return self.output_tensors_dict + + with_args = classmethod(_with_args) + + def _observer_forward_hook(module, input, output): + """Forward hook that calls observer on the output + + Args: + module (object): input module + input (object): module input + output (object): module output + + Returns: + module output tensor (object) + """ + return module.activation_post_process(output) + + def _add_observer_(module, op_list=None, prefix=""): + """Add observer for the leaf child of the module. + + This function insert observer module to all leaf child module that + has a valid qconfig attribute. + + Args: + module (object): input module with qconfig attributes for all the leaf modules that + we want to dump tensor + op_list (list, optional): list of ops which to be dumped in module + prefix (string): name of module + + Returns: + None, module is modified inplace with added observer modules and forward_hooks + """ + for name, child in module.named_children(): + op_name = name if prefix == "" else prefix + "." + name + if isinstance(child, torch.nn.quantized.FloatFunctional) and \ + (op_list is None or op_name in op_list): + if hasattr(child, 'qconfig') and child.qconfig is not None and ( + op_list is None or op_name in op_list): + child.activation_post_process = \ + child.qconfig.activation() + elif hasattr(child, 'qconfig') and child.qconfig is not None and \ + (op_list is None or op_name in op_list): + # observer and hook will be gone after we swap the module + child.add_module('activation_post_process', child.qconfig.activation()) + child.register_forward_hook(_observer_forward_hook) + else: + _add_observer_(child, op_list, op_name) + + def _propagate_qconfig_helper(module, + qconfig_dict, + white_list=None, + qconfig_parent=None, + prefix='', + fused=False): + """This is a helper function for `propagate_qconfig_` + + Args: + module (object): input module + qconfig_dict (dictionary): dictionary that maps from name of submodule to + quantization configuration + white_list (list, optional): list of quantizable modules + qconfig_parent (object, optional): config of parent module, we will fallback to + this config when there is no specified config + for current module + prefix (string, optional): corresponding prefix of the current module, + used as key in qconfig_dict + fused (bool, optional): Indicates whether the module is fused or not + + Return: + None, module is modified inplace with qconfig attached + """ + module.qconfig = qconfig_parent + if hasattr(module, '_modules'): + for name, child in module.named_children(): + module_prefix = prefix + '.' + name if prefix else name + _propagate_qconfig_helper(child, qconfig_dict, white_list, qconfig_parent, + module_prefix) + + def _prepare(model, inplace=True, op_list=[], white_list=None): + """The model will be attached with observer or fake quant modules, and qconfig + will be propagated. + + Args: + model (object): input model to be modified in-place + inplace (bool, optional): carry out model transformations in-place, + the original module is mutated + op_list (list, optional): list of ops which to be dumped in module + white_list (list, optional): list of quantizable modules + + Returns: + model (object): model with qconfig + """ + if not inplace: + model = copy.deepcopy(model) + _propagate_qconfig_helper(model, + qconfig_dict={}, + white_list=white_list, + qconfig_parent=model.qconfig) + # sanity check common API misusage + if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): # pragma: no cover + logger.warn("None of the submodule got qconfig applied. Make sure you " + "passed correct configuration through `qconfig_dict` or " + "by assigning the `.qconfig` attribute directly on submodules") + _add_observer_(model, op_list=op_list) + return model + + # create properties + if self.version.release < Version("1.7.0").release: # pragma: no cover + white_list = self.white_list | \ + (set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.values()) | + set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.values()) | + set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.values())) + elif self.version.release < Version("1.8.0").release: # pragma: no cover + white_list = torch.quantization.get_compare_output_module_list() + else: + white_list = torch.quantization.get_default_compare_output_module_list() + + model = model if model.is_quantized else copy.deepcopy(model) + model._model.qconfig = torch.quantization.QConfig( + weight=torch.quantization.default_debug_observer, + activation=_RecordingObserver.with_args(iteration_list=iteration_list)) + _prepare(model._model, op_list=op_list, white_list=white_list) + + return model + def is_fused_module(self, module): """This is a helper function for `_propagate_qconfig_helper` to detecte if this module is fused. @@ -1495,7 +1895,6 @@ def __init__(self, framework_specific_info): # for tensorboard self.dump_times = 0 - self.fused_dict = {} self.optype_statistics = None @@ -1604,6 +2003,7 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg) + self.fused_dict = self.get_fused_list(q_model.model) q_model.q_config = copy.deepcopy(self.tune_cfg) if self.approach != 'post_training_dynamic_quant': self._get_scale_zeropoint(q_model._model, q_model.q_config) @@ -1852,7 +2252,6 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): Returns: None """ - module_dict = dict(model.named_modules()) for op_name, child in model.named_modules(): if self.is_fused_module(child): @@ -1860,10 +2259,6 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): module_prefix = op_name + '.' + name if module_prefix in module_dict: module_dict.pop(module_prefix) # remove sub-modules of fused modules - if op_name in self.fused_dict: - self.fused_dict[op_name] = [self.fused_dict[op_name], module_prefix] - else: - self.fused_dict[op_name] = module_prefix for op_name, child in module_dict.items(): # there is accuracy issue in quantized LayerNorm op in pytorch <1.8.1, # so remove it here @@ -2252,171 +2647,6 @@ def _post_eval_hook(self, model, **args): def save(self, model, path=None): pass - def inspect_tensor(self, - model, - dataloader, - op_list=None, - iteration_list=None, - inspect_type='activation', - save_to_disk=False): - if self.version.release >= Version("1.8.0").release: - from torch.fx import GraphModule - if type(model._model) == GraphModule: # pragma: no cover - assert False, "Inspect_tensor didn't support fx graph model now!" - from torch import dequantize - import numpy as np - is_quantized = model.is_quantized - op_list_ = [] - fp32_int8_map = {} - for op_name in op_list: - op_list_.append(op_name) - for key in self.fused_dict: - if op_name in self.fused_dict[key]: - fp32_int8_map[op_name] = \ - {'activation': self.fused_dict[key][-1], 'weight': key} - if is_quantized: - op_list_.append(key) - op_list_.remove(op_name) - else: - op_list_.append(self.fused_dict[key][-1]) - - new_model = model if is_quantized else copy.deepcopy(model) - - assert min(iteration_list) > 0, \ - "Iteration number should great zero, 1 means first iteration." - iterations = max(iteration_list) if iteration_list is not None else -1 - new_model = self._pre_eval_hook(new_model, op_list=op_list_, iteration_list=iteration_list) - self.evaluate(new_model, dataloader, iteration=iterations) - observer_dict = {} - ret = {} - if inspect_type == 'activation' or inspect_type == 'all': - if self.version.release >= Version("2.0.0").release: - from torch.quantization.quantize import _get_observer_dict as get_observer_dict - else: - from torch.quantization import get_observer_dict - ret['activation'] = [] - get_observer_dict(new_model._model, observer_dict) - if iteration_list is None: - iteration_list = [1] - for i in iteration_list: - summary = OrderedDict() - for key in observer_dict: - if isinstance(observer_dict[key], torch.nn.modules.linear.Identity): - continue - op_name = key.replace(".activation_post_process", "") - value = observer_dict[key].get_tensor_value()[i] - if op_name in op_list: - if type(value) is list: - summary[op_name] = {} - for index in range(len(value)): - summary[op_name].update({ - op_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else value[index].numpy() - }) - else: - summary[op_name] = { - op_name + ".output0": - dequantize(value).numpy() if value.is_quantized else value.numpy() - } - else: - if bool(self.fused_dict): - if is_quantized: - for a in fp32_int8_map: - if op_name == fp32_int8_map[a]['weight']: - if type(value) is list: - summary[a] = {} - for index in range(len(value)): - summary[a].update({ - op_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else - value[index].numpy() - }) - else: - summary[a] = { - op_name + ".output0": - dequantize(value).numpy() - if value.is_quantized else value.numpy() - } - else: - for a in fp32_int8_map: # pragma: no cover - if op_name == fp32_int8_map[a]['activation']: - if type(value) is list: - summary[a] = {} - for index in range(len(value)): - summary[a].update({ - op_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else - value[index].numpy() - }) - else: - summary[a] = { - op_name + ".output0": - dequantize(value).numpy() - if value.is_quantized else value.numpy() - } - - if save_to_disk: - dump_dir = os.path.join(self.workspace_path, 'dump_tensor') - os.makedirs(dump_dir, exist_ok=True) - np.savez(os.path.join(dump_dir, 'activation_iter{}.npz'.format(i)), **summary) - - ret['activation'].append(summary) - - if inspect_type == 'weight' or inspect_type == 'all': - ret['weight'] = {} - state_dict = new_model._model.state_dict() - - for key in state_dict: - if not isinstance(state_dict[key], torch.Tensor): - continue - if 'weight' not in key and 'bias' not in key: - continue - - op = key[:key.rfind('.')] - op = op.replace('._packed_params', '') - - if op in op_list: - if op in ret['weight']: - ret['weight'][op].update({ - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else state_dict[key].detach().numpy() - }) - else: - ret['weight'][op] = { - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else state_dict[key].detach().numpy() - } - else: - if bool(self.fused_dict): - if is_quantized: - for a in fp32_int8_map: - if op == fp32_int8_map[a]['weight']: - if a in ret['weight']: - ret['weight'][a].update({ - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else - state_dict[key].detach().numpy() - }) - else: - ret['weight'][a] = \ - {key: dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else - state_dict[key].detach().numpy()} - break - - if save_to_disk: - np.savez(os.path.join(dump_dir, 'weight.npz'), **ret['weight']) - else: - ret['weight'] = None - - return ret - def set_tensor(self, model, tensor_dict): state_dict = model._model.state_dict() tensor_name = None @@ -3446,6 +3676,8 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg) + self.fused_dict = self.get_fused_list(q_model.model) + q_model.is_quantized = True q_model.q_config = copy.deepcopy(self.tune_cfg) if self.approach != 'post_training_dynamic_quant': self._get_scale_zeropoint(q_model._model, q_model.q_config) @@ -4583,7 +4815,6 @@ def _dump_model_op_stats(self, model, tune_cfg): field_names=field_names).print_stat() self.optype_statistics = field_names, output_data - def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): """This is a helper function for `query_fw_capability`, and it will get all quantizable ops from model. diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py index 71e411d44cf..65dc0a58d0f 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py @@ -2,6 +2,7 @@ import neural_compressor.adaptor.pytorch as nc_torch import numpy as np import os +import pickle import shutil import torch import torch.nn as nn @@ -1114,5 +1115,36 @@ def test_symbolic_trace(self): traced_model_qat = symbolic_trace(model_origin, is_qat=True) self.assertTrue(isinstance(traced_model_qat.sub, torch.fx.graph_module.GraphModule)) + def test_tensor_dump(self): + model = resnet18() + model = MODELS['pytorch'](model) + quantizer = Quantization('fx_ptq_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + dataloader = common.DataLoader(dataset) + dataloader = common._generate_common_dataloader(dataloader, 'pytorch') + quantizer.eval_dataloader = dataloader + quantizer.calib_dataloader = dataloader + quantizer.model = model.model + q_model = quantizer.fit() + quantizer.strategy.adaptor.inspect_tensor( + model, dataloader, op_list=['conv1', 'layer1.0.conv1'], + iteration_list=[1, 2], inspect_type='all', save_to_disk=True) + with open('saved/inspect_result.pkl', 'rb') as fp: + tensor_dict = pickle.load(fp) + a = tensor_dict["activation"][0] + w = tensor_dict["weight"] + self.assertTrue(w['conv1']['conv1.weight'].shape[0] == + a['conv1']['conv1.output0'].shape[1]) + quantizer.strategy.adaptor.inspect_tensor( + q_model, dataloader, op_list=['conv1', 'layer1.0.conv1.0'], + iteration_list=[1, 2], inspect_type='all', save_to_disk=True) + with open('saved/inspect_result.pkl', 'rb') as fp: + tensor_dict = pickle.load(fp) + a = tensor_dict["activation"][0] + w = tensor_dict["weight"] + self.assertTrue(w['conv1']['conv1.weight'].shape[0] == + a['conv1']['conv1.output0'].shape[1]) + + if __name__ == "__main__": unittest.main() From 6b92ebb9593ebe91f10f4ca73d832b932448f049 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Tue, 1 Aug 2023 19:04:14 +0800 Subject: [PATCH 2/4] Update code Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/pytorch.py | 366 ++++-------------- .../test_adaptor_pytorch_1.x.py | 28 +- 2 files changed, 96 insertions(+), 298 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 1650bb4c97c..9df61747c74 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1248,15 +1248,28 @@ def get_fused_list(self, model): for op_name, child in model.named_modules(): if type(child) in self.fused_op_list: in_fused_loop = False - fp32_int8_ops = [op_name,] + is_fused_module = False type_name = str(child).split("(")[0] prefix_index = op_name.rfind(".") + fp32_int8_ops = [] for fp32_op_name, module in self.pre_optimized_model.model.named_modules(): fp32_type_name = str(module).split("(")[0] prefix_fp32_index = fp32_op_name.rfind(".") - if op_name == fp32_op_name: + if not is_fused_module: + is_fused_module = self.is_fused_module(module) + if is_fused_module: + in_fused_loop = True + continue + if is_fused_module and in_fused_loop: + if op_name == fp32_op_name[: fp32_op_name.rfind(".")]: + fp32_int8_ops.append(fp32_op_name) + continue + else: + is_fused_module =False + in_fused_loop = False + elif op_name == fp32_op_name and not in_fused_loop: in_fused_loop = True - continue + fp32_int8_ops.append(fp32_op_name) elif in_fused_loop and \ op_name[: prefix_index if prefix_index > -1 else 0] == \ fp32_op_name[: prefix_fp32_index if prefix_fp32_index > -1 else 0]: @@ -1273,9 +1286,36 @@ def get_fused_list(self, model): elif in_fused_loop: in_fused_loop = False break - fused_dict.update({op_name: fp32_int8_ops}) + if len(fp32_int8_ops) > 1: + fused_dict.update({op_name: fp32_int8_ops}) return fused_dict + def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None): + """This is a helper function to diagnosis. + + Args: + fp32_model (object): Fp32 model (original) + int8_model (object): Quantized model + tune_cfg (dict): Quantization config + save_path (Path): The path to save min/max value of op outputs + + Returns: + Op name list for inspecting, tuning configuration + """ + exclude_list = ["QuantStub", "DeQuantStub", "BatchNorm2d", "Sequential"] + optype_list = torch.quantization.get_default_qconfig_propagation_list() + supported_optype = [] + for optype in optype_list: + op_type = str(optype).rstrip('\'>').split('.')[-1] + if "intrinsic." not in str(optype) and op_type not in exclude_list: + supported_optype.append(optype) + inspect_node_list = [] + for name, child in fp32_model.model.named_modules(): + op_type = type(child) + if op_type in supported_optype: + inspect_node_list.append(name) + return inspect_node_list, tune_cfg + def inspect_tensor(self, model, dataloader, @@ -1300,8 +1340,8 @@ def inspect_tensor(self, {'activation': self.fused_dict[key][-1], 'weight': self.fused_dict[key][0]} if not is_quantized: op_list_.append(self.fused_dict[key][-1]) - elif self.fused_dict[key][0] not in op_list_: - op_list_.append(self.fused_dict[key][0]) + elif key not in op_list_: + op_list_.append(key) break assert min(iteration_list) > 0, \ @@ -1342,37 +1382,39 @@ def inspect_tensor(self, if bool(self.fused_dict): if is_quantized: for a in fp32_int8_map: - if op_name == fp32_int8_map[a]['weight']: + if op_name == a: + tensor_name = fp32_int8_map[a]['weight'] if type(value) is list: - summary[a] = {} + summary[tensor_name] = {} for index in range(len(value)): - summary[a].update({ - a + ".output" + str(index): + summary[tensor_name].update({ + tensor_name + ".output" + str(index): dequantize(value[index]).numpy() if value[index].is_quantized else value[index].numpy() }) else: - summary[a] = { - a + ".output0": + summary[tensor_name] = { + tensor_name + ".output0": dequantize(value).numpy() if value.is_quantized else value.numpy() } else: for a in fp32_int8_map: # pragma: no cover if op_name == fp32_int8_map[a]['activation']: + tensor_name = fp32_int8_map[a]['weight'] if type(value) is list: - summary[a] = {} + summary[tensor_name] = {} for index in range(len(value)): - summary[a].update({ - a + ".output" + str(index): + summary[tensor_name].update({ + tensor_name + ".output" + str(index): dequantize(value[index]).numpy() if value[index].is_quantized else value[index].numpy() }) else: - summary[a] = { - a + ".output0": + summary[tensor_name] = { + tensor_name + ".output0": dequantize(value).numpy() if value.is_quantized else value.numpy() } @@ -1409,16 +1451,17 @@ def inspect_tensor(self, if bool(self.fused_dict): if is_quantized: for a in fp32_int8_map: - if op == fp32_int8_map[a]['weight']: - if a in ret['weight']: - ret['weight'][a].update({ + if op == a: + tensor_name = fp32_int8_map[a]['weight'] + if tensor_name in ret['weight']: + ret['weight'][tensor_name].update({ key: dequantize(state_dict[key]).numpy() if state_dict[key].is_quantized else state_dict[key].detach().numpy() }) else: - ret['weight'][a] = \ + ret['weight'][tensor_name] = \ {key: dequantize(state_dict[key]).numpy() if state_dict[key].is_quantized else state_dict[key].detach().numpy()} @@ -1610,22 +1653,11 @@ def _prepare(model, inplace=True, op_list=[], white_list=None): _add_observer_(model, op_list=op_list) return model - # create properties - if self.version.release < Version("1.7.0").release: # pragma: no cover - white_list = self.white_list | \ - (set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.values())) - elif self.version.release < Version("1.8.0").release: # pragma: no cover - white_list = torch.quantization.get_compare_output_module_list() - else: - white_list = torch.quantization.get_default_compare_output_module_list() - model = model if model.is_quantized else copy.deepcopy(model) model._model.qconfig = torch.quantization.QConfig( weight=torch.quantization.default_debug_observer, activation=_RecordingObserver.with_args(iteration_list=iteration_list)) - _prepare(model._model, op_list=op_list, white_list=white_list) + _prepare(model._model, op_list=op_list) return model @@ -2292,211 +2324,6 @@ def _get_scale_zeropoint(self, model, tune_cfg): if hasattr(modules[key[0]], 'zero_point'): value['activation']['zero_point'] = int(modules[key[0]].zero_point) - def _pre_eval_hook(self, model, op_list=None, iteration_list=None): - """The function is used to do some preprocession before evaluation phase. - Here, it used to add hook for dump output tensor for quantizable ops. - - Args: - model (object): input model - - Returns: - model (object): model with hook - """ - from abc import ABCMeta - - def _with_args(cls_or_self, **kwargs): - r"""Wrapper that allows creation of class factories. - - This can be useful when there is a need to create classes with the same - constructor arguments, but different instances. - - Example:: - - >>> Foo.with_args = classmethod(_with_args) - >>> foo_builder = Foo.with_args(a=3, b=4).with_args(answer=42) - >>> foo_instance1 = foo_builder() - >>> foo_instance2 = foo_builder() - >>> id(foo_instance1) == id(foo_instance2) - False - """ - class _PartialWrapper(object): - def __init__(self, p): - self.p = p - - def __call__(self, *args, **keywords): - return self.p(*args, **keywords) - - def __repr__(self): - return self.p.__repr__() - - with_args = _with_args - - r = _PartialWrapper(partial(cls_or_self, **kwargs)) - return r - - ABC = ABCMeta(str("ABC"), (object, ), {}) # compatible with Python 2 *and* 3: - - class _RecordingObserver(ABC, torch.nn.Module): - """The module is mainly for debug and records the tensor values during runtime. - - Args: - iteration_list (list, optional): indexs of iteration which to dump tensor. - """ - def __init__(self, iteration_list=None, **kwargs): - super(_RecordingObserver, self).__init__(**kwargs) - self.output_tensors_dict = OrderedDict() - self.current_iter = 1 - self.iteration_list = iteration_list - - def forward(self, x): - if (self.iteration_list is None and self.current_iter == 1) or \ - (self.iteration_list is not None and - self.current_iter in self.iteration_list): - if type(x) is tuple or type(x) is list: - self.output_tensors_dict[self.current_iter] = \ - [i.to("cpu") if i.device != 'cpu' else i.clone() for i in x] - else: - self.output_tensors_dict[self.current_iter] = \ - x.to("cpu") if x.device != "cpu" else x.clone() - self.current_iter += 1 - return x - - @torch.jit.export - def get_tensor_value(self): - return self.output_tensors_dict - - with_args = classmethod(_with_args) - - def _observer_forward_hook(module, input, output): - """Forward hook that calls observer on the output - - Args: - module (object): input module - input (object): module input - output (object): module output - - Returns: - module output tensor (object) - """ - return module.activation_post_process(output) - - def _add_observer_(module, op_list=None, prefix=""): - """Add observer for the leaf child of the module. - - This function insert observer module to all leaf child module that - has a valid qconfig attribute. - - Args: - module (object): input module with qconfig attributes for all the leaf modules that - we want to dump tensor - op_list (list, optional): list of ops which to be dumped in module - prefix (string): name of module - - Returns: - None, module is modified inplace with added observer modules and forward_hooks - """ - for name, child in module.named_children(): - op_name = name if prefix == "" else prefix + "." + name - if isinstance(child, torch.nn.quantized.FloatFunctional) and \ - (op_list is None or op_name in op_list): - if hasattr(child, 'qconfig') and child.qconfig is not None and ( - op_list is None or op_name in op_list): - child.activation_post_process = \ - child.qconfig.activation() - elif hasattr(child, 'qconfig') and child.qconfig is not None and \ - (op_list is None or op_name in op_list): - # observer and hook will be gone after we swap the module - child.add_module('activation_post_process', child.qconfig.activation()) - child.register_forward_hook(_observer_forward_hook) - else: - _add_observer_(child, op_list, op_name) - - def _propagate_qconfig_helper(module, - qconfig_dict, - white_list=None, - qconfig_parent=None, - prefix='', - fused=False): - """This is a helper function for `propagate_qconfig_` - - Args: - module (object): input module - qconfig_dict (dictionary): dictionary that maps from name of submodule to - quantization configuration - white_list (list, optional): list of quantizable modules - qconfig_parent (object, optional): config of parent module, we will fallback to - this config when there is no specified config - for current module - prefix (string, optional): corresponding prefix of the current module, - used as key in qconfig_dict - fused (bool, optional): Indicates whether the module is fused or not - - Return: - None, module is modified inplace with qconfig attached - """ - if white_list is None: - white_list = \ - torch.quantization.default_mappings.DEFAULT_QCONFIG_PROPAGATE_WHITE_LIST \ - if self.version.release < Version("1.7.0").release else \ - torch.quantization.quantization_mappings.get_qconfig_propagation_list() - - if type(module) in white_list and type(module) != torch.nn.Sequential: - module.qconfig = qconfig_parent - else: - module.qconfig = None - if hasattr(module, '_modules'): - for name, child in module.named_children(): - module_prefix = prefix + '.' + name if prefix else name - _propagate_qconfig_helper(child, qconfig_dict, white_list, qconfig_parent, - module_prefix) - - def _prepare(model, inplace=True, op_list=[], white_list=None): - """The model will be attached with observer or fake quant modules, and qconfig - will be propagated. - - Args: - model (object): input model to be modified in-place - inplace (bool, optional): carry out model transformations in-place, - the original module is mutated - op_list (list, optional): list of ops which to be dumped in module - white_list (list, optional): list of quantizable modules - - Returns: - model (object): model with qconfig - """ - if not inplace: - model = copy.deepcopy(model) - _propagate_qconfig_helper(model, - qconfig_dict={}, - white_list=white_list, - qconfig_parent=model.qconfig) - # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): # pragma: no cover - logger.warn("None of the submodule got qconfig applied. Make sure you " - "passed correct configuration through `qconfig_dict` or " - "by assigning the `.qconfig` attribute directly on submodules") - _add_observer_(model, op_list=op_list) - return model - - # create properties - if self.version.release < Version("1.7.0").release: # pragma: no cover - white_list = self.white_list | \ - (set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.values())) - elif self.version.release < Version("1.8.0").release: # pragma: no cover - white_list = torch.quantization.get_compare_output_module_list() - else: - white_list = torch.quantization.get_default_compare_output_module_list() - - model = model if model.is_quantized else copy.deepcopy(model) - model._model.qconfig = torch.quantization.QConfig( - weight=torch.quantization.default_debug_observer, - activation=_RecordingObserver.with_args(iteration_list=iteration_list)) - _prepare(model._model, op_list=op_list, white_list=white_list) - - return model - def is_fused_child(self, op_name): """This is a helper function for `_post_eval_hook` @@ -2507,43 +2334,11 @@ def is_fused_child(self, op_name): (bool): if this op is fused """ - op = op_name[:op_name.rfind('.')] - if op in self.fused_dict and op_name[op_name.rfind('.') + 1:].isdigit(): - return True - else: - return False - - def is_fused_op(self, op_name): - """This is a helper function for `_post_eval_hook` - - Args: - op_name (string): op name - - Returns: - (bool): if this op is fused - - """ - op = op_name[:op_name.rfind('.')] - if op in self.fused_dict: - return True - else: - return False - - def is_last_fused_child(self, op_name): - """This is a helper function for `_post_eval_hook` - - Args: - op_name (string): op name - - Returns: - (bool): if this op is last fused op + for key in self.fused_dict: + if op_name in self.fused_dict[key]: + return True + return False - """ - op = op_name[:op_name.rfind('.')] - if op_name in self.fused_dict[op][-1]: - return True - else: - return False def _post_eval_hook(self, model, **args): """The function is used to do some post process after complete evaluation. @@ -2595,20 +2390,17 @@ def _post_eval_hook(self, model, **args): for key in observer_dict: if isinstance(observer_dict[key], torch.nn.modules.linear.Identity): continue - op_name = key.strip(".activation_post_process") + op_name = key.replace(".activation_post_process", "") summary[op_name + ".output"] = observer_dict[key].get_tensor_value() for iter in summary[op_name + ".output"]: # Only collect last fused child output op = op_name - if self.is_fused_child(op_name) == True and \ - self.is_last_fused_child(op_name) == True: - op = op_name[:op_name.rfind('.')] + if op_name in self.fused_dict: + op = self.fused_dict[op_name][0] else: - if self.is_fused_child(op_name) == True and \ - self.is_last_fused_child(op_name) == False: - continue - else: - op = op_name + for key in self.fused_dict: + if op_name in self.fused_dict[key]: + op = op_name if summary[op_name + ".output"][iter].is_quantized: writer.add_histogram(op + "/Output/int8", @@ -2620,7 +2412,6 @@ def _post_eval_hook(self, model, **args): for key in state_dict: if not isinstance(state_dict[key], torch.Tensor): continue - op = key[:key.rfind('.')] if self.is_fused_child(op) is True: # fused child tensorboard tag will be merge @@ -2657,7 +2448,12 @@ def set_tensor(self, model, tensor_dict): weight_bias = key[end + 1:] for op in self.fused_dict: if op_name in self.fused_dict[op]: - state_op_name = op + if model.is_quantized: + state_op_name = op + else: + state_op_name = self.fused_dict[op][0] + # elif op_name in self.fused_dict[op]: + # state_op_name = op if state_op_name is None: state_op_name = op_name for state_dict_key in state_dict.keys(): diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py index 65dc0a58d0f..2ef90aec963 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py @@ -709,16 +709,17 @@ def test_tensor_dump_and_set(self): quantizer.strategy.adaptor.inspect_tensor( model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], iteration_list=[1, 2], inspect_type='all', save_to_disk=True) - load_array = lambda *a, **k: np.load(*a, allow_pickle=True, **k) - a = load_array('saved/dump_tensor/activation_iter1.npz') - w = load_array('saved/dump_tensor/weight.npz') + with open('saved/inspect_result.pkl', 'rb') as fp: + tensor_dict = pickle.load(fp) + a = tensor_dict["activation"][0] + w = tensor_dict["weight"] if PT_VERSION >= Version("1.8.0").release: - self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == - a['conv1.0'].item()['conv1.0.output0'].shape[1]) + self.assertTrue(w['conv1.0']['conv1.0.weight'].shape[0] == + a['conv1.0']['conv1.0.output0'].shape[1]) else: - self.assertTrue(w['conv1.0'].item()['conv1.0.weight'].shape[0] == - a['conv1.0'].item()['conv1.1.output0'].shape[1]) - data = np.random.random(w['conv1.0'].item()['conv1.0.weight'].shape).astype(np.float32) + self.assertTrue(w['conv1.0']['conv1.0.weight'].shape[0] == + a['conv1.0']['conv1.1.output0'].shape[1]) + data = np.random.random(w['conv1.0']['conv1.0.weight'].shape).astype(np.float32) quantizer.strategy.adaptor.set_tensor(q_model, {'conv1.0.weight': data}) changed_tensor = q_model.get_weight('conv1.weight') scales = changed_tensor.q_per_channel_scales() @@ -1126,9 +1127,10 @@ def test_tensor_dump(self): quantizer.calib_dataloader = dataloader quantizer.model = model.model q_model = quantizer.fit() + op_list, _ = quantizer.strategy.adaptor.diagnosis_helper(model, q_model, None) quantizer.strategy.adaptor.inspect_tensor( - model, dataloader, op_list=['conv1', 'layer1.0.conv1'], - iteration_list=[1, 2], inspect_type='all', save_to_disk=True) + model, dataloader, op_list=op_list, + iteration_list=[1], inspect_type='all', save_to_disk=True) with open('saved/inspect_result.pkl', 'rb') as fp: tensor_dict = pickle.load(fp) a = tensor_dict["activation"][0] @@ -1136,14 +1138,14 @@ def test_tensor_dump(self): self.assertTrue(w['conv1']['conv1.weight'].shape[0] == a['conv1']['conv1.output0'].shape[1]) quantizer.strategy.adaptor.inspect_tensor( - q_model, dataloader, op_list=['conv1', 'layer1.0.conv1.0'], + q_model, dataloader, op_list=['conv1', 'layer2.0.downsample.0'], iteration_list=[1, 2], inspect_type='all', save_to_disk=True) with open('saved/inspect_result.pkl', 'rb') as fp: tensor_dict = pickle.load(fp) a = tensor_dict["activation"][0] w = tensor_dict["weight"] - self.assertTrue(w['conv1']['conv1.weight'].shape[0] == - a['conv1']['conv1.output0'].shape[1]) + self.assertTrue(w['layer2.0.downsample.0']['layer2.0.downsample.0.weight'].shape[0] == + a['layer2.0.downsample.0']['layer2.0.downsample.0.output0'].shape[1]) if __name__ == "__main__": From 00d0413b227b4b5b6733542f9fed19a85313911c Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 14 Aug 2023 15:12:43 +0800 Subject: [PATCH 3/4] Support torch > 1.8.0 Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/pytorch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 9df61747c74..5ac2ad3ee6c 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -847,7 +847,7 @@ def __init__(self, framework_specific_info): self.fp32_results = [] self.fp32_preds_as_label = False - if self.version.release >= Version("2.0").release: + if self.version.release >= Version("1.8").release: static_quant_mapping = tq.quantization_mappings.get_default_static_quant_module_mappings() self.fused_op_list = \ [static_quant_mapping[key] for key in static_quant_mapping if "intrinsic." in str(key)] @@ -1325,7 +1325,7 @@ def inspect_tensor(self, save_to_disk=False, save_path=None, quantization_cfg=None): - assert self.version.release >= Version("2.0").release, "Inspect_tensor only support torch 1.8 or above!" + assert self.version.release >= Version("1.8").release, "Inspect_tensor only support torch 1.8 or above!" from neural_compressor.utils.utility import dump_data_to_local from torch import dequantize is_quantized = model.is_quantized From 8c759a8d40604261a9ae5b209aae6b49f5165f94 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Tue, 15 Aug 2023 13:06:51 +0800 Subject: [PATCH 4/4] Update code for pytorch<2.0 version Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/pytorch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 5ac2ad3ee6c..bf85df15ef9 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1352,7 +1352,10 @@ def inspect_tensor(self, observer_dict = {} ret = {} if inspect_type == 'activation' or inspect_type == 'all': - from torch.quantization.quantize import _get_observer_dict as get_observer_dict + if self.version.release >= Version("2.0.0").release: + from torch.quantization.quantize import _get_observer_dict as get_observer_dict + else: + from torch.quantization import get_observer_dict ret['activation'] = [] get_observer_dict(new_model.model, observer_dict) if iteration_list is None: