diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt index 34fc7f29fcf..2902c0c8f9c 100644 --- a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt +++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt @@ -5,6 +5,9 @@ branch = True include = */neural_compressor/common/* */neural_compressor/torch/* +omit = + */neural_compressor/torch/algorithms/habana_fp8/* + */neural_compressor/torch/amp/* exclude_lines = pragma: no cover raise NotImplementedError diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh index d1aee3a98cb..f6e54ba2662 100644 --- a/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh +++ b/.azure-pipelines/scripts/ut/3x/run_3x_tf.sh @@ -16,20 +16,36 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__ cd /neural-compressor/test/3x || exit 1 rm -rf torch rm -rf onnxrt -rm -rf tensorflow/quantization/ptq/newapi mv tensorflow/keras ../3x_keras -mv tensorflow/quantization/itex ./3x_itex +mv tensorflow/quantization/ptq/newapi ../3x_newapi LOG_DIR=/neural-compressor/log_dir mkdir -p ${LOG_DIR} ut_log_name=${LOG_DIR}/ut_3x_tf.log + +# test for tensorflow ut pytest --cov="${inc_path}" -vs --disable-warnings --html=report_tf_quant.html --self-contained-html ./tensorflow/quantization 2>&1 | tee -a ${ut_log_name} rm -rf tensorflow/quantization pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf.html --self-contained-html . 2>&1 | tee -a ${ut_log_name} +# test for tensorflow new api ut +pip uninstall tensorflow -y +pip install /tf_dataset/tf_binary/230928/tensorflow*.whl +pip install cmake +pip install protobuf==3.20.3 +pip install horovod==0.27.0 +pip list +rm -rf tensorflow/* +mkdir -p tensorflow/quantization/ptq +mv ../3x_newapi tensorflow/quantization/ptq/newapi +find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=${inc_path} --cov-append -vs --disable-warnings ,g" > run.sh +cat run.sh +bash run.sh 2>&1 | tee -a ${ut_log_name} + +# test for itex ut rm -rf tensorflow/* mv ../3x_keras tensorflow/keras -mv ../3x_itex tensorflow/quantization/itex +pip uninstall tensorflow -y pip install intel-extension-for-tensorflow[cpu] pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow 2>&1 | tee -a ${ut_log_name} diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh b/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh deleted file mode 100644 index 218e32a9b3a..00000000000 --- a/.azure-pipelines/scripts/ut/3x/run_3x_tf_new_api.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -python -c "import neural_compressor as nc" -test_case="run 3x New TF API" -echo "${test_case}" - -# install requirements -echo "set up UT env..." -pip install -r /neural-compressor/test/3x/tensorflow/requirements.txt -pip install pytest-html -pip install pytest-html-merger - -pip uninstall tensorflow -y -pip install /tf_dataset/tf_binary/230928/tensorflow*.whl -pip install cmake -pip install protobuf==3.20.3 -pip install horovod==0.27.0 -pip list - -cd /neural-compressor/test/3x || exit 1 -mv tensorflow/quantization/ptq/newapi ../3x_newapi -rm -rf ./* - -LOG_DIR=/neural-compressor/log_dir -mkdir -p ${LOG_DIR} -ut_log_name=${LOG_DIR}/ut_3x_new_tf.log - -mkdir -p tensorflow/quantization/ptq -mv ../3x_newapi tensorflow/quantization/ptq/newapi - -pytest -vs --disable-warnings --html=report_new_tf_quant_one_case.html --self-contained-html ./tensorflow/quantization/ptq/newapi/test_big_saved_model.py 2>&1 | tee -a ${ut_log_name} -rm -rf tensorflow/quantization/ptq/newapi/test_big_saved_model.py -pytest -vs --disable-warnings --html=report_new_tf_quant.html --self-contained-html ./tensorflow/quantization/ptq/newapi 2>&1 | tee -a ${ut_log_name} - -mkdir -p report -mv *.html report -pytest_html_merger -i ./report -o ./report.html - -cp report.html ${LOG_DIR}/ - -if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then - echo "Find errors in pytest case, please check the output..." - echo "Please search for '== FAILURES ==' or '== ERRORS =='" - exit 1 -fi - -echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/ut-3x-tf.yml b/.azure-pipelines/ut-3x-tf.yml index 0fdc0c02f26..df852e28000 100644 --- a/.azure-pipelines/ut-3x-tf.yml +++ b/.azure-pipelines/ut-3x-tf.yml @@ -41,20 +41,6 @@ stages: uploadPath: $(UPLOAD_PATH) utArtifact: "ut_3x" - - stage: NewTF - displayName: Unit Test 3x New TF API - dependsOn: [] - jobs: - - job: - displayName: Unit Test 3x New TF API - steps: - - template: template/ut-template.yml - parameters: - dockerConfigName: "commonDockerConfig" - utScriptFileName: "3x/run_3x_tf_new_api" - uploadPath: $(UPLOAD_PATH) - utArtifact: "ut_3x_tf_new_api" - - stage: TensorFlow_baseline displayName: Unit Test 3x TensorFlow baseline dependsOn: [] diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 697e70799c4..63b228f3f7e 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -53,11 +53,7 @@ subprojects: - "Model-Test (Run ONNX Model resnet50-v1-12)" - "Model-Test (Run PyTorch Model resnet18)" - "Model-Test (Run PyTorch Model resnet18_fx)" - - "Model-Test (Run TensorFlow Model darknet19)" - - "Model-Test (Run TensorFlow Model inception_v1)" - - "Model-Test (Run TensorFlow Model resnet-101)" - "Model-Test (Run TensorFlow Model resnet50v1.5)" - - "Model-Test (Run TensorFlow Model ssd_mobilenet_v1_ckpt)" - "Model-Test (Run TensorFlow Model ssd_resnet50_v1)" - id: "Model Tests 3x workflow" diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index c4b15d847a3..004393c8c27 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -90,46 +90,13 @@ def __init__(self, framework_specific_info): os.mkdir(DEFAULT_WORKSPACE) self.tmp_dir = (DEFAULT_WORKSPACE + "tmp_model.keras") if self.keras3 else (DEFAULT_WORKSPACE + "tmp_model") - def _check_itex(self): - """Check if the IntelĀ® Extension for TensorFlow has been installed.""" - try: - import intel_extension_for_tensorflow - except: - raise ImportError( - "The IntelĀ® Extension for TensorFlow is not installed. " - "Please install it to run models on ITEX backend" - ) - - def convert_bf16(self): - """Execute the BF16 conversion.""" - tf.keras.mixed_precision.set_global_policy("mixed_bfloat16") - model = self.pre_optimized_model - - for layer in model.layers: - if layer.name in self.bf16_ops: - layer.dtype = "mixed_bfloat16" - - model.save(self.tmp_dir) - converted_model = tf.keras.models.load_model(self.tmp_dir) - tf.keras.mixed_precision.set_global_policy("float32") - - return converted_model - - # (TODO) choose the properly quantize mode - def _check_quantize_mode(self, model): - """Check what quantize mode to use.""" - for layer in model.layers: - if "ReLU" in layer.__class__.__name__: - return "MIN_FIRST" - return "SCALED" - def _set_weights(self, qmodel, layer_weights): """Set fp32 weights to qmodel.""" for qlayer in qmodel.layers: if qlayer.get_weights(): if qlayer.name in layer_weights: qlayer.set_weights(layer_weights[qlayer.name]) - else: + else: # pragma: no cover hit_layer = False for sub_layer in qlayer.submodules: if sub_layer.name in layer_weights: @@ -164,7 +131,7 @@ def _check_quantize_format(self, model): self.conv_format[layer.name] = "u8" break - def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers): + def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers): # pragma: no cover fuse_layers = [] fused_bn_name = "" for idx, layer in enumerate(fp32_layers): @@ -211,7 +178,7 @@ def _fuse_bn_keras3(self, fuse_conv_bn, fp32_layers): return fuse_layers - def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): + def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): # pragma: no cover fuse_layers = [] for idx, layer in enumerate(fp32_layers): if hasattr(layer, "_inbound_nodes"): @@ -272,7 +239,7 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): return fuse_layers - def _fuse_bn(self, model): + def _fuse_bn(self, model): # pragma: no cover """Fusing Batch Normalization.""" model.save(self.tmp_dir) fuse_bn_model = tf.keras.models.load_model(self.tmp_dir) @@ -362,14 +329,6 @@ def quantize(self, quant_config, model, dataloader, iteration, q_func=None): tune_cfg = converter.parse_to_tune_cfg() self.tuning_cfg_to_fw(tune_cfg) - # just convert the input model to mixed_bfloat16 - if self.bf16_ops and not self.quantize_config["op_wise_config"]: - converted_model = self.convert_bf16() - return converted_model - - # if self.backend == "itex": - # self._check_itex() - logger.debug("Dump quantization configurations:") logger.debug(self.quantize_config) calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) @@ -469,59 +428,6 @@ def _calibrate(self, model, dataloader, calib_interation): return quantized_model - @dump_elapsed_time(customized_msg="Model inference") - def evaluate( - self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - fp32_baseline=False, - ): - """The function is used to run evaluation on validation dataset. - - Args: - model (object): The model to do calibration. - dataloader (generator): generate the data and labels. - postprocess (object, optional): process the result from the model - metric (object, optional): Depends on model category. Defaults to None. - measurer (object, optional): for precise benchmark measurement. - iteration(int, optional): control steps of mini-batch - fp32_baseline (boolean, optional): only for compare_label=False pipeline - """ - # use keras object - keras_model = model.model - logger.info("Start to evaluate the Keras model.") - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - # use predict on batch - if measurer is not None: - measurer.start() - predictions = keras_model.predict_on_batch(inputs) - measurer.end() - else: - predictions = keras_model.predict_on_batch(inputs) - - if self.fp32_preds_as_label: - self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions) - - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - if metrics: - for metric in metrics: - if not hasattr(metric, "compare_label") or ( - hasattr(metric, "compare_label") and metric.compare_label - ): - metric.update(predictions, labels) - if idx + 1 == iteration: - break - - acc = 0 if metrics is None else [metric.result() for metric in metrics] - - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - def query_fw_capability(self, model): """The function is used to return framework tuning capability. @@ -621,7 +527,7 @@ def tuning_cfg_to_fw(self, tuning_cfg): for each_op_info in tuning_cfg["op"]: op_name = each_op_info[0] - if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": # pragma: no cover if each_op_info[1] in bf16_type: bf16_ops.append(op_name) continue @@ -693,31 +599,6 @@ def _get_specified_version_cfg(self, data): return default_config - def get_version(self): - """Get the current backend version information. - - Returns: - [string]: version string. - """ - return self.cur_config["version"]["name"] - - def get_precisions(self): - """Get supported precisions for current backend. - - Returns: - [string list]: the precisions' name. - """ - return self.cur_config["precisions"]["names"] - - def get_op_types(self): - """Get the supported op types by all precisions. - - Returns: - [dictionary list]: A list composed of dictionary which key is precision - and value is the op types. - """ - return self.cur_config["ops"] - def get_quantization_capability(self): """Get the supported op types' quantization capability. @@ -846,7 +727,7 @@ def _parse_inputs(self, BN_fused_layers=None, conv_names=None): try: model_input = self.model.input - except ValueError: + except ValueError: # pragma: no cover model_input = self.model.inputs[0] return input_layer_dict, model_input diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py index 18f514ba306..4f279e20073 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py @@ -37,7 +37,6 @@ BaseDataLoader, BaseModel, CpuInfo, - Dequantize, Statistics, deep_get, dump_elapsed_time, @@ -87,8 +86,8 @@ def __init__(self, framework_specific_info): cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[: -len("Adaptor")].lower()) self.itex_mode = self.backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml" - # if self.itex_mode: - # self._check_itex() + if self.itex_mode: + self._check_itex() self.query_handler = TensorflowQuery( local_config_file=os.path.join(os.path.dirname(__file__), cfg_yaml_name), @@ -119,284 +118,6 @@ def _check_itex(self): "Please install it to run models on ITEX backend" ) - def _log_histogram(self, writer, tag, values, step=0, bins=1000): - """Writes a histogram for later analysis.""" - # Convert to a numpy array - values = np.array(values) - - # Create and write Summary - # update using TF2.X API - with writer.as_default(): - tf.summary.histogram(tag, values, step) - writer.flush() - - def _pre_hook_for_hvd(self, dataloader=None): - """Pre hook for Horovod.""" - import horovod.tensorflow as hvd - - self.hvd = hvd - self.hvd.init() - - @dump_elapsed_time(customized_msg="Model inference") - def evaluate( - self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - tensorboard=False, - fp32_baseline=False, - ): - """Evaluate the model for specified metric on validation dataset. - - Args: - model ([Graph, GraphDef or Path String]): The model could be the graph, - graph_def object, the frozen pb or ckpt/savedmodel folder path. - dataloader (generator): generate the data and labels. - postprocess (object, optional): process the result from the model - metrics (list, optional): Depends on model category. Defaults to None. - measurer (object, optional): for precise benchmark measurement. - iteration(int, optional): control steps of mini-batch - tensorboard (boolean, optional): for tensorboard inspect tensor. - fp32_baseline (boolean, optional): only for compare_label=False pipeline - - Returns: - [float]: evaluation result, the larger is better. - """ - from neural_compressor.tensorflow.quantization.utils.utility import iterator_sess_run - - outputs = model.output_tensor_names - - if getattr(dataloader, "distributed", False): - import horovod.tensorflow as hvd - - hvd.init() - # If metric.hvd is not None then run distributed inference - for metric in metrics: - metric.hvd = hvd - try: - len_dataloader = len(dataloader) - except: - logger.info( - "The length of the distributed evaluation dataloader is unknown." - "When the iteration of evaluation dataloader in each process is " - "inconsistent, an error may occur." - ) - else: - list_len_dataloader = hvd.allgather_object(len_dataloader) - if hvd.rank() == 0: - for i in range(len(list_len_dataloader) - 1): - if list_len_dataloader[i] != list_len_dataloader[i + 1]: - raise AttributeError( - "The evaluation dataloader's iteration is" - "different between processes, please reset dataloader's batch_size." - ) - logger.info( - "Rank {!s} dataloaders' data distribution balance check for evaluation have been finished.".format( - hvd.allgather_object(hvd.rank()) - ) - ) - if tensorboard: - from tensorflow.python.framework import tensor_util - - from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer - - output_postfix = "_fp32.output" - inspect_node_types = [ - "Conv2D", - "DepthwiseConv2dNative", - "MaxPool", - "AvgPool", - "ConcatV2", - "MatMul", - "FusedBatchNormV3", - "FusedBatchNorm", - "BiasAdd", - "_MklFusedInstanceNorm", - "Relu", - "Relu6", - "Dequantize", - ] - fp32_inspect_node_name = [] - int8_inspect_node_name = [] - q_node_scale = {} - if self.dump_times == 0: - temp_dir = "./runs/eval/baseline" - else: - temp_dir = "./runs/eval/tune_" + str(self.dump_times) - if os.path.isdir(temp_dir): - import shutil - - shutil.rmtree(temp_dir, ignore_errors=True) - # Create the writer using TF2.x APIs to handle eager executions - writer = tf.summary.create_file_writer(temp_dir) # pylint: disable=no-member - with writer.as_default(): - tf.summary.graph(model.graph) # pylint: disable=no-member - - cur_graph = GraphAnalyzer() - cur_graph.graph = model.graph_def - cur_graph.parse_graph() - graph_info = cur_graph.node_name_details - for node in model.graph_def.node: - if node.op in inspect_node_types: - fp32_inspect_node_name.append(node.name) - # Tensor dump supported quantized op including, - # Requantize, QuantizedConv2DAndRequantize, - # QuantizedConv2DAndReluAndRequantize, - # QuantizedConv2DWithBiasAndRequantize, - # QuantizedConv2DWithBiasAndReluAndRequantize, - # QuantizedConv2DWithBiasSignedSumAndReluAndRequantize, - # QuantizedConv2DWithBiasSumAndReluAndRequantize, - # QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize, - # QuantizedMatMulWithBiasAndReluAndRequantize, - # QuantizedMatMulWithBiasAndRequantize - elif node.op.find("Requantize") != -1: - out_min = -2 - out_max = -1 - if node.op.find("Sum") != -1: - out_min = -5 - out_max = -4 - q_out_min = graph_info[node.input[out_min]].node.attr["value"].tensor.float_val[0] - q_out_max = graph_info[node.input[out_max]].node.attr["value"].tensor.float_val[0] - q_node_scale[node.name] = (node.op, q_out_min, q_out_max) - int8_inspect_node_name.append(node.name) - # Inspect weights, bias. Need further optimize - if node.op == "Const" and graph_info[graph_info[node.name].outputs[0]].node.op in [ - "Conv2D", - "DepthwiseConv2dNative", - "MatMul", - "FusedBatchNormV3", - "_MklFusedInstanceNorm", - "BiasAdd", - ]: - const_value = tensor_util.MakeNdarray(node.attr.get("value").tensor).astype(np.float32) - self._log_histogram(writer, node.name, const_value) - - outputs.extend(fp32_inspect_node_name) - if len(int8_inspect_node_name) > 0: - output_postfix = "_int8.output" - outputs.extend(int8_inspect_node_name) - - if metrics: - for metric in metrics: - metric.reset() - self.fp32_preds_as_label = any( - [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] - ) - - origin_output_tensor_names = model.output_tensor_names - model.output_tensor_names = outputs - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0] - logger.info("Start to evaluate the TensorFlow model.") - - def eval_func(dataloader): - results = [] - for idx, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - if len(input_tensor) == 1: - feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): - for name in inputs: - for tensor in input_tensor: - pos = tensor.name.rfind(":") - t_name = tensor.name if pos < 0 else tensor.name[:pos] - if name == t_name: - feed_dict[tensor] = inputs[name] - break - else: - feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] - else: - assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" - feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): - for name in inputs: - for tensor in input_tensor: - pos = tensor.name.rfind(":") - t_name = tensor.name if pos < 0 else tensor.name[:pos] - if name == t_name: - feed_dict[tensor] = inputs[name] - break - else: - feed_dict = dict(zip(input_tensor, inputs)) - - if model.iter_op: - predictions = iterator_sess_run( - model.sess, model.iter_op, feed_dict, output_tensor, iteration, measurer - ) - elif measurer is not None: - measurer.start() - predictions = model.sess.run(output_tensor, feed_dict) - measurer.end() - else: - predictions = model.sess.run(output_tensor, feed_dict) - - if self.fp32_preds_as_label: - self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions) - - # Inspect node output, just get 1st iteration output tensors for now - if idx == 0 and tensorboard: - for index, node_name in enumerate(outputs): - tensor = predictions[index] - if node_name in int8_inspect_node_name: - tensor = Dequantize(predictions[index], q_node_scale[node_name]) - self._log_histogram(writer, node_name + output_postfix, tensor.astype(np.float32), idx) - writer.close() - if isinstance(predictions, list): - if len(origin_output_tensor_names) == 1: - predictions = predictions[0] - elif len(origin_output_tensor_names) > 1: - predictions = predictions[: len(origin_output_tensor_names)] - if postprocess is not None: - predictions, labels = postprocess((predictions, labels)) - if metrics: - for metric in metrics: - if not hasattr(metric, "compare_label") or ( - hasattr(metric, "compare_label") and metric.compare_label - ): - metric.update(predictions, labels) - if idx + 1 == iteration: - break - return results - - if isinstance(dataloader, BaseDataLoader) and not self.benchmark: - try: - results = eval_func(dataloader) - except Exception: # pragma: no cover - logger.warning("Fail to forward with batch size={}, set to {} now.".format(dataloader.batch_size, 1)) - dataloader.batch(1) - results = eval_func(dataloader) - else: # pragma: no cover - results = eval_func(dataloader) - - if self.fp32_preds_as_label: - from neural_compressor.tensorflow.quantization.utils.utility import collate_tf_preds - - if fp32_baseline: - results = collate_tf_preds(self.fp32_results) - reference = results - else: - reference = collate_tf_preds(self.fp32_results) - results = collate_tf_preds(results) - for metric in metrics: - if hasattr(metric, "compare_label") and not metric.compare_label: - metric.update(results, reference) - - acc = 0 if metrics is None else [metric.result() for metric in metrics] - if tensorboard: - new_dir = temp_dir + "_acc_" + str(acc) - writer.close() - if os.path.isdir(new_dir): - import shutil - - shutil.rmtree(new_dir, ignore_errors=True) - os.rename(temp_dir, new_dir) - self.dump_times += 1 - model.output_tensor_names = origin_output_tensor_names - return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - def _tuning_cfg_to_fw(self, tuning_cfg): """Parse the neural_compressor wrapped configuration to Tensorflow. @@ -468,21 +189,11 @@ def quantize( """ assert ( self.approach != "post_training_dynamic_quant" - ), "Dynamic quantization is not supported on TensorFlow framework now!" - - if self.approach == "quant_aware_training": # pragma: no cover - assert ( - q_func is not None - ), "quantization aware training mode \ - is not configured correctly" - - from neural_compressor.tensorflow.utils import Model - - qat_model = q_func(model) + ), "Dynamic Quantization is not supported on TensorFlow framework now!" - return self.convert(Model(qat_model), "QAT", "default") - - assert q_func is None, "post-training quantization mode is not support calibration function for Tensorflow!" + assert ( + self.approach != "quant_aware_training" + ), "Quantize Aware Training is not supported on TensorFlow framework now!" self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration) @@ -612,7 +323,7 @@ def _dump_model_op_stats(self, model_graphdef): continue possible_int8_res = [name for name in int8_op_prefix_list if i.op.find(name) != -1] - if any(possible_int8_res): + if any(possible_int8_res): # pragma: no cover origin_op_type = possible_int8_res[0].split("Quantized")[-1] if origin_op_type == "FusedBatchNorm": origin_op_type = "FusedBatchNormV3" @@ -915,367 +626,6 @@ def check_match(patterns, input_pattern): return capability - def set_tensor(self, model, tensor_dict): - """Quantize the bias and weight tensors in tensor_dict.""" - from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer - - g = GraphAnalyzer() - g.graph = model.graph_def - graph_info = g.parse_graph() - - def _get_fp32_op_name(model, tensor_name): - is_weight = False - is_biasadd = False - last_node_name = None - current_node_name = None - for each_node in model.graph_def.node: - if tensor_name in each_node.input: - tensor_index = list(each_node.input).index(tensor_name) - if each_node.op.find("Quantized") != -1 and tensor_index == 2: - is_biasadd = True - last_node_name = each_node.input[0] - current_node_name = each_node.name - - if tensor_name + "_qint8_const" in each_node.input: - pass - - return is_weight, is_biasadd, current_node_name, last_node_name - - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.python.framework import dtypes, tensor_util - - from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper - - qint32_type = dtypes.qint32.as_datatype_enum - - for tensor_name, tensor_content in tensor_dict.items(): - is_weight, is_biasadd, current_node_name, last_node_name = _get_fp32_op_name(model, tensor_name) - - if is_biasadd: - is_biasadd_dtype_is_fp32 = graph_info[current_node_name].node.attr["Tbias"] == attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum - ) - current_node = graph_info[current_node_name].node - bias_add_node = graph_info[current_node.input[2]].node - if is_biasadd_dtype_is_fp32: - bias_add_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(tensor_content, dtypes.float32, tensor_content.shape) - ) - ) - else: - last_node = graph_info[last_node_name].node - min_input = graph_info[last_node.input[-2]].node.attr["value"].tensor.float_val[0] - max_input = graph_info[last_node.input[-1]].node.attr["value"].tensor.float_val[0] - channel_size = tensor_content.shape[0] - max_filter_node = graph_info[current_node.input[6]].node - min_filter_node = graph_info[current_node.input[5]].node - if max_filter_node.attr["value"].tensor.float_val: - max_filter_tensor = [] - min_filter_tensor = [] - max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0]) - min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0]) - else: - max_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) - min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) - activation_range = 127.0 if current_node.attr["Tinput"].type == dtypes.qint8 else 255.0 - updated_bias = Helper.generate_int32_bias_for_conv( - tensor_content, - channel_size, - max_input, - min_input, - max_filter_tensor, - min_filter_tensor, - activation_range, - ) - - bias_add_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type)) - bias_add_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(updated_bias, dtypes.int32, tensor_content.shape) - ) - ) - bias_add_node.attr["value"].tensor.dtype = qint32_type - current_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type)) - - if is_weight: - tmp_const_node = Helper.create_constant_node( - current_node.name + "_weights_tmp", tensor_content.transpose(2, 3, 1, 0), dtypes.float32 - ) - min_filter_node = graph_info[current_node.input[5]].node - per_channel = True if min_filter_node.attr["value"].tensor.tensor_shape else False - from neural_compressor.tensorflow.quantization.utils.quantize_graph_common import QuantizeGraphHelper - - original_fp32_op = current_node.op.split("With")[0].split("Quantized")[-1] - if original_fp32_op.find("Depthwise") != -1: - original_fp32_op = "DepthwiseConv2dNative" - qint8_const_node, min_node, max_node = QuantizeGraphHelper.generate_quantized_weight_node( - original_fp32_op, tmp_const_node, per_channel - ) - g.add_node(qint8_const_node, [], [current_node.name]) - g.add_node(min_node, [], [current_node.name]) - g.add_node(max_node, [], [current_node.name]) - g.replace_constant_graph_with_constant_node(qint8_const_node, tensor_name) - g.replace_constant_graph_with_constant_node(min_node, current_node.input[5]) - g.replace_constant_graph_with_constant_node(max_node, current_node.input[6]) - - def inspect_weight_and_bias(self, node_list, graph_def, graph_info, graph_node_name_mapping): - """Inspect the weights and biases.""" - from neural_compressor.tensorflow.quantization.utils.utility import ( - get_tensor_val_from_graph_node, - int8_node_name_reverse, - ) - from neural_compressor.tensorflow.utils import dequantize_weight - - weights_result = {} - inspect_nodes = [] - node_set = set(node_list) - for node in graph_def.node: - node_name = node.name - if "Quantized" in node.op: - node_name = int8_node_name_reverse(node) - if node_name in node_set and ("Conv" in node.op or "Mul" in node.op): - inspect_nodes.append(node) - logger.debug(f"Start to inspect weight and bias for: {[node.name for node in inspect_nodes]}.") - for node in inspect_nodes: - # inspect weights and bias - node_name = node.name - weight_node_name = node.input[1] - weight_node = graph_node_name_mapping[weight_node_name] - if weight_node.op != "Const": # skip the matmul whose two inputs are previous output - continue - weight_node_val = get_tensor_val_from_graph_node(graph_node_name_mapping, weight_node_name) - weight_node_val = weight_node_val.astype("float32") - # dequantize the weight for quantized model - if "Quantized" in node.op: - node_name = int8_node_name_reverse(node) - weight_node_name_pre = weight_node_name.split("_qint8_const")[0] - min_filter_node = weight_node_name_pre + "_min" - max_filter_node = weight_node_name_pre + "_max" - if graph_info[min_filter_node].node.attr["value"].tensor.float_val: - min_filter_val = graph_info[min_filter_node].node.attr["value"].tensor.float_val - max_filter_val = graph_info[max_filter_node].node.attr["value"].tensor.float_val - else: - min_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, min_filter_node) - max_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, max_filter_node) - weight_node_val = dequantize_weight(weight_node_val, min_filter_val, max_filter_val) - weights_result[node_name] = {weight_node_name: weight_node_val} - return weights_result - - def fused_node_mapping(self, node_list, pattern_mapping, graph_info, graph_node_name_mapping): - """Create the mapping between first node and last node in fused sequence. - - Args: - node_list: node name list - pattern_mapping: key: node name, val: node pattern mapping - graph_info: key: node name, val: node details - graph_node_name_mapping: key: node name, val: node - Returns: - fused_mapping: key: first node name in fused seq, val: last node in fused seq - fused_mapping_reverse: key: last node in fused seq, val: first node name in fused seq - """ - fused_mapping = {} - fused_mapping_reverse = {} - for node_name in node_list: - fused_seq = pattern_mapping[node_name]["sequence"].split(",") - # for the node not fused with others - if len(fused_seq) == 1: - fused_mapping[node_name] = node_name - fused_mapping_reverse[node_name] = node_name - continue - _next_node_name = node_name - for _next_node_op_type in fused_seq[1:]: - node_details = graph_info[_next_node_name] - for node_output_name in node_details.outputs: - if graph_node_name_mapping[node_output_name].op == "Cast": - cast_node = graph_node_name_mapping[node_output_name] - node_output_name = graph_info[cast_node.name].outputs[0] - if graph_node_name_mapping[node_output_name].op in [_next_node_op_type, "Cast"]: - _next_node_name = node_output_name - fused_mapping[node_name] = _next_node_name - fused_mapping_reverse[_next_node_name] = node_name - return fused_mapping, fused_mapping_reverse - - def _inspect_tensor_inference(self, inspect_node_dict, model, dataloader, iteration_list): - """Do inference for inspect activation.""" - out_tensor_lst = [] - out_tensor_lst += [{n: [n + ":" + str(i) for i in range(3)]} for n in inspect_node_dict["qreq_node"]] - out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["qdq_node"]] - out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["f_node"]] - out_cnt = len(out_tensor_lst) - iteration_list = set(iteration_list) - input_tensor = model.input_tensor - logger.info("Start to do inference for inspect activation.") - activation_result = [] - for idx, (inputs, labels) in enumerate(dataloader): - model_out = [] - if idx + 1 > max(iteration_list): - break - if idx + 1 not in iteration_list: - continue - if len(input_tensor) == 1: - feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] - else: - assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" - feed_dict = dict(zip(input_tensor, inputs)) - # TODO find an optimized method to avoid multiple runs - for i, out_t in enumerate(out_tensor_lst): - logger.debug(f"Finished inspect {i}/{out_cnt} nodes, current inspect node {out_t.keys()}.") - model_out.append(model.sess.run(out_t, feed_dict)) - activation_result.append(model_out) - return activation_result - - def inspect_activation( - self, node_list, graph_def, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info - ): - """Inspect the activation.""" - from neural_compressor.tensorflow.utils import Model - - original_graph_node_mapping = {} - for node in graph_def.node: - original_graph_node_mapping[node.name] = node - inspect_node_dict = {"qdq_node": [], "qreq_node": [], "f_node": []} - for node_name in node_list: - node = graph_node_name_mapping[node_name] - if "Quantized" in node.op and "Dequantize" in node.op: - inspect_node_dict["qdq_node"].append(node.name) - elif "Quantized" in node.op or "_Quantized" in node.op or "Requantize" in node.op: - inspect_node_dict["qreq_node"].append(node.name) - else: - inspect_node_dict["f_node"].append(node_name) - pattern_mapping = {} - node_dict = quantization_cfg["op"] - for node_name_and_type in node_dict.keys(): - node_name, _ = node_name_and_type - if "pattern" in node_dict[node_name_and_type]: - pattern_mapping[node_name] = node_dict[node_name_and_type]["pattern"] - else: - pattern_mapping[node_name] = {"sequence": node_name} - if inspect_node_dict["f_node"]: - fuse_map, fuse_map_reverse = self.fused_node_mapping( - inspect_node_dict["f_node"], pattern_mapping, graph_info, graph_node_name_mapping - ) - inspect_node_dict["f_node"] = [fuse_map[n] for n in inspect_node_dict["f_node"]] - # build model and do inference - model = Model(graph_def) - activation_result = self._inspect_tensor_inference(inspect_node_dict, model, dataloader, iteration_list) - final_result = [] - int8_postfix = "_eightbit" - for iter_res in activation_result: - tmp_iter_result = {} - for res in iter_res: - node_name, val = list(res.keys())[0], list(res.values())[0] - val = Dequantize(val[0], (node_name, val[1], val[2])) if len(val) == 3 else val - val = val.astype(np.float32) - index_postfix = node_name.find(int8_postfix) - if index_postfix != -1: - node_name = node_name[:index_postfix] - tmp_iter_result[node_name] = {node_name: val} - else: - tmp_iter_result[fuse_map_reverse[node_name]] = {fuse_map_reverse[node_name]: val} - final_result.append(tmp_iter_result) - return final_result - - def inspect_tensor( - self, - model, - dataloader=None, - op_list=[], - iteration_list=[], - inspect_type="activation", - save_to_disk=False, - save_path=None, - quantization_cfg=None, - ): - """Dump the weight and activation(output) to local disk. - - 1. create the correspondence between query node name and the actually output node name in graph_def - 2. get the weight and bias for the given node - 3. get the activation for the given node - 4. save the tensor to disk - Args: - model: int8/fp32 graph_def/TensorflowBaseModel - dataloader: dataloader used during inspect activation - op_list: op list to inspect - iteration_list: iteration list to inspect, start from 1 - inspect_type: activation/weight/all - save_to_disk: dump to disk or not - save_path: the dump path for inspect tensor - quantization_cfg: quantization configuration for fused fp32 model and quantized model - Returns: - Dict - { - 'weight': { - 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, - 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, - ... - }, - 'activation': [ - # iter 1: - { - 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} - 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} - ... - }, - # iter 2: - { - ... - } - ] - } - """ - from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer - from neural_compressor.tensorflow.quantization.utils.utility import int8_node_name_reverse - from neural_compressor.tensorflow.utils import TensorflowBaseModel, dump_data_to_local, load_data_from_pkl - - if isinstance(model, TensorflowBaseModel): - model = model.graph_def - if not quantization_cfg: - # TODO get config from graph if config is None - quantization_cfg = load_data_from_pkl("./nc_workspace/", "cfg.pkl") - node_list = op_list - # create the mapping between node name and node, key: node_name, val: node - graph_node_name_mapping = {} - quan_model_flag = False - for node in model.node: - node_name = int8_node_name_reverse(node) - if "Quantized" in node.op: - quan_model_flag = True - node_name = int8_node_name_reverse(node) - if node.attr["value"].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum: - quan_model_flag = True - graph_node_name_mapping[node_name] = node - if quan_model_flag: - logger.info("Dump the tensor for quantized model.") - - # create the mapping between node name and node detail - g = GraphAnalyzer() - g.graph = model - graph_info = g.parse_graph() - inspect_result = {} - - # inspect weight - if inspect_type == "weight" or inspect_type == "all": - logger.info("Start to inspect weight and bias.") - weights_result = self.inspect_weight_and_bias(node_list, model, graph_info, graph_node_name_mapping) - inspect_result["weight"] = weights_result - - # inspect activation - if inspect_type == "activation" or inspect_type == "all": - logger.info("Start to inspect activation.") - activation_result = self.inspect_activation( - node_list, model, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info - ) - inspect_result["activation"] = activation_result - - # save to disk - if save_to_disk: - if not save_path: - save_path = "./nc_workspace/tmp/" - dump_data_to_local(inspect_result, save_path, "inspect_result.pkl") - logger.info(f"Dumped the inspect tensor to {save_path}") - return inspect_result - def quantize_input(self, model): """Quantize the model to be able to take quantized input. @@ -1381,320 +731,8 @@ def get_optype_wise_ability(self): res[op[1]] = {"activation": {"dtype": ["bf16"]}, "weight": {"dtype": ["bf16"]}} return res - def _pre_hook_for_qat(self, dataloader=None): - """Pre hook for QAT.""" - self.model.model = self.qat_convert(self.model.model) - - def _post_hook_for_qat(self): - """Post hook for QAT.""" - pass - - def _pre_eval_hook(self, model): - """Pre evaluation hook.""" - return model - - # Add keyword arguments unpacking - def _post_eval_hook(self, model, **kwargs): - """Post evaluation hook.""" - pass - - def save(self, model, path): - """Save model to the path.""" - pass - - # this function is used to convert keras QAT model to pb in old QAT implementation, - # and it's not used in refactored QAT - def convert(self, model, source, destination): # pragma: no cover - """The function is used to convert a source model format to another. - - Args: - model (neural_compressor.model): base model to be converted. - source (string): The source model format. - destination (string): The destination model format. - """ - assert source.lower() == "qat" and destination.lower() == "default" - capability = self._query_fw_capability(model) - - quantize_config = {"op_wise_config": {}} - for each_op_info in capability["opwise"]: - is_perchannel = False - weight_bit = 7.0 - for op_cap in capability["opwise"][each_op_info]: - if "activation" in op_cap and "quant_mode" in op_cap["activation"]: - activation = op_cap["activation"] - if "weight" in op_cap: - weight = op_cap["weight"] - is_perchannel = True if weight["granularity"][0] == "per_channel" else False - algorithm = activation["algorithm"][0] - is_asymmetric = False - if "activation" in op_cap: - is_asymmetric = True if activation["scheme"][0] == "asym" else False - - quantize_config["op_wise_config"][each_op_info[0]] = ( - is_perchannel, - algorithm, - is_asymmetric, - weight_bit, - ) - from neural_compressor.tensorflow.quantization.utils.graph_converter import GraphConverter - - tmp_graphdef = copy.deepcopy(model.graph_def) - for i in tmp_graphdef.node: - if i.op == "Const" and i.input: - i.ClearField("input") - model.graph_def = tmp_graphdef - converter = GraphConverter( - model, - qt_config=quantize_config, - int8_sequences=self.op_wise_sequences, - fake_quant=True, - new_api=self.new_api, - performance_only=self.performance_only, - use_bf16=self.use_bf16, - ) - - return converter.convert() - - def qat_convert(self, model, quantize_recipe=None): - """Convert a fp32 'tf.keras' model to be a int8 one with quantization aware training implementation. - - Args: - model (tf.keras.Model): The model to be quantized, expected to be a Keras Functional or Sequential model. - quantize_recipe (dict): A dict that decide whether given layers should be quantized. - - Returns: - converted_model (tf.keras.Model): Quantized model with fake quant nodes inserted. - """ - assert isinstance(model, tf.keras.Model), ( - "The model to be converted is expected to be " - "a `tf.keras.Model` instance. You should not pass an instance of type: {input}.".format( - input=model.__class__.__name__ - ) - ) - - assert model.__class__.__name__ in [ - "Functional", - "Sequential", - ], "Only `Functional` or `Sequential` keras model is supported for QAT." - - from neural_compressor.tensorflow.quantization.utils.quantize_graph.qat.quantize_config import global_config - from neural_compressor.tensorflow.quantization.utils.quantize_graph.qat.quantize_helper import ( - init_quantize_config, - qat_clone_function, - ) - - config = init_quantize_config(model, quantize_recipe) - q_model = tf.keras.models.clone_model(model, input_tensors=None, clone_function=qat_clone_function) - global_config.clear() - - return q_model - - @dump_elapsed_time("Pass recover model") - def recover_tuned_model(self, model, q_config): - """Execute the recover process on the specified model. - - Args: - tune_cfg (dict): quantization configuration - model (tf.compat.v1.GraphDef): fp32 model - q_config (dict): recover configuration - - Returns: - tf.compat.v1.GraphDef: the quantized model - """ - from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.pre_optimize import PreOptimization - - self.pre_optimizer_handle = PreOptimization(model, self.new_api, self.device) - self.pre_optimized_model = self.pre_optimizer_handle.get_optimized_model(self.itex_mode) - model.graph_def = self.pre_optimized_model.graph_def - - from neural_compressor.tensorflow.quantization.utils.graph_converter_without_calib import ( - GraphConverterWithoutCalib, - ) - - converter = GraphConverterWithoutCalib( - model, - recover_config=q_config, - new_api=self.new_api, - performance_only=self.performance_only, - use_bf16=self.use_bf16, - ) - - return converter.convert_without_calib() - - def get_output_op_names(self, qmodel): - """Get the oupur OPs's names.""" - from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer - - graph_def = GraphAnalyzer().parse_graph(qmodel.graph_def) - output_op_names = set() - - def _add_output_op_name(opname): - if opname.endswith("_dequantize"): - output_op_names.add(opname[: -len("_dequantize")]) # pylint: disable=no-member - elif opname.endswith("__dequant"): - pass - else: - output_op_names.add(opname) # pylint: disable=no-member - - for output_opname in qmodel.output_node_names: - op_count = 0 - stack = [output_opname] - while stack: - opname = stack.pop() - while True: - op_count += 1 - if opname not in graph_def: - break - op = graph_def[opname] - if op.node.op == "Dequantize": - _add_output_op_name(opname) - break - next_opnames = op.node.input - if not next_opnames: - break - elif len(next_opnames) > 1: - stack += next_opnames[1:] - - opname = next_opnames[0] - - output_op_names = list(output_op_names) - logger.debug(f"output op names: {output_op_names}") - return output_op_names - - def calculate_op_sensitivity( - self, model, dataloader, tune_cfg, output_op_names, confidence_batches, fallback=True, requantize_cfgs=None - ): - """Compute the op sensitivity. - - The sensitivity metric is the mse between the output of the last quantized op of - the quantized model and the output of its corresponding op in the fp32 model. - - 1. Backup the tune cfg - 2. Fallback each int8 op and compute its mse if use fallback (with 'fallback == True'), - or re-quantize each fp32 op(fallen back in the previous stage) and compute its MSE if not. - 3. Sorted op name list according to its MSE - - Args: - fp32_model: The fp32 model. - dataloader: the dataloader with full dataset. - tune_cfg: tuning config - fallback: denote fallback stage or re-quantize stage - requantize_cfgs: the dict of tuning configs for all re-quantizable ops - - Returns: - A list of op names, sorted by its MSE sensitivity. - """ - fp32_op_cfg = {"activation": {"dtype": "fp32", "quant_mode": "fp32"}, "weight": {"dtype": "fp32"}} - - if fallback: - ops_list = [ - op - for op, config in tune_cfg["op"].items() - if config["activation"]["quant_mode"] in ("static", "dynamic") - ] - replace_cfgs = {op: fp32_op_cfg for op in tune_cfg["op"]} - else: - ops_list = [ - op - for op, config in tune_cfg["op"].items() - if config["activation"]["quant_mode"] == "fp32" and op in requantize_cfgs - ] - replace_cfgs = requantize_cfgs - - # Step2. compute mse - mse_result = self._get_mse_order( - model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, output_op_names, confidence_batches - ) - - # Step3. sort - mse_order = [op for op, _ in sorted(mse_result.items(), key=lambda i: i[1])] - logger.debug("Dump MSE order:") - for op in mse_order: - logger.debug(f"{op}: {mse_result[op]}") - return mse_order - - def _get_mse_order( - self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, output_op_names, confidence_batches - ): - """Compute MSE.""" - op_cfg = tune_cfg["op"] - mse_result = {} - partial_dataloader = self._partial_dataloader(dataloader, confidence_batches) - - fp32_output = self._inference_model_on_batches(fp32_model, tune_cfg, partial_dataloader, output_op_names) - - for op in ops_lst: - # backup and set replace tuning config - backup_cfg = op_cfg[op] - op_cfg[op] = replace_cfgs[op] - # quantize and inference the model - q_model = self.quantize(tune_cfg, fp32_model, partial_dataloader) - q_output = self._inference_model_on_batches(q_model, tune_cfg, partial_dataloader, output_op_names) - - mse_result[op] = self._calculate_mse(fp32_output, q_output) - - # recover tune_cfg - op_cfg[op] = backup_cfg - - return mse_result - - def _partial_dataset_of(self, dataloader, confidence_batches): - """Partial dataset.""" - from neural_compressor.tensorflow.utils import DummyDataset, DummyDatasetV2 - - if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDatasetV2): - assert isinstance(confidence_batches, int) - ds = copy.deepcopy(dataloader.dataset) - ds.dataset = ds.dataset[:confidence_batches] - return ds - else: - return dataloader.dataset.take(confidence_batches) - - def _partial_dataloader(self, dataloader, confidence_batches): - """Partial dataloader.""" - return type(dataloader)( - dataset=self._partial_dataset_of(dataloader, confidence_batches), - batch_size=dataloader.batch_size, - last_batch=dataloader.last_batch, - collate_fn=dataloader.collate_fn, - sampler=dataloader.sampler, - batch_sampler=dataloader.batch_sampler, - num_workers=dataloader.num_workers, - pin_memory=dataloader.pin_memory, - shuffle=dataloader.shuffle, - distributed=dataloader.distributed, - ) - - def _calculate_mse(self, fp32_output, q_output): - """MSE calculation.""" - result = [] - for i, j in zip(fp32_output, q_output): - result.append(np.square(i - j).mean()) - return np.array(result).mean() - - def _inference_model_on_batches(self, model, tune_cfg, dataloader, output_op_names): - """Inference model on batches.""" - from neural_compressor.tensorflow.quantization.utils.utility import generate_feed_dict - - input_tensors = model.input_tensor - output_tensors = [] - for op in output_op_names: - for tensor in model.graph.get_operation_by_name(op).outputs: - output_tensors.append(tensor) - - predictions = [] - for index, (inputs, _) in enumerate(dataloader): - feed_dict = generate_feed_dict(input_tensors, inputs) - - pred = model.sess.run(output_tensors, feed_dict) - for item in pred: - predictions.append(item) - - return predictions - - -class Tensorflow_ITEXAdaptor(TensorFlowAdaptor): +class Tensorflow_ITEXAdaptor(TensorFlowAdaptor): # pragma: no cover """Tensorflow ITEX Adaptor Class.""" def __init__(self, framework_specific_info): @@ -2253,10 +1291,10 @@ def get_fuse_patterns(self): if self.itex_mode: patterns["int8"].append("FusedBatchNormV3 + Relu") patterns["int8"].append("FusedBatchNormV3 + LeakyRelu") - elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): + elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): # pragma: no cover patterns["int8"] = tf1_15_up3_int8_pattern_list patterns["uint8"] = tf1_15_up3_uint8_pattern_list - else: + else: # pragma: no cover patterns["int8"] = old_tf_int8_pattern_list patterns["uint8"] = old_tf_uint8_pattern_list diff --git a/neural_compressor/tensorflow/keras/layers/conv2d.py b/neural_compressor/tensorflow/keras/layers/conv2d.py index 426b1777b42..002ac1c507b 100644 --- a/neural_compressor/tensorflow/keras/layers/conv2d.py +++ b/neural_compressor/tensorflow/keras/layers/conv2d.py @@ -30,7 +30,7 @@ else: from keras.layers.convolutional.base_conv import Conv # pylint: disable=E0401 -if version1_gte_version2(tf.__version__, "2.16.1"): +if version1_gte_version2(tf.__version__, "2.16.1"): # pragma: no cover class QConv2D(BaseConv): def __init__( @@ -354,40 +354,28 @@ def get_config(self): def initialize_int8_conv2d(fp32_layer, q_config): kwargs = fp32_layer.get_config() - if "name" in kwargs: - del kwargs["name"] - if "filters" in kwargs: - del kwargs["filters"] - if "kernel_size" in kwargs: - del kwargs["kernel_size"] - if "strides" in kwargs: - del kwargs["strides"] - if "padding" in kwargs: - del kwargs["padding"] - if "data_format" in kwargs: - del kwargs["data_format"] - if "dilation_rate" in kwargs: - del kwargs["dilation_rate"] - if "groups" in kwargs: - del kwargs["groups"] - if "activation" in kwargs: - del kwargs["activation"] - if "use_bias" in kwargs: - del kwargs["use_bias"] - if "kernel_initializer" in kwargs: - del kwargs["kernel_initializer"] - if "bias_initializer" in kwargs: - del kwargs["bias_initializer"] - if "kernel_regularizer" in kwargs: - del kwargs["kernel_regularizer"] - if "activity_regularizer" in kwargs: - del kwargs["activity_regularizer"] - if "bias_regularizer" in kwargs: - del kwargs["bias_regularizer"] - if "kernel_constraint" in kwargs: - del kwargs["kernel_constraint"] - if "bias_constraint" in kwargs: - del kwargs["bias_constraint"] + param_list = [ + "name", + "filters", + "kernel_size", + "strides", + "padding", + "data_format", + "dilation_rate", + "groups", + "activation", + "use_bias", + "kernel_initializer", + "bias_initializer", + "kernel_regularizer", + "activity_regularizer", + "bias_regularizer", + "kernel_constraint", + "bias_constraint", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] return QConv2D( name=fp32_layer.name, diff --git a/neural_compressor/tensorflow/keras/layers/dense.py b/neural_compressor/tensorflow/keras/layers/dense.py index 4e97cbfb7a7..84c4dfabd6c 100644 --- a/neural_compressor/tensorflow/keras/layers/dense.py +++ b/neural_compressor/tensorflow/keras/layers/dense.py @@ -170,28 +170,22 @@ def get_config(self): def initialize_int8_dense(fp32_layer, q_config): kwargs = fp32_layer.get_config() - if "name" in kwargs: - del kwargs["name"] - if "units" in kwargs: - del kwargs["units"] - if "activation" in kwargs: - del kwargs["activation"] - if "use_bias" in kwargs: - del kwargs["use_bias"] - if "kernel_initializer" in kwargs: - del kwargs["kernel_initializer"] - if "bias_initializer" in kwargs: - del kwargs["bias_initializer"] - if "kernel_regularizer" in kwargs: - del kwargs["kernel_regularizer"] - if "activity_regularizer" in kwargs: - del kwargs["activity_regularizer"] - if "bias_regularizer" in kwargs: - del kwargs["bias_regularizer"] - if "kernel_constraint" in kwargs: - del kwargs["kernel_constraint"] - if "bias_constraint" in kwargs: - del kwargs["bias_constraint"] + param_list = [ + "name", + "units", + "activation", + "use_bias", + "kernel_initializer", + "bias_initializer", + "kernel_regularizer", + "activity_regularizer", + "bias_regularizer", + "kernel_constraint", + "bias_constraint", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] q_layer = QDense( name=fp32_layer.name, diff --git a/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py b/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py index 683c774b2fe..a0d8511d058 100644 --- a/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +++ b/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py @@ -34,12 +34,10 @@ if version1_gte_version2(tf.__version__, "2.16.1"): - class QDepthwiseConv2D(BaseDepthwiseConv): + class QDepthwiseConv2D(BaseDepthwiseConv): # pragma: no cover def __init__( self, kernel_size, - min_value, - max_value, strides=(1, 1), padding="valid", depth_multiplier=1, @@ -195,8 +193,6 @@ class QDepthwiseConv2D(DepthwiseConv): def __init__( self, kernel_size, - min_value, - max_value, strides=(1, 1), padding="valid", depth_multiplier=1, @@ -376,42 +372,27 @@ def initialize_int8_depthwise_conv2d(fp32_layer, q_config): kwargs = fp32_layer.get_config() q_name = fp32_layer.name - if "name" in kwargs: - del kwargs["name"] - if "kernel_size" in kwargs: - del kwargs["kernel_size"] - if "strides" in kwargs: - del kwargs["strides"] - if "padding" in kwargs: - del kwargs["padding"] - if "depth_multiplier" in kwargs: - del kwargs["depth_multiplier"] - if "data_format" in kwargs: - del kwargs["data_format"] - if "dilation_rate" in kwargs: - del kwargs["dilation_rate"] - if "activation" in kwargs: - del kwargs["activation"] - if "use_bias" in kwargs: - del kwargs["use_bias"] - if "depthwise_initializer" in kwargs: - del kwargs["depthwise_initializer"] - if "bias_initializer" in kwargs: - del kwargs["bias_initializer"] - if "depthwise_regularizer" in kwargs: - del kwargs["depthwise_regularizer"] - if "activity_regularizer" in kwargs: - del kwargs["activity_regularizer"] - if "bias_regularizer" in kwargs: - del kwargs["bias_regularizer"] - if "depthwise_constraint" in kwargs: - del kwargs["depthwise_constraint"] - if "bias_constraint" in kwargs: - del kwargs["bias_constraint"] - if "min_value" in kwargs: - del kwargs["min_value"] - if "max_value" in kwargs: - del kwargs["max_value"] + param_list = [ + "name", + "kernel_size", + "strides", + "padding", + "depth_multiplier", + "data_format", + "dilation_rate", + "activation", + "use_bias", + "depthwise_initializer", + "bias_initializer", + "depthwise_regularizer", + "activity_regularizer", + "bias_regularizer", + "depthwise_constraint", + "bias_constraint", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] return QDepthwiseConv2D( name=q_name, diff --git a/neural_compressor/tensorflow/keras/layers/pool2d.py b/neural_compressor/tensorflow/keras/layers/pool2d.py index ce81fc2377b..1a04627f06b 100644 --- a/neural_compressor/tensorflow/keras/layers/pool2d.py +++ b/neural_compressor/tensorflow/keras/layers/pool2d.py @@ -215,16 +215,16 @@ def get_config(self): def initialize_int8_avgpool(fp32_layer, q_config): kwargs = fp32_layer.get_config() - if "name" in kwargs: - del kwargs["name"] - if "pool_size" in kwargs: - del kwargs["pool_size"] - if "strides" in kwargs: - del kwargs["strides"] - if "padding" in kwargs: - del kwargs["padding"] - if "data_format" in kwargs: - del kwargs["data_format"] + param_list = [ + "name", + "pool_size", + "strides", + "padding", + "data_format", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] q_layer = QAvgPool2D( name=fp32_layer.name, @@ -243,16 +243,16 @@ def initialize_int8_avgpool(fp32_layer, q_config): def initialize_int8_maxpool(fp32_layer, q_config): kwargs = fp32_layer.get_config() - if "name" in kwargs: - del kwargs["name"] - if "pool_size" in kwargs: - del kwargs["pool_size"] - if "strides" in kwargs: - del kwargs["strides"] - if "padding" in kwargs: - del kwargs["padding"] - if "data_format" in kwargs: - del kwargs["data_format"] + param_list = [ + "name", + "pool_size", + "strides", + "padding", + "data_format", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] q_layer = QMaxPool2D( name=fp32_layer.name, diff --git a/neural_compressor/tensorflow/keras/layers/separable_conv2d.py b/neural_compressor/tensorflow/keras/layers/separable_conv2d.py index 05ee3a62c72..b3df094fec0 100644 --- a/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +++ b/neural_compressor/tensorflow/keras/layers/separable_conv2d.py @@ -32,15 +32,13 @@ from keras.layers.convolutional.base_separable_conv import SeparableConv # pylint: disable=E0401 from keras.utils import conv_utils # pylint: disable=E0401 -if version1_gte_version2(tf.__version__, "2.16.1"): +if version1_gte_version2(tf.__version__, "2.16.1"): # pragma: no cover class QSeparableConv2D(BaseSeparableConv): def __init__( self, filters, kernel_size, - min_value, - max_value, strides=(1, 1), padding="valid", data_format=None, @@ -205,8 +203,6 @@ def __init__( self, filters, kernel_size, - min_value, - max_value, strides=(1, 1), padding="valid", data_format=None, @@ -368,50 +364,31 @@ def get_config(self): def initialize_int8_separable_conv2d(fp32_layer, q_config): kwargs = fp32_layer.get_config() - if "name" in kwargs: - del kwargs["name"] - if "filters" in kwargs: - del kwargs["filters"] - if "kernel_size" in kwargs: - del kwargs["kernel_size"] - if "strides" in kwargs: - del kwargs["strides"] - if "padding" in kwargs: - del kwargs["padding"] - if "data_format" in kwargs: - del kwargs["data_format"] - if "dilation_rate" in kwargs: - del kwargs["dilation_rate"] - if "depth_multiplier" in kwargs: - del kwargs["depth_multiplier"] - if "activation" in kwargs: - del kwargs["activation"] - if "use_bias" in kwargs: - del kwargs["use_bias"] - if "depthwise_initializer" in kwargs: - del kwargs["depthwise_initializer"] - if "pointwise_initializer" in kwargs: - del kwargs["pointwise_initializer"] - if "bias_initializer" in kwargs: - del kwargs["bias_initializer"] - if "depthwise_regularizer" in kwargs: - del kwargs["depthwise_regularizer"] - if "pointwise_regularizer" in kwargs: - del kwargs["pointwise_regularizer"] - if "activity_regularizer" in kwargs: - del kwargs["activity_regularizer"] - if "bias_regularizer" in kwargs: - del kwargs["bias_regularizer"] - if "depthwise_constraint" in kwargs: - del kwargs["depthwise_constraint"] - if "pointwise_constraint" in kwargs: - del kwargs["pointwise_constraint"] - if "bias_constraint" in kwargs: - del kwargs["bias_constraint"] - if "min_value" in kwargs: - del kwargs["min_value"] - if "max_value" in kwargs: - del kwargs["max_value"] + param_list = [ + "name", + "filters", + "kernel_size", + "strides", + "padding", + "data_format", + "dilation_rate", + "depth_multiplier", + "activation", + "use_bias", + "depthwise_initializer", + "bias_initializer", + "pointwise_initializer", + "depthwise_regularizer", + "activity_regularizer", + "bias_regularizer", + "pointwise_regularizer", + "depthwise_constraint", + "bias_constraint", + "pointwise_constraint", + ] + for p in param_list: # pragma: no cover + if p in kwargs: + del kwargs[p] return QSeparableConv2D( name=fp32_layer.name, diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py index 30295005686..a0a924ecbe7 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py @@ -117,7 +117,7 @@ version1_lte_version2, ) -TF_SUPPORTED_MAX_VERSION = "2.15.0" +TF_SUPPORTED_MAX_VERSION = "2.16.1" TF_SUPPORTED_MIN_VERSION = "1.14.0" logger = logging.getLogger("neural_compressor") @@ -231,10 +231,6 @@ def _inference(self, model): Args: model(TensorflowBaseModel): input TensorflowBaseModel """ - if self.calib_func: - self.calib_func(model.model) - return - if model.model_type == "llm_saved_model": self._inference_llm(model) return @@ -264,7 +260,9 @@ def _inference(self, model): for idx, (inputs, labels) in enumerate(self.data_loader): if len(input_tensor) == 1: feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): + if ( + isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict) + ): # pragma: no cover for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -274,7 +272,7 @@ def _inference(self, model): break else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] - else: + else: # pragma: no cover assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): @@ -345,7 +343,7 @@ def _inference_llm(self, model): if idx >= self.calib_iteration: break - def _check_tf_version(self): + def _check_tf_version(self): # pragma: no cover """Check if the installed tensorflow version is supported.""" is_supported_version = False is_sprbase_version = False @@ -466,7 +464,7 @@ def convert(self): else: model = self.quantize() - if self.itex_mode: + if self.itex_mode: # pragma: no cover host_const_graph_def = PostHostConstConverter(self._tmp_model.graph_def).do_transformation() host_const_graph_def.library.CopyFrom(self.model.graph_def.library) self._tmp_model.graph_def = host_const_graph_def @@ -524,7 +522,9 @@ def _get_fp32_print_node_names(self, specified_op_list): for i in target_conv_op: if specified_op_list and i not in specified_op_list: continue - if node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBiasSumAndRelu": + if ( + node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBiasSumAndRelu" + ): # pragma: no cover start_index = sorted_node_names.index(i) for index, value in enumerate(sorted_node_names[start_index:]): if ( @@ -553,7 +553,7 @@ def _get_fp32_print_node_names(self, specified_op_list): self._fp32_model.graph_def = fp32_graph_def return self._fp32_model - def _search_y_pattern_for_itex(self): + def _search_y_pattern_for_itex(self): # pragma: no cover """Search the Y pattern for itex and return the op name.""" g = GraphAnalyzer() g.graph = self._fp32_model.graph_def @@ -633,7 +633,7 @@ def quantize(self): self._freeze_requantization_ranges(self._kl_op_dict) self._fuse_requantize_with_fused_quantized_node() - except ValueError as e: + except ValueError as e: # pragma: no cover logger.error("Fail to quantize graph due to {}.".format(str(e))) self._tmp_model = None raise @@ -944,7 +944,7 @@ def _insert_qdq_pairs(self): def _convert_qdq(self): """Convert Dequantize + Op + QuantizeV2 into QuantizedOps.""" - if self.itex_mode: + if self.itex_mode: # pragma: no cover self._tmp_graph_def, quantizev2_max = FreezeValueTransformer( self._tmp_graph_def, self._calibration_data, "__max:", self.itex_mode ).do_transformation() diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py b/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py deleted file mode 100644 index 1ec3712a39d..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/graph_converter_without_calib.py +++ /dev/null @@ -1,384 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Without calibration Graph Converter Class.""" - -import copy -import logging -import os - -import tensorflow as tf -from tensorflow.python.platform import gfile - -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.bf16.bf16_convert import BF16Convert -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.fold_batch_norm import ( - FoldBatchNormNodesOptimizer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.fuse_pad_with_conv import ( - FusePadWithConv2DOptimizer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.remove_training_nodes import ( - RemoveTrainingNodesOptimizer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.generic.strip_unused_nodes import ( - StripUnusedNodesOptimizer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.freeze_value_without_calib import ( - FreezeValueWithoutCalibTransformer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.fuse_conv_requantize import ( - FuseConvRequantizeTransformer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.fuse_matmul_requantize import ( - FuseMatMulRequantizeDequantizeTransformer, - FuseMatMulRequantizeTransformer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.meta_op_optimizer import ( - MetaInfoChangingMemOpOptimizer, -) -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.rnn_convert import QuantizedRNNConverter -from neural_compressor.tensorflow.quantization.utils.graph_rewriter.int8.scale_propagation import ( - ScaleProPagationTransformer, -) -from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer -from neural_compressor.tensorflow.quantization.utils.quantize_graph.quantize_graph_for_intel_cpu import ( - QuantizeGraphForIntel, -) -from neural_compressor.tensorflow.quantization.utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.tensorflow.quantization.utils.transform_graph.bias_correction import BiasCorrection -from neural_compressor.tensorflow.quantization.utils.transform_graph.rerange_quantized_concat import ( - RerangeQuantizedConcat, -) -from neural_compressor.tensorflow.utils import ( - SPR_BASE_VERSIONS, - Model, - deep_get, - version1_eq_version2, - version1_gt_version2, - version1_gte_version2, - version1_lt_version2, - version1_lte_version2, -) - -TF_SUPPORTED_MAX_VERSION = "2.15.0" -TF_SUPPORTED_MIN_VERSION = "1.14.0" - -logger = logging.getLogger("neural_compressor") -debug = bool(logger.level == logging.DEBUG) - - -class GraphConverterWithoutCalib: - """Graph Converter without calibration Class is used to generate the quantization graph without calibration.""" - - def __init__( - self, model, data_loader=None, recover_config=None, new_api=False, performance_only=False, use_bf16=False - ): - """Convert graph without calibration. - - :param model: input tensorflow model. - :param qt_config: quantization configs, including interaction and op-wise quant config - :param fp32_ops: fall back to fp32 dtype op list - :param bf16_ops: fall back to bf16 dtype op list - :param data_loader: for calibration phase used dataloader - :param recover_config: config for recovering tuned model - """ - # Logger initial - self.model = model - # (TODO) does it right to make the internal model format as graph_def - self.output_tensor_names = self.model.output_tensor_names - self.input_tensor_names = self.model.input_tensor_names - # quantize specific config - self.op_wise_config = recover_config["op_wise_config"] - self.advance_config = deep_get(recover_config, "advance") - self.device = recover_config["device"] if "device" in recover_config else "cpu" - self.int8_sequences = recover_config["int8_sequences"] - self.fp32_ops = recover_config["fp32_ops"] - self.bf16_ops = recover_config["bf16_ops"] - self.recipes = recover_config["recipes"] - self.quantized_node_info = [] - self._calibration_data = [] - self._fp32_print_data = [] - self.data_loader = data_loader - self.recover_config = recover_config - self._check_tf_version() - self._check_args() - self._gen_tmp_filenames() - self.new_api = new_api - self.performance_only = performance_only - self.use_bf16 = use_bf16 - self._tmp_graph_def = copy.deepcopy(self.model.graph_def) - - # pylint: disable=no-member - def _check_tf_version(self): - """Check if the installed tensorflow version is supported.""" - is_supported_version = False - is_sprbase_version = False - try: - from tensorflow import python - - if hasattr(python, "pywrap_tensorflow") and hasattr( - python.pywrap_tensorflow, "IsMklEnabled" - ): # pragma: no cover - from tensorflow.python.pywrap_tensorflow import IsMklEnabled - elif hasattr(python.util, "_pywrap_util_port"): - from tensorflow.python.util._pywrap_util_port import IsMklEnabled - else: - from tensorflow.python._pywrap_util_port import IsMklEnabled - if IsMklEnabled() and (version1_lte_version2(TF_SUPPORTED_MIN_VERSION, tf.version.VERSION)): - is_supported_version = True - - if version1_gte_version2(tf.version.VERSION, "2.6.0") and os.getenv("TF_ENABLE_ONEDNN_OPTS") == "1": - is_supported_version = True - - if version1_gte_version2(tf.version.VERSION, "2.9.0"): - is_supported_version = True - - if tf.version.VERSION == "1.15.0-up3": - is_supported_version = True - - if tf.version.VERSION in SPR_BASE_VERSIONS: - is_supported_version = True - is_sprbase_version = True - - except Exception as e: - raise ValueError(e) - finally: # pragma: no cover - if version1_gt_version2(tf.version.VERSION, TF_SUPPORTED_MAX_VERSION) and not is_sprbase_version: - logger.warning( - str( - "Please note the {} version of TensorFlow is not fully verified! " - "Suggest to use the versions between {} and {} if meet problem." - ).format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION) - ) - - if version1_eq_version2(tf.version.VERSION, "2.5.0") and os.getenv("TF_ENABLE_MKL_NATIVE_FORMAT") != "0": - logger.fatal( - "Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " "when TensorFlow 2.5.0 installed." - ) - - if ( - version1_gte_version2(tf.version.VERSION, "2.6.0") - and version1_lt_version2(tf.version.VERSION, "2.9.0") - and os.getenv("TF_ENABLE_ONEDNN_OPTS") != "1" - ): - logger.fatal( - "Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 " - "when TensorFlow >= 2.6.0 and < 2.9.0 installed." - ) - - if not is_supported_version: - raise ValueError( - str("Please install TensorFlow within version >={} and <={}.").format( - TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION - ) - ) - - def _check_args(self): - """Check model's arguments.""" - if ( - self.model.workspace_path - and not os.path.isdir(self.model.workspace_path) - and not os.path.exists(os.path.dirname(self.model.workspace_path)) - ): - raise ValueError('"output_graph" directory does not exist.') - self._output_path = self.model.workspace_path - - def _gen_tmp_filenames(self): - """Generate the temporary file names.""" - self._int8_dynamic_range_model_path = os.path.join(self._output_path, "int8_dynamic_range_graph") - self._int8_logged_model_path = os.path.join(self._output_path, "int8_logged_graph") - self._fp32_logged_model_path = os.path.join(self._output_path, "fp32_logged_graph") - self._int8_frozen_range_model_path = os.path.join(self._output_path, "int8_frozen_range_graph") - self._bf16_mixed_precision_model_path = os.path.join(self._output_path, "int8_bf16_mixed_precision_graph") - - self.output_graph = os.path.join(self._output_path, "int8_final_fused_graph") - # to keep temp model - self._tmp_model = Model(self.model._model, **self.model.kwargs) - self._tmp_model.output_tensor_names = self.output_tensor_names - self._tmp_model.input_tensor_names = self.input_tensor_names - - def convert_without_calib(self): - """Do conversion without calibration.""" - model = self._tmp_model - - if len(self.op_wise_config) > 0: - model = self.quantize_without_calib() - - if len(self.bf16_ops) > 0: - model = self.bf16_convert() - - post_cse_graph_def = PostCseOptimizer(model.graph_def).do_transformation() - post_cse_graph_def.library.CopyFrom(self.model.graph_def.library) - model.graph_def = post_cse_graph_def - - if debug: - model.save(self.output_graph) - - return model - - def _analysis_rnn_model(self): - """Match the RNN pattern.""" - g = GraphAnalyzer() - g.graph = self._tmp_graph_def - graph_info = g.parse_graph() - rnn_pattern = [["TensorArrayV3"], ["Enter"], ["TensorArrayReadV3"], ["MatMul"], ["BiasAdd"]] - target_nodes = g.query_fusion_pattern_nodes(rnn_pattern) - res = {} - for i in target_nodes: - if i[-3] not in self.bf16_ops and i[-3] not in self.fp32_ops: - res[(i[-3], i[-2])] = graph_info[i[1]].node.attr["frame_name"].s.decode() - - return res - - def quantize_without_calib(self): - """Quantize graph only (without optimizing fp32 graph). - - Including: - 1) quantize graph, - 2) fuse RequantizeOp with fused quantized conv, and so on. - - :return: - """ - try: - self._quantize_graph() - self._rnn_details = self._analysis_rnn_model() - self._freeze_requantization_ranges_without_calib() - self._fuse_requantize_with_fused_quantized_node() - except Exception as e: - import traceback - - traceback.print_exc() - self._tmp_model = None - logger.error("Fail to quantize graph due to {}.".format(str(e))) - finally: - if not debug: - self._post_clean() - return self._tmp_model - - def bf16_convert(self): - """Convert fp32 nodes in bf16_node to bf16 dtype based on FP32 + INT8 mixed precision graph.""" - try: - self._tmp_model.graph_def = BF16Convert( - self._tmp_model.graph_def, self.fp32_ops, self.bf16_ops - ).do_transformation() - - except Exception as e: - self._tmp_model = None - logger.error("Fail to convert graph due to {}.".format(str(e))) - finally: - if debug: - self._tmp_model.save(self._bf16_mixed_precision_model_path) - - return self._tmp_model - - def _quantize_graph(self): - """Quantize graph.""" - non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) - self._tmp_graph_def = FusePadWithConv2DOptimizer( - self._tmp_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api - ).do_transformation() - - self._tmp_graph_def = QuantizeGraphHelper().get_sorted_graph( - self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names - ) - - self._tmp_graph_def, self.quantized_node_info, _ = QuantizeGraphForIntel( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names, - self.op_wise_config, - self.int8_sequences, - self.device, - False, - self.new_api, - self.performance_only, - ).do_transform() - - self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) - if debug: - self._tmp_model.graph_def = self._tmp_graph_def - self._tmp_model.save(self._int8_dynamic_range_model_path) - - def _freeze_requantization_ranges_without_calib(self): - """Freeze requantization ranges after doing quantization.""" - self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, self.recover_config, postfix="__min" - ).do_transformation_without_calib() - self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, self.recover_config, postfix="__max" - ).do_transformation_without_calib() - self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, self.recover_config, postfix="__requant_min_max", device=self.device - ).do_transformation_without_calib() - - self._tmp_graph_def = QuantizedRNNConverter( - self._tmp_graph_def, self._calibration_data, self._rnn_details - ).do_transformation() - - if "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"]: - self._tmp_graph_def = ScaleProPagationTransformer(self._tmp_graph_def).do_transformation() - - if debug: - self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) - self._tmp_model.graph_def = self._tmp_graph_def - self._tmp_model.save(self._int8_frozen_range_model_path) - - def _fuse_requantize_with_fused_quantized_node(self): - """Fuse the Requantize/Dequantize with fused quantized Ops.""" - self._tmp_graph_def = FuseConvRequantizeTransformer( - self._tmp_graph_def, self.device, self.new_api - ).do_transformation() - - self._tmp_graph_def = FuseMatMulRequantizeTransformer(self._tmp_graph_def).do_transformation() - - self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer(self._tmp_graph_def).do_transformation() - - self._tmp_graph_def = StripUnusedNodesOptimizer( - self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names - ).do_transformation() - - self._tmp_graph_def = RemoveTrainingNodesOptimizer( - self._tmp_graph_def, protected_nodes=self._tmp_model.output_node_names - ).do_transformation() - - self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation() - - if "scale_propagation_concat" in self.recipes and self.recipes["scale_propagation_concat"]: - self._tmp_graph_def = RerangeQuantizedConcat(self._tmp_graph_def, self.device).do_transformation() - - self._tmp_graph_def = MetaInfoChangingMemOpOptimizer(self._tmp_graph_def).do_transformation() - - if self.advance_config is not None and deep_get(self.advance_config, "bias_correction") is not None: - self._tmp_graph_def = BiasCorrection(self._tmp_graph_def, self.model.graph_def).do_transformation() - - self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) - - self._tmp_model.graph_def = self._tmp_graph_def - - def _post_clean(self): - """Delete the temporarily files generated during the quantization process. - - :return: None - """ - if os.path.exists(self._int8_logged_model_path) and os.path.isdir(self._int8_logged_model_path): - import shutil - - shutil.rmtree(self._int8_logged_model_path) - - elif gfile.Exists(self._int8_logged_model_path + ".pb"): - os.remove(self._int8_logged_model_path + ".pb") diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py index 44e20f20cc3..beac92c1b8d 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py @@ -75,7 +75,7 @@ def __init__(self, model, new_api, device): "debug_stripper": True, "loop": True, } - else: + else: # pragma: no cover self.optimization = { "pruning": True, "shape": True, @@ -85,7 +85,7 @@ def __init__(self, model, new_api, device): } # Table initialization should disable grappler dependency and pruning pass node_names = [node.name for node in model.graph_def.node] - if "init_all_tables" in node_names: + if "init_all_tables" in node_names: # pragma: no cover self.optimization["dependency"] = False self.optimization["pruning"] = False self.new_api = new_api @@ -144,7 +144,7 @@ def get_optimized_model(self, itex_mode=False): if self.device == "cpu": cpus = tf.config.list_physical_devices("CPU") node_device = cpus[0].name.replace("physical_device:", "") - else: + else: # pragma: no cover gpus = tf.config.list_physical_devices("GPU") if len(gpus) == 0: xpus = tf.config.list_physical_devices("XPU") @@ -253,7 +253,7 @@ def get_optimized_model(self, itex_mode=False): if self.device == "cpu": cpus = list_physical_devices("CPU") node_device = cpus[0].name.replace("physical_device:", "") - else: + else: # pragma: no cover gpus = list_physical_devices("GPU") if len(gpus) == 0: xpus = list_physical_devices("XPU") @@ -272,7 +272,7 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) for function_def in self.model.graph_def.library.function: - if function_def.signature.name == "swish_f32": + if function_def.signature.name == "swish_f32": # pragma: no cover self._tmp_graph_def.library.function.extend([copy.deepcopy(function_def)]) origin_model.graph_def = self._tmp_graph_def diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py index 3d0cef7da9e..20325ea7f42 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py @@ -79,7 +79,7 @@ def _get_valid_log(self): output.append(i) elif semi_count % 2 != 0: self.logger.warning("Invalid line.") - else: + else: # pragma: no cover loop_times = int(semi_count / 2) semi_index = [index for index, value in enumerate(i) if value == ";"] for index in range(loop_times - 1): @@ -165,7 +165,7 @@ def _parse_requantization_ranges(self): res[key].append(sorted(temp_max[key])[target_max_index]) - if self.tensor_data: + if self.tensor_data: # pragma: no cover for k, v in self.tensor_data.items(): if k in res: self.logger.debug("Update node {} min to {}, max to {}.".format(k, v[2], v[3])) @@ -241,7 +241,7 @@ def generate_output_graph_ranges(self, max_name_value): if not self.graph_info.get(in_node_name) or not in_node_name.endswith("_eightbit_quantized_in"): in_node_name = None - if self.itex_mode and "BatchNorm" in node_name: + if self.itex_mode and "BatchNorm" in node_name: # pragma: no cover bn_node_name = node_name[: -len("_eightbit_requant_range")] if bn_node_name not in self.graph_info: bn_node_name = None @@ -284,7 +284,7 @@ def generate_output_graph_ranges(self, max_name_value): attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, [])) ) - if bn_node_name: + if bn_node_name: # pragma: no cover if self.itex_mode: self.cur_graph.replace_const_node( min_node, diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py deleted file mode 100644 index d793d333931..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value_without_calib.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Freeze Value without calibration Graph Rewriter.""" - -from tensorflow.core.framework import attr_value_pb2, node_def_pb2 -from tensorflow.python.framework import dtypes, tensor_util - -from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer -from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper - -from ..graph_base import GraphRewriterBase - - -class FreezeValueWithoutCalibTransformer(GraphRewriterBase): - """Freeze value without calibration.""" - - def __init__(self, model, max_min_data, postfix, th=0.95, device="gpu"): - """Free Max/Min value into QuantizeV2 op. - - Args: - model (graphdef): input model - max_min_data (string list): the string context contains max/min values. - postfix (string): the specified postfix to locate value. - th (float, optional): The percentage of overall data.Defaults to 0.95. - device (string, optional): The hardware device type, 'cpu' or 'gpu'. - """ - super().__init__(model) - self.data = max_min_data - if 0.0 < th <= 1.0: - self.threshold = th - else: - self.logger.warning("The threshold value for clipping is invalid, " "Reset it to 0.95 by default.") - self.threshold = 0.95 - self.postfix = postfix - self.device = device - self.cur_graph = GraphAnalyzer() - self.cur_graph.graph = self.model - - self.graph_info = self.cur_graph.parse_graph() - - def generate_output_graph(self, max_name_value): - """Generate transformed graph for freeze_max/freeze_min transformation. - - :param max_name_value: target values - :return: transformed graph - """ - for node_name, value in max_name_value.items(): - node_name = node_name.replace(":", "__port__").replace("^", "__hat__") - if node_name not in self.graph_info: - continue - new_node = node_def_pb2.NodeDef() - new_node.op = "Const" - new_node_postfix = "/frozen_{}_only".format("".join([x for x in self.postfix if x.isalpha()])) - new_node.name = node_name + new_node_postfix - new_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - new_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, [])) - ) - output_node_name = self.graph_info[node_name].outputs[0] - self.cur_graph.replace_const_node(new_node, [Helper.node_name_from_input(output_node_name)], node_name) - self.cur_graph.remove_node(node_name) - - return GraphAnalyzer().dump_graph() - - def generate_output_graph_ranges(self, max_name_value): - """Generate transformed graph for freeze_max/freeze_min transformation. - - :param max_name_value: target values - :return: transformed graph - """ - for node_name, value in max_name_value.items(): - if node_name not in self.graph_info: - continue - - min_node = node_def_pb2.NodeDef() - min_node.op = "Const" - min_node_postfix = "/frozen_min" - min_node.name = node_name + min_node_postfix - min_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - min_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[0]), dtypes.float32, [])) - ) - - max_node = node_def_pb2.NodeDef() - max_node.op = "Const" - max_node_postfix = "/frozen_max" - max_node.name = node_name + max_node_postfix - max_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - max_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, [])) - ) - output_node_name = self.graph_info[node_name].outputs[0] - self.cur_graph.replace_const_node( - min_node, [Helper.node_name_from_input(output_node_name)], node_name + ":0" - ) - self.cur_graph.replace_const_node( - max_node, [Helper.node_name_from_input(output_node_name)], node_name + ":1" - ) - self.cur_graph.remove_node(node_name) - - return GraphAnalyzer().dump_graph() - - def do_transformation_without_calib(self): - """Apply transformation without calibration.""" - if self.postfix == "__requant_min_max": - range_data = self.data[self.postfix] - return self.generate_output_graph_ranges(range_data) - max_name_value = self.data[self.postfix] - return self.generate_output_graph(max_name_value) diff --git a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py b/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py deleted file mode 100644 index 55142680b09..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/rnn_convert.py +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Quantized RNN Graph Rewriter.""" - -import numpy as np -import tensorflow as tf -from tensorflow.python.framework import dtypes, tensor_util -from tensorflow.python.ops import array_ops - -from neural_compressor.tensorflow.quantization.utils.graph_util import GraphAnalyzer -from neural_compressor.tensorflow.quantization.utils.graph_util import GraphRewriterHelper as Helper -from neural_compressor.tensorflow.utils import dump_elapsed_time - -from ..graph_base import GraphRewriterBase - - -class QuantizedRNNConverter(GraphRewriterBase): - """Quantized RNN converter.""" - - def __init__(self, model, calibration_data, rnn_details, new_api=False): - """Initialization.""" - super().__init__(model) - self.calibration_data = calibration_data - self.rnn_details = rnn_details - self.new_api = new_api - - @dump_elapsed_time("Pass QuantizedRNNConverter") - def do_transformation(self): - """Apply the RNN conversion.""" - g = GraphAnalyzer() - g.graph = self.model - graph_info = g.parse_graph() - - for i in self.rnn_details.keys(): # pragma: no cover - start_node_name = graph_info[i[0]].node.input[0] - - matmul_b_node_name = graph_info[i[0]].node.input[1] - matmul_b_node = graph_info[Helper.node_name_from_input(matmul_b_node_name)].node - if matmul_b_node.op == "Split": - enter_node_name = matmul_b_node.input[1] - elif matmul_b_node.op == "Enter": - enter_node_name = graph_info[i[0]].node.input[1] - else: - continue - - min_str = i[0] + "_eightbit_min_" + start_node_name + "__print__;__min:" - input_min_values = [] - input_max_values = [] - output_min_values = [] - output_max_values = [] - max_str = i[0] + "_eightbit_max_" + start_node_name + "__print__;__max:" - output_str = i[0] + "_eightbit_requant_range__print__;__requant_min_max:" - for j in self.calibration_data: - if j.find(min_str) != -1: - input_min_values.append(float(j.split("[")[-1].split("]")[0])) - if j.find(max_str) != -1: - input_max_values.append(float(j.split("[")[-1].split("]")[0])) - - if j.find(output_str) != -1: - output_min_values.append(float(j.split(":")[-1][1:].split("]")[0])) - output_max_values.append(float(j.split("][")[-1][:-1])) - min_input = min(input_min_values) - max_input = max(input_max_values) - min_output = min(output_min_values) - max_output = max(output_max_values) - q_max_in_node = Helper.create_constant_node(i[0] + "_quant_max", max_input, dtypes.float32) - - q_min_in_node = Helper.create_constant_node(i[0] + "_quant_min", min_input, dtypes.float32) - q_enter_min_node = Helper.create_node("Enter", q_min_in_node.name + "_enter", [q_min_in_node.name]) - Helper.set_attr_string(q_enter_min_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_dtype(q_enter_min_node, "T", dtypes.float32) - Helper.set_attr_bool(q_enter_min_node, "is_constant", True) - Helper.set_attr_int(q_enter_min_node, "parallel_iterations", 32) - q_enter_max_node = Helper.create_node("Enter", q_max_in_node.name + "_enter", [q_max_in_node.name]) - Helper.set_attr_dtype(q_enter_max_node, "T", dtypes.float32) - Helper.set_attr_string(q_enter_max_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_bool(q_enter_max_node, "is_constant", True) - Helper.set_attr_int(q_enter_max_node, "parallel_iterations", 32) - - weight_node_name = graph_info[Helper.node_name_from_input(enter_node_name)].node.input[0] - weight_node = graph_info[Helper.node_name_from_input(weight_node_name)].node - if weight_node.attr["dtype"].type == dtypes.qint8: - qint8_const_name = weight_node_name - else: - base_name = weight_node_name + "_" - qint8_const_name = base_name + "qint8_const" - min_name = base_name + "min" - max_name = base_name + "max" - - need_to_create_const_node = bool(qint8_const_name not in graph_info) - if need_to_create_const_node: - float_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) - - min_value = np.min(float_tensor.flatten()) - max_value = np.max(float_tensor.flatten()) - # Same processing of min-max as in quantize_weight_eightbit - # function. - if min_value > 0.0: - min_value = 0.0 - if min_value == max_value: - if abs(min_value) < 0.000001: - max_value = min_value + 1.0 - elif min_value > 0: - max_value = 2 * min_value - else: - max_value = min_value / 2.0 - - sess = tf.compat.v1.Session() - with sess.as_default(): - quantize_op = array_ops.quantize_v2( - float_tensor, min_value, max_value, dtypes.qint8, mode="SCALED", round_mode="HALF_TO_EVEN" - ) - qint8_tensor = quantize_op[0].numpy() if tf.executing_eagerly() else quantize_op[0].eval() - # Updated min-max values should be passed to the next - # feeding node. - min_value = quantize_op[1].numpy() if tf.executing_eagerly() else quantize_op[1].eval() - max_value = quantize_op[2].numpy() if tf.executing_eagerly() else quantize_op[2].eval() - sess.close() - - shape = tensor_util.TensorShapeProtoToList(weight_node.attr["value"].tensor.tensor_shape) - qint8_const_node = Helper.create_constant_node( - qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape - ) - - min_node = Helper.create_constant_node(min_name, min_value, dtypes.float32) - - max_node = Helper.create_constant_node(max_name, max_value, dtypes.float32) - enter_min_node = Helper.create_node("Enter", min_name + "_enter", [min_name]) - Helper.set_attr_string(enter_min_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_dtype(enter_min_node, "T", dtypes.float32) - Helper.set_attr_bool(enter_min_node, "is_constant", True) - Helper.set_attr_int(enter_min_node, "parallel_iterations", 32) - enter_max_node = Helper.create_node("Enter", max_name + "_enter", [max_name]) - Helper.set_attr_dtype(enter_max_node, "T", dtypes.float32) - Helper.set_attr_string(enter_max_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_bool(enter_max_node, "is_constant", True) - Helper.set_attr_int(enter_max_node, "parallel_iterations", 32) - else: - qint8_const_node = graph_info[qint8_const_name].node - min_node = graph_info[min_name].node - max_node = graph_info[max_name].node - quant_input = [start_node_name, q_enter_min_node.name, q_enter_max_node.name] - quantize_node = Helper.create_node("QuantizeV2", i[0] + "_quantize", quant_input) - Helper.set_attr_dtype(quantize_node, "T", dtypes.quint8) - Helper.set_attr_string(quantize_node, "mode", b"MIN_FIRST") - g.add_node(quantize_node, start_node_name, [i[0]]) - g.add_node(q_enter_max_node, None, [quantize_node.name]) - g.add_node(q_enter_min_node, None, [quantize_node.name]) - g.add_node(q_max_in_node, None, [q_enter_max_node.name]) - g.add_node(q_min_in_node, None, [q_enter_min_node.name]) - - bias_node = graph_info[graph_info[i[0]].outputs[0]].node - if graph_info[bias_node.name].outputs: - last_node_name = [graph_info[graph_info[bias_node.name].outputs[0]].node.name] - else: - last_node_name = [] - quantized_matmul_input = [ - quantize_node.name, - Helper.node_name_from_input(graph_info[i[0]].node.input[1]), - bias_node.input[1], - ] - quantized_matmul_input.append(quantize_node.name + ":1") - quantized_matmul_input.append(quantize_node.name + ":2") - - quantized_matmul_input.append(enter_min_node.name) - quantized_matmul_input.append(enter_max_node.name) - if self.new_api: - quantized_matmul_with_bias_node = Helper.create_node( - "_QuantizedMatMul", i[0] + "_quantized_mat_mul", quantized_matmul_input - ) - else: - quantized_matmul_with_bias_node = Helper.create_node( - "QuantizedMatMulWithBias", i[0] + "_quantized_mat_mul", quantized_matmul_input - ) - Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T1", dtypes.quint8) - Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T2", dtypes.qint8) - Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tbias", dtypes.float32) - if self.new_api: - Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tout", dtypes.qint32) - else: - Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Toutput", dtypes.qint32) - Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_a", False) - Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_b", False) - if self.new_api: - Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"SCALED") - Helper.set_attr_string(quantized_matmul_with_bias_node, "output_quant_mode", b"SCALED") - Helper.set_attr_string_list(quantized_matmul_with_bias_node, "fused_ops", [b"BiasAdd"]) - Helper.set_attr_type_list( - quantized_matmul_with_bias_node, - "Thost_inputs", - [ - dtypes.quint8.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ], - ) - Helper.set_attr_type_list( - quantized_matmul_with_bias_node, - "Thost_outputs", - [dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], - ) - else: - Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"MIN_FIRST") - - g.add_node(quantized_matmul_with_bias_node, quantize_node.name, [bias_node.name]) - - if qint8_const_node.name not in graph_info: - g.add_node(qint8_const_node, None, [enter_node_name]) - enter_node = graph_info[enter_node_name].node - if matmul_b_node.op == "Split": - Helper.set_attr_dtype(matmul_b_node, "T", dtypes.qint8) - Helper.set_attr_dtype(enter_node, "T", dtypes.qint8) - graph_info[enter_node.name].node.input[0] = qint8_const_node.name - elif qint8_const_node.name in graph_info: - pass - else: - g.add_node(qint8_const_node, None, [quantized_matmul_with_bias_node.name]) - - if need_to_create_const_node: - g.add_node(enter_min_node, None, [quantized_matmul_with_bias_node.name]) - g.add_node(enter_max_node, None, [quantized_matmul_with_bias_node.name]) - g.add_node(min_node, None, [enter_min_node.name]) - g.add_node(max_node, None, [enter_max_node.name]) - - # create requantize node - requantize_min_node = Helper.create_constant_node(i[0] + "requant_w_min", min_output, dtypes.float32) - requantize_max_node = Helper.create_constant_node(i[0] + "requant_w_max", max_output, dtypes.float32) - - enter_req_min_node = Helper.create_node( - "Enter", requantize_min_node.name + "_enter", [requantize_min_node.name] - ) - Helper.set_attr_string(enter_req_min_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_dtype(enter_req_min_node, "T", dtypes.float32) - Helper.set_attr_bool(enter_req_min_node, "is_constant", True) - Helper.set_attr_int(enter_req_min_node, "parallel_iterations", 32) - - enter_req_max_node = Helper.create_node( - "Enter", requantize_max_node.name + "_enter", [requantize_max_node.name] - ) - Helper.set_attr_dtype(enter_req_max_node, "T", dtypes.float32) - Helper.set_attr_string(enter_req_max_node, "frame_name", self.rnn_details[i].encode()) - Helper.set_attr_bool(enter_req_max_node, "is_constant", True) - Helper.set_attr_int(enter_req_max_node, "parallel_iterations", 32) - requantize_input = [ - quantized_matmul_with_bias_node.name, - quantized_matmul_with_bias_node.name + ":1", - quantized_matmul_with_bias_node.name + ":2", - enter_req_min_node.name, - enter_req_max_node.name, - ] - requantize_node = Helper.create_node("Requantize", i[0] + "_requantize", requantize_input) - Helper.set_attr_dtype(requantize_node, "out_type", dtypes.qint8) - Helper.set_attr_dtype(requantize_node, "Tinput", dtypes.qint32) - - g.add_node(requantize_node, quantized_matmul_with_bias_node.name, [bias_node.name]) - dequantize_input = [requantize_node.name, requantize_node.name + ":1", requantize_node.name + ":2"] - dequantize_node = Helper.create_node("Dequantize", i[0] + "_dequantize", dequantize_input) - Helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8) - Helper.set_attr_dtype(dequantize_node, "dtype", dtypes.float32) - Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST") - - g.add_node(enter_req_min_node, None, [requantize_node.name]) - g.add_node(enter_req_max_node, None, [requantize_node.name]) - g.add_node(requantize_min_node, None, [enter_req_min_node.name]) - g.add_node(requantize_max_node, None, [enter_req_max_node.name]) - g.add_node(dequantize_node, requantize_node.name, last_node_name) - if last_node_name: - replace_index = [ - Helper.node_name_from_input(i) for i in graph_info[last_node_name[0]].node.input - ].index(bias_node.name) - - graph_info[last_node_name[0]].node.input[replace_index] = dequantize_node.name - g.remove_node(bias_node.name) - g.remove_node(i[0]) - - # g.remove_node(weight_node_name) - - return g.dump_graph() diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py deleted file mode 100644 index 2a47a09d8f1..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tensorflow QAT Graph Quantizers.""" diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py deleted file mode 100644 index 97f3f6ce3d4..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/fake_quantize.py +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""QAT Fake Quantize Graph Class.""" - -import abc - -import six -import tensorflow as tf - - -@six.add_metaclass(abc.ABCMeta) -class FakeQuantizeBase(object): - """ABC interface class for applying fake quantization by insert qdq.""" - - @abc.abstractmethod - def __call__(self, inputs, range, training, **kwargs): - """Apply quantization to the input tensor. - - This is the main logic of the 'FakeQuantize' which implements the core logic - to quantize the tensor. It is invoked during the `call` stage of the layer, - and allows modifying the tensors used in graph construction. - - Args: - inputs (tf.Tensor): Input tensor to be quantized. - range (dict): The min-max range of input tensor. - training (bool): Whether the graph is currently training. - **kwargs: Additional variables which may be passed to the FakeQuantize class. - - Returns: - output (tf.Tensor): The tensor to be quantized. - """ - raise NotImplementedError - - @abc.abstractmethod - def get_config(self): - """Returns the config used to serialize the 'FakeQuantize'.""" - raise NotImplementedError("FakeQuantize should implement get_config().") - - @classmethod - def from_config(cls, config): - """Instantiates a 'FakeQuantize' from its config. - - Args: - config (dict): A dict containing required information. - - Returns: - output (FakeQuantize): A 'FakeQuantize' instance. - """ - return cls(**config) - - -class FakeQuantize(FakeQuantizeBase): - """The class that applies fake quantization.""" - - def __init__(self, per_channel=False, num_bits=8, channel_axis=-1, symmetric=True, narrow_range=True): - """Initialize a FakeQuantize class. - - Args: - per_channel (bool): Whether to apply per_channel quantization. The last dimension is - used as the channel. - num_bits (int): Number of bits for quantization. - channel_axis(int): Channel axis. - symmetric (bool): If true, use symmetric quantization limits instead of training - the minimum and maximum of each quantization range separately. - narrow_range (bool): In case of 8 bits, narrow_range nudges the quantized range - to be [-127, 127] instead of [-128, 127]. This ensures symmetric range - has 0 as the centre. - """ - self.num_bits = num_bits - self.per_channel = per_channel - self.symmetric = symmetric - self.narrow_range = narrow_range - self.channel_axis = channel_axis - self.name_prefix = "FakeQuantize" - - def __call__(self, inputs, ranges, training, **kwargs): - """Applying fake quantization by insert qdq. - - The quantized tensor is calculated based on range of the last batch of values. - - Args: - inputs (tf.Tensor): Input tensor to be quantized. - range (dict): The min-max range of input tensor. - training (bool): Whether the graph is currently training. - **kwargs: Additional variables which may be passed to the FakeQuantize class. - - Returns: - output (tf.Tensor): The tensor to be quantized. - """ - with tf.name_scope(self.name_prefix): - input_shape = inputs.get_shape() - input_dim = len(input_shape) - if self.channel_axis == -1: - self.channel_axis += input_dim - - if not training: - return self._insert_qdq(inputs, ranges["min_var"], ranges["max_var"]) - - if self.per_channel: - if input_dim == 2: - reduce_dims = [0] - elif input_dim == 4: - reduce_dims = [i for i in range(input_dim) if i != self.channel_axis] - - if self.per_channel: - if input_dim >= 2: - batch_min = tf.math.reduce_min(inputs, axis=reduce_dims, name="BatchMin") - else: - batch_min = inputs - else: - batch_min = tf.math.reduce_min(inputs, name="BatchMin") - - if self.per_channel: - if input_dim >= 2: - batch_max = tf.math.reduce_max(inputs, axis=reduce_dims, name="BatchMax") - else: - batch_max = inputs - else: - batch_max = tf.math.reduce_max(inputs, name="BatchMax") - - if self.symmetric: - if self.narrow_range: - min_max_ratio = -1 - else: - min_max_ratio = -((1 << self.num_bits) - 2) / (1 << self.num_bits) - - range_min = tf.math.minimum(batch_min, batch_max / min_max_ratio) - range_max = tf.math.maximum(batch_max, batch_min * min_max_ratio) - else: - range_min = tf.math.minimum(batch_min, 0.0) - range_max = tf.math.maximum(batch_max, 0.0) - - assign_min = ranges["min_var"].assign(range_min, name="AssignMinLast") - assign_max = ranges["max_var"].assign(range_max, name="AssignMaxLast") - - return self._insert_qdq(inputs, assign_min, assign_max) - - def _insert_qdq(self, inputs, min_var, max_var): - """Adds a fake quantization operation. - - Depending on value of self.per_channel, this operation may do global quantization - or per channel quantization. min_var and max_var should have corresponding - shapes: [1] when per_channel == False and [d] when per_channel == True. - - Args: - inputs (tf.Tensor): A tensor containing values to be quantized. - min_var (tf.Variable): A variable containing quantization range lower end(s). - max_var (tf.Variable): A variable containing quantization range upper end(s). - - Returns: - outputs (tf.Tensor): A tensor containing quantized values. - """ - if self.per_channel: - return tf.quantization.quantize_and_dequantize_v2( - inputs, - min_var, - max_var, - num_bits=self.num_bits, - narrow_range=self.narrow_range, - axis=self.channel_axis, - range_given=True, - ) - else: - assert min_var.get_shape() == [] - assert max_var.get_shape() == [] - - return tf.quantization.quantize_and_dequantize_v2( - inputs, - min_var, - max_var, - num_bits=self.num_bits, - narrow_range=self.narrow_range, - range_given=True, - ) - - def get_config(self): - """Returns the config used to serialize the 'FakeQuantize'. - - Returns: - config (dict): A dict containing required information. - """ - return { - "num_bits": self.num_bits, - "per_channel": self.per_channel, - "symmetric": self.symmetric, - "narrow_range": self.narrow_range, - } - - def __eq__(self, other): - """Check if this instance is equal to another instance. - - Args: - other (FakeQuantize): Another instance to be checked. - - Returns: - is_equal (bool): If the two instances are equal. - """ - if not isinstance(other, FakeQuantize): - return False - - return ( - self.num_bits == other.num_bits - and self.per_channel == other.per_channel - and self.symmetric == other.symmetric - and self.narrow_range == other.narrow_range - ) - - def __ne__(self, other): - """Check if this instance is not equal to another instance. - - Args: - other (FakeQuantize): Another instance to be checked. - - Returns: - not_equal (bool): If the two instances are not equal. - """ - return not self.__eq__(other) diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py deleted file mode 100644 index 1f50f20879e..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_config.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""QAT Quantize Config Class.""" - -import logging - -global_config = {} -logger = logging.getLogger("neural_compressor") - - -class QuantizeConfig: - """Class for building custom quantize config. - - There should be only one QuantizeConfig instance for global setting. - """ - - def __new__(cls): - """Created a QuantizeConfig instance and add it to the global_config dict. - - Returns: - instance (QuantizeConfig) : The created QuantizeConfig instance. - """ - instance = super().__new__(cls) - global_config["quantize_config"] = instance - return instance - - def __init__(self): - """Initialize QuantizeConfig instance.""" - self.quantize_recipe = {} - self.model_name = None - - def add_quantize_recipe(self, quantize_recipe): # pragma: no cover - """Add custom recipe for quantization to the QuantizeConfig instance. - - Args: - quantize_recipe (dict): A dict that decide whether given layers should be quantized. - A typical quantize_recipe will be a dict of layer_name and - dict as key-value pairs. In each value dict, there should be - a {'quantize': bool} key-value pair and a {'index': list} pair. - The latter one is used to decide which inputs should be quantized - in some layers with multiple inputs. - For example: - {'conv5_block3_3_conv': {'quantize': False} - 'conv5_block3_3_add' : {'quantize': True, 'index': [1, 3]} - } - """ - self.quantize_recipe.update(quantize_recipe) - - def query_layer(self, layer_name): - """Query if a specific layer is in the quantize_recipe dict. - - Args: - layer_name (string): The input layer name. - - Returns: - layer_recipe (dict): The quantize recipe for this input layer. - """ - if layer_name in self.quantize_recipe: - return self.quantize_recipe[layer_name] - return {} - - def remove_layer(self, layer_name): # pragma: no cover - """Remove a specific layer from the quantize_recipe dict. - - Args: - layer_name (string): The name of layer to be removed. - """ - if layer_name in self.quantize_recipe: - del self.quantize_recipe[layer_name] - - def remove_layers(self, layer_names): # pragma: no cover - """Remove a batch of layers from the quantize_recipe dict. - - Args: - layer_names (List): The names of layers to be removed. - """ - for layer_name in layer_names: - self.remove_layer(layer_name) - - def get_quantize_recipe(self): # pragma: no cover - """Get the current recipe dict for quantization. - - Returns: - quantize_recipe (dict): A dict that decide whether given layers should be quantized. - """ - return self.quantize_recipe - - def is_empty(self): # pragma: no cover - """Check if the recipe of quantization is an empty dict. - - Returns: - is_empty (bool): True if no custom recipe is updated to this class. - """ - if self.quantize_recipe: - return False - return True - - def clear_quantize_recipe(self): # pragma: no cover - """Clear recipe of quantization to be an empty dict.""" - self.quantize_recipe.clear() - - -layer_wise_config = { - "quantize_layers": { - "Conv2D", - "Dense", - "DepthwiseConv2D", - "MaxPooling2D", - "AveragePooling2D", - "GlobalAveragePooling2D", - }, - "possible_quantize_layers": {"Multiply", "Concatenate", "Add", "BatchNormalization"}, - "weighted_layers": {"Conv2D", "Dense", "DepthwiseConv2D"}, - "multiple_inputs_layers": {"Multiply", "Concatenate", "Add"}, -} diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py deleted file mode 100644 index d28d9474f2b..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_helper.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""QAT Quantize Helper Class.""" - -from .quantize_config import QuantizeConfig, global_config, layer_wise_config -from .quantize_layers.optimize_layer import config_quantizable_layers -from .quantize_wrapper import QuantizeWrapper - - -def init_quantize_config(model, quantize_recipe=None): - """Initialize quantization config at the beginning of QAT process. - - Args: - model_name (string): Special pre-optimized model name. - quantize_recipe (dict): A dict that decide whether given layers should be quantized. - - Returns: - config (QuantizeConfig): QuantizeConfig instance used to decide whether a specific layer - should be quantized. - """ - assert "quantize_config" not in global_config, ( - "quantize_config has been unexpectedly " "created. Please check your QAT workflow" - ) - - config = QuantizeConfig() - config_quantizable_layers(model) - - if quantize_recipe: - config.add_quantize_recipe(quantize_recipe) - - return config - - -def _is_quantizable_layer(layer): - """Query if the input layer should be quantized. - - Args: - layer (tf.keras.layers.Layer): input Keras layer - - Returns: - capability (bool): whether the input layer is capable of quantization. - """ - quantizable = True - layer_class = layer.__class__.__name__ - - quantize_config = global_config["quantize_config"] - specific_layer_config = quantize_config.query_layer(layer.name) - if specific_layer_config: - # the layer is set to be unquantizable by QuantizeConfig - if not specific_layer_config["quantize"]: - return False - else: - if ( - layer_class in layer_wise_config["quantize_layers"] - or layer_class in layer_wise_config["possible_quantize_layers"] - ): - return True - - if layer_class not in layer_wise_config["quantize_layers"]: - quantizable = False - - return quantizable - - -def qat_clone_function(layer): - """Wrap or leave given layer based on quantize config object parameters. - - Args: - layer (tf.keras.layers.Layer): input Keras layer - - Returns: - wrapped_layer (QuantizeWrapper): layer wrapped by QuantizeWrapper class. - """ - wrapped_layer = layer - if _is_quantizable_layer(layer): - wrapped_layer = QuantizeWrapper(layer) - - return wrapped_layer diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py deleted file mode 100644 index 81d1403e2b4..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tensorflow QAT Graph Quantize Layers.""" diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py deleted file mode 100644 index 620942261e1..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/optimize_layer.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Optimize layer config.""" - -from .quantize_layer_add import QuantizeLayerAdd -from .quantize_layer_bn import QuantizeLayerBatchNormalization - - -def config_quantizable_layers(model): - """Configure the quantizable layers.""" - quantize_layer_mapping = {"Add": QuantizeLayerAdd, "BatchNormalization": QuantizeLayerBatchNormalization} - - for layer_class, quantize_layer in quantize_layer_mapping.items(): - quantize_layer_mapping[layer_class] = quantize_layer() - - for layer in model.layers: - if layer.__class__.__name__ in quantize_layer_mapping: - quantize_layer_mapping[layer.__class__.__name__](layer) diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py deleted file mode 100644 index ae2db7e4006..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Quantization Add Layer Class.""" - -import logging - -from .quantize_layer_base import QuantizeLayerBase - -logger = logging.getLogger("neural_compressor") - - -class QuantizeLayerAdd(QuantizeLayerBase): # pragma: no cover - """The class for quantization of Add.""" - - def __init__(self): - """Initialize QuantizeLayerAdd class.""" - self.quantize_patterns = [ - ["Conv", "BatchNorm", "Add"], - ["Conv", "BatchNorm", "Activation", "Add"], - ["Conv", "BatchNorm", "Activation", "Dropout", "Add"], - ] - - super().__init__() - - def _quantizable_add(self): - """Check if the input layer meets criteria of quantization. - - Args: - layer (tf.keras.layers.Layer): The input layer. - - Returns: - quantizable (bool): If this layer should be quantized. - """ - input_layer = self._find_input_layers(self.layer) - if len(input_layer) == 1: - logger.warning( - "The layer 'Add' should have more than one input. " - "You input a model with layer {} which has only one input".format(self.layer.name) - ) - return False - - return True - - def __call__(self, layer): - """The main logic of QuantizeLayerAdd. - - Neural Compressor will enumerate all layers of the input model to check - if there are any layer meeting the criteria. The chosen ones will be marked - as quantizable by QuantizeConfig. - - Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. - """ - self.layer = layer - if self._quantizable_add(): - input_layers = self._find_input_layers(self.layer) - fused_conv_index = None - for i, input_layer in enumerate(input_layers): - # Check that the input is a Conv pattern - if "Conv" in input_layer.__class__.__name__ or self._find_patterns(input_layer): - if hasattr(input_layer, "outbound_nodes") and len(getattr(input_layer, "outbound_nodes")) == 1: - fused_conv_index = i - break - - input_indexes = [i for i in range(0, len(input_layers))] - if fused_conv_index: - del input_indexes[fused_conv_index] - - self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True, "index": input_indexes}}) diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py deleted file mode 100644 index 2cf5f76c37e..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""QuantizeLayer Base Class.""" - -from ..quantize_config import global_config - - -class QuantizeLayerBase: # pragma: no cover - """QuantizeLayer Base Class.""" - - def __init__(self): - """Initialize QuantizeLayerBase class.""" - self.quantize_patterns = [] - assert "quantize_config" in global_config, "QuantizeConfig is not correctly created." - self.quantize_config = global_config["quantize_config"] - - def _find_input_layers(self, layer): - """Find all inputs of a specific layer. - - Args: - layer (tf.keras.layers.Layer): The target keras layer that this method - is to find its input layers. - - Returns: - input_layers (list): List of input layers found by this method. - """ - input_layers = [] - if isinstance(layer.input, list): - for input_tensor in layer.input: - input_layer = input_tensor._keras_history.layer - input_layers.append(input_layer) - else: - input_layer = layer.input._keras_history.layer - input_layers.append(input_layer) - return input_layers - - def _find_patterns(self, layer): - """Checks if the input layer can satisfy the patterns. - - Args: - layer (tf.keras.layers.Layer): The input keras layer that this method - is to find patterns. - - Returns: - valid_patterns (bool): If the input layer can satisfy any pattern. - """ - if not self.quantize_patterns: - return False - - for quantize_pattern in self.quantize_patterns: - index = len(quantize_pattern) - 2 - previous_layer = layer - while index >= 0: - previous_layer = self._find_input_layers(previous_layer) - if quantize_pattern[index] not in previous_layer.__class__.__name__: - break - index -= 1 - if index == -1: - return True - - return False - - def __call__(self, layer): - """The main logic of QuantizeLayerBase. - - Neural Compressor will enumerate all layers of the input model to check - if there are any layer meeting the criteria. The chosen ones will be marked - as quantizable by QuantizeConfig. - - Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. - """ - raise NotImplementedError() diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py deleted file mode 100644 index c44b5da3f7c..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Quantize Layer BatchNormalization Class.""" - -from .quantize_layer_base import QuantizeLayerBase - - -class QuantizeLayerBatchNormalization(QuantizeLayerBase): # pragma: no cover - """The class for quantization of BatchNormalization.""" - - def __init__(self): - """Initialize QuantizeLayerBatchNormalization class.""" - super().__init__() - - def _quantizable_bn(self): - """Check if the input layer meets criteria of quantization. - - Args: - layer (tf.keras.layers.Layer): The input layer. - - Returns: - quantizable (bool): If this layer should be quantized. - """ - input_layer = self._find_input_layers(self.layer) - assert len(input_layer) == 1, "BatchNormalization only has one input." - input_layer_class = input_layer.__class__.__name__ - if "Conv" not in input_layer_class: - return True - - return False - - def __call__(self, layer): - """The main logic of QuantizeLayerBatchNormalization. - - Neural Compressor will enumerate all layers of the input model to check - if there are any layer meeting the criteria. The chosen ones will be marked - as quantizable by QuantizeConfig. - - Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. - """ - self.layer = layer - if self._quantizable_bn(): - self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True}}) diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py deleted file mode 100644 index 2baf26c0c24..00000000000 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qat/quantize_wrapper.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""QAT Quantize Wrapper Class.""" - -from abc import abstractmethod - -import tensorflow as tf -from tensorflow.python.util import tf_inspect - -from .fake_quantize import FakeQuantize -from .quantize_config import global_config, layer_wise_config - - -class QuantizeWrapperBase(tf.keras.layers.Wrapper): - """Base class for quantize wrapper.""" - - def __init__(self, layer, **kwargs): - """Create a quantize wrapper for a keras layer. - - This wrapper provides options to quantize inputs and weights of the layer. - - Args: - layer (tf.keras.layers.Layer): The keras layer to be wrapped. - **kwargs: Additional keyword arguments to be passed. - """ - assert layer is not None, "'layer' should not be None." - - assert isinstance(layer, tf.keras.layers.Layer) or isinstance(layer, tf.keras.Model), ( - "'layer' can only be a 'tf.keras.layers.Layer' instance." - " You passed an instance of type: {input}.".format(input=layer.__class__.__name__) - ) - - if "name" not in kwargs: - kwargs["name"] = self._make_layer_name(layer) - - super(QuantizeWrapperBase, self).__init__(layer, **kwargs) - - self.index = None - self._layer_class = layer.__class__.__name__ - self._track_trackable(layer, name="layer") - - @staticmethod - def _make_layer_name(layer): - """Modify the layer name to be quantized layer.""" - return "{}_{}".format("quant", layer.name) - - def build(self, input_shape): - """Creates the variables of the layer. - - Args: - input_shape (tf.TensorShape or list): shapes of input tensors - """ - super(QuantizeWrapperBase, self).build(input_shape) - - self.optimizer_step = self.add_weight( - "optimizer_step", - initializer=tf.keras.initializers.Constant(-1), - dtype=tf.dtypes.int32, - trainable=False, - ) - - def _init_min_max_variables(self, name, shape): - """Initialize the minimum and maximum values of variables to the wrapped layer. - - Args: - name (string): Name prefix of the variables. - shape (tf.TensorShape): shape of variables to be added. - - Returns: - min_variable (tf.Variable) : The initialized minimum value of given variables. - min_variable (tf.Variable) : The initialized maximum value of given variables. - """ - min_variable = self.layer.add_weight( - name + "_min", - shape=(shape), - trainable=False, - initializer=tf.keras.initializers.Constant(-6.0), - ) - max_variable = self.layer.add_weight( - name + "_max", - shape=(shape), - trainable=False, - initializer=tf.keras.initializers.Constant(6.0), - ) - - return min_variable, max_variable - - def query_input_index(self): - """Query QuantizeConfig to check if there is any designated input index for this layer.""" - quantize_config = global_config["quantize_config"] - custom_layer_config = quantize_config.query_layer(self.layer) - if custom_layer_config and "index" in custom_layer_config: - self.index = custom_layer_config["index"] - - @abstractmethod - def call(self, inputs, training=None): - """This is where the quantize wrapper's logic lives. - - Args: - inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer. - - Returns: - outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer. - """ - raise NotImplementedError - - @property - def trainable(self): - """Get trainable attribute for the layer and its sublayers.""" - return self.layer.trainable - - @trainable.setter - def trainable(self, value): - """Set trainable attribute for the layer and its sublayers. - - Args: - value (Boolean): The desired state for the layer's trainable attribute. - """ - self.layer.trainable = value - - @property - def trainable_weights(self): - """List of all trainable weights tracked by this layer. - - Trainable weights are updated via gradient descent during training. - - Returns: - trainable_weights (list): A list of trainable variables. - """ - return self.layer.trainable_weights + self._trainable_weights - - @property - def non_trainable_weights(self): - """List of all non-trainable weights tracked by this layer. - - Non-trainable weights are *not* updated during training. They are - expected to be updated manually in `call()`. - - Returns: - non_trainable_weights (list): A list of non-trainable variables. - """ - return self.layer.non_trainable_weights + self._non_trainable_weights - - @property - def updates(self): - """Update layer.""" - return self.layer.updates + self._updates - - @property - def losses(self): - """List of losses added using the `add_loss()` API. - - Variable regularization tensors are created when this property is - accessed, so it is eager safe: accessing `losses` under a - `tf.GradientTape` will propagate gradients back to the corresponding - variables. - - Returns: - losses (list): A list of tensors. - """ - return self.layer.losses + self._losses - - -class QuantizeWrapper(QuantizeWrapperBase): - """General QuantizeWrapper for quantizable layers. - - Weights and inputs will be quantized according to the layer type and quantize config. - """ - - def __init__(self, layer, **kwargs): - """Create a quantize wrapper for a keras layer. - - This wrapper provides options to quantize inputs and weights of the layer. - - Args: - layer (tf.keras.layers.Layer): The keras layer to be wrapped. - **kwargs: Additional keyword arguments to be passed. - """ - super().__init__(layer, **kwargs) - - self.kernel = "kernel" - self.kernel_weights = None - self.channel_axis = kwargs.get("axis", -1) - if self._layer_class == "DepthwiseConv2D": - self.kernel = "depthwise_kernel" - self.channel_axis = 2 - if self._layer_class in layer_wise_config["multiple_inputs_layers"]: - self.query_input_index() - - def build(self, input_shape): - """Creates the variables of the layer. - - Args: - input_shape (tf.TensorShape or list): shapes of input tensors - """ - super().build(input_shape) - - if self._layer_class in layer_wise_config["weighted_layers"]: - self.kernel_weights = getattr(self.layer, self.kernel) - - weight_min, weight_max = self._init_min_max_variables( - name=self.kernel_weights.name.split(":")[0], shape=self.kernel_weights.shape[self.channel_axis] - ) - - self.weight_range = {"min_var": weight_min, "max_var": weight_max} - self._trainable_weights.append(self.kernel_weights) - - num_input = 1 - if not isinstance(input_shape, tf.TensorShape): - num_input = len(input_shape) - self.query_input_index() - if not self.index: - self.index = [i for i in range(num_input)] - - if num_input == 1: - inputs_min, inputs_max = self._init_min_max_variables( - name=self.layer.name + "_input{}".format(0), shape=None - ) - self.inputs_range = {"min_var": inputs_min, "max_var": inputs_max} - else: - self.inputs_range = [] - for i in range(num_input): - self.inputs_range.append({}) - if i in self.index: - inputs_min, inputs_max = self._init_min_max_variables( - name=self.layer.name + "_input{}".format(i), shape=None - ) - self.inputs_range[i] = {"min_var": inputs_min, "max_var": inputs_max} - - def call(self, inputs, training=None): - """This is where the quantize wrapper's logic lives. - - Args: - inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer. - - Returns: - outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer. - """ - if training is None: - training = tf.keras.backend.learning_phase() - - # Quantize all weights, and replace them in the underlying layer. - if self._layer_class in layer_wise_config["weighted_layers"]: - weight_quantizer = FakeQuantize( - per_channel=True, - channel_axis=self.channel_axis, - ) - quantized_weight = weight_quantizer(self.kernel_weights, self.weight_range, training) - setattr(self.layer, self.kernel, quantized_weight) - - quantized_inputs = inputs - inputs_quantizer = FakeQuantize( - per_channel=False, - channel_axis=self.channel_axis, - ) - - if not isinstance(quantized_inputs, tf.Tensor): - for i in range(len(quantized_inputs)): - if i in self.index: - quantized_inputs[i] = inputs_quantizer(inputs[i], self.inputs_range[i], training) - else: - quantized_inputs = inputs_quantizer(inputs, self.inputs_range, training) - - args = tf_inspect.getfullargspec(self.layer.call).args - if "training" in args: - outputs = self.layer.call(quantized_inputs, training=training) - else: - outputs = self.layer.call(quantized_inputs) - - return outputs diff --git a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py index e08b052204a..90f29fa3fa3 100644 --- a/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +++ b/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py @@ -24,7 +24,7 @@ from ..quantize_graph_base import QuantizeNodeBase -class FuseNodeStartWithFusedInstanceNorm(QuantizeNodeBase): +class FuseNodeStartWithFusedInstanceNorm(QuantizeNodeBase): # pragma: no cover """Quantize FusedInstanceNorm and apply the fusion.""" def __init__(self, **kwargs): diff --git a/neural_compressor/tensorflow/quantization/utils/utility.py b/neural_compressor/tensorflow/quantization/utils/utility.py index 84ae1fb1915..5e3fa83ea90 100644 --- a/neural_compressor/tensorflow/quantization/utils/utility.py +++ b/neural_compressor/tensorflow/quantization/utils/utility.py @@ -308,45 +308,6 @@ def strip_unused_nodes(graph_def, input_node_names, output_node_names): return tf.compat.v1.graph_util.extract_sub_graph(cur_graph.dump_graph(), output_node_names) -def get_estimator_graph(estimator, input_fn): - """Get the graph of the estimator. - - Args: - estimator: tf estimator model - input_fn: input function - - Returns: - graph - """ - with tf.Graph().as_default() as g: - features, input_hooks = estimator._get_features_from_input_fn(input_fn, tf.estimator.ModeKeys.PREDICT) - estimator_spec = estimator._call_model_fn(features, None, tf.estimator.ModeKeys.PREDICT, estimator.config) - - outputs = ( - [tensor.name for tensor in estimator_spec.predictions.values()] - if isinstance(estimator_spec.predictions, dict) - else [estimator_spec.predictions.name] - ) - logger.info("Estimator output tensor names is {}.".format(outputs)) - with tf.compat.v1.Session(graph=g) as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - # Freezing a graph requires output_node_names, which can be found in - # estimator_spec.predictions that contains prediction tensors as a - # dictionary - # When a model uses Iterator, we need to have 'MakeIterator' (default - # name used by TF) in the output_node_names as well. - output_nodes = list(set([output.split(":")[0] for output in outputs])) - if "MakeIterator" in [node.op for node in g.as_graph_def().node]: - output_nodes.append("MakeIterator") - - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, g.as_graph_def(), output_nodes) - - graph = tf.Graph() - with graph.as_default(): - tf.import_graph_def(graph_def, name="") - return graph - - def strip_equivalent_nodes(graph_def, output_node_names): """Strip nodes with the same input and attr.""" stripped_graph = GraphAnalyzer() @@ -445,50 +406,6 @@ def get_model_input_shape(model): return 1 -def get_tensor_val_from_graph_node(graph_node_name_mapping, node_name): - """Get the tensor value for given node name. - - Args: - graph_node_name_mapping: key: node name, val: node - node_name: query node - - Returns: - tensor_val: numpy array - """ - from tensorflow.python.framework import tensor_util - - node = graph_node_name_mapping[node_name] - node_tensor = node.attr["value"].tensor - tensor_val = tensor_util.MakeNdarray(node_tensor) - return tensor_val - - -def int8_node_name_reverse(node): - """Reverse int8 node name.""" - int8_postfix = "_eightbit" - node_name = node.name - if "Quantized" in node.op: - index_postfix = node_name.find(int8_postfix) - if index_postfix != -1: - node_name = node_name[:index_postfix] - return node_name - - -def _parse_config(q_config, cfg, op_list): - """Parse q_config and get dequantize min max value.""" - activation_min_max = {} - if "__requant_min_max" in q_config: - for node_name, val in q_config["__requant_min_max"].items(): - node_name = node_name.split("_eightbit_requant_range")[0] - if node_name in op_list: - activation_min_max[node_name] = {"min": val[0], "max": val[1]} - updated_cfg = {"op": {}} - for op_name_and_type in cfg["op"].keys(): - if op_name_and_type[0] in op_list: - updated_cfg["op"][op_name_and_type] = cfg["op"][op_name_and_type] - return activation_min_max, updated_cfg - - def generate_feed_dict(input_tensor, inputs): """Generate feed dict helper function.""" if len(input_tensor) == 1: @@ -545,51 +462,6 @@ def check_shape(tensor, data): return feed_dict -def get_weight_from_input_tensor(model, input_tensor_names, op_types): - """Extracts weight tensors and their associated nodes from a smooth quant node's input tensor. - - Args: - model: A TensorFlow model containing a `graph_def` attribute. - input_tensor_names: A list of input tensor names to search for weight tensors. - op_types: A list of operation types to search for when looking for weight tensors. - - Returns: - A tuple of two dictionaries: - - sq_weight_tensors: A dictionary mapping each input tensor name - to a dict of its associated weight tensors with weight name. - - sq_weights_nodes: A dictionary mapping each input tensor name - to a dict of its associated weight nodes with weight name. - """ - g_analyzer = GraphAnalyzer() - g_analyzer.graph = model.graph_def - graph_info = g_analyzer.parse_graph() - - sq_weight_tensors = {} - sq_weights_nodes = {} - - from tensorflow.python.framework import tensor_util - - for name in input_tensor_names: - # Use dict rather than list to fix the QKV/VQK misorder issue - curr_weight_tensors = {} - curr_weights_nodes = {} - next_node_names = graph_info[name].outputs - for node_name in next_node_names: - curr_node = graph_info[node_name].node - if curr_node.op not in op_types: - continue - if len(curr_node.input) >= 2: - weight_name = curr_node.input[1] - weight_node = graph_info[weight_name].node - weight_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) - curr_weight_tensors[weight_name] = weight_tensor - curr_weights_nodes[weight_name] = weight_node - # {input node -> {xxx_q_proj_matmul: value1, xxx_v_proj_matmul: value2, ...}, ...} - sq_weight_tensors[name] = curr_weight_tensors - sq_weights_nodes[name] = curr_weights_nodes - return sq_weight_tensors, sq_weights_nodes - - def apply_inlining(func): """Apply an inlining optimization to the function's graph definition. diff --git a/neural_compressor/tensorflow/utils/__init__.py b/neural_compressor/tensorflow/utils/__init__.py index 65dbabd2270..0e1535b235f 100644 --- a/neural_compressor/tensorflow/utils/__init__.py +++ b/neural_compressor/tensorflow/utils/__init__.py @@ -46,14 +46,10 @@ combine_histogram, get_all_fp32_data, get_tensor_histogram, - Dequantize, - dequantize_weight, - dump_data_to_local, - load_data_from_pkl, singleton, CpuInfo, Statistics, CaptureOutputToFile, - LazyImport, valid_keras_format, + TFSlimNetsFactory, ) diff --git a/neural_compressor/tensorflow/utils/data.py b/neural_compressor/tensorflow/utils/data.py index bdf4ce1d9bf..5854e45ad75 100644 --- a/neural_compressor/tensorflow/utils/data.py +++ b/neural_compressor/tensorflow/utils/data.py @@ -28,7 +28,7 @@ from neural_compressor.common import logger -def default_collate(batch): # pragma: no cover +def default_collate(batch): """Merge data with outer dimension batch size.""" elem = batch[0] if isinstance(elem, collections.abc.Mapping): @@ -229,7 +229,7 @@ def __len__(self): return (len(self.sampler) + self.batch_size - 1) // self.batch_size -class BaseDataLoader: # pragma: no cover +class BaseDataLoader: """Base class for TF DataLoaders. _generate_dataloader is needed to create a dataloader object diff --git a/neural_compressor/tensorflow/utils/model.py b/neural_compressor/tensorflow/utils/model.py index 75334446c4c..8ad020678ae 100644 --- a/neural_compressor/tensorflow/utils/model.py +++ b/neural_compressor/tensorflow/utils/model.py @@ -43,7 +43,7 @@ def reset_global_config(self): TFConfig = TensorflowGlobalConfig() -class Model(object): +class Model(object): # pragma: no cover """A wrapper to construct a Neural Compressor TF Model.""" def __new__(cls, root, **kwargs): diff --git a/neural_compressor/tensorflow/utils/model_wrappers.py b/neural_compressor/tensorflow/utils/model_wrappers.py index 2628ad1edb8..b9fc4a54a63 100644 --- a/neural_compressor/tensorflow/utils/model_wrappers.py +++ b/neural_compressor/tensorflow/utils/model_wrappers.py @@ -371,7 +371,7 @@ def _get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_t return load_saved_model(saved_model_dir, saved_model_tags, input_tensor_names, output_tensor_names) -def _get_graph_from_original_keras_v2(model): +def _get_graph_from_original_keras_v2(model): # pragma: no cover """The version 2 function that get graph from the original keras model. Args: @@ -424,7 +424,7 @@ def _get_graph_from_original_keras_v2(model): return graph_def, input_names, output_names -def _check_keras_format(model, saved_model_dir): +def _check_keras_format(model, saved_model_dir): # pragma: no cover """Decide which method will be used to get graph from the saved_model . Args: @@ -504,7 +504,7 @@ def _get_graph_from_saved_model_v1(model): return graph_def, inputs, outputs -def try_loading_keras(model, input_tensor_names, output_tensor_names): +def try_loading_keras(model, input_tensor_names, output_tensor_names): # pragma: no cover """Try different ways of loading keras models. Args: @@ -590,7 +590,7 @@ def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): # p output_tensor_names (list of string): validated output_tensor_names. """ assert version1_lt_version2(tf.version.VERSION, "2.0.0"), "slim model only used in tensorflow 1.x" - from neural_compressor.tensorflow.utils.nets_factory import TFSlimNetsFactory + from neural_compressor.tensorflow.utils.utility import TFSlimNetsFactory factory = TFSlimNetsFactory() assert "name" in kwargs, "model name should be set in slim checkpoint...." @@ -682,7 +682,7 @@ def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs) return sess, input_tensor_names, output_tensor_names -def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): +def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): # pragma: no cover """Build session with estimator model. Args: @@ -1113,68 +1113,6 @@ def model(self, input_model): """Set model in AutoTrackable object.""" self._auto_trackable = input_model - def compute_sparsity(self, tensor): - """Compute the sparsity. - - Args: - tensor: Tensorflow tensor - - Return: - (the original tensor size, number of zero elements, number of non-zero elements) - """ - mask = np.ones_like(tensor) - tensor_size = tensor.size - dense_mask = tensor != 0 - dense_size = dense_mask.sum() - return tensor_size, tensor_size - dense_size, dense_size - - def report_sparsity(self): - """Get sparsity of the model. - - Returns: - df (DataFrame): DataFrame of sparsity of each weight. - total_sparsity (float): total sparsity of model. - """ - import numpy as np - import pandas as pd - import tensorflow as tf - - df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"]) - pd.set_option("display.precision", 2) - param_dims = [2, 4] - params_size = 0 - sparse_params_size = 0 - for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): - if not len(layer.weights): - continue - # Extract just the actual parameter's name, which in this context we treat - # as its "type" - weights = layer.get_weights()[0] - if weights.ndim in param_dims: - param_size, sparse_param_size, dense_param_size = self.compute_sparsity(weights) - density = dense_param_size / param_size - params_size += param_size - sparse_params_size += sparse_param_size - df.loc[len(df.index)] = [ - index, - list(weights.shape), - dense_param_size, - sparse_param_size, - (1 - density) * 100, - ] - - total_sparsity = sparse_params_size / params_size * 100 - - df.loc[len(df.index)] = [ - "Total sparsity:", - "-", - params_size, - sparse_params_size, - total_sparsity, - ] - - return df, total_sparsity - def build_saved_model(self, root=None): """Build Tensorflow saved model. @@ -1411,67 +1349,6 @@ def save(self, root=None): shutil.rmtree(self.model_path, ignore_errors=True) -class TensorflowQATModel(TensorflowSavedModelModel): - """Build Tensorflow QAT model.""" - - def __init__(self, model="", **kwargs): - """Initialize a Tensorflow QAT model. - - Args: - model (string or tf.keras.Model object): model path or model object. - """ - assert isinstance(model, tf.keras.Model) or isinstance( - model, str - ), "The TensorflowQATModel should be initialized either by a string or a tf.keras.Model." - super(TensorflowQATModel, self).__init__(model) - self.keras_model = None - self.model_type = "keras" - - @property - def model(self): - """Return model itself.""" - if self.keras_model is None: - if isinstance(self._model, tf.keras.Model): - self.keras_model = self._model - else: - self.keras_model = tf.keras.models.load_model(self._model) - - return self.keras_model - - @model.setter - def model(self, q_model): - """Set model itself.""" - self.keras_model = q_model - - @property - def frozen_graph_def(self): - """Get frozen graph_def.""" - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( - self.sess, self.sess.graph_def, self.output_node_names - ) - return graph_def - - def save(self, root=None): - """Save Tensorflow QAT model.""" - if not root: - root = DEFAULT_WORKSPACE + "/saved_model" - root = os.path.abspath(os.path.expanduser(root)) - os.makedirs(os.path.dirname(root), exist_ok=True) - if root.endswith(".pb"): - saved_format = "pb file" - graph_def = self.frozen_graph_def - f = tf.io.gfile.GFile(root, "wb") - f.write(graph_def.SerializeToString()) - else: - q_aware_model = self.keras_model - q_aware_model.save(root) - saved_format = "saved_model" - if root.endswith(".h5"): - saved_format = "h5 file" - logger.info("Save quantized model to {}.".format(saved_format)) - return root - - class TensorflowCheckpointModel(TensorflowBaseModel): """Build Tensorflow checkpoint model.""" @@ -1552,93 +1429,6 @@ def save(self, root, *args, **kwargs): """Save Keras model.""" self._model_object.save(root) - @abstractmethod - def _export( - self, - save_path: str, - conf, - ): - pass - - @abstractmethod - def framework(self): - """Return framework.""" - return "keras" - - def get_all_weight_names(self): - """Get weight names of model. - - Returns: - list: weight names list. - """ - names = [] - for index, layer in enumerate(self.model.layers): - if len(layer.weights): - names.append(index) - return names - - def compute_sparsity(self, tensor): - """Compute the sparsity. - - Args: - tensor: Tensorflow tensor - - Return: - (the original tensor size, number of zero elements, number of non-zero elements) - """ - mask = np.ones_like(tensor) - tensor_size = tensor.size - dense_mask = tensor != 0 - dense_size = dense_mask.sum() - return tensor_size, tensor_size - dense_size, dense_size - - def report_sparsity(self): - """Get sparsity of the model. - - Returns: - df (DataFrame): DataFrame of sparsity of each weight. - total_sparsity (float): total sparsity of model. - """ - import numpy as np - import pandas as pd - import tensorflow as tf - - df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"]) - pd.set_option("display.precision", 2) - param_dims = [2, 4] - params_size = 0 - sparse_params_size = 0 - for index, layer in enumerate(self.model.layers): - if not len(layer.weights): - continue - # Extract just the actual parameter's name, which in this context we treat - # as its "type" - weights = layer.get_weights()[0] - if weights.ndim in param_dims: - param_size, sparse_param_size, dense_param_size = self.compute_sparsity(weights) - density = dense_param_size / param_size - params_size += param_size - sparse_params_size += sparse_param_size - df.loc[len(df.index)] = [ - index, - list(weights.shape), - dense_param_size, - sparse_param_size, - (1 - density) * 100, - ] - - total_sparsity = sparse_params_size / params_size * 100 - - df.loc[len(df.index)] = [ - "Total sparsity:", - "-", - params_size, - sparse_params_size, - total_sparsity, - ] - - return df, total_sparsity - @property def input_node_names(self): """Return input node names.""" @@ -1673,7 +1463,6 @@ def output_node_names(self): "AutoTrackable": TensorflowSavedModelModel, "llm_saved_model": TensorflowLLMModel, "keras": KerasModel, - "keras_qat": TensorflowQATModel, } diff --git a/neural_compressor/tensorflow/utils/nets_factory.py b/neural_compressor/tensorflow/utils/nets_factory.py deleted file mode 100644 index d09ef4ba1d1..00000000000 --- a/neural_compressor/tensorflow/utils/nets_factory.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""TF-Slim nets factory.""" - -from neural_compressor.tensorflow.utils.utility import singleton - - -@singleton -class TFSlimNetsFactory(object): - """TF-Slim nets factory.""" - - def __init__(self): - """Initialize a TFSlimNetsFactory.""" - # tf_slim only support specific models by default - self.default_slim_models = [ - "alexnet_v2", - "overfeat", - "vgg_a", - "vgg_16", - "vgg_19", - "inception_v1", - "inception_v2", - "inception_v3", - "resnet_v1_50", - "resnet_v1_101", - "resnet_v1_152", - "resnet_v1_200", - "resnet_v2_50", - "resnet_v2_101", - "resnet_v2_152", - "resnet_v2_200", - ] - - from tf_slim.nets import alexnet, inception, overfeat, resnet_v1, resnet_v2, vgg - - self.networks_map = { - "alexnet_v2": { - "model": alexnet.alexnet_v2, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": alexnet.alexnet_v2_arg_scope, - }, - "overfeat": { - "model": overfeat.overfeat, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": overfeat.overfeat_arg_scope, - }, - "vgg_a": { - "model": vgg.vgg_a, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": vgg.vgg_arg_scope, - }, - "vgg_16": { - "model": vgg.vgg_16, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": vgg.vgg_arg_scope, - }, - "vgg_19": { - "model": vgg.vgg_19, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": vgg.vgg_arg_scope, - }, - "inception_v1": { - "model": inception.inception_v1, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": inception.inception_v1_arg_scope, - }, - "inception_v2": { - "model": inception.inception_v2, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": inception.inception_v2_arg_scope, - }, - "inception_v3": { - "model": inception.inception_v3, - "input_shape": [None, 299, 299, 3], - "num_classes": 1001, - "arg_scope": inception.inception_v3_arg_scope, - }, - "resnet_v1_50": { - "model": resnet_v1.resnet_v1_50, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": resnet_v1.resnet_arg_scope, - }, - "resnet_v1_101": { - "model": resnet_v1.resnet_v1_101, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": resnet_v1.resnet_arg_scope, - }, - "resnet_v1_152": { - "model": resnet_v1.resnet_v1_152, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": resnet_v1.resnet_arg_scope, - }, - "resnet_v1_200": { - "model": resnet_v1.resnet_v1_200, - "input_shape": [None, 224, 224, 3], - "num_classes": 1000, - "arg_scope": resnet_v1.resnet_arg_scope, - }, - "resnet_v2_50": { - "model": resnet_v2.resnet_v2_50, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": resnet_v2.resnet_arg_scope, - }, - "resnet_v2_101": { - "model": resnet_v2.resnet_v2_101, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": resnet_v2.resnet_arg_scope, - }, - "resnet_v2_152": { - "model": resnet_v2.resnet_v2_152, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": resnet_v2.resnet_arg_scope, - }, - "resnet_v2_200": { - "model": resnet_v2.resnet_v2_200, - "input_shape": [None, 224, 224, 3], - "num_classes": 1001, - "arg_scope": resnet_v2.resnet_arg_scope, - }, - } - - def register(self, name, model_func, input_shape, arg_scope, **kwargs): - """Register a model to TFSlimNetsFactory. - - Args: - name (str): name of a model. - model_func (_type_): model that built from slim. - input_shape (_type_): input tensor shape. - arg_scope (_type_): slim arg scope that needed. - """ - net_info = {"model": model_func, "input_shape": input_shape, "arg_scope": arg_scope} - net = {name: {**net_info, **kwargs}} - self.networks_map.update(net) - self.default_slim_models.append(name) diff --git a/neural_compressor/tensorflow/utils/utility.py b/neural_compressor/tensorflow/utils/utility.py index 886dcffc234..a7671da1f1e 100644 --- a/neural_compressor/tensorflow/utils/utility.py +++ b/neural_compressor/tensorflow/utils/utility.py @@ -161,67 +161,6 @@ def get_tensor_histogram(tensor_data, bins=2048): return (hist, hist_edges, min_val, max_val, th) -def Dequantize(data, scale_info): - """Dequantize the data with the scale_info.""" - original_shape = data.shape - max_value = 255.0 if scale_info[0].find("Relu") != -1.0 else 127.0 - _scale = (np.array(scale_info[2]) - np.array(scale_info[1])) / max_value - de_scale = np.ones(original_shape) * _scale - de_data = np.multiply(data, de_scale).astype(np.float32) - return de_data - - -def dequantize_weight(weight_tensor, min_filter_tensor, max_filter_tensor): - """Dequantize the weight with min-max filter tensors.""" - weight_channel = weight_tensor.shape[-1] - if len(min_filter_tensor) == 1: - weight_tensor = weight_tensor * ((max_filter_tensor[0] - min_filter_tensor[0]) / 127.0) - else: - # TODO to calculate the de-quantized result in a parallel way - for i in range(weight_channel): - weight_tensor[:, :, :, i] = weight_tensor[:, :, :, i] * ( - (max_filter_tensor[i] - min_filter_tensor[i]) / 127.0 - ) - return weight_tensor - - -def dump_data_to_local(data, path, filename): - """Dump data to local as pkl file. - - Args: - data: Data used to dump - path: The directory to save data - filename: The filename to dump - - Returns: - loaded data - """ - from pathlib import Path - - if not os.path.exists(path): - Path(path).mkdir(parents=True, exist_ok=True) - file_path = os.path.join(path, filename) - with open(file_path, "wb") as fp: - pickle.dump(data, fp) - logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path) - - -def load_data_from_pkl(path, filename): - """Load data from local pkl file. - - Args: - path: The directory to load data - filename: The filename to load - """ - try: - file_path = os.path.join(path, filename) - with open(file_path, "rb") as fp: - data = pickle.load(fp) - return data - except FileExistsError: - logging.getLogger("neural_compressor").info("Can not open %s." % path) - - def singleton(cls): """Not displayed in API Docs. @@ -395,33 +334,143 @@ def __exit__(self, type, value, traceback): self.tmp_file.close() -class LazyImport(object): - """Lazy import python module till use.""" +@singleton +class TFSlimNetsFactory(object): # pragma: no cover + """TF-Slim nets factory.""" - def __init__(self, module_name): - """Init LazyImport object. + def __init__(self): + """Initialize a TFSlimNetsFactory.""" + # tf_slim only support specific models by default + self.default_slim_models = [ + "alexnet_v2", + "overfeat", + "vgg_a", + "vgg_16", + "vgg_19", + "inception_v1", + "inception_v2", + "inception_v3", + "resnet_v1_50", + "resnet_v1_101", + "resnet_v1_152", + "resnet_v1_200", + "resnet_v2_50", + "resnet_v2_101", + "resnet_v2_152", + "resnet_v2_200", + ] + + from tf_slim.nets import alexnet, inception, overfeat, resnet_v1, resnet_v2, vgg + + self.networks_map = { + "alexnet_v2": { + "model": alexnet.alexnet_v2, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": alexnet.alexnet_v2_arg_scope, + }, + "overfeat": { + "model": overfeat.overfeat, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": overfeat.overfeat_arg_scope, + }, + "vgg_a": { + "model": vgg.vgg_a, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "vgg_16": { + "model": vgg.vgg_16, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "vgg_19": { + "model": vgg.vgg_19, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "inception_v1": { + "model": inception.inception_v1, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v1_arg_scope, + }, + "inception_v2": { + "model": inception.inception_v2, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v2_arg_scope, + }, + "inception_v3": { + "model": inception.inception_v3, + "input_shape": [None, 299, 299, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v3_arg_scope, + }, + "resnet_v1_50": { + "model": resnet_v1.resnet_v1_50, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_101": { + "model": resnet_v1.resnet_v1_101, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_152": { + "model": resnet_v1.resnet_v1_152, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_200": { + "model": resnet_v1.resnet_v1_200, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v2_50": { + "model": resnet_v2.resnet_v2_50, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_101": { + "model": resnet_v2.resnet_v2_101, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_152": { + "model": resnet_v2.resnet_v2_152, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_200": { + "model": resnet_v2.resnet_v2_200, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + } + + def register(self, name, model_func, input_shape, arg_scope, **kwargs): + """Register a model to TFSlimNetsFactory. Args: - module_name (string): The name of module imported later + name (str): name of a model. + model_func (_type_): model that built from slim. + input_shape (_type_): input tensor shape. + arg_scope (_type_): slim arg scope that needed. """ - self.module_name = module_name - self.module = None - - def __getattr__(self, name): - """Get the attributes of the module by name.""" - try: - self.module = importlib.import_module(self.module_name) - mod = getattr(self.module, name) - except: - spec = importlib.util.find_spec(str(self.module_name + "." + name)) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - return mod - - def __call__(self, *args, **kwargs): - """Call the function in that module.""" - function_name = self.module_name.split(".")[-1] - module_name = self.module_name.split(f".{function_name}")[0] - self.module = importlib.import_module(module_name) - function = getattr(self.module, function_name) - return function(*args, **kwargs) + net_info = {"model": model_func, "input_shape": input_shape, "arg_scope": arg_scope} + net = {name: {**net_info, **kwargs}} + self.networks_map.update(net) + self.default_slim_models.append(name) diff --git a/test/3x/tensorflow/keras/test_layers.py b/test/3x/tensorflow/keras/test_layers.py new file mode 100644 index 00000000000..b43b3fd8bf7 --- /dev/null +++ b/test/3x/tensorflow/keras/test_layers.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os +import shutil +import unittest + +import keras +import numpy as np +import tensorflow as tf + +from neural_compressor.common import Logger +from neural_compressor.tensorflow.utils import version1_gte_version2 + +logger = Logger().get_logger() + + +def build_model1(): + # Load MNIST dataset + mnist = keras.datasets.mnist + + # 60000 images in train and 10000 images in test, but we don't need so much for ut + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + train_images, train_labels = train_images[:1000], train_labels[:1000] + test_images, test_labels = test_images[:200], test_labels[:200] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.DepthwiseConv2D(3, 3, activation="relu", name="conv2d"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10, name="dense"), + ] + ) + # Train the digit classification model + model.compile( + optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) + + print("Baseline test accuracy:", baseline_model_accuracy) + if version1_gte_version2(tf.__version__, "2.16.1"): + model.save("baseline_model1.keras") + else: + model.save("baseline_model1") + + +def build_model2(): + # Load MNIST dataset + mnist = keras.datasets.mnist + + # 60000 images in train and 10000 images in test, but we don't need so much for ut + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + train_images, train_labels = train_images[:1000], train_labels[:1000] + test_images, test_labels = test_images[:200], test_labels[:200] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.SeparableConv2D(3, 3, activation="relu"), + keras.layers.AveragePooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10, name="dense"), + ] + ) + # Train the digit classification model + model.compile( + optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) + + print("Baseline test accuracy:", baseline_model_accuracy) + if version1_gte_version2(tf.__version__, "2.16.1"): + model.save("baseline_model2.keras") + else: + model.save("baseline_model2") + + +class Dataset(object): + def __init__(self, batch_size=1): + self.batch_size = batch_size + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + train_images, train_labels = train_images[:1000], train_labels[:1000] + test_images, test_labels = test_images[:200], test_labels[:200] + # Normalize the input image so that each pixel value is between 0 to 1. + self.train_images = train_images / 255.0 + self.test_images = test_images / 255.0 + self.train_labels = train_labels + self.test_labels = test_labels + + def __len__(self): + return len(self.test_images) + + def __getitem__(self, idx): + return self.test_images[idx], self.test_labels[idx] + + +class MyDataloader: + def __init__(self, dataset, batch_size=1): + self.dataset = dataset + self.batch_size = batch_size + self.length = math.ceil(len(dataset) / self.batch_size) + + def __iter__(self): + for _, (images, labels) in enumerate(self.dataset): + images = np.expand_dims(images, axis=0) + labels = np.expand_dims(labels, axis=0) + yield (images, labels) + + def __len__(self): + return self.length + + +class TestTF3xNewApi(unittest.TestCase): + @classmethod + def setUpClass(self): + build_model1() + build_model2() + os.environ["ITEX_ONEDNN_GRAPH"] = "1" + self.fp32_model_path1 = ( + "baseline_model1.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model1" + ) + self.fp32_model_path2 = ( + "baseline_model2.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model2" + ) + + @classmethod + def tearDownClass(self): + if self.fp32_model_path1.endswith(".keras"): + os.remove(self.fp32_model_path1) + os.remove(self.fp32_model_path2) + else: + shutil.rmtree(self.fp32_model_path1, ignore_errors=True) + shutil.rmtree(self.fp32_model_path2, ignore_errors=True) + os.environ["ITEX_ONEDNN_GRAPH"] = "0" + + def test_depthwise_conv2d(self): + logger.info("test_static_quant_from_dict_default") + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import get_default_static_quant_config + + calib_dataloader = MyDataloader(dataset=Dataset()) + fp32_model = keras.models.load_model(self.fp32_model_path1) + qmodel = quantize_model(fp32_model, get_default_static_quant_config(), calib_dataloader) + self.assertIsNotNone(qmodel) + + for layer in qmodel.layers: + if layer.name == "conv2d": + self.assertEqual(layer.__class__.__name__, "QDepthwiseConv2D") + break + + def test_seprable_conv2d(self): + logger.info("test_static_quant_from_dict_default") + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import get_default_static_quant_config + + calib_dataloader = MyDataloader(dataset=Dataset()) + fp32_model = keras.models.load_model(self.fp32_model_path2) + qmodel = quantize_model(fp32_model, get_default_static_quant_config(), calib_dataloader) + self.assertIsNotNone(qmodel) + + for layer in qmodel.layers: + if layer.name == "conv2d": + self.assertEqual(layer.__class__.__name__, "QSeparableConv2D") + break + + +if __name__ == "__main__": + unittest.main() diff --git a/test/3x/tensorflow/keras/test_model_wrappers.py b/test/3x/tensorflow/keras/test_model_wrappers.py index b9cb3eecfd0..e4d906e27a2 100644 --- a/test/3x/tensorflow/keras/test_model_wrappers.py +++ b/test/3x/tensorflow/keras/test_model_wrappers.py @@ -94,30 +94,6 @@ def test_keras_model(self): self.assertEqual(os.path.isfile("./keras_model.keras"), True) - def test_tf_qat_model(self): - if parse_version(tf.version.VERSION) < parse_version("2.3.0"): - return - - from neural_compressor.tensorflow.utils.model_wrappers import TensorflowQATModel - - keras_model = self.model - model = TensorflowQATModel(keras_model) - self.assertEqual(isinstance(model.model, tf.keras.Model), True) - self.assertEqual(model.model_path, None) - - keras_model.save("./simple_model.keras") - model = TensorflowQATModel("./simple_model.keras") - self.assertEqual(isinstance(model.model, tf.keras.Model), True) - self.assertEqual(model.model_path, "./simple_model.keras") - - model.save("./keras_model.keras") - loaded_model = tf.keras.models.load_model("./keras_model.keras") - self.assertEqual(isinstance(loaded_model, tf.keras.Model), True) - - model.save("keras_model.h5") - loaded_model = tf.keras.models.load_model("keras_model.h5") - self.assertEqual(isinstance(loaded_model, tf.keras.Model), True) - if __name__ == "__main__": unittest.main() diff --git a/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py b/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py deleted file mode 100644 index b538c34a43d..00000000000 --- a/test/3x/tensorflow/quantization/ptq/test_get_estimator_graph.py +++ /dev/null @@ -1,52 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -import os -import platform -import unittest - -import tensorflow as tf - -from neural_compressor.tensorflow.quantization.utils.utility import get_estimator_graph -from neural_compressor.tensorflow.utils import version1_gte_version2 - - -class TestEstimatorGraphConvert(unittest.TestCase): - @classmethod - def setUpClass(self): - if version1_gte_version2(tf.version.VERSION, "2.16.1"): - return - - self.dst_path = "/tmp/.neural_compressor/train.csv" - self.titanic_file = tf.keras.utils.get_file( - self.dst_path, "https://storage.googleapis.com/tf-datasets/titanic/train.csv" - ) - - @unittest.skipIf( - version1_gte_version2(tf.version.VERSION, "2.16.1"), "The estimator APIs are deleted after TF2.16.1" - ) - def test_get_estimator_graph(self): - def train_input_fn(): - titanic = tf.data.experimental.make_csv_dataset(self.titanic_file, batch_size=32, label_name="survived") - titanic_batches = titanic.cache().repeat().shuffle(500).prefetch(tf.data.experimental.AUTOTUNE) - return titanic_batches - - age = tf.feature_column.numeric_column("age") - cls = tf.feature_column.categorical_column_with_vocabulary_list("class", ["First", "Second", "Third"]) - embark = tf.feature_column.categorical_column_with_hash_bucket("embark_town", 32) - import tempfile - - model_dir = tempfile.mkdtemp() - model = tf.estimator.LinearClassifier(model_dir=model_dir, feature_columns=[embark, cls, age], n_classes=2) - model = model.train(input_fn=train_input_fn, steps=100) - result = model.evaluate(train_input_fn, steps=10) - - graph = get_estimator_graph(model, train_input_fn) - - self.assertTrue(isinstance(graph, tf.Graph)) - graph_def = graph.as_graph_def() - self.assertGreater(len(graph_def.node), 1) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/3x/tensorflow/quantization/ptq/test_set_tensor.py b/test/3x/tensorflow/quantization/ptq/test_set_tensor.py deleted file mode 100644 index 99e5f50ff7d..00000000000 --- a/test/3x/tensorflow/quantization/ptq/test_set_tensor.py +++ /dev/null @@ -1,145 +0,0 @@ -import os -import shutil -import unittest - -import numpy as np -import tensorflow as tf -import yaml -from tensorflow.compat.v1 import graph_util - -from neural_compressor.tensorflow.algorithms.static_quant.tensorflow import TensorFlowAdaptor -from neural_compressor.tensorflow.utils import disable_random - - -class TestSetTensor(unittest.TestCase): - @classmethod - def tearDownClass(self): - shutil.rmtree("./saved", ignore_errors=True) - - @disable_random() - def test_fp32bias(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) - x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable( - "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() - ) - conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - - conv_bias = tf.compat.v1.get_variable( - "bias", [16], dtype=tf.float32, initializer=tf.compat.v1.random_normal_initializer() - ) - - conv_bias = tf.math.add(conv, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - - out_name = relu6.name.split(":")[0] - with tf.compat.v1.Session() as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = graph_util.convert_variables_to_constants( - sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] - ) - - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model - from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDataset - - dataset = DummyDataset(shape=(100, 56, 56, 16), label=True) - calib_dataloader = BaseDataLoader(dataset) - quant_config = StaticQuantConfig() - q_model = quantize_model(constant_graph, quant_config, calib_dataloader) - - framework_specific_info = { - "device": "cpu", - "workspace_path": "saved", - "random_seed": 1978, - "inputs": ["input"], - "outputs": ["op_to_store"], - "approach": "post_training_static_quant", - "format": "default", - "backend": "default", - } - adaptor = TensorFlowAdaptor(framework_specific_info) - adaptor.set_tensor(q_model, {"bias": np.random.random(16)}) - - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.python.framework import dtypes - - for node in q_model.graph_def.node: - if node.name == "bias": - self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - - @disable_random() - def test_int32bias(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) - x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable( - "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() - ) - conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - - conv_bias = tf.compat.v1.get_variable("bias", [16], dtype=tf.float32) - - conv_bias = tf.math.add(conv, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name="relu_0") - - conv_weights1 = tf.compat.v1.get_variable( - "weight1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() - ) - conv1 = tf.nn.conv2d(relu6, conv_weights1, strides=[1, 2, 2, 1], padding="VALID") - - conv_bias1 = tf.compat.v1.get_variable("bias1", [16], dtype=tf.float32) - - conv_bias1 = tf.math.add(conv1, conv_bias1) - relu6 = tf.nn.relu6(conv_bias1, name="relu_1") - - conv_weights2 = tf.compat.v1.get_variable( - "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() - ) - conv2 = tf.nn.conv2d(relu6, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") - - conv_bias2 = tf.compat.v1.get_variable("bias2", [16], dtype=tf.float32) - - conv_bias2 = tf.math.add(conv2, conv_bias2) - relu6 = tf.nn.relu6(conv_bias2, name="op_to_store") - out_name = relu6.name.split(":")[0] - with tf.compat.v1.Session() as sess: - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = graph_util.convert_variables_to_constants( - sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] - ) - - for i in constant_graph.node: - if i.op.find("Add") != -1: - i.op = "Add" - - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model - from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDataset - - dataset = DummyDataset(shape=(100, 56, 56, 16), label=True) - calib_dataloader = BaseDataLoader(dataset) - quant_config = StaticQuantConfig() - q_model = quantize_model(constant_graph, quant_config, calib_dataloader) - - framework_specific_info = { - "device": "cpu", - "workspace_path": "saved", - "random_seed": 1978, - "inputs": ["input"], - "outputs": ["op_to_store"], - "approach": "post_training_static_quant", - "format": "default", - "backend": "default", - } - adaptor = TensorFlowAdaptor(framework_specific_info) - adaptor.set_tensor(q_model, {"bias1": np.random.randint(6, size=2, dtype="int32")}) - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.python.framework import dtypes - - for node in q_model.graph_def.node: - if node.name == "bias2": - self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py index 579f69166d5..4a48387af55 100644 --- a/test/3x/tensorflow/test_config.py +++ b/test/3x/tensorflow/test_config.py @@ -213,6 +213,35 @@ def test_static_quant_from_dict_advance(self): self.assertEqual(conv2d_quantized, False) + def test_static_quant_from_dict_advance2(self): + logger.info("test_static_quant_from_dict_advance2") + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.utils import DummyDataset + + dataset = DummyDataset(shape=(100, 32, 32, 3), label=True) + calib_dataloader = MyDataLoader(dataset=dataset) + fp32_model = self.graph + quant_config = { + "static_quant": { + "global": { + "weight_dtype": "int8", + "weight_sym": True, + "weight_granularity": "per_channel", + "act_dtype": "int8", + "act_sym": True, + "act_granularity": "per_channel", + }, + "local": { + "conv1": { + "weight_algorithm": "kl", + "act_algorithm": "kl", + } + }, + } + } + qmodel = quantize_model(fp32_model, quant_config, calib_dataloader) + self.assertIsNotNone(qmodel) + def test_static_quant_from_class_advance(self): logger.info("test_static_quant_from_class_advance") from neural_compressor.tensorflow import StaticQuantConfig, quantize_model diff --git a/test/3x/tensorflow/test_model_wrappers.py b/test/3x/tensorflow/test_model_wrappers.py index da76526e6f9..b5cdb7be9de 100644 --- a/test/3x/tensorflow/test_model_wrappers.py +++ b/test/3x/tensorflow/test_model_wrappers.py @@ -147,8 +147,6 @@ def test_validate_graph_node(self): "Only supports tf previous to the version 2.16.1", ) def test_estimator(self): - from neural_compressor.tensorflow.quantization.utils.utility import get_estimator_graph - model_fn = build_estimator() input_fn = build_input_fn() estimator = tf.estimator.Estimator(model_fn, model_dir=None, config=None, params=None, warm_start_from=None) @@ -205,7 +203,7 @@ def test_slim(self): self.assertGreaterEqual(len(model.input_node_names), 1) self.assertEqual(model.model_path, "./slim_ckpt/inception_v1.ckpt") # test net factory - from neural_compressor.tensorflow.utils.nets_factory import TFSlimNetsFactory + from neural_compressor.tensorflow.utils.utility import TFSlimNetsFactory factory = TFSlimNetsFactory() from tf_slim.nets import inception