fix bug

Signed-off-by: Mengni Wang <[email protected]>
intel · Jul 4, 2023 · 381f1bd · 381f1bd
1 parent fec595c
commit 381f1bd
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 1 deletion.
diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py
@@ -212,6 +212,16 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
             return model
         if model.model.opset_import[0].version < 11: # pragma: no cover
             logger.warning("Quantize input needs model opset 11 or newer.")
+        if self.backend == 'DnnlExecutionProvider' and \
+            any([i.domain in ['', 'ai.onnx'] and i.version < 15 for i in model.model.opset_import]):
+            from onnx import version_converter
+            try:
+                model.model = version_converter.convert_version(model.model, 15)
+            except:
+                logging.warning("Fail to upgrade model opset_import to >= 15, "\
+                                "please upgrate it manually to run with bf16 data type")
+                exit(0)
+
         from neural_compressor.adaptor.ox_utils.util import QuantizationMode
         if self.format == "qlinearops":
             format = QuantizationMode.QLinearOps

diff --git a/neural_compressor/adaptor/ox_utils/calibration.py b/neural_compressor/adaptor/ox_utils/calibration.py
@@ -467,7 +467,7 @@ def calculate_quantization_params(self, q_config, quantization_thresholds):
             if tensor_name in output_name_to_nodes:
                 parent = output_name_to_nodes[tensor_name]
             if parent and parent.name in q_config and \
-                q_config[parent.name] not in ['fp32', 'fp16']:
+                q_config[parent.name] not in ['fp32', 'fp16', 'bf16']:
                 scheme = q_config[parent.name]['activation']['scheme']
                 qType = q_config[parent.name]['activation']['dtype']
             elif self.backend in ['TensorrtExecutionProvider']: