Skip to content

Commit

Permalink
Chronos: 'forecaster.predict_with_openvino' supports quantization API (
Browse files Browse the repository at this point in the history
…intel-analytics#5690)

* add openvino quantization

* add some comments

* fix code style
  • Loading branch information
liangs6212 authored and ForJadeForest committed Sep 20, 2022
1 parent 8826093 commit 637383a
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 10 deletions.
39 changes: 29 additions & 10 deletions python/chronos/src/bigdl/chronos/forecaster/base_forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def model_creator_orca(config):
self.onnxruntime_fp32 = None # onnxruntime session for fp32 precision
self.openvino_fp32 = None # placeholader openvino session for fp32 precision
self.onnxruntime_int8 = None # onnxruntime session for int8 precision
self.openvino_int8 = None # placeholader openvino session for int8 precision
self.pytorch_int8 = None # pytorch model for int8 precision

def _build_automodel(self, data, validation_data=None, batch_size=32, epochs=1):
Expand Down Expand Up @@ -554,7 +555,7 @@ def predict_with_onnx(self, data, batch_size=32, quantize=False):
input_data=data,
batch_size=batch_size)

def predict_with_openvino(self, data, batch_size=32):
def predict_with_openvino(self, data, batch_size=32, quantize=False):
"""
Predict using a trained forecaster with openvino. The method can only be
used when forecaster is a non-distributed version.
Expand All @@ -571,6 +572,7 @@ def predict_with_openvino(self, data, batch_size=32):
:param batch_size: predict batch size. The value will not affect predict
result but will affect resources cost(e.g. memory and time). Defaults
to 32. None for all-data-single-time inference.
:param quantize: if use the quantized openvino model to predict.
:return: A numpy array with shape (num_samples, horizon, target_dim).
"""
Expand All @@ -585,11 +587,16 @@ def predict_with_openvino(self, data, batch_size=32):
if not self.fitted:
invalidInputError(False,
"You must call fit or restore first before calling predict!")
if self.openvino_fp32 is None:
self.build_openvino()
return _pytorch_fashion_inference(model=self.openvino_fp32,
input_data=data,
batch_size=batch_size)
if quantize:
return _pytorch_fashion_inference(model=self.openvino_int8,
input_data=data,
batch_size=batch_size)
else:
if self.openvino_fp32 is None:
self.build_openvino()
return _pytorch_fashion_inference(model=self.openvino_fp32,
input_data=data,
batch_size=batch_size)

def evaluate(self, data, batch_size=32, multioutput="raw_values", quantize=False):
"""
Expand Down Expand Up @@ -881,6 +888,7 @@ def to_local(self):
self.onnxruntime_fp32 = None # onnxruntime session for fp32 precision
self.openvino_fp32 = None # openvino session for fp32 precision
self.onnxruntime_int8 = None # onnxruntime session for int8 precision
self.openvino_int8 = None # openvino session for int8 precision
self.pytorch_int8 = None # pytorch model for int8 precision
return self

Expand Down Expand Up @@ -1007,10 +1015,13 @@ def quantize(self, calib_data=None,
quantization. You may choose from "mse", "mae", "rmse", "r2", "mape", "smape".
:param conf: A path to conf yaml file for quantization. Default to None,
using default config.
:param framework: string or list, [{'pytorch'|'pytorch_fx'|'pytorch_ipex'},
{'onnxrt_integerops'|'onnxrt_qlinearops'}]. Default: 'pytorch_fx'.
Consistent with Intel Neural Compressor.
:param framework: string or list.
[{'pytorch'|'pytorch_fx'|'pytorch_ipex'},
{'onnxrt_integerops'|'onnxrt_qlinearops'},
{'openvino'}] Default: 'pytorch_fx'. Consistent with Intel Neural Compressor.
:param approach: str, 'static' or 'dynamic'. Default to 'static'.
OpenVINO supports static mode only, if set to 'dynamic',
it will be replaced with 'static'.
:param tuning_strategy: str, 'bayesian', 'basic', 'mse' or 'sigopt'. Default to 'bayesian'.
:param relative_drop: Float, tolerable ralative accuracy drop. Default to None,
e.g. set to 0.1 means that we accept a 10% increase in the metrics error.
Expand Down Expand Up @@ -1067,9 +1078,15 @@ def quantize(self, calib_data=None,
framework = [framework] if isinstance(framework, str) else framework
temp_quantized_model = None
for framework_item in framework:
accelerator, method = framework_item.split('_')
if '_' in framework_item:
accelerator, method = framework_item.split('_')
else:
accelerator = framework_item
if accelerator == 'pytorch':
accelerator = None
elif accelerator == 'openvino':
method = None
approach = "static"
else:
accelerator = 'onnxruntime'
method = method[:-3]
Expand All @@ -1088,6 +1105,8 @@ def quantize(self, calib_data=None,
onnxruntime_session_options=sess_options)
if accelerator == 'onnxruntime':
self.onnxruntime_int8 = q_model
if accelerator == 'openvino':
self.openvino_int8 = q_model
if accelerator is None:
self.pytorch_int8 = q_model

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ def test_tcn_forecaster_openvino_methods(self):
except ImportError:
pass

forecaster.quantize(calib_data=train_data,
framework="openvino")
openvino_yhat = forecaster.predict_with_openvino(test_data[0])
q_openvino_yhat = forecaster.predict_with_openvino(test_data[0], quantize=True)
assert openvino_yhat.shape == q_openvino_yhat.shape == test_data[1].shape

def test_tcn_forecaster_quantization_dynamic(self):
train_data, val_data, test_data = create_data()
forecaster = TCNForecaster(past_seq_len=24,
Expand Down

0 comments on commit 637383a

Please sign in to comment.