From d0f8b183785fa0009efc6b6208f683086dd01379 Mon Sep 17 00:00:00 2001
From: Ruonan Wang <105281011+rnwang04@users.noreply.github.com>
Date: Wed, 14 Sep 2022 08:34:26 +0800
Subject: [PATCH] Nano : Enhancement for output format of InferenceOptimizer
 (#5705)

* first commit

* improve output of optimize

* fix method_type for method lack of dependency

* update openvino model for thread num

* add thread num for trace and quantize

* update based on comment: modify output, add progress bar and remove method type

* modify latency to latency(ms)

* add thread_num for Trainer.trace and Trainer.quantize

* fix bug of openvino cpu_num
---
 .../resnet/inference_pipeline.py              |   2 +-
 .../bigdl/nano/deps/openvino/core/model.py    |  10 +-
 .../bigdl/nano/deps/openvino/openvino_api.py  |   9 +-
 .../bigdl/nano/deps/openvino/pytorch/model.py |   7 +-
 .../bigdl/nano/pytorch/inference/optimizer.py | 157 +++++++++++++-----
 .../src/bigdl/nano/pytorch/trainer/Trainer.py |  10 ++
 .../tests/test_inference_pipeline_ipex.py     |   6 +-
 7 files changed, 149 insertions(+), 52 deletions(-)

diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py
index ca6d8534ca2..aeeb2d47638 100644
--- a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py
+++ b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py
@@ -45,7 +45,7 @@ def accuracy(pred, target):
                        validation_data=datamodule.val_dataloader(limit_num_samples=160),
                        metric=accuracy,
                        direction="max",
-                       cpu_num=1,
+                       thread_num=1,
                        latency_sample_num=30)
 
     # 4. Get the best model under specific restrictions or without restrictions
diff --git a/python/nano/src/bigdl/nano/deps/openvino/core/model.py b/python/nano/src/bigdl/nano/deps/openvino/core/model.py
index b583cdb9fba..76e85eaf73c 100644
--- a/python/nano/src/bigdl/nano/deps/openvino/core/model.py
+++ b/python/nano/src/bigdl/nano/deps/openvino/core/model.py
@@ -22,9 +22,10 @@
 
 
 class OpenVINOModel:
-    def __init__(self, ie_network: str, device='CPU'):
+    def __init__(self, ie_network: str, device='CPU', thread_num=None):
         self._ie = Core()
         self._device = device
+        self.thread_num = thread_num
         self.ie_network = ie_network
 
     def forward_step(self, *inputs):
@@ -47,8 +48,13 @@ def ie_network(self, model):
             self._ie_network = self._ie.read_model(model=str(model))
         else:
             self._ie_network = model
+        if self.thread_num is not None:
+            config = {"CPU_THREADS_NUM": str(self.thread_num)}
+        else:
+            config = {}
         self._compiled_model = self._ie.compile_model(model=self.ie_network,
-                                                      device_name=self._device)
+                                                      device_name=self._device,
+                                                      config=config)
         self._infer_request = self._compiled_model.create_infer_request()
         input_names = [t.any_name for t in self._ie_network.inputs]
         self._forward_args = input_names
diff --git a/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py b/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py
index 7357ad4b1f1..200fe4bcb28 100644
--- a/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py
+++ b/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py
@@ -16,20 +16,23 @@
 from functools import partial
 
 
-def PytorchOpenVINOModel(model, input_sample=None, logging=True, **export_kwargs):
+def PytorchOpenVINOModel(model, input_sample=None, thread_num=None,
+                         logging=True, **export_kwargs):
     """
     Create a OpenVINO model from pytorch.
 
     :param model: Pytorch model to be converted to OpenVINO for inference or
                   path to Openvino saved model.
     :param input_sample: A set of inputs for trace, defaults to None if you have trace before or
-                         model is a LightningModule with any dataloader attached, defaults to None
+                         model is a LightningModule with any dataloader attached, defaults to None.
+    :param thread_num: a int represents how many threads(cores) is needed for
+                       inference. default: None.
     :param logging: whether to log detailed information of model conversion. default: True.
     :param **export_kwargs: will be passed to torch.onnx.export function.
     :return: PytorchOpenVINOModel model for OpenVINO inference.
     """
     from .pytorch.model import PytorchOpenVINOModel
-    return PytorchOpenVINOModel(model, input_sample, logging, **export_kwargs)
+    return PytorchOpenVINOModel(model, input_sample, thread_num, logging, **export_kwargs)
 
 
 def load_openvino_model(path):
diff --git a/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py b/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py
index e8a5783e037..2751b42c312 100644
--- a/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py
+++ b/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py
@@ -26,7 +26,8 @@
 
 
 class PytorchOpenVINOModel(AcceleratedLightningModule):
-    def __init__(self, model, input_sample=None, logging=True, **export_kwargs):
+    def __init__(self, model, input_sample=None, thread_num=None,
+                 logging=True, **export_kwargs):
         """
         Create a OpenVINO model from pytorch.
 
@@ -35,6 +36,8 @@ def __init__(self, model, input_sample=None, logging=True, **export_kwargs):
         :param input_sample: A set of inputs for trace, defaults to None if you have trace before or
                              model is a LightningModule with any dataloader attached,
                              defaults to None.
+        :param thread_num: a int represents how many threads(cores) is needed for
+                           inference. default: None.
         :param logging: whether to log detailed information of model conversion. default: True.
         :param **export_kwargs: will be passed to torch.onnx.export function.
         """
@@ -44,7 +47,7 @@ def __init__(self, model, input_sample=None, logging=True, **export_kwargs):
             if isinstance(model, torch.nn.Module):
                 export(model, input_sample, str(dir / 'tmp.xml'), logging, **export_kwargs)
                 ov_model_path = dir / 'tmp.xml'
-            self.ov_model = OpenVINOModel(ov_model_path)
+            self.ov_model = OpenVINOModel(ov_model_path, thread_num=thread_num)
             super().__init__(self.ov_model)
 
     def on_forward_start(self, inputs):
diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py
index 30c2b7135ad..eabcdaa20d4 100644
--- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py
+++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py
@@ -85,11 +85,11 @@ def get_accelerator(self):
         "int8": AccelerationOption(inc=True),
         "jit_fp32": AccelerationOption(jit=True),
         "jit_fp32_ipex": AccelerationOption(jit=True, ipex=True),
-        "jit_fp32_ipex_clast": AccelerationOption(jit=True, ipex=True,
-                                                  channels_last=True),
+        "jit_fp32_ipex_channels_last": AccelerationOption(jit=True, ipex=True,
+                                                          channels_last=True),
         "openvino_fp32": AccelerationOption(openvino=True),
         "openvino_int8": AccelerationOption(openvino=True, pot=True),
-        "onnxruntime_fp32": AccelerationOption(onnxtunrime=True),
+        "onnxruntime_fp32": AccelerationOption(onnxruntime=True),
         "onnxruntime_int8_qlinear": AccelerationOption(onnxruntime=True, inc=True,
                                                        method="qlinear"),
         "onnxruntime_int8_integer": AccelerationOption(onnxruntime=True, inc=True,
@@ -106,13 +106,14 @@ def __init__(self):
         # optimized_model_dict handles the optimized model and some metadata
         # in {"method_name": {"latency": ..., "accuracy": ..., "model": ...}}
         self.optimized_model_dict = {}
+        self._optimize_result = None
 
     def optimize(self, model: nn.Module,
                  training_data: DataLoader,
                  validation_data: DataLoader = None,
                  metric: Callable = None,
                  direction: str = "max",
-                 cpu_num: int = None,
+                 thread_num: int = None,
                  logging: bool = False,
                  latency_sample_num: int = 100) -> None:
         '''
@@ -135,7 +136,7 @@ def optimize(self, model: nn.Module,
         :param direction: (optional) A string that indicates the higher/lower
                better for the metric, "min" for the lower the better and "max" for the
                higher the better. Default value is "max".
-        :param cpu_num: (optional) a int represents how many cores is needed for
+        :param thread_num: (optional) a int represents how many threads(cores) is needed for
                inference.
         :param logging: whether to log detailed information of model conversion.
                default: False.
@@ -160,24 +161,21 @@ def optimize(self, model: nn.Module,
             self._calculate_accuracy = False
 
         default_threads: int = torch.get_num_threads()
-        cpu_num: int = default_threads if cpu_num is None else int(cpu_num)
-
-        # set cpu num for onnxruntime
-        if _onnxruntime_checker():
-            import onnxruntime
-            sessoption = onnxruntime.SessionOptions()
-            sessoption.intra_op_num_threads = cpu_num
-            sessoption.inter_op_num_threads = cpu_num
-        else:
-            sessoption = None
-        # TODO: set cpu num for openvino
+        thread_num: int = default_threads if thread_num is None else int(thread_num)
 
         result_map: Dict[str, Dict] = {}
 
-        model.eval()  # change model to eval state
+        model.eval()  # change model to eval mode
 
-        for method, available in available_dict.items():
-            if available:
+        print("==========================Start Optimization==========================")
+        start_time = time.perf_counter()
+        for idx, (method, available) in enumerate(available_dict.items()):
+            result_map[method] = {}
+            if available is False:
+                result_map[method]["status"] = "lack dependency"
+            else:
+                print(f"----------Start test {method} model "
+                      f"({idx+1}/{len(ALL_INFERENCE_ACCELERATION_METHOD)})----------")
                 option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method]
                 use_ipex: bool = option.ipex
                 use_channels_last: bool = option.channels_last
@@ -203,11 +201,13 @@ def optimize(self, model: nn.Module,
                                     InferenceOptimizer.trace(model=model,
                                                              accelerator=accelerator,
                                                              input_sample=input_sample,
-                                                             onnxruntime_session_options=sessoption,
+                                                             thread_num=thread_num,
                                                              # remove output of openvino
                                                              logging=logging)
                     except Exception as e:
                         print(e)
+                        result_map[method]["status"] = "fail to convert"
+                        print(f"----------Failed to convert to {method}----------")
                         continue
 
                 # if precision is int8 or bf16, then we will use quantize method
@@ -221,25 +221,28 @@ def optimize(self, model: nn.Module,
                                                         use_ipex=use_ipex,
                                                         calib_dataloader=training_data,
                                                         method=ort_method,
-                                                        onnxruntime_session_options=sessoption,
+                                                        thread_num=thread_num,
                                                         # remove output of openvino
                                                         logging=logging)
                     except Exception as e:
                         print(e)
+                        result_map[method]["status"] = "fail to convert"
+                        print(f"----------Failed to convert to {method}----------")
                         continue
 
-                result_map[method] = {}
+                result_map[method]["status"] = "successful"
 
                 def func_test(model, input_sample):
-                    model(*input_sample)
+                    with torch.no_grad():
+                        model(*input_sample)
 
-                torch.set_num_threads(cpu_num)
+                torch.set_num_threads(thread_num)
                 try:
                     result_map[method]["latency"] =\
                         _throughput_calculate_helper(latency_sample_num, func_test,
                                                      acce_model, input_sample)
                 except Exception as e:
-                    result_map.pop(method)
+                    result_map[method]["status"] = "fail to forward"
                     torch.set_num_threads(default_threads)
                     continue
 
@@ -252,19 +255,26 @@ def func_test(model, input_sample):
                     result_map[method]["accuracy"] = None
 
                 result_map[method]["model"] = acce_model
-            else:
-                pass
+                print(f"----------Finish test {method} model "
+                      f"({idx+1}/{len(ALL_INFERENCE_ACCELERATION_METHOD)})----------")
 
         self.optimized_model_dict: Dict = result_map
-        print("==========================Optimization Results==========================")
-        if self._calculate_accuracy:
-            for key, value in self.optimized_model_dict.items():
-                print("accleration option: {}, latency: {:.4f}ms, accuracy : {:.4f}"
-                      .format(key, value["latency"], value["accuracy"]))
-        else:
-            for key, value in self.optimized_model_dict.items():
-                print("accleration option: {}, latency: {:.4f}ms :"
-                      .format(key, value["latency"]))
+        print("\n\n==========================Optimization Results==========================")
+
+        self._optimize_result = _format_optimize_result(self.optimized_model_dict,
+                                                        self._calculate_accuracy)
+        print(self._optimize_result)
+        print("Optimization cost {:.3}s at all.".format(time.perf_counter() - start_time))
+        print("===========================Stop Optimization===========================")
+
+    def summary(self):
+        '''
+        Print format string representation for optimization result
+        '''
+        invalidOperationError(len(self.optimized_model_dict) > 0,
+                              "There is no optimization result. You should call .optimize() "
+                              "before summary()")
+        print(self._optimize_result)
 
     def get_best_model(self,
                        accelerator: str = None,
@@ -302,7 +312,7 @@ def get_best_model(self,
                                     self.optimized_model_dict["original"]["accuracy"])
 
         for method in self.optimized_model_dict.keys():
-            if method == "original":
+            if method == "original" or self.optimized_model_dict[method]["status"] != "successful":
                 continue
             option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method]
             result: Dict = self.optimized_model_dict[method]
@@ -333,7 +343,7 @@ def get_best_model(self,
                 best_model = result["model"]
                 best_metric = CompareMetric(method, result["latency"], result["accuracy"])
 
-        return best_model, _format_acceleration_info(best_metric.method_name)
+        return best_model, _format_acceleration_option(best_metric.method_name)
 
     @staticmethod
     def quantize(model: nn.Module,
@@ -350,6 +360,7 @@ def quantize(model: nn.Module,
                  timeout: int = None,
                  max_trials: int = None,
                  input_sample=None,
+                 thread_num: int = None,
                  onnxruntime_session_options=None,
                  logging: bool = True,
                  **export_kwargs):
@@ -394,6 +405,9 @@ def quantize(model: nn.Module,
                             "timeout=0, max_trials=1" means it will try quantization only once and
                             return satisfying best model.
         :param input_sample:      An input example to convert pytorch model into ONNX/OpenVINO.
+        :param thread_num: (optional) a int represents how many threads(cores) is needed for
+                           inference, only valid for accelerator='onnxruntime'
+                           or accelerator='openvino'.
         :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                             accelerator='onnxruntime', otherwise will be ignored.
         :param logging: whether to log detailed information of model conversion, only valid when
@@ -439,10 +453,17 @@ def quantize(model: nn.Module,
                         if input_sample is None:
                             # input_sample can be a dataloader
                             input_sample = calib_dataloader
+                        if onnxruntime_session_options is None:
+                            import onnxruntime
+                            onnxruntime_session_options = onnxruntime.SessionOptions()
+                            if thread_num is not None:
+                                onnxruntime_session_options.intra_op_num_threads = thread_num
+                                onnxruntime_session_options.inter_op_num_threads = thread_num
                         model = InferenceOptimizer.trace(
                             model,
                             input_sample=input_sample,
                             accelerator='onnxruntime',
+                            onnxruntime_session_options=onnxruntime_session_options,
                             **export_kwargs)
                 """
                 If accelerator==None, quantized model returned should be an object of PytorchModel
@@ -470,6 +491,7 @@ def quantize(model: nn.Module,
                     model = InferenceOptimizer.trace(model,
                                                      input_sample=input_sample,
                                                      accelerator='openvino',
+                                                     thread_num=thread_num,
                                                      logging=logging,
                                                      **export_kwargs)
                 invalidInputError(type(model).__name__ == 'PytorchOpenVINOModel',
@@ -508,6 +530,7 @@ def trace(model: nn.Module,
               input_sample=None,
               accelerator: str = None,
               use_ipex: bool = False,
+              thread_num: int = None,
               onnxruntime_session_options=None,
               logging: bool = True,
               **export_kwargs):
@@ -522,6 +545,9 @@ def trace(model: nn.Module,
         :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch
                             backend. 'openvino', 'onnxruntime' and 'jit' are supported for now.
         :param use_ipex: whether we use ipex as accelerator for inferencing. default: False.
+        :param thread_num: (optional) a int represents how many threads(cores) is needed for
+                           inference, only valid for accelerator='onnxruntime'
+                           or accelerator='openvino'.
         :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                             accelerator='onnxruntime', otherwise will be ignored.
         :param logging: whether to log detailed information of model conversion, only valid when
@@ -540,8 +566,14 @@ def trace(model: nn.Module,
             "but got type {}".format(type(model))
         )
         if accelerator == 'openvino':  # openvino backend will not care about ipex usage
-            return PytorchOpenVINOModel(model, input_sample, logging, **export_kwargs)
+            return PytorchOpenVINOModel(model, input_sample, thread_num, logging, **export_kwargs)
         if accelerator == 'onnxruntime':  # onnxruntime backend will not care about ipex usage
+            if onnxruntime_session_options is None:
+                import onnxruntime
+                onnxruntime_session_options = onnxruntime.SessionOptions()
+                if thread_num is not None:
+                    onnxruntime_session_options.intra_op_num_threads = thread_num
+                    onnxruntime_session_options.inter_op_num_threads = thread_num
             return PytorchONNXRuntimeModel(model, input_sample, onnxruntime_session_options,
                                            **export_kwargs)
         if accelerator == 'jit' or use_ipex:
@@ -623,7 +655,8 @@ def _throughput_calculate_helper(iterrun, func, *args):
     time_list = []
     for i in range(iterrun):
         st = time.perf_counter()
-        func(*args)
+        with torch.no_grad():
+            func(*args)
         end = time.perf_counter()
         time_list.append(end - st)
         # at least need 10 iters and try to control calculation
@@ -649,7 +682,7 @@ def _accuracy_calculate_helper(model, metric, data):
     return np.sum(metric_list) / sample_num
 
 
-def _format_acceleration_info(method_name):
+def _format_acceleration_option(method_name: str) -> str:
     '''
     Get a string represation for current method's acceleration option
     '''
@@ -663,3 +696,45 @@ def _format_acceleration_info(method_name):
     if len(repr_str) > 0:
         repr_str = repr_str[:-2]
     return repr_str
+
+
+def _format_optimize_result(optimize_result_dict: dict,
+                            calculate_accuracy: bool) -> str:
+    '''
+    Get a format string represation for optimization result
+    '''
+    if calculate_accuracy is True:
+        horizontal_line = " {0} {1} {2} {3}\n" \
+            .format("-" * 32, "-" * 22, "-" * 14, "-" * 12)
+        repr_str = horizontal_line
+        repr_str += "| {0:^30} | {1:^20} | {2:^12} | {3:^10} |\n" \
+            .format("method", "status", "latency(ms)", "accuracy")
+        repr_str += horizontal_line
+        for method, result in optimize_result_dict.items():
+            status = result["status"]
+            latency = result.get("latency", "None")
+            if latency != "None":
+                latency = round(latency, 3)
+            accuracy = result.get("accuracy", "None")
+            if accuracy != "None":
+                accuracy = round(accuracy, 3)
+            method_str = f"| {method:^30} | {status:^20} | " \
+                         f"{latency:^12} | {accuracy:^10} |\n"
+            repr_str += method_str
+        repr_str += horizontal_line
+    else:
+        horizontal_line = " {0} {1} {2}\n" \
+            .format("-" * 32, "-" * 22, "-" * 14)
+        repr_str = horizontal_line
+        repr_str += "| {0:^30} | {1:^20} | {2:^12} |\n" \
+            .format("method", "status", "latency(ms)")
+        repr_str += horizontal_line
+        for method, result in optimize_result_dict.items():
+            status = result["status"]
+            latency = result.get("latency", "None")
+            if latency != "None":
+                latency = round(latency, 3)
+            method_str = f"| {method:^30} | {status:^20} | {latency:^12} |\n"
+            repr_str += method_str
+        repr_str += horizontal_line
+    return repr_str
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 4ef4a6333d8..da54675124d 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -268,6 +268,7 @@ def trace(model: nn.Module,
               input_sample=None,
               accelerator: str = None,
               use_ipex: bool = False,
+              thread_num: int = None,
               onnxruntime_session_options=None,
               logging: bool = True,
               **export_kwargs):
@@ -282,6 +283,9 @@ def trace(model: nn.Module,
         :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch
                             backend. 'openvino', 'onnxruntime' and 'jit' are supported for now.
         :param use_ipex: whether we use ipex as accelerator for inferencing. default: False.
+        :param thread_num: (optional) a int represents how many threads(cores) is needed for
+                           inference, only valid for accelerator='onnxruntime'
+                           or accelerator='openvino'.
         :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                             accelerator='onnxruntime', otherwise will be ignored.
         :param logging: whether to log detailed information of model conversion, only valid when
@@ -298,6 +302,7 @@ def trace(model: nn.Module,
                                         input_sample=input_sample,
                                         accelerator=accelerator,
                                         use_ipex=use_ipex,
+                                        thread_num=thread_num,
                                         onnxruntime_session_options=onnxruntime_session_options,
                                         logging=logging,
                                         **export_kwargs)
@@ -317,6 +322,7 @@ def quantize(model: nn.Module,
                  timeout: int = None,
                  max_trials: int = None,
                  input_sample=None,
+                 thread_num: int = None,
                  onnxruntime_session_options=None,
                  logging: bool = True,
                  **export_kwargs):
@@ -361,6 +367,9 @@ def quantize(model: nn.Module,
                             "timeout=0, max_trials=1" means it will try quantization only once and
                             return satisfying best model.
         :param input_sample:      An input example to convert pytorch model into ONNX/OpenVINO.
+        :param thread_num: (optional) a int represents how many threads(cores) is needed for
+                           inference, only valid for accelerator='onnxruntime'
+                           or accelerator='openvino'.
         :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                             accelerator='onnxruntime', otherwise will be ignored.
         :param logging: whether to log detailed information of model conversion, only valid when
@@ -382,6 +391,7 @@ def quantize(model: nn.Module,
                                            timeout=timeout,
                                            max_trials=max_trials,
                                            input_sample=input_sample,
+                                           thread_num=thread_num,
                                            onnxruntime_session_options=onnxruntime_session_options,
                                            logging=logging,
                                            **export_kwargs)
diff --git a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py
index cbbf0fbe769..1d939aa7541 100644
--- a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py
+++ b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py
@@ -58,7 +58,7 @@ class TestInferencePipeline(TestCase):
     num_workers = 0
     data_dir = os.path.join(os.path.dirname(__file__), "data")
     metric = torchmetrics.Accuracy(num_classes=10, top_k=1)
-    max_epochs = 10
+    max_epochs = 5
 
     model = Net()
     test_loader = create_data_loader(data_dir, 1, num_workers, data_transform, subset=10, shuffle=False)
@@ -85,7 +85,7 @@ def test_pipeline_with_metric(self):
                                validation_data=self.test_loader,
                                metric=self.metric,
                                direction="max",
-                               cpu_num=1)
+                               thread_num=1)
 
         acc_model, option = inference_opt.get_best_model()
         acc_model, option = inference_opt.get_best_model(accelerator="onnxruntime")
@@ -99,7 +99,7 @@ def test_pipeline_without_metric(self):
         inference_opt = InferenceOptimizer()
         inference_opt.optimize(model=self.model,
                                training_data=self.train_loader,
-                               cpu_num=1)
+                               thread_num=1)
 
         acc_model, option = inference_opt.get_best_model()
         acc_model, option = inference_opt.get_best_model(accelerator="onnxruntime")