From 67ff54e8142c5665c15ade715ac01065342ea0ff Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Thu, 8 Sep 2022 13:08:35 -0400 Subject: [PATCH 01/15] first commit --- .../bigdl/nano/pytorch/inference/optimizer.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 30c2b7135ad..6432d30e61a 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -176,8 +176,13 @@ def optimize(self, model: nn.Module, model.eval() # change model to eval state + print("==========================Start Optimization==========================") for method, available in available_dict.items(): - if available: + if available is False: + result_map[method] = {} + result_map[method]["status"] = "lack dependency" + else: + print(f"*** Start to try method {method}***") option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] use_ipex: bool = option.ipex use_channels_last: bool = option.channels_last @@ -208,6 +213,9 @@ def optimize(self, model: nn.Module, logging=logging) except Exception as e: print(e) + result_map[method] = {} + result_map[method]["status"] = "fail to convert" + print(f"*** Failed to convert to {method}***") continue # if precision is int8 or bf16, then we will use quantize method @@ -226,12 +234,17 @@ def optimize(self, model: nn.Module, logging=logging) except Exception as e: print(e) + result_map[method] = {} + result_map[method]["status"] = "fail to convert" + print(f"*** Failed to convert to {method}***") continue result_map[method] = {} + result_map[method]["status"] = "successful" def func_test(model, input_sample): - model(*input_sample) + with torch.no_grad(): + model(*input_sample) torch.set_num_threads(cpu_num) try: @@ -239,7 +252,7 @@ def func_test(model, input_sample): _throughput_calculate_helper(latency_sample_num, func_test, acce_model, input_sample) except Exception as e: - result_map.pop(method) + result_map[method]["status"] = "fail to forward" torch.set_num_threads(default_threads) continue @@ -256,6 +269,7 @@ def func_test(model, input_sample): pass self.optimized_model_dict: Dict = result_map + # TODO: format the results print("==========================Optimization Results==========================") if self._calculate_accuracy: for key, value in self.optimized_model_dict.items(): @@ -265,6 +279,7 @@ def func_test(model, input_sample): for key, value in self.optimized_model_dict.items(): print("accleration option: {}, latency: {:.4f}ms :" .format(key, value["latency"])) + print("===========================Stop Optimization===========================") def get_best_model(self, accelerator: str = None, @@ -623,7 +638,8 @@ def _throughput_calculate_helper(iterrun, func, *args): time_list = [] for i in range(iterrun): st = time.perf_counter() - func(*args) + with torch.no_grad(): + func(*args) end = time.perf_counter() time_list.append(end - st) # at least need 10 iters and try to control calculation From 58b23d7a1744c23ec20e22f57c6cc9b5c068d259 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 06:30:25 -0400 Subject: [PATCH 02/15] improve output of optimize --- .../bigdl/nano/pytorch/inference/optimizer.py | 99 ++++++++++++++----- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 6432d30e61a..cb7798089fc 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -85,8 +85,8 @@ def get_accelerator(self): "int8": AccelerationOption(inc=True), "jit_fp32": AccelerationOption(jit=True), "jit_fp32_ipex": AccelerationOption(jit=True, ipex=True), - "jit_fp32_ipex_clast": AccelerationOption(jit=True, ipex=True, - channels_last=True), + "jit_fp32_ipex_channels_last": AccelerationOption(jit=True, ipex=True, + channels_last=True), "openvino_fp32": AccelerationOption(openvino=True), "openvino_int8": AccelerationOption(openvino=True, pot=True), "onnxruntime_fp32": AccelerationOption(onnxtunrime=True), @@ -106,6 +106,7 @@ def __init__(self): # optimized_model_dict handles the optimized model and some metadata # in {"method_name": {"latency": ..., "accuracy": ..., "model": ...}} self.optimized_model_dict = {} + self._optimize_result = None def optimize(self, model: nn.Module, training_data: DataLoader, @@ -174,15 +175,16 @@ def optimize(self, model: nn.Module, result_map: Dict[str, Dict] = {} - model.eval() # change model to eval state + model.eval() # change model to eval mode print("==========================Start Optimization==========================") + start_time = time.perf_counter() for method, available in available_dict.items(): + result_map[method] = {} if available is False: - result_map[method] = {} result_map[method]["status"] = "lack dependency" else: - print(f"*** Start to try method {method}***") + print(f"********************Start test {method} model********************") option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] use_ipex: bool = option.ipex use_channels_last: bool = option.channels_last @@ -194,7 +196,9 @@ def optimize(self, model: nn.Module, try: if accelerator is None and use_ipex is False: acce_model = model + result_map[method]["method_type"] = "none" else: + result_map[method]["method_type"] = "trace" if accelerator in ("jit", None): acce_model = \ InferenceOptimizer.trace(model=model, @@ -213,13 +217,13 @@ def optimize(self, model: nn.Module, logging=logging) except Exception as e: print(e) - result_map[method] = {} result_map[method]["status"] = "fail to convert" - print(f"*** Failed to convert to {method}***") + print(f"********************Failed to convert to {method}********************") continue # if precision is int8 or bf16, then we will use quantize method elif precision in ("int8", "bf16"): + result_map[method]["method_type"] = "quantize" ort_method: str = option.method try: acce_model = \ @@ -234,12 +238,10 @@ def optimize(self, model: nn.Module, logging=logging) except Exception as e: print(e) - result_map[method] = {} result_map[method]["status"] = "fail to convert" - print(f"*** Failed to convert to {method}***") + print(f"********************Failed to convert to {method}********************") continue - result_map[method] = {} result_map[method]["status"] = "successful" def func_test(model, input_sample): @@ -265,22 +267,26 @@ def func_test(model, input_sample): result_map[method]["accuracy"] = None result_map[method]["model"] = acce_model - else: - pass + print(f"********************Finish test {method} model********************") self.optimized_model_dict: Dict = result_map + print("\n\n==========================Optimization Results==========================") # TODO: format the results - print("==========================Optimization Results==========================") - if self._calculate_accuracy: - for key, value in self.optimized_model_dict.items(): - print("accleration option: {}, latency: {:.4f}ms, accuracy : {:.4f}" - .format(key, value["latency"], value["accuracy"])) - else: - for key, value in self.optimized_model_dict.items(): - print("accleration option: {}, latency: {:.4f}ms :" - .format(key, value["latency"])) + self._optimize_result = _format_optimize_result(self.optimized_model_dict, + self._calculate_accuracy) + print(self._optimize_result) + print("Optimization cost {:.3}ms at all.".format(time.perf_counter() - start_time)) print("===========================Stop Optimization===========================") + def summary(self): + ''' + Print format string represation for optimization result + ''' + invalidOperationError(len(self.optimized_model_dict) > 0, + "There is no optimization result. You should call .optimize() " + "before summary()") + print(self._optimize_result) + def get_best_model(self, accelerator: str = None, precision: str = None, @@ -317,7 +323,7 @@ def get_best_model(self, self.optimized_model_dict["original"]["accuracy"]) for method in self.optimized_model_dict.keys(): - if method == "original": + if method == "original" or self.optimized_model_dict[method]["status"] != "successful": continue option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] result: Dict = self.optimized_model_dict[method] @@ -348,7 +354,7 @@ def get_best_model(self, best_model = result["model"] best_metric = CompareMetric(method, result["latency"], result["accuracy"]) - return best_model, _format_acceleration_info(best_metric.method_name) + return best_model, _format_acceleration_option(best_metric.method_name) @staticmethod def quantize(model: nn.Module, @@ -598,7 +604,7 @@ def _openvino_checker(): ''' check if openvino-dev is installed ''' - return not find_spec("openvino-dev") is None + return not find_spec("openvino") is None def _bf16_checker(): @@ -665,7 +671,7 @@ def _accuracy_calculate_helper(model, metric, data): return np.sum(metric_list) / sample_num -def _format_acceleration_info(method_name): +def _format_acceleration_option(method_name: str) -> str: ''' Get a string represation for current method's acceleration option ''' @@ -679,3 +685,46 @@ def _format_acceleration_info(method_name): if len(repr_str) > 0: repr_str = repr_str[:-2] return repr_str + + +def _format_optimize_result(optimize_result_dict: dict, + calculate_accuracy: bool) -> str: + ''' + Get a format string represation for optimization result + ''' + if calculate_accuracy is True: + horizontal_line = " {0} {1} {2} {3} {4}\n" \ + .format("-"*32, "-"*22, "-"*12, "-"*12, "-"*12) + repr_str = horizontal_line + repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} | {4:^10} |\n" \ + .format("method", "status", "type", "latency", "accuracy") + repr_str += horizontal_line + for method, result in optimize_result_dict.items(): + status = result["status"] + method_type = result["method_type"] + latency = result.get("latency", "None") + if latency != "None": + latency = round(latency, 3) + accuracy = result.get("accuracy", "None") + if accuracy != "None": + accuracy = round(accuracy, 3) + method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | {latency:^10} | {accuracy:^10} |\n" + repr_str += method_str + repr_str += horizontal_line + else: + horizontal_line = " {0} {1} {2} {3}\n" \ + .format("-"*32, "-"*22, "-"*12, "-"*12) + repr_str = horizontal_line + repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} |\n" \ + .format("method", "status", "type", "latency") + repr_str += horizontal_line + for method, result in optimize_result_dict.items(): + status = result["status"] + method_type = result["method_type"] + latency = result.get("latency", "None") + if latency != "None": + latency = round(latency, 3) + method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | {latency:^10} |\n" + repr_str += method_str + repr_str += horizontal_line + return repr_str From b807b8f220685b7f73a5017b02c95efd924438a3 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 06:50:36 -0400 Subject: [PATCH 03/15] fix typo --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index cb7798089fc..b3d4f7be99d 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -89,7 +89,7 @@ def get_accelerator(self): channels_last=True), "openvino_fp32": AccelerationOption(openvino=True), "openvino_int8": AccelerationOption(openvino=True, pot=True), - "onnxruntime_fp32": AccelerationOption(onnxtunrime=True), + "onnxruntime_fp32": AccelerationOption(onnxruntime=True), "onnxruntime_int8_qlinear": AccelerationOption(onnxruntime=True, inc=True, method="qlinear"), "onnxruntime_int8_integer": AccelerationOption(onnxruntime=True, inc=True, @@ -280,7 +280,7 @@ def func_test(model, input_sample): def summary(self): ''' - Print format string represation for optimization result + Print format string representation for optimization result ''' invalidOperationError(len(self.optimized_model_dict) > 0, "There is no optimization result. You should call .optimize() " @@ -604,7 +604,7 @@ def _openvino_checker(): ''' check if openvino-dev is installed ''' - return not find_spec("openvino") is None + return not find_spec("openvino-dev") is None def _bf16_checker(): From 7d13d5471b3621fa4aa9f044e2e65db940cb4f3d Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 06:55:39 -0400 Subject: [PATCH 04/15] fix style --- .../bigdl/nano/pytorch/inference/optimizer.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index b3d4f7be99d..0d3f51f8818 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -184,7 +184,7 @@ def optimize(self, model: nn.Module, if available is False: result_map[method]["status"] = "lack dependency" else: - print(f"********************Start test {method} model********************") + print(f"**********Start test {method} model**********") option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] use_ipex: bool = option.ipex use_channels_last: bool = option.channels_last @@ -218,7 +218,7 @@ def optimize(self, model: nn.Module, except Exception as e: print(e) result_map[method]["status"] = "fail to convert" - print(f"********************Failed to convert to {method}********************") + print(f"**********Failed to convert to {method}**********") continue # if precision is int8 or bf16, then we will use quantize method @@ -239,7 +239,7 @@ def optimize(self, model: nn.Module, except Exception as e: print(e) result_map[method]["status"] = "fail to convert" - print(f"********************Failed to convert to {method}********************") + print(f"**********Failed to convert to {method}**********") continue result_map[method]["status"] = "successful" @@ -267,12 +267,12 @@ def func_test(model, input_sample): result_map[method]["accuracy"] = None result_map[method]["model"] = acce_model - print(f"********************Finish test {method} model********************") + print(f"**********Finish test {method} model**********") self.optimized_model_dict: Dict = result_map print("\n\n==========================Optimization Results==========================") - # TODO: format the results - self._optimize_result = _format_optimize_result(self.optimized_model_dict, + + self._optimize_result = _format_optimize_result(self.optimized_model_dict, self._calculate_accuracy) print(self._optimize_result) print("Optimization cost {:.3}ms at all.".format(time.perf_counter() - start_time)) @@ -687,14 +687,14 @@ def _format_acceleration_option(method_name: str) -> str: return repr_str -def _format_optimize_result(optimize_result_dict: dict, +def _format_optimize_result(optimize_result_dict: dict, calculate_accuracy: bool) -> str: ''' Get a format string represation for optimization result ''' if calculate_accuracy is True: horizontal_line = " {0} {1} {2} {3} {4}\n" \ - .format("-"*32, "-"*22, "-"*12, "-"*12, "-"*12) + .format("-" * 32, "-" * 22, "-" * 12, "-" * 12, "-" * 12) repr_str = horizontal_line repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} | {4:^10} |\n" \ .format("method", "status", "type", "latency", "accuracy") @@ -708,12 +708,13 @@ def _format_optimize_result(optimize_result_dict: dict, accuracy = result.get("accuracy", "None") if accuracy != "None": accuracy = round(accuracy, 3) - method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | {latency:^10} | {accuracy:^10} |\n" + method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | " \ + f"{latency:^10} | {accuracy:^10} |\n" repr_str += method_str repr_str += horizontal_line else: horizontal_line = " {0} {1} {2} {3}\n" \ - .format("-"*32, "-"*22, "-"*12, "-"*12) + .format("-" * 32, "-" * 22, "-" * 12, "-" * 12) repr_str = horizontal_line repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} |\n" \ .format("method", "status", "type", "latency") From 35a548fb5962ae638adaac9e4cb2c7eff5844415 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 09:14:53 -0400 Subject: [PATCH 05/15] fix method_type for method lack of dependency --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 0d3f51f8818..499daf669fb 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -73,6 +73,11 @@ def get_accelerator(self): return "jit" return None + def get_method_type(self): + if self.inc or self.pot or self.bf16: + return "quantize" + return "trace" + # acceleration method combinations, developers may want to register some new # combinations here @@ -183,6 +188,8 @@ def optimize(self, model: nn.Module, result_map[method] = {} if available is False: result_map[method]["status"] = "lack dependency" + option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] + result_map[method]["method_type"] = option.get_method_type() else: print(f"**********Start test {method} model**********") option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] From f128901423352ca44ae71d30f82685bf9543fce7 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 13:06:45 -0400 Subject: [PATCH 06/15] update openvino model for thread num --- python/nano/src/bigdl/nano/deps/openvino/core/model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/deps/openvino/core/model.py b/python/nano/src/bigdl/nano/deps/openvino/core/model.py index b583cdb9fba..77ba6ef8901 100644 --- a/python/nano/src/bigdl/nano/deps/openvino/core/model.py +++ b/python/nano/src/bigdl/nano/deps/openvino/core/model.py @@ -22,9 +22,10 @@ class OpenVINOModel: - def __init__(self, ie_network: str, device='CPU'): + def __init__(self, ie_network: str, device='CPU', thread_num=None): self._ie = Core() self._device = device + self.thread_num = thread_num self.ie_network = ie_network def forward_step(self, *inputs): @@ -47,8 +48,13 @@ def ie_network(self, model): self._ie_network = self._ie.read_model(model=str(model)) else: self._ie_network = model + if self.thread_num is not None: + config = {"CPU_THREADS_NUM": "8"} + else: + config = {} self._compiled_model = self._ie.compile_model(model=self.ie_network, - device_name=self._device) + device_name=self._device, + config=config) self._infer_request = self._compiled_model.create_infer_request() input_names = [t.any_name for t in self._ie_network.inputs] self._forward_args = input_names From f99e73a56013548aa980b6bf91bed7b7c7a99fbb Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 13:24:43 -0400 Subject: [PATCH 07/15] add thread num for trace and quantize --- .../bigdl/nano/deps/openvino/openvino_api.py | 9 +++-- .../bigdl/nano/deps/openvino/pytorch/model.py | 7 +++- .../bigdl/nano/pytorch/inference/optimizer.py | 40 ++++++++++++------- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py b/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py index 7357ad4b1f1..200fe4bcb28 100644 --- a/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py +++ b/python/nano/src/bigdl/nano/deps/openvino/openvino_api.py @@ -16,20 +16,23 @@ from functools import partial -def PytorchOpenVINOModel(model, input_sample=None, logging=True, **export_kwargs): +def PytorchOpenVINOModel(model, input_sample=None, thread_num=None, + logging=True, **export_kwargs): """ Create a OpenVINO model from pytorch. :param model: Pytorch model to be converted to OpenVINO for inference or path to Openvino saved model. :param input_sample: A set of inputs for trace, defaults to None if you have trace before or - model is a LightningModule with any dataloader attached, defaults to None + model is a LightningModule with any dataloader attached, defaults to None. + :param thread_num: a int represents how many threads(cores) is needed for + inference. default: None. :param logging: whether to log detailed information of model conversion. default: True. :param **export_kwargs: will be passed to torch.onnx.export function. :return: PytorchOpenVINOModel model for OpenVINO inference. """ from .pytorch.model import PytorchOpenVINOModel - return PytorchOpenVINOModel(model, input_sample, logging, **export_kwargs) + return PytorchOpenVINOModel(model, input_sample, thread_num, logging, **export_kwargs) def load_openvino_model(path): diff --git a/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py b/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py index e8a5783e037..2751b42c312 100644 --- a/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py +++ b/python/nano/src/bigdl/nano/deps/openvino/pytorch/model.py @@ -26,7 +26,8 @@ class PytorchOpenVINOModel(AcceleratedLightningModule): - def __init__(self, model, input_sample=None, logging=True, **export_kwargs): + def __init__(self, model, input_sample=None, thread_num=None, + logging=True, **export_kwargs): """ Create a OpenVINO model from pytorch. @@ -35,6 +36,8 @@ def __init__(self, model, input_sample=None, logging=True, **export_kwargs): :param input_sample: A set of inputs for trace, defaults to None if you have trace before or model is a LightningModule with any dataloader attached, defaults to None. + :param thread_num: a int represents how many threads(cores) is needed for + inference. default: None. :param logging: whether to log detailed information of model conversion. default: True. :param **export_kwargs: will be passed to torch.onnx.export function. """ @@ -44,7 +47,7 @@ def __init__(self, model, input_sample=None, logging=True, **export_kwargs): if isinstance(model, torch.nn.Module): export(model, input_sample, str(dir / 'tmp.xml'), logging, **export_kwargs) ov_model_path = dir / 'tmp.xml' - self.ov_model = OpenVINOModel(ov_model_path) + self.ov_model = OpenVINOModel(ov_model_path, thread_num=thread_num) super().__init__(self.ov_model) def on_forward_start(self, inputs): diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 499daf669fb..8440ef1cc58 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -118,7 +118,7 @@ def optimize(self, model: nn.Module, validation_data: DataLoader = None, metric: Callable = None, direction: str = "max", - cpu_num: int = None, + thread_num: int = None, logging: bool = False, latency_sample_num: int = 100) -> None: ''' @@ -141,7 +141,7 @@ def optimize(self, model: nn.Module, :param direction: (optional) A string that indicates the higher/lower better for the metric, "min" for the lower the better and "max" for the higher the better. Default value is "max". - :param cpu_num: (optional) a int represents how many cores is needed for + :param thread_num: (optional) a int represents how many threads(cores) is needed for inference. :param logging: whether to log detailed information of model conversion. default: False. @@ -166,17 +166,7 @@ def optimize(self, model: nn.Module, self._calculate_accuracy = False default_threads: int = torch.get_num_threads() - cpu_num: int = default_threads if cpu_num is None else int(cpu_num) - - # set cpu num for onnxruntime - if _onnxruntime_checker(): - import onnxruntime - sessoption = onnxruntime.SessionOptions() - sessoption.intra_op_num_threads = cpu_num - sessoption.inter_op_num_threads = cpu_num - else: - sessoption = None - # TODO: set cpu num for openvino + thread_num: int = default_threads if thread_num is None else int(thread_num) result_map: Dict[str, Dict] = {} @@ -219,7 +209,7 @@ def optimize(self, model: nn.Module, InferenceOptimizer.trace(model=model, accelerator=accelerator, input_sample=input_sample, - onnxruntime_session_options=sessoption, + thread_num=thread_num, # remove output of openvino logging=logging) except Exception as e: @@ -378,6 +368,7 @@ def quantize(model: nn.Module, timeout: int = None, max_trials: int = None, input_sample=None, + thread_num: int = None, onnxruntime_session_options=None, logging: bool = True, **export_kwargs): @@ -422,6 +413,9 @@ def quantize(model: nn.Module, "timeout=0, max_trials=1" means it will try quantization only once and return satisfying best model. :param input_sample: An input example to convert pytorch model into ONNX/OpenVINO. + :param thread_num: (optional) a int represents how many threads(cores) is needed for + inference, only valid for accelerator='onnxruntime' + or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param logging: whether to log detailed information of model conversion, only valid when @@ -467,10 +461,16 @@ def quantize(model: nn.Module, if input_sample is None: # input_sample can be a dataloader input_sample = calib_dataloader + if onnxruntime_session_options is None: + import onnxruntime + onnxruntime_session_options = onnxruntime.SessionOptions() + onnxruntime_session_options.intra_op_num_threads = thread_num + onnxruntime_session_options.inter_op_num_threads = thread_num model = InferenceOptimizer.trace( model, input_sample=input_sample, accelerator='onnxruntime', + onnxruntime_session_options=onnxruntime_session_options, **export_kwargs) """ If accelerator==None, quantized model returned should be an object of PytorchModel @@ -498,6 +498,7 @@ def quantize(model: nn.Module, model = InferenceOptimizer.trace(model, input_sample=input_sample, accelerator='openvino', + thread_num=thread_num, logging=logging, **export_kwargs) invalidInputError(type(model).__name__ == 'PytorchOpenVINOModel', @@ -536,6 +537,7 @@ def trace(model: nn.Module, input_sample=None, accelerator: str = None, use_ipex: bool = False, + thread_num: int = None, onnxruntime_session_options=None, logging: bool = True, **export_kwargs): @@ -550,6 +552,9 @@ def trace(model: nn.Module, :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch backend. 'openvino', 'onnxruntime' and 'jit' are supported for now. :param use_ipex: whether we use ipex as accelerator for inferencing. default: False. + :param thread_num: (optional) a int represents how many threads(cores) is needed for + inference, only valid for accelerator='onnxruntime' + or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param logging: whether to log detailed information of model conversion, only valid when @@ -568,8 +573,13 @@ def trace(model: nn.Module, "but got type {}".format(type(model)) ) if accelerator == 'openvino': # openvino backend will not care about ipex usage - return PytorchOpenVINOModel(model, input_sample, logging, **export_kwargs) + return PytorchOpenVINOModel(model, input_sample, thread_num, logging, **export_kwargs) if accelerator == 'onnxruntime': # onnxruntime backend will not care about ipex usage + if onnxruntime_session_options is None: + import onnxruntime + onnxruntime_session_options = onnxruntime.SessionOptions() + onnxruntime_session_options.intra_op_num_threads = thread_num + onnxruntime_session_options.inter_op_num_threads = thread_num return PytorchONNXRuntimeModel(model, input_sample, onnxruntime_session_options, **export_kwargs) if accelerator == 'jit' or use_ipex: From a377661681c9085cc27dd9f1d41d145e3b62372e Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 13:29:41 -0400 Subject: [PATCH 08/15] fix ut --- .../nano/test/pytorch/tests/test_inference_pipeline_ipex.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py index cbbf0fbe769..1d939aa7541 100644 --- a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py +++ b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py @@ -58,7 +58,7 @@ class TestInferencePipeline(TestCase): num_workers = 0 data_dir = os.path.join(os.path.dirname(__file__), "data") metric = torchmetrics.Accuracy(num_classes=10, top_k=1) - max_epochs = 10 + max_epochs = 5 model = Net() test_loader = create_data_loader(data_dir, 1, num_workers, data_transform, subset=10, shuffle=False) @@ -85,7 +85,7 @@ def test_pipeline_with_metric(self): validation_data=self.test_loader, metric=self.metric, direction="max", - cpu_num=1) + thread_num=1) acc_model, option = inference_opt.get_best_model() acc_model, option = inference_opt.get_best_model(accelerator="onnxruntime") @@ -99,7 +99,7 @@ def test_pipeline_without_metric(self): inference_opt = InferenceOptimizer() inference_opt.optimize(model=self.model, training_data=self.train_loader, - cpu_num=1) + thread_num=1) acc_model, option = inference_opt.get_best_model() acc_model, option = inference_opt.get_best_model(accelerator="onnxruntime") From b0216ee140b3821390f7ffcf4070b2a19acc792d Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Fri, 9 Sep 2022 13:33:29 -0400 Subject: [PATCH 09/15] fix parameter --- .../pytorch/inference_pipeline/resnet/inference_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py index ca6d8534ca2..aeeb2d47638 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py +++ b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py @@ -45,7 +45,7 @@ def accuracy(pred, target): validation_data=datamodule.val_dataloader(limit_num_samples=160), metric=accuracy, direction="max", - cpu_num=1, + thread_num=1, latency_sample_num=30) # 4. Get the best model under specific restrictions or without restrictions From b3cf48f6c15144e54a7deaae180aca717fe41000 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 05:28:03 -0400 Subject: [PATCH 10/15] update based on comment: modify output, add progress bar and remove method type --- .../bigdl/nano/pytorch/inference/optimizer.py | 57 ++++++++----------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 8440ef1cc58..4f07d2c9bde 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -73,11 +73,6 @@ def get_accelerator(self): return "jit" return None - def get_method_type(self): - if self.inc or self.pot or self.bf16: - return "quantize" - return "trace" - # acceleration method combinations, developers may want to register some new # combinations here @@ -174,14 +169,13 @@ def optimize(self, model: nn.Module, print("==========================Start Optimization==========================") start_time = time.perf_counter() - for method, available in available_dict.items(): + for idx, (method, available) in enumerate(available_dict.items()): result_map[method] = {} if available is False: result_map[method]["status"] = "lack dependency" - option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] - result_map[method]["method_type"] = option.get_method_type() else: - print(f"**********Start test {method} model**********") + print(f"----------Start test {method} model " + f"({idx+1}/{len(ALL_INFERENCE_ACCELERATION_METHOD)})----------") option: AccelerationOption = ALL_INFERENCE_ACCELERATION_METHOD[method] use_ipex: bool = option.ipex use_channels_last: bool = option.channels_last @@ -193,9 +187,7 @@ def optimize(self, model: nn.Module, try: if accelerator is None and use_ipex is False: acce_model = model - result_map[method]["method_type"] = "none" else: - result_map[method]["method_type"] = "trace" if accelerator in ("jit", None): acce_model = \ InferenceOptimizer.trace(model=model, @@ -215,12 +207,11 @@ def optimize(self, model: nn.Module, except Exception as e: print(e) result_map[method]["status"] = "fail to convert" - print(f"**********Failed to convert to {method}**********") + print(f"----------Failed to convert to {method}----------") continue # if precision is int8 or bf16, then we will use quantize method elif precision in ("int8", "bf16"): - result_map[method]["method_type"] = "quantize" ort_method: str = option.method try: acce_model = \ @@ -230,13 +221,13 @@ def optimize(self, model: nn.Module, use_ipex=use_ipex, calib_dataloader=training_data, method=ort_method, - onnxruntime_session_options=sessoption, + thread_num=thread_num, # remove output of openvino logging=logging) except Exception as e: print(e) result_map[method]["status"] = "fail to convert" - print(f"**********Failed to convert to {method}**********") + print(f"----------Failed to convert to {method}----------") continue result_map[method]["status"] = "successful" @@ -245,7 +236,7 @@ def func_test(model, input_sample): with torch.no_grad(): model(*input_sample) - torch.set_num_threads(cpu_num) + torch.set_num_threads(thread_num) try: result_map[method]["latency"] =\ _throughput_calculate_helper(latency_sample_num, func_test, @@ -264,7 +255,7 @@ def func_test(model, input_sample): result_map[method]["accuracy"] = None result_map[method]["model"] = acce_model - print(f"**********Finish test {method} model**********") + print(f"----------Finish test {method} model----------") self.optimized_model_dict: Dict = result_map print("\n\n==========================Optimization Results==========================") @@ -464,8 +455,9 @@ def quantize(model: nn.Module, if onnxruntime_session_options is None: import onnxruntime onnxruntime_session_options = onnxruntime.SessionOptions() - onnxruntime_session_options.intra_op_num_threads = thread_num - onnxruntime_session_options.inter_op_num_threads = thread_num + if thread_num is not None: + onnxruntime_session_options.intra_op_num_threads = thread_num + onnxruntime_session_options.inter_op_num_threads = thread_num model = InferenceOptimizer.trace( model, input_sample=input_sample, @@ -578,8 +570,9 @@ def trace(model: nn.Module, if onnxruntime_session_options is None: import onnxruntime onnxruntime_session_options = onnxruntime.SessionOptions() - onnxruntime_session_options.intra_op_num_threads = thread_num - onnxruntime_session_options.inter_op_num_threads = thread_num + if thread_num is not None: + onnxruntime_session_options.intra_op_num_threads = thread_num + onnxruntime_session_options.inter_op_num_threads = thread_num return PytorchONNXRuntimeModel(model, input_sample, onnxruntime_session_options, **export_kwargs) if accelerator == 'jit' or use_ipex: @@ -710,39 +703,37 @@ def _format_optimize_result(optimize_result_dict: dict, Get a format string represation for optimization result ''' if calculate_accuracy is True: - horizontal_line = " {0} {1} {2} {3} {4}\n" \ - .format("-" * 32, "-" * 22, "-" * 12, "-" * 12, "-" * 12) + horizontal_line = " {0} {1} {2} {3}\n" \ + .format("-" * 32, "-" * 22, "-" * 12, "-" * 12) repr_str = horizontal_line - repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} | {4:^10} |\n" \ - .format("method", "status", "type", "latency", "accuracy") + repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} |\n" \ + .format("method", "status", "latency", "accuracy") repr_str += horizontal_line for method, result in optimize_result_dict.items(): status = result["status"] - method_type = result["method_type"] latency = result.get("latency", "None") if latency != "None": latency = round(latency, 3) accuracy = result.get("accuracy", "None") if accuracy != "None": accuracy = round(accuracy, 3) - method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | " \ + method_str = f"| {method:^30} | {status:^20} | " \ f"{latency:^10} | {accuracy:^10} |\n" repr_str += method_str repr_str += horizontal_line else: - horizontal_line = " {0} {1} {2} {3}\n" \ - .format("-" * 32, "-" * 22, "-" * 12, "-" * 12) + horizontal_line = " {0} {1} {2}\n" \ + .format("-" * 32, "-" * 22, "-" * 12) repr_str = horizontal_line - repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} |\n" \ - .format("method", "status", "type", "latency") + repr_str += "| {0:^30} | {1:^20} | {2:^10} |\n" \ + .format("method", "status", "latency") repr_str += horizontal_line for method, result in optimize_result_dict.items(): status = result["status"] - method_type = result["method_type"] latency = result.get("latency", "None") if latency != "None": latency = round(latency, 3) - method_str = f"| {method:^30} | {status:^20} | {method_type:^10} | {latency:^10} |\n" + method_str = f"| {method:^30} | {status:^20} | {latency:^10} |\n" repr_str += method_str repr_str += horizontal_line return repr_str From ac5a139a32e6901b3a62d6c53b0db70ec7ef3dfc Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 05:33:48 -0400 Subject: [PATCH 11/15] fix --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 4f07d2c9bde..6d36aecbf26 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -255,7 +255,8 @@ def func_test(model, input_sample): result_map[method]["accuracy"] = None result_map[method]["model"] = acce_model - print(f"----------Finish test {method} model----------") + print(f"----------Finish test {method} model " + f"({idx+1}/{len(ALL_INFERENCE_ACCELERATION_METHOD)})----------") self.optimized_model_dict: Dict = result_map print("\n\n==========================Optimization Results==========================") From 6eeb8bc04e591fc84df0bcb950fd9a8fe5eb37a4 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 05:51:19 -0400 Subject: [PATCH 12/15] modify latency to latency(ms) --- .../bigdl/nano/pytorch/inference/optimizer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 6d36aecbf26..e6ca5798241 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -705,10 +705,10 @@ def _format_optimize_result(optimize_result_dict: dict, ''' if calculate_accuracy is True: horizontal_line = " {0} {1} {2} {3}\n" \ - .format("-" * 32, "-" * 22, "-" * 12, "-" * 12) + .format("-" * 32, "-" * 22, "-" * 14, "-" * 12) repr_str = horizontal_line - repr_str += "| {0:^30} | {1:^20} | {2:^10} | {3:^10} |\n" \ - .format("method", "status", "latency", "accuracy") + repr_str += "| {0:^30} | {1:^20} | {2:^12} | {3:^10} |\n" \ + .format("method", "status", "latency(ms)", "accuracy") repr_str += horizontal_line for method, result in optimize_result_dict.items(): status = result["status"] @@ -719,22 +719,22 @@ def _format_optimize_result(optimize_result_dict: dict, if accuracy != "None": accuracy = round(accuracy, 3) method_str = f"| {method:^30} | {status:^20} | " \ - f"{latency:^10} | {accuracy:^10} |\n" + f"{latency:^12} | {accuracy:^10} |\n" repr_str += method_str repr_str += horizontal_line else: horizontal_line = " {0} {1} {2}\n" \ - .format("-" * 32, "-" * 22, "-" * 12) + .format("-" * 32, "-" * 22, "-" * 14) repr_str = horizontal_line - repr_str += "| {0:^30} | {1:^20} | {2:^10} |\n" \ - .format("method", "status", "latency") + repr_str += "| {0:^30} | {1:^20} | {2:^12} |\n" \ + .format("method", "status", "latency(ms)") repr_str += horizontal_line for method, result in optimize_result_dict.items(): status = result["status"] latency = result.get("latency", "None") if latency != "None": latency = round(latency, 3) - method_str = f"| {method:^30} | {status:^20} | {latency:^10} |\n" + method_str = f"| {method:^30} | {status:^20} | {latency:^12} |\n" repr_str += method_str repr_str += horizontal_line return repr_str From da9326af2323365d0b40491a6d95f5ec81109e2a Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 06:46:25 -0400 Subject: [PATCH 13/15] add thread_num for Trainer.trace and Trainer.quantize --- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 4ef4a6333d8..da54675124d 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -268,6 +268,7 @@ def trace(model: nn.Module, input_sample=None, accelerator: str = None, use_ipex: bool = False, + thread_num: int = None, onnxruntime_session_options=None, logging: bool = True, **export_kwargs): @@ -282,6 +283,9 @@ def trace(model: nn.Module, :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch backend. 'openvino', 'onnxruntime' and 'jit' are supported for now. :param use_ipex: whether we use ipex as accelerator for inferencing. default: False. + :param thread_num: (optional) a int represents how many threads(cores) is needed for + inference, only valid for accelerator='onnxruntime' + or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param logging: whether to log detailed information of model conversion, only valid when @@ -298,6 +302,7 @@ def trace(model: nn.Module, input_sample=input_sample, accelerator=accelerator, use_ipex=use_ipex, + thread_num=thread_num, onnxruntime_session_options=onnxruntime_session_options, logging=logging, **export_kwargs) @@ -317,6 +322,7 @@ def quantize(model: nn.Module, timeout: int = None, max_trials: int = None, input_sample=None, + thread_num: int = None, onnxruntime_session_options=None, logging: bool = True, **export_kwargs): @@ -361,6 +367,9 @@ def quantize(model: nn.Module, "timeout=0, max_trials=1" means it will try quantization only once and return satisfying best model. :param input_sample: An input example to convert pytorch model into ONNX/OpenVINO. + :param thread_num: (optional) a int represents how many threads(cores) is needed for + inference, only valid for accelerator='onnxruntime' + or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param logging: whether to log detailed information of model conversion, only valid when @@ -382,6 +391,7 @@ def quantize(model: nn.Module, timeout=timeout, max_trials=max_trials, input_sample=input_sample, + thread_num=thread_num, onnxruntime_session_options=onnxruntime_session_options, logging=logging, **export_kwargs) From c4ebf8ee017e6566c7cf1ea9e197e1ebd0991d99 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 09:49:20 -0400 Subject: [PATCH 14/15] fix bug of openvino cpu_num --- python/nano/src/bigdl/nano/deps/openvino/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/deps/openvino/core/model.py b/python/nano/src/bigdl/nano/deps/openvino/core/model.py index 77ba6ef8901..76e85eaf73c 100644 --- a/python/nano/src/bigdl/nano/deps/openvino/core/model.py +++ b/python/nano/src/bigdl/nano/deps/openvino/core/model.py @@ -49,7 +49,7 @@ def ie_network(self, model): else: self._ie_network = model if self.thread_num is not None: - config = {"CPU_THREADS_NUM": "8"} + config = {"CPU_THREADS_NUM": str(self.thread_num)} else: config = {} self._compiled_model = self._ie.compile_model(model=self.ie_network, From b3eef83d1c352aae8a88dd348a4628c3db891ac6 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Tue, 13 Sep 2022 11:41:50 -0400 Subject: [PATCH 15/15] modify ms->s --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index e6ca5798241..eabcdaa20d4 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -264,7 +264,7 @@ def func_test(model, input_sample): self._optimize_result = _format_optimize_result(self.optimized_model_dict, self._calculate_accuracy) print(self._optimize_result) - print("Optimization cost {:.3}ms at all.".format(time.perf_counter() - start_time)) + print("Optimization cost {:.3}s at all.".format(time.perf_counter() - start_time)) print("===========================Stop Optimization===========================") def summary(self):