From e92aa1418b392faaeba5e9f3c236f6f83dc27335 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 05:21:41 -0400 Subject: [PATCH 01/13] update readme and reduce time cost for calculating accuracy --- .../inference_pipeline/resnet/README.md | 36 +++++++++++-------- .../bigdl/nano/pytorch/inference/optimizer.py | 19 ++++++---- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/README.md b/python/nano/example/pytorch/inference_pipeline/resnet/README.md index f4e2646f89b..005e5dafcc6 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/README.md +++ b/python/nano/example/pytorch/inference_pipeline/resnet/README.md @@ -56,23 +56,29 @@ python inference_pipeline.py ``` ## Results - -It will take about 2 minutes to run inference optimization. Then you may find the result for inference as follows: +It will take about 1 minute to run inference optimization. Then you may find the result for inference as follows: ``` ==========================Optimization Results========================== -accleration option: original, latency: 54.2669ms, accuracy: 0.9937 -accleration option: fp32_ipex, latency: 40.3075ms, accuracy: 0.9937 -accleration option: bf16_ipex, latency: 115.6182ms, accuracy: 0.9937 -accleration option: int8, latency: 14.4857ms, accuracy: 0.4750 -accleration option: jit_fp32, latency: 39.3361ms, accuracy: 0.9937 -accleration option: jit_fp32_ipex, latency: 39.2949ms, accuracy: 0.9937 -accleration option: jit_fp32_ipex_clast, latency: 24.5715ms, accuracy: 0.9937 -accleration option: openvino_fp32, latency: 14.5771ms, accuracy: 0.9937 -accleration option: openvino_int8, latency: 7.2186ms, accuracy: 0.9937 -accleration option: onnxruntime_fp32, latency: 44.3872ms, accuracy: 0.9937 -accleration option: onnxruntime_int8_qlinear, latency: 10.1866ms, accuracy: 0.9937 -accleration option: onnxruntime_int8_integer, latency: 18.8731ms, accuracy: 0.9875 + -------------------------------- ---------------------- -------------- ------------ +| method | status | latency(ms) | accuracy | + -------------------------------- ---------------------- -------------- ------------ +| original | successful | 43.52 | 1.0 | +| fp32_ipex | successful | 33.316 | 1.0 | +| bf16 | fail to forward | None | None | +| bf16_ipex | pruned | 206.862 | None | +| int8 | successful | 10.815 | 1.0 | +| jit_fp32 | successful | 33.066 | 1.0 | +| jit_fp32_ipex | successful | 34.361 | 1.0 | +| jit_fp32_ipex_channels_last | successful | 19.313 | 1.0 | +| openvino_fp32 | successful | 11.65 | 1.0 | +| openvino_int8 | successful | 7.931 | 0.994 | +| onnxruntime_fp32 | successful | 20.652 | 1.0 | +| onnxruntime_int8_qlinear | successful | 8.504 | 0.988 | +| onnxruntime_int8_integer | fail to convert | None | None | + -------------------------------- ---------------------- -------------- ------------ +Optimization cost 67.1s at all. +===========================Stop Optimization=========================== When accelerator is onnxruntime, the model with minimal latency is: inc + onnxruntime + qlinear When accuracy drop less than 5%, the model with minimal latency is: openvino + pot The model with minimal latency is: openvino + pot -``` +``` \ No newline at end of file diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index eabcdaa20d4..f7be3945230 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -248,9 +248,14 @@ def func_test(model, input_sample): torch.set_num_threads(default_threads) if self._calculate_accuracy: - result_map[method]["accuracy"] =\ - _accuracy_calculate_helper(acce_model, - metric, validation_data) + # TODO: here we suppose trace don't change accuracy, + # so we jump it to reduce time cost of optimize + if precision == "fp32" and method != "original": + result_map[method]["accuracy"] = result_map["original"]["accuracy"] + else: + result_map[method]["accuracy"] =\ + _accuracy_calculate_helper(acce_model, + metric, validation_data) else: result_map[method]["accuracy"] = None @@ -676,9 +681,11 @@ def _accuracy_calculate_helper(model, metric, data): ''' metric_list = [] sample_num = 0 - for i, (data_input, target) in enumerate(data): - metric_list.append(metric(model(data_input), target).numpy() * data_input.shape[0]) - sample_num += data_input.shape[0] + with torch.no_grad(): + for i, (data_input, target) in enumerate(data): + metric_list.append(metric(model(data_input), target).numpy() * + data_input.shape[0]) + sample_num += data_input.shape[0] return np.sum(metric_list) / sample_num From 683d36aa5eade113ff0c40df335de2f4b7c129df Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 06:23:32 -0400 Subject: [PATCH 02/13] add prune type --- .../bigdl/nano/pytorch/inference/optimizer.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index f7be3945230..74a321c02cd 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -167,6 +167,12 @@ def optimize(self, model: nn.Module, model.eval() # change model to eval mode + input_sample = tuple(next(iter(training_data))[:-1]) + st = time.perf_counter() + with torch.no_grad(): + model(*input_sample) + baseline_time = time.perf_counter() - st + print("==========================Start Optimization==========================") start_time = time.perf_counter() for idx, (method, available) in enumerate(available_dict.items()): @@ -183,7 +189,6 @@ def optimize(self, model: nn.Module, precision: str = option.get_precision() # if precision is fp32, then we will use trace method if precision == "fp32": - input_sample = tuple(next(iter(training_data))[:-1]) try: if accelerator is None and use_ipex is False: acce_model = model @@ -238,9 +243,14 @@ def func_test(model, input_sample): torch.set_num_threads(thread_num) try: - result_map[method]["latency"] =\ - _throughput_calculate_helper(latency_sample_num, func_test, - acce_model, input_sample) + result_map[method]["latency"], status =\ + _throughput_calculate_helper(latency_sample_num, baseline_time, + func_test, + acce_model, input_sample) + if status is False: + result_map[method]["status"] = "pruned" + torch.set_num_threads(default_threads) + continue except Exception as e: result_map[method]["status"] = "fail to forward" torch.set_num_threads(default_threads) @@ -652,7 +662,7 @@ def _available_acceleration_combination(): return available_dict -def _throughput_calculate_helper(iterrun, func, *args): +def _throughput_calculate_helper(iterrun, baseline_time, func, *args): ''' A simple helper to calculate average latency ''' @@ -664,6 +674,9 @@ def _throughput_calculate_helper(iterrun, func, *args): func(*args) end = time.perf_counter() time_list.append(end - st) + # if inference is too slow, prune it + if i == 2 and end - start_time > 12 * baseline_time: + return np.mean(time_list) * 1000, False # at least need 10 iters and try to control calculation # time less than 2 min if i + 1 >= min(iterrun, 10) and (end - start_time) > 2: @@ -672,7 +685,7 @@ def _throughput_calculate_helper(iterrun, func, *args): time_list.sort() # remove top and least 10% data time_list = time_list[int(0.1 * iterrun): int(0.9 * iterrun)] - return np.mean(time_list) * 1000 + return np.mean(time_list) * 1000, True def _accuracy_calculate_helper(model, metric, data): From 2cb9c093edfd2574caf2f7ef0bcbc62e7fc30c6f Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 07:35:20 -0400 Subject: [PATCH 03/13] filter warnings --- .../inference_pipeline/resnet/README.md | 34 ++++++++++--------- .../bigdl/nano/pytorch/inference/optimizer.py | 23 ++++++++----- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/README.md b/python/nano/example/pytorch/inference_pipeline/resnet/README.md index 005e5dafcc6..29aaf31a5a7 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/README.md +++ b/python/nano/example/pytorch/inference_pipeline/resnet/README.md @@ -1,7 +1,7 @@ # Bigdl-nano InferenceOptimizer example on Cat vs. Dog dataset This example illustrates how to apply InferenceOptimizer to quickly find acceleration method with the minimum inference latency under specific restrictions or without restrictions for a trained model. -For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 7.5x times faster. +For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 5.5x times faster. ## Prepare the environment @@ -25,6 +25,7 @@ pip install --upgrade numpy==1.21.6 Initialize environment variables with script `bigdl-nano-init` installed with bigdl-nano. ``` source bigdl-nano-init +unset KMP_AFFINITY ``` You may find environment variables set like follows: ``` @@ -59,24 +60,25 @@ python inference_pipeline.py It will take about 1 minute to run inference optimization. Then you may find the result for inference as follows: ``` ==========================Optimization Results========================== - -------------------------------- ---------------------- -------------- ------------ + -------------------------------- ---------------------- -------------- ------------ | method | status | latency(ms) | accuracy | - -------------------------------- ---------------------- -------------- ------------ -| original | successful | 43.52 | 1.0 | -| fp32_ipex | successful | 33.316 | 1.0 | + -------------------------------- ---------------------- -------------- ------------ +| original | successful | 43.447 | 0.994 | +| fp32_ipex | successful | 32.827 | 0.994 | | bf16 | fail to forward | None | None | -| bf16_ipex | pruned | 206.862 | None | -| int8 | successful | 10.815 | 1.0 | -| jit_fp32 | successful | 33.066 | 1.0 | -| jit_fp32_ipex | successful | 34.361 | 1.0 | -| jit_fp32_ipex_channels_last | successful | 19.313 | 1.0 | -| openvino_fp32 | successful | 11.65 | 1.0 | -| openvino_int8 | successful | 7.931 | 0.994 | -| onnxruntime_fp32 | successful | 20.652 | 1.0 | -| onnxruntime_int8_qlinear | successful | 8.504 | 0.988 | +| bf16_ipex | pruned | 201.702 | None | +| int8 | successful | 10.992 | 0.994 | +| jit_fp32 | successful | 36.741 | 0.994 | +| jit_fp32_ipex | successful | 33.293 | 0.994 | +| jit_fp32_ipex_channels_last | successful | 19.523 | 0.994 | +| openvino_fp32 | successful | 10.51 | 0.994 | +| openvino_int8 | successful | 6.637 | 0.994 | +| onnxruntime_fp32 | successful | 20.55 | 0.994 | +| onnxruntime_int8_qlinear | successful | 8.15 | 0.994 | | onnxruntime_int8_integer | fail to convert | None | None | - -------------------------------- ---------------------- -------------- ------------ -Optimization cost 67.1s at all. + -------------------------------- ---------------------- -------------- ------------ + +Optimization cost 64.3s at all. ===========================Stop Optimization=========================== When accelerator is onnxruntime, the model with minimal latency is: inc + onnxruntime + qlinear When accuracy drop less than 5%, the model with minimal latency is: openvino + pot diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 74a321c02cd..24978bf8bd4 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -35,6 +35,12 @@ from bigdl.nano.deps.neural_compressor.inc_api import load_inc_model, quantize as inc_quantize from bigdl.nano.utils.inference.pytorch.model import AcceleratedLightningModule from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 +import warnings +# Filter out useless Userwarnings +warnings.filterwarnings('ignore', category=UserWarning, module='pytorch_lightning') +warnings.filterwarnings('ignore', category=DeprecationWarning, module='pytorch_lightning') +warnings.filterwarnings('ignore', category=UserWarning, module='torch') +warnings.filterwarnings('ignore', category=DeprecationWarning, module='torch') import os os.environ['LOGLEVEL'] = 'ERROR' # remove parital output of inc @@ -244,9 +250,9 @@ def func_test(model, input_sample): torch.set_num_threads(thread_num) try: result_map[method]["latency"], status =\ - _throughput_calculate_helper(latency_sample_num, baseline_time, - func_test, - acce_model, input_sample) + _throughput_calculate_helper(latency_sample_num, baseline_time, + func_test, + acce_model, input_sample) if status is False: result_map[method]["status"] = "pruned" torch.set_num_threads(default_threads) @@ -258,14 +264,14 @@ def func_test(model, input_sample): torch.set_num_threads(default_threads) if self._calculate_accuracy: - # TODO: here we suppose trace don't change accuracy, + # TODO: here we suppose trace don't change accuracy, # so we jump it to reduce time cost of optimize if precision == "fp32" and method != "original": result_map[method]["accuracy"] = result_map["original"]["accuracy"] else: result_map[method]["accuracy"] =\ _accuracy_calculate_helper(acce_model, - metric, validation_data) + metric, validation_data) else: result_map[method]["accuracy"] = None @@ -630,7 +636,7 @@ def _openvino_checker(): ''' check if openvino-dev is installed ''' - return not find_spec("openvino-dev") is None + return not find_spec("openvino") is None def _bf16_checker(): @@ -674,7 +680,7 @@ def _throughput_calculate_helper(iterrun, baseline_time, func, *args): func(*args) end = time.perf_counter() time_list.append(end - st) - # if inference is too slow, prune it + # if three samples cost more than 4x time than baseline model, prune it if i == 2 and end - start_time > 12 * baseline_time: return np.mean(time_list) * 1000, False # at least need 10 iters and try to control calculation @@ -696,8 +702,7 @@ def _accuracy_calculate_helper(model, metric, data): sample_num = 0 with torch.no_grad(): for i, (data_input, target) in enumerate(data): - metric_list.append(metric(model(data_input), target).numpy() * - data_input.shape[0]) + metric_list.append(metric(model(data_input), target).numpy() * data_input.shape[0]) sample_num += data_input.shape[0] return np.sum(metric_list) / sample_num From 275502efe238ccdca8aad488102d156d0aaefe51 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 07:37:49 -0400 Subject: [PATCH 04/13] fix style --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 24978bf8bd4..99a77de2ca7 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -251,8 +251,7 @@ def func_test(model, input_sample): try: result_map[method]["latency"], status =\ _throughput_calculate_helper(latency_sample_num, baseline_time, - func_test, - acce_model, input_sample) + func_test, acce_model, input_sample) if status is False: result_map[method]["status"] = "pruned" torch.set_num_threads(default_threads) From d9fd8dac18fbe68442bbe8afa36ddc0cb105daba Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 07:40:34 -0400 Subject: [PATCH 05/13] add check for input_sample --- .../nano/src/bigdl/nano/pytorch/inference/optimizer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 99a77de2ca7..d34d8c9a137 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -175,8 +175,13 @@ def optimize(self, model: nn.Module, input_sample = tuple(next(iter(training_data))[:-1]) st = time.perf_counter() - with torch.no_grad(): - model(*input_sample) + try: + with torch.no_grad(): + model(*input_sample) + except: + invalidInputError(False, + "training_data is incompatible with your model input.") + exit(1) baseline_time = time.perf_counter() - st print("==========================Start Optimization==========================") From a2ce597f246bafde0847dcaea32bb014e679cd4f Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 07:42:40 -0400 Subject: [PATCH 06/13] fix except --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index d34d8c9a137..42265f3fa63 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -178,7 +178,7 @@ def optimize(self, model: nn.Module, try: with torch.no_grad(): model(*input_sample) - except: + except Exception: invalidInputError(False, "training_data is incompatible with your model input.") exit(1) From 2294dee380fcc69779d70cd2e6b7090957326234 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 07:43:45 -0400 Subject: [PATCH 07/13] fix --- python/nano/example/pytorch/inference_pipeline/resnet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/README.md b/python/nano/example/pytorch/inference_pipeline/resnet/README.md index 29aaf31a5a7..88a228acf2b 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/README.md +++ b/python/nano/example/pytorch/inference_pipeline/resnet/README.md @@ -1,7 +1,7 @@ # Bigdl-nano InferenceOptimizer example on Cat vs. Dog dataset This example illustrates how to apply InferenceOptimizer to quickly find acceleration method with the minimum inference latency under specific restrictions or without restrictions for a trained model. -For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 5.5x times faster. +For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 6.5x times faster. ## Prepare the environment From 7d9b5df1b49d45c94a180b0d4d3c2e61f8ef34fd Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 12:11:14 -0400 Subject: [PATCH 08/13] update based on comment --- .../inference_pipeline/resnet/README.md | 49 ++++++++++--------- .../resnet/inference_pipeline.py | 6 --- .../bigdl/nano/pytorch/inference/optimizer.py | 33 ++++++++----- 3 files changed, 47 insertions(+), 41 deletions(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/README.md b/python/nano/example/pytorch/inference_pipeline/resnet/README.md index 88a228acf2b..aca8c3c6be1 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/README.md +++ b/python/nano/example/pytorch/inference_pipeline/resnet/README.md @@ -29,18 +29,23 @@ unset KMP_AFFINITY ``` You may find environment variables set like follows: ``` +OpenMP library found... Setting OMP_NUM_THREADS... Setting OMP_NUM_THREADS specified for pytorch... Setting KMP_AFFINITY... Setting KMP_BLOCKTIME... Setting MALLOC_CONF... +Setting LD_PRELOAD... +nano_vars.sh already exists +++++ Env Variables +++++ -LD_PRELOAD=./../lib/libjemalloc.so -MALLOC_CONF=oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1 +LD_PRELOAD=/opt/anaconda3/envs/nano/bin/../lib/libiomp5.so /opt/anaconda3/envs/nano/lib/python3.7/site-packages/bigdl/nano//libs/libtcmalloc.so +MALLOC_CONF= OMP_NUM_THREADS=112 KMP_AFFINITY=granularity=fine,compact,1,0 KMP_BLOCKTIME=1 -TF_ENABLE_ONEDNN_OPTS= +TF_ENABLE_ONEDNN_OPTS=1 +ENABLE_TF_OPTS=1 +NANO_TF_INTER_OP=1 +++++++++++++++++++++++++ Complete. ``` @@ -60,27 +65,25 @@ python inference_pipeline.py It will take about 1 minute to run inference optimization. Then you may find the result for inference as follows: ``` ==========================Optimization Results========================== - -------------------------------- ---------------------- -------------- ------------ -| method | status | latency(ms) | accuracy | - -------------------------------- ---------------------- -------------- ------------ -| original | successful | 43.447 | 0.994 | -| fp32_ipex | successful | 32.827 | 0.994 | -| bf16 | fail to forward | None | None | -| bf16_ipex | pruned | 201.702 | None | -| int8 | successful | 10.992 | 0.994 | -| jit_fp32 | successful | 36.741 | 0.994 | -| jit_fp32_ipex | successful | 33.293 | 0.994 | -| jit_fp32_ipex_channels_last | successful | 19.523 | 0.994 | -| openvino_fp32 | successful | 10.51 | 0.994 | -| openvino_int8 | successful | 6.637 | 0.994 | -| onnxruntime_fp32 | successful | 20.55 | 0.994 | -| onnxruntime_int8_qlinear | successful | 8.15 | 0.994 | -| onnxruntime_int8_integer | fail to convert | None | None | - -------------------------------- ---------------------- -------------- ------------ + -------------------------------- ---------------------- -------------- ---------------------- +| method | status | latency(ms) | accuracy | + -------------------------------- ---------------------- -------------- ---------------------- +| original | successful | 43.688 | 0.969 | +| fp32_ipex | successful | 33.383 | not recomputed | +| bf16 | fail to forward | None | None | +| bf16_ipex | early stopped | 203.897 | None | +| int8 | successful | 10.74 | 0.969 | +| jit_fp32 | successful | 38.732 | not recomputed | +| jit_fp32_ipex | successful | 35.205 | not recomputed | +| jit_fp32_ipex_channels_last | successful | 19.327 | not recomputed | +| openvino_fp32 | successful | 10.215 | not recomputed | +| openvino_int8 | successful | 8.192 | 0.969 | +| onnxruntime_fp32 | successful | 20.931 | not recomputed | +| onnxruntime_int8_qlinear | successful | 8.274 | 0.969 | +| onnxruntime_int8_integer | fail to convert | None | None | + -------------------------------- ---------------------- -------------- ---------------------- Optimization cost 64.3s at all. ===========================Stop Optimization=========================== -When accelerator is onnxruntime, the model with minimal latency is: inc + onnxruntime + qlinear -When accuracy drop less than 5%, the model with minimal latency is: openvino + pot -The model with minimal latency is: openvino + pot +When accuracy drop less than 5%, the model with minimal latency is: openvino + int8 ``` \ No newline at end of file diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py index aeeb2d47638..372f68dc534 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py +++ b/python/nano/example/pytorch/inference_pipeline/resnet/inference_pipeline.py @@ -49,15 +49,9 @@ def accuracy(pred, target): latency_sample_num=30) # 4. Get the best model under specific restrictions or without restrictions - acc_model, option = optimizer.get_best_model(accelerator="onnxruntime") - print("When accelerator is onnxruntime, the model with minimal latency is: ", option) - acc_model, option = optimizer.get_best_model(accuracy_criterion=0.05) print("When accuracy drop less than 5%, the model with minimal latency is: ", option) - acc_model, option = optimizer.get_best_model() - print("The model with minimal latency is: ", option) - # 5. Inference with accelerated model x_input = next(iter(datamodule.train_dataloader(batch_size=1)))[0] output = acc_model(x_input) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index 42265f3fa63..b9fdbe6138a 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -258,7 +258,7 @@ def func_test(model, input_sample): _throughput_calculate_helper(latency_sample_num, baseline_time, func_test, acce_model, input_sample) if status is False: - result_map[method]["status"] = "pruned" + result_map[method]["status"] = "early stopped" torch.set_num_threads(default_threads) continue except Exception as e: @@ -268,10 +268,10 @@ def func_test(model, input_sample): torch.set_num_threads(default_threads) if self._calculate_accuracy: - # TODO: here we suppose trace don't change accuracy, + # here we suppose trace don't change accuracy, # so we jump it to reduce time cost of optimize if precision == "fp32" and method != "original": - result_map[method]["accuracy"] = result_map["original"]["accuracy"] + result_map[method]["accuracy"] = "not recomputed" else: result_map[method]["accuracy"] =\ _accuracy_calculate_helper(acce_model, @@ -354,9 +354,11 @@ def get_best_model(self, continue if accuracy_criterion is not None: - accuracy: float = result["accuracy"] + accuracy = result["accuracy"] compare_acc: float = best_metric.accuracy - if self._direction == "min": + if accuracy == "not recomputed": + pass + elif self._direction == "min": if (accuracy - compare_acc) / compare_acc > accuracy_criterion: continue else: @@ -366,7 +368,11 @@ def get_best_model(self, # After the above conditions are met, the latency comparison is performed if result["latency"] < best_metric.latency: best_model = result["model"] - best_metric = CompareMetric(method, result["latency"], result["accuracy"]) + if result["accuracy"] != "not recomputed": + accuracy = result["accuracy"] + else: + accuracy = self.optimized_model_dict["original"]["accuracy"] + best_metric = CompareMetric(method, result["latency"], accuracy) return best_model, _format_acceleration_option(best_metric.method_name) @@ -640,7 +646,7 @@ def _openvino_checker(): ''' check if openvino-dev is installed ''' - return not find_spec("openvino") is None + return not find_spec("openvino-dev") is None def _bf16_checker(): @@ -719,7 +725,10 @@ def _format_acceleration_option(method_name: str) -> str: repr_str = "" for key, value in option.__dict__.items(): if value is True: - repr_str = repr_str + key + " + " + if key == "pot": + repr_str = repr_str + "int8" + " + " + else: + repr_str = repr_str + key + " + " elif isinstance(value, str): repr_str = repr_str + value + " + " if len(repr_str) > 0: @@ -734,9 +743,9 @@ def _format_optimize_result(optimize_result_dict: dict, ''' if calculate_accuracy is True: horizontal_line = " {0} {1} {2} {3}\n" \ - .format("-" * 32, "-" * 22, "-" * 14, "-" * 12) + .format("-" * 32, "-" * 22, "-" * 14, "-" * 22) repr_str = horizontal_line - repr_str += "| {0:^30} | {1:^20} | {2:^12} | {3:^10} |\n" \ + repr_str += "| {0:^30} | {1:^20} | {2:^12} | {3:^20} |\n" \ .format("method", "status", "latency(ms)", "accuracy") repr_str += horizontal_line for method, result in optimize_result_dict.items(): @@ -745,10 +754,10 @@ def _format_optimize_result(optimize_result_dict: dict, if latency != "None": latency = round(latency, 3) accuracy = result.get("accuracy", "None") - if accuracy != "None": + if accuracy != "None" and isinstance(accuracy, float): accuracy = round(accuracy, 3) method_str = f"| {method:^30} | {status:^20} | " \ - f"{latency:^12} | {accuracy:^10} |\n" + f"{latency:^12} | {accuracy:^20} |\n" repr_str += method_str repr_str += horizontal_line else: From 5e938361ff1891c57e02c21aa72f367e58d29976 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Wed, 14 Sep 2022 16:47:36 -0400 Subject: [PATCH 09/13] update readme and add ut --- .../inference_pipeline/resnet/README.md | 7 ++--- .../tests/test_inference_pipeline_ipex.py | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/python/nano/example/pytorch/inference_pipeline/resnet/README.md b/python/nano/example/pytorch/inference_pipeline/resnet/README.md index aca8c3c6be1..537200fad00 100644 --- a/python/nano/example/pytorch/inference_pipeline/resnet/README.md +++ b/python/nano/example/pytorch/inference_pipeline/resnet/README.md @@ -1,7 +1,7 @@ # Bigdl-nano InferenceOptimizer example on Cat vs. Dog dataset This example illustrates how to apply InferenceOptimizer to quickly find acceleration method with the minimum inference latency under specific restrictions or without restrictions for a trained model. -For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 6.5x times faster. +For the sake of this example, we first train the proposed network(by default, a ResNet18 is used) on the [cats and dogs dataset](https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip), which consists both [frozen and unfrozen stages](https://github.com/PyTorchLightning/pytorch-lightning/blob/495812878dfe2e31ec2143c071127990afbb082b/pl_examples/domain_templates/computer_vision_fine_tuning.py#L21-L35). Then, by calling `optimize()`, we can obtain all available accelaration combinations provided by BigDL-Nano for inference. By calling `get_best_mdoel()` , we could get an accelerated model whose inference is 5x times faster. ## Prepare the environment @@ -25,7 +25,6 @@ pip install --upgrade numpy==1.21.6 Initialize environment variables with script `bigdl-nano-init` installed with bigdl-nano. ``` source bigdl-nano-init -unset KMP_AFFINITY ``` You may find environment variables set like follows: ``` @@ -41,7 +40,7 @@ nano_vars.sh already exists LD_PRELOAD=/opt/anaconda3/envs/nano/bin/../lib/libiomp5.so /opt/anaconda3/envs/nano/lib/python3.7/site-packages/bigdl/nano//libs/libtcmalloc.so MALLOC_CONF= OMP_NUM_THREADS=112 -KMP_AFFINITY=granularity=fine,compact,1,0 +KMP_AFFINITY=granularity=fine KMP_BLOCKTIME=1 TF_ENABLE_ONEDNN_OPTS=1 ENABLE_TF_OPTS=1 @@ -85,5 +84,5 @@ It will take about 1 minute to run inference optimization. Then you may find the Optimization cost 64.3s at all. ===========================Stop Optimization=========================== -When accuracy drop less than 5%, the model with minimal latency is: openvino + int8 +When accuracy drop less than 5%, the model with minimal latency is: openvino + int8 ``` \ No newline at end of file diff --git a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py index 1d939aa7541..a48edc54e5e 100644 --- a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py +++ b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py @@ -63,6 +63,8 @@ class TestInferencePipeline(TestCase): model = Net() test_loader = create_data_loader(data_dir, 1, num_workers, data_transform, subset=10, shuffle=False) train_loader = create_data_loader(data_dir, 32, num_workers, data_transform, subset=10, shuffle=True) + input_sample = next(iter(test_loader))[0] + print(input_sample.shape) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) @@ -111,3 +113,31 @@ def test_pipeline_without_metric(self): error_msg = e.value.args[0] assert error_msg == "If you want to specify accuracy_criterion, you need "\ "to set metric and validation_data when call 'optimize'." + + def test_summary(self): + inference_opt = InferenceOptimizer() + with pytest.raises(RuntimeError) as e: + inference_opt.summary() + error_msg = e.value.args[0] + assert error_msg == "There is no optimization result. You should call .optimize() "\ + "before summary()" + inference_opt.optimize(model=self.model, + training_data=self.train_loader, + thread_num=1) + inference_opt.summary() + + def test_wrong_data_loader(self): + fake_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), + transforms.Resize(64), + ]) + fake_train_loader = create_data_loader(self.data_dir, 32, self.num_workers, + fake_transform, subset=10, shuffle=True) + inference_opt = InferenceOptimizer() + with pytest.raises(RuntimeError) as e: + inference_opt.optimize(model=self.model, + training_data=fake_train_loader, + thread_num=1) + error_msg = e.value.args[0] + assert error_msg == "training_data is incompatible with your model input." From e1ec2b5144bccc96db07f960d170eccfe7478137 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Thu, 15 Sep 2022 04:25:33 -0400 Subject: [PATCH 10/13] update input_sample --- .../src/bigdl/nano/pytorch/inference/optimizer.py | 6 +++--- .../nano/utils/inference/pytorch/model_utils.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index b9fdbe6138a..d4f5d85db50 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -34,6 +34,7 @@ load_onnxruntime_model from bigdl.nano.deps.neural_compressor.inc_api import load_inc_model, quantize as inc_quantize from bigdl.nano.utils.inference.pytorch.model import AcceleratedLightningModule +from bigdl.nano.utils.inference.pytorch.model_utils import get_input_example from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 import warnings # Filter out useless Userwarnings @@ -172,8 +173,8 @@ def optimize(self, model: nn.Module, result_map: Dict[str, Dict] = {} model.eval() # change model to eval mode - - input_sample = tuple(next(iter(training_data))[:-1]) + # TODO: inspect to get model args + input_sample = get_input_example(model, training_data) st = time.perf_counter() try: with torch.no_grad(): @@ -181,7 +182,6 @@ def optimize(self, model: nn.Module, except Exception: invalidInputError(False, "training_data is incompatible with your model input.") - exit(1) baseline_time = time.perf_counter() - st print("==========================Start Optimization==========================") diff --git a/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py b/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py index 9003a752c62..fd1c1f30ee4 100644 --- a/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py +++ b/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py @@ -29,10 +29,12 @@ def get_forward_args(model): return forward_args -def get_input_example(model, input_sample): +def get_input_example(model, input_sample, forward_args): if isinstance(input_sample, DataLoader): # TODO: This assumpe the last output is y - input_sample = tuple(next(iter(input_sample))[:-1]) + input_sample = next(iter(input_sample)) + if isinstance(input_sample, list): + input_sample = input_sample[:len(forward_args)] elif input_sample is None: if getattr(model, "example_input_array", None) is not None: input_sample = model.example_input_array @@ -44,7 +46,9 @@ def get_input_example(model, input_sample): try: dataloader = dataloader_fn() # TODO: This assumpe the last output is y - input_sample = tuple(next(iter(dataloader)))[:-1] + input_sample = next(iter(input_sample)) + if isinstance(input_sample, list): + input_sample = input_sample[:len(forward_args)] break except Exception as _e: pass @@ -73,13 +77,13 @@ def export_to_onnx(model, input_sample=None, onnx_path="model.onnx", dynamic_axe :param dynamic_axes: If we set the first dim of each input as a dynamic batch_size :param **kwargs: will be passed to torch.onnx.export function. ''' - input_sample = get_input_example(model, input_sample) + forward_args = get_forward_args(model) + input_sample = get_input_example(model, input_sample, forward_args) invalidInputError(input_sample is not None, 'You should implement at least one of model.test_dataloader, ' 'model.train_dataloader, model.val_dataloader and ' 'model.predict_dataloader, ' 'or set one of input_sample and model.example_input_array') - forward_args = get_forward_args(model) if dynamic_axes: dynamic_axes = {} for arg in forward_args: From 0ac8b552da22c3c11c5435bbd8ccdf8a37e9e032 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Thu, 15 Sep 2022 04:29:49 -0400 Subject: [PATCH 11/13] fix --- .../nano/utils/inference/pytorch/model_utils.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py b/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py index fd1c1f30ee4..d2e5ec2e616 100644 --- a/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py +++ b/python/nano/src/bigdl/nano/utils/inference/pytorch/model_utils.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from typing import Any +from typing import Any, Sequence from bigdl.nano.pytorch.lightning import LightningModule import inspect from torch.utils.data import DataLoader @@ -31,10 +31,9 @@ def get_forward_args(model): def get_input_example(model, input_sample, forward_args): if isinstance(input_sample, DataLoader): - # TODO: This assumpe the last output is y input_sample = next(iter(input_sample)) - if isinstance(input_sample, list): - input_sample = input_sample[:len(forward_args)] + if isinstance(input_sample, Sequence): + input_sample = tuple(list(input_sample)[:len(forward_args)]) elif input_sample is None: if getattr(model, "example_input_array", None) is not None: input_sample = model.example_input_array @@ -45,10 +44,9 @@ def get_input_example(model, input_sample, forward_args): model.val_dataloader]: try: dataloader = dataloader_fn() - # TODO: This assumpe the last output is y input_sample = next(iter(input_sample)) - if isinstance(input_sample, list): - input_sample = input_sample[:len(forward_args)] + if isinstance(input_sample, Sequence): + input_sample = tuple(list(input_sample)[:len(forward_args)]) break except Exception as _e: pass From 39497d80889e467ca983a6aa2a5d40b54598f09b Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Thu, 15 Sep 2022 04:34:54 -0400 Subject: [PATCH 12/13] delete redundant lines --- python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py index a48edc54e5e..e2845f49d31 100644 --- a/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py +++ b/python/nano/test/pytorch/tests/test_inference_pipeline_ipex.py @@ -63,8 +63,6 @@ class TestInferencePipeline(TestCase): model = Net() test_loader = create_data_loader(data_dir, 1, num_workers, data_transform, subset=10, shuffle=False) train_loader = create_data_loader(data_dir, 32, num_workers, data_transform, subset=10, shuffle=True) - input_sample = next(iter(test_loader))[0] - print(input_sample.shape) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) From 5530c7e99cb06fefdefd6e9c85cf3499d4c59139 Mon Sep 17 00:00:00 2001 From: "ruoan1.wang" Date: Thu, 15 Sep 2022 05:09:29 -0400 Subject: [PATCH 13/13] add forward_args --- python/nano/src/bigdl/nano/pytorch/inference/optimizer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py index d4f5d85db50..09a370d59bc 100644 --- a/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py +++ b/python/nano/src/bigdl/nano/pytorch/inference/optimizer.py @@ -34,7 +34,7 @@ load_onnxruntime_model from bigdl.nano.deps.neural_compressor.inc_api import load_inc_model, quantize as inc_quantize from bigdl.nano.utils.inference.pytorch.model import AcceleratedLightningModule -from bigdl.nano.utils.inference.pytorch.model_utils import get_input_example +from bigdl.nano.utils.inference.pytorch.model_utils import get_forward_args, get_input_example from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 import warnings # Filter out useless Userwarnings @@ -173,8 +173,9 @@ def optimize(self, model: nn.Module, result_map: Dict[str, Dict] = {} model.eval() # change model to eval mode - # TODO: inspect to get model args - input_sample = get_input_example(model, training_data) + + forward_args = get_forward_args(model) + input_sample = get_input_example(model, training_data, forward_args) st = time.perf_counter() try: with torch.no_grad():