Add enable_profiling for perf tuning (microsoft#850)

## Describe your changes Add enable_profiling for perf tuning ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link
yuwenzho · Jan 2, 2024 · cde3c73 · cde3c73
1 parent 4018e27
commit cde3c73
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 3 deletions.
diff --git a/docs/source/features/passes/onnx.md b/docs/source/features/passes/onnx.md
@@ -336,7 +336,8 @@ improve performance.
                 },
             ],
             "CPUExecutionProvider",
-        ]
+        ],
+        "enable_profiling": false,
     }
 }
 ```

diff --git a/olive/passes/onnx/inc_quantization.py b/olive/passes/onnx/inc_quantization.py
@@ -470,8 +470,9 @@ def _set_woq_config(self, run_config):
     def _run_for_config(
         self, model: ONNXModelHandler, data_root: str, config: Dict[str, Any], output_model_path: str
     ) -> ONNXModelHandler:
-        # set the log level for neural-compressor
-        os.environ["LOGLEVEL"] = logging.getLevelName(logger.getEffectiveLevel())
+        if "LOGLEVEL" not in os.environ:
+            # set the log level for neural-compressor
+            os.environ["LOGLEVEL"] = logging.getLevelName(logger.getEffectiveLevel())
 
         try:
             from neural_compressor import quantization

diff --git a/olive/passes/onnx/perf_tuning.py b/olive/passes/onnx/perf_tuning.py
@@ -368,6 +368,11 @@ def get_benchmark(model, data_root, latency_metric, config, test_params=None, io
         inference_settings["execution_provider"] = execution_providers
         inference_settings["provider_options"] = provider_options
 
+    if config.enable_profiling:
+        if "session_options" not in inference_settings:
+            inference_settings["session_options"] = {}
+        inference_settings["session_options"]["enable_profiling"] = True
+
     # set the session_options for metrics so that the evalute will use them by default
     latency_metric.user_config.io_bind = io_bind
     latency_metric.user_config.inference_settings = {"onnx": inference_settings}
@@ -495,6 +500,11 @@ def _default_config(accelerator_spec: AcceleratorSpec) -> Dict[str, PassConfigPa
                     " which are different with the associated execution provider."
                 ),
             ),
+            "enable_profiling": PassConfigParam(
+                type_=bool,
+                default_value=False,
+                description="Whether enable profiling for ONNX Runtime inference.",
+            ),
         }
 
     def _run_for_config(
-Original file line number
+Diff line change
@@ Expand Up / @@ -336,7 +336,8 @@ improve performance. @@
                     },
                 ],
                 "CPUExecutionProvider",
-            ]
+            ],
+            "enable_profiling": false,
         }
     }
     ```
@@ Expand Down @@