Support DirectML EP (#1144)

Signed-off-by: Mengni Wang <[email protected]>
intel · Aug 23, 2023 · 750bb9b · 750bb9b
1 parent 3018319
commit 750bb9b
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 13 deletions.
diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py
@@ -95,6 +95,8 @@ def __init__(self, framework_specific_info):
             config_file = 'onnxrt_cuda.yaml'
         elif self.backend == 'DnnlExecutionProvider':
             config_file = 'onnxrt_dnnl.yaml'
+        elif self.backend == 'DmlExecutionProvider':
+            config_file = 'onnxrt_dml.yaml'
         else: # pragma: no cover
             assert False, "{} provider is not supported in current environment, " \
                 "supported providers: {}".format(self.backend,

diff --git a/neural_compressor/adaptor/onnxrt_dml.yaml b/neural_compressor/adaptor/onnxrt_dml.yaml
@@ -0,0 +1,67 @@
+## Copyright (c) 2021 Intel Corporation
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##    http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+#
+
+-
+  version:
+    name: '1.13.1'
+  int8: &ref_1_13 {
+    'static': &ref_1_13_static {
+      'Conv': &default_static_qlinear_qdq {
+        'weight':   &int8_sym_pertensor_minmax {
+                    'dtype': ['int8'],
+                    'scheme': ['sym'],
+                    'granularity': ['per_tensor'],
+                    'algorithm': ['minmax']
+                    },
+        'activation': &uint8_asym_pertensor_minmax {
+                    'dtype': ['uint8'],
+                    'scheme': ['asym'],
+                    'granularity': ['per_tensor'],
+                    'algorithm': ['minmax']
+                    },
+        'mode': ['QDQ', 'QLinear']
+      },
+      'MatMul': {
+        'weight': *int8_sym_pertensor_minmax,
+        'activation': *uint8_asym_pertensor_minmax,
+        'mode': ['QDQ', 'QLinear']
+      },
+      'Mul': &default_static_qlinear {
+        'weight':   *int8_sym_pertensor_minmax,
+        'activation': *uint8_asym_pertensor_minmax,
+        'mode': ['QLinear']
+      },
+      'Relu': *default_static_qlinear_qdq, 
+      'Clip': *default_static_qlinear_qdq,
+      'MaxPool': *default_static_qlinear_qdq,
+      'Add': *default_static_qlinear,
+    },
+  }
+  fp16: &common_fp16 ['Add', 'GlobalAveragePool', 'AveragePool', 'SpaceToDepth', 'Sigmoid', 'Mul',
+    'Softmax', 'Gemm', 'MatMul', 'Conv', 'Concat', 'Upsample', 'Pow', 'Sqrt', 'DepthToSpace',
+    'Clip', 'BatchNormalization', 'Transpose', 'Softmax', 'AveragePool', 'Squeeze', 'MaxPool',
+    'Relu', 'Concat']
+
+  recipes: &default_optimization
+    graph_optimization:   # from onnxruntime graph_optimization_level
+      level: ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']
+
+-
+  version:
+    name: 'default'
+  int8: *ref_1_13
+  recipes:
+    <<: *default_optimization
diff --git a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py
@@ -18,7 +18,9 @@
 
 from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator
 
-@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice")
+
+@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, " \
+             "SpaceToDepth, DepthToSpace, Upsample")
 class Direct8BitOperator(Operator):
     """Direct8Bit Operator."""
 

diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py
@@ -72,13 +72,15 @@
     'onnxrt_trt_ep': 'TensorrtExecutionProvider',
     'onnxrt_dnnl_ep': 'DnnlExecutionProvider',
     'onnxrt_cuda_ep': 'CUDAExecutionProvider',
+    'onnxrt_dml_ep': 'DmlExecutionProvider'
 }
 
 ONNXRT_BACKENDS = {
     'CPUExecutionProvider': 'default',
     'TensorrtExecutionProvider': 'onnxrt_trt_ep',
     'CUDAExecutionProvider': 'onnxrt_cuda_ep',
-    'DnnlExecutionProvider': 'onnxrt_dnnl_ep'
+    'DnnlExecutionProvider': 'onnxrt_dnnl_ep',
+    'DmlExecutionProvider': 'onnxrt_dml_ep'
 }
 
 def dtype_to_name(dtype_mapping, dtype):
@@ -536,4 +538,4 @@ def to_numpy(data):
                 assert False, "The input data for onnx model is {}, which is not supported " \
                               "to convert to numpy ndarrays.".format(type(data))
     else:
-        return data
+        return data
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
@@ -259,7 +259,7 @@ class BenchmarkConfig:
         inputs (list, optional): A list of strings containing the inputs of model. Default is an empty list.
         outputs (list, optional): A list of strings containing the outputs of model. Default is an empty list.
         backend (str, optional): Backend name for model execution. Supported values include: 'default', 'itex',
-                                'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'.
+                                'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep'.
                                 Default value is 'default'.
         warmup (int, optional): The number of iterations to perform warmup before running performance tests.
                                 Default value is 5.
@@ -328,7 +328,7 @@ def backend(self):
     def backend(self, backend):
         """Set backend."""
         if _check_value('backend', backend, str, [
-                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
+                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
             self._backend = backend
 
     @property
@@ -694,7 +694,8 @@ class _BaseQuantizationConfig:
         inputs: Inputs of model, only required in tensorflow.
         outputs: Outputs of model, only required in tensorflow.
         backend: Backend for model execution.
-                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
+                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
+                 'onnxrt_dml_ep'
         domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'.
                 Adaptor will use specific quantization settings for different domains automatically, and
                 explicitly specified quantization settings will override the automatic setting.
@@ -1102,7 +1103,7 @@ def backend(self):
     @backend.setter
     def backend(self, backend):
         if _check_value('backend', backend, str, [
-                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
+                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
             self._backend = backend
 
     @property
@@ -1148,7 +1149,8 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig):
     Args:
         device: Support 'cpu' and 'gpu'.
         backend: Backend for model execution.
-                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
+                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
+                 'onnxrt_dml_ep'
         domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'.
                 Adaptor will use specific quantization settings for different domains automatically, and
                 explicitly specified quantization settings will override the automatic setting.
@@ -1309,7 +1311,8 @@ class QuantizationAwareTrainingConfig(_BaseQuantizationConfig):
     Args:
         device: Support 'cpu' and 'gpu'.
         backend: Backend for model execution.
-                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
+                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
+                 'onnxrt_dml_ep'
         inputs: Inputs of model, only required in tensorflow.
         outputs: Outputs of model, only required in tensorflow.
         op_type_dict: Tuning constraints on optype-wise  for advance user to reduce tuning space.
@@ -1779,8 +1782,8 @@ class MixedPrecisionConfig(object):
         device (str, optional): Device for execution.
                                 Support 'cpu' and 'gpu', default is 'cpu'.
         backend (str, optional): Backend for model execution.
-                                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
-                                 default is 'default'.
+                                 Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
+                                 'onnxrt_dml_ep'. Default is 'default'.
         precisions ([str, list], optional): Target precision for mix precision conversion.
                                    Support 'bf16' and 'fp16', default is 'bf16'.
         model_name (str, optional): The name of the model. Default value is empty.
@@ -1939,7 +1942,7 @@ def backend(self):
     def backend(self, backend):
         """Set backend."""
         if _check_value('backend', backend, str, [
-                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
+                'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
             self._backend = backend
 
     @property

diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py
@@ -1482,7 +1482,7 @@ def test_backend(self, mock_warning):
         with self.assertRaises(AssertionError) as context:
           adaptor = FRAMEWORKS[framework](framework_specific_info)
         self.assertEqual(str(context.exception), "'test_backend' backend is not supported, "\
-          "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep']")
+          "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep', 'onnxrt_dml_ep']")
 
         framework_specific_info = {"device": "cpu",
                                    "backend": "onnxrt_trt_ep",