Skip to content

Commit

Permalink
Support DirectML EP (#1144)
Browse files Browse the repository at this point in the history
Signed-off-by: Mengni Wang <[email protected]>
  • Loading branch information
mengniwang95 authored Aug 23, 2023
1 parent 3018319 commit 750bb9b
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 13 deletions.
2 changes: 2 additions & 0 deletions neural_compressor/adaptor/onnxrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def __init__(self, framework_specific_info):
config_file = 'onnxrt_cuda.yaml'
elif self.backend == 'DnnlExecutionProvider':
config_file = 'onnxrt_dnnl.yaml'
elif self.backend == 'DmlExecutionProvider':
config_file = 'onnxrt_dml.yaml'
else: # pragma: no cover
assert False, "{} provider is not supported in current environment, " \
"supported providers: {}".format(self.backend,
Expand Down
67 changes: 67 additions & 0 deletions neural_compressor/adaptor/onnxrt_dml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
## Copyright (c) 2021 Intel Corporation
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
#

-
version:
name: '1.13.1'
int8: &ref_1_13 {
'static': &ref_1_13_static {
'Conv': &default_static_qlinear_qdq {
'weight': &int8_sym_pertensor_minmax {
'dtype': ['int8'],
'scheme': ['sym'],
'granularity': ['per_tensor'],
'algorithm': ['minmax']
},
'activation': &uint8_asym_pertensor_minmax {
'dtype': ['uint8'],
'scheme': ['asym'],
'granularity': ['per_tensor'],
'algorithm': ['minmax']
},
'mode': ['QDQ', 'QLinear']
},
'MatMul': {
'weight': *int8_sym_pertensor_minmax,
'activation': *uint8_asym_pertensor_minmax,
'mode': ['QDQ', 'QLinear']
},
'Mul': &default_static_qlinear {
'weight': *int8_sym_pertensor_minmax,
'activation': *uint8_asym_pertensor_minmax,
'mode': ['QLinear']
},
'Relu': *default_static_qlinear_qdq,
'Clip': *default_static_qlinear_qdq,
'MaxPool': *default_static_qlinear_qdq,
'Add': *default_static_qlinear,
},
}
fp16: &common_fp16 ['Add', 'GlobalAveragePool', 'AveragePool', 'SpaceToDepth', 'Sigmoid', 'Mul',
'Softmax', 'Gemm', 'MatMul', 'Conv', 'Concat', 'Upsample', 'Pow', 'Sqrt', 'DepthToSpace',
'Clip', 'BatchNormalization', 'Transpose', 'Softmax', 'AveragePool', 'Squeeze', 'MaxPool',
'Relu', 'Concat']

recipes: &default_optimization
graph_optimization: # from onnxruntime graph_optimization_level
level: ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']

-
version:
name: 'default'
int8: *ref_1_13
recipes:
<<: *default_optimization
4 changes: 3 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/direct_q8.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator

@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice")

@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, " \
"SpaceToDepth, DepthToSpace, Upsample")
class Direct8BitOperator(Operator):
"""Direct8Bit Operator."""

Expand Down
6 changes: 4 additions & 2 deletions neural_compressor/adaptor/ox_utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@
'onnxrt_trt_ep': 'TensorrtExecutionProvider',
'onnxrt_dnnl_ep': 'DnnlExecutionProvider',
'onnxrt_cuda_ep': 'CUDAExecutionProvider',
'onnxrt_dml_ep': 'DmlExecutionProvider'
}

ONNXRT_BACKENDS = {
'CPUExecutionProvider': 'default',
'TensorrtExecutionProvider': 'onnxrt_trt_ep',
'CUDAExecutionProvider': 'onnxrt_cuda_ep',
'DnnlExecutionProvider': 'onnxrt_dnnl_ep'
'DnnlExecutionProvider': 'onnxrt_dnnl_ep',
'DmlExecutionProvider': 'onnxrt_dml_ep'
}

def dtype_to_name(dtype_mapping, dtype):
Expand Down Expand Up @@ -536,4 +538,4 @@ def to_numpy(data):
assert False, "The input data for onnx model is {}, which is not supported " \
"to convert to numpy ndarrays.".format(type(data))
else:
return data
return data
21 changes: 12 additions & 9 deletions neural_compressor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ class BenchmarkConfig:
inputs (list, optional): A list of strings containing the inputs of model. Default is an empty list.
outputs (list, optional): A list of strings containing the outputs of model. Default is an empty list.
backend (str, optional): Backend name for model execution. Supported values include: 'default', 'itex',
'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'.
'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep'.
Default value is 'default'.
warmup (int, optional): The number of iterations to perform warmup before running performance tests.
Default value is 5.
Expand Down Expand Up @@ -328,7 +328,7 @@ def backend(self):
def backend(self, backend):
"""Set backend."""
if _check_value('backend', backend, str, [
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
self._backend = backend

@property
Expand Down Expand Up @@ -694,7 +694,8 @@ class _BaseQuantizationConfig:
inputs: Inputs of model, only required in tensorflow.
outputs: Outputs of model, only required in tensorflow.
backend: Backend for model execution.
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
'onnxrt_dml_ep'
domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'.
Adaptor will use specific quantization settings for different domains automatically, and
explicitly specified quantization settings will override the automatic setting.
Expand Down Expand Up @@ -1102,7 +1103,7 @@ def backend(self):
@backend.setter
def backend(self, backend):
if _check_value('backend', backend, str, [
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
self._backend = backend

@property
Expand Down Expand Up @@ -1148,7 +1149,8 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig):
Args:
device: Support 'cpu' and 'gpu'.
backend: Backend for model execution.
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
'onnxrt_dml_ep'
domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'.
Adaptor will use specific quantization settings for different domains automatically, and
explicitly specified quantization settings will override the automatic setting.
Expand Down Expand Up @@ -1309,7 +1311,8 @@ class QuantizationAwareTrainingConfig(_BaseQuantizationConfig):
Args:
device: Support 'cpu' and 'gpu'.
backend: Backend for model execution.
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
'onnxrt_dml_ep'
inputs: Inputs of model, only required in tensorflow.
outputs: Outputs of model, only required in tensorflow.
op_type_dict: Tuning constraints on optype-wise for advance user to reduce tuning space.
Expand Down Expand Up @@ -1779,8 +1782,8 @@ class MixedPrecisionConfig(object):
device (str, optional): Device for execution.
Support 'cpu' and 'gpu', default is 'cpu'.
backend (str, optional): Backend for model execution.
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep'
default is 'default'.
Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep',
'onnxrt_dml_ep'. Default is 'default'.
precisions ([str, list], optional): Target precision for mix precision conversion.
Support 'bf16' and 'fp16', default is 'bf16'.
model_name (str, optional): The name of the model. Default value is empty.
Expand Down Expand Up @@ -1939,7 +1942,7 @@ def backend(self):
def backend(self, backend):
"""Set backend."""
if _check_value('backend', backend, str, [
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep']):
'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']):
self._backend = backend

@property
Expand Down
2 changes: 1 addition & 1 deletion test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,7 @@ def test_backend(self, mock_warning):
with self.assertRaises(AssertionError) as context:
adaptor = FRAMEWORKS[framework](framework_specific_info)
self.assertEqual(str(context.exception), "'test_backend' backend is not supported, "\
"supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep']")
"supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep', 'onnxrt_dml_ep']")

framework_specific_info = {"device": "cpu",
"backend": "onnxrt_trt_ep",
Expand Down

0 comments on commit 750bb9b

Please sign in to comment.