From 868b7833783851ac68deb31bccde57c3c25850d1 Mon Sep 17 00:00:00 2001 From: Tomasz Socha Date: Thu, 2 Sep 2021 14:46:52 +0200 Subject: [PATCH] [ONNX] QLinearConvolution (#7210) --- .../core/include/ngraph/op/fake_quantize.hpp | 6 + ngraph/core/src/op/fake_quantize.cpp | 79 +++++++++ ngraph/frontend/onnx/frontend/src/op/conv.cpp | 21 +-- ngraph/frontend/onnx/frontend/src/op/conv.hpp | 3 + .../frontend/src/op/dequantize_linear.cpp | 43 ++--- .../frontend/src/op/dequantize_linear.hpp | 13 +- .../onnx/frontend/src/op/qlinear_conv.cpp | 68 ++++++++ .../onnx/frontend/src/op/qlinear_conv.hpp | 31 ++++ .../onnx/frontend/src/op/quantize_linear.cpp | 31 +++- .../onnx/frontend/src/op/quantize_linear.hpp | 6 + .../frontend/onnx/frontend/src/ops_bridge.cpp | 10 +- ngraph/test/files/onnx/qlinearconv3d/x.bin | Bin 64 -> 0 bytes ngraph/test/files/onnx/qlinearconv3d/y.bin | Bin 64 -> 0 bytes .../quant_conv_linear_onnx_example.prototxt | 152 ++++++++++++++++++ ngraph/test/onnx/onnx_import_quant.in.cpp | 116 +++++++++++-- ngraph/test/runtime/ie/unit_test.manifest | 9 +- .../runtime/interpreter/evaluates_map.cpp | 23 --- .../runtime/interpreter/unit_test.manifest | 3 - runtime/bindings/python/tests/__init__.py | 6 - .../tests/test_ngraph/test_ops_fused.py | 3 +- .../python/tests/test_onnx/test_backend.py | 25 +-- 21 files changed, 533 insertions(+), 115 deletions(-) create mode 100644 ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp create mode 100644 ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp delete mode 100644 ngraph/test/files/onnx/qlinearconv3d/x.bin delete mode 100644 ngraph/test/files/onnx/qlinearconv3d/y.bin create mode 100644 ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt diff --git a/ngraph/core/include/ngraph/op/fake_quantize.hpp b/ngraph/core/include/ngraph/op/fake_quantize.hpp index 08a53dca2fdcb9..00c970bf8fd415 100644 --- a/ngraph/core/include/ngraph/op/fake_quantize.hpp +++ b/ngraph/core/include/ngraph/op/fake_quantize.hpp @@ -67,6 +67,12 @@ class NGRAPH_API FakeQuantize : public ngraph::op::Op { m_auto_broadcast = auto_broadcast; } + bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + bool has_evaluate() const override; + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override { + return false; + } + private: std::size_t m_levels; AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY; diff --git a/ngraph/core/src/op/fake_quantize.cpp b/ngraph/core/src/op/fake_quantize.cpp index 645fcc92b97a18..324b87b651fd3f 100644 --- a/ngraph/core/src/op/fake_quantize.cpp +++ b/ngraph/core/src/op/fake_quantize.cpp @@ -11,7 +11,9 @@ #include "ngraph/op/constant.hpp" #include "ngraph/op/convert.hpp" #include "ngraph/op/select.hpp" +#include "ngraph/runtime/reference/fake_quantize.hpp" #include "ngraph/shape.hpp" +#include "ngraph/type/element_type.hpp" using namespace std; using namespace ngraph; @@ -73,3 +75,80 @@ shared_ptr op::FakeQuantize::clone_with_new_inputs(const OutputVector& new m_levels, m_auto_broadcast); } + +namespace fakequantizeop { +template +bool evaluate(const HostTensorPtr& arg0, + const HostTensorPtr& arg1, + const HostTensorPtr& arg2, + const HostTensorPtr& arg3, + const HostTensorPtr& arg4, + const HostTensorPtr& out, + const ngraph::op::FakeQuantize* parent) { + NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate); + using T = typename element_type_traits::value_type; + runtime::reference::fake_quantize(arg0->get_data_ptr(), + arg1->get_data_ptr(), + arg2->get_data_ptr(), + arg3->get_data_ptr(), + arg4->get_data_ptr(), + out->get_data_ptr(), + parent->get_input_shape(0), + parent->get_input_shape(1), + parent->get_input_shape(2), + parent->get_input_shape(3), + parent->get_input_shape(4), + parent->get_levels(), + parent->get_auto_broadcast()); + return true; +} + +bool evaluate_fakequantize(const HostTensorPtr& arg0, + const HostTensorPtr& arg1, + const HostTensorPtr& arg2, + const HostTensorPtr& arg3, + const HostTensorPtr& arg4, + const HostTensorPtr& out, + const ngraph::op::FakeQuantize* parent) { + bool rc = true; + switch (arg0->get_element_type()) { + NGRAPH_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent); + NGRAPH_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent); + NGRAPH_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent); + NGRAPH_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent); + NGRAPH_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent); + NGRAPH_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent); + default: + rc = false; + break; + } + return rc; +} +} // namespace fakequantizeop + +bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { + NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate); + return fakequantizeop::evaluate_fakequantize(inputs[0], + inputs[1], + inputs[2], + inputs[3], + inputs[4], + outputs[0], + this); +} + +bool ngraph::op::FakeQuantize::has_evaluate() const { + NGRAPH_OP_SCOPE(v0_FakeQuantize_has_evaluate); + switch (get_input_element_type(0)) { + case ngraph::element::i32: + case ngraph::element::i64: + case ngraph::element::u32: + case ngraph::element::u64: + case ngraph::element::f16: + case ngraph::element::f32: + return true; + default: + break; + } + return false; +} diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.cpp b/ngraph/frontend/onnx/frontend/src/op/conv.cpp index 5c280c3769acf1..230ae916bc78b5 100644 --- a/ngraph/frontend/onnx/frontend/src/op/conv.cpp +++ b/ngraph/frontend/onnx/frontend/src/op/conv.cpp @@ -13,6 +13,7 @@ #include "ngraph/builder/reshape.hpp" #include "ngraph/op/group_conv.hpp" #include "ngraph/op/util/attr_types.hpp" +#include "onnx_import/core/null_node.hpp" #include "utils/convpool.hpp" #include "utils/reshape.hpp" @@ -20,7 +21,7 @@ namespace ngraph { namespace onnx_import { namespace op { namespace set_1 { -namespace { +namespace detail { std::shared_ptr make_ng_convolution(const Output& data, const Output& filters, const ngraph::Strides& strides, @@ -57,14 +58,13 @@ std::shared_ptr add_bias(const Output& ng_conv, cons return { std::make_shared(ng_conv, reshape::reshape_channel_shaped_node_to_nchw(bias, conv_rank))}; } -} // namespace -OutputVector conv(const Node& node) { +OutputVector conv(const Node& node, + Output data, + Output filters, + Output bias) { // in the current implementation we assume that the data input rank is static // and only the 'batch' dimension can be dynamic - const OutputVector& inputs = node.get_ng_inputs(); - const auto data = inputs.at(0); - const auto filters = inputs.at(1); const auto groups = node.get_attribute_value("group", 1); NGRAPH_CHECK(data.get_partial_shape().rank().is_static(), "The input data tensor's rank has to be known (static)"); @@ -80,10 +80,9 @@ OutputVector conv(const Node& node) { make_ng_convolution(data, filters, strides, dilations, padding_below, padding_above, groups, auto_pad_type); // no bias param - if (inputs.size() < 3) { + if (ngraph::op::is_null(bias)) { return {conv_node}; } else { - const auto& bias = inputs.at(2); const auto& bias_ps = bias.get_partial_shape(); NGRAPH_CHECK(bias_ps.rank().is_static() && bias_ps.rank().get_length() == 1, @@ -92,7 +91,11 @@ OutputVector conv(const Node& node) { return {add_bias(conv_node, bias)}; } } - +} // namespace detail +OutputVector conv(const Node& node) { + const OutputVector& inputs = node.get_ng_inputs(); + return detail::conv(node, inputs[0], inputs[1], inputs.size() < 3 ? std::make_shared() : inputs[2]); +} } // namespace set_1 } // namespace op diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.hpp b/ngraph/frontend/onnx/frontend/src/op/conv.hpp index 065902af22a338..759a75110cb12f 100644 --- a/ngraph/frontend/onnx/frontend/src/op/conv.hpp +++ b/ngraph/frontend/onnx/frontend/src/op/conv.hpp @@ -11,6 +11,9 @@ namespace ngraph { namespace onnx_import { namespace op { namespace set_1 { +namespace detail { +OutputVector conv(const Node& node, Output data, Output filters, Output bias); +} /// \brief Performs ONNX Conv operation. /// /// \param node The ONNX node object representing this operation. diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp index bfa26cc64ada7d..ecdd5261503556 100644 --- a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp +++ b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp @@ -19,7 +19,7 @@ namespace ngraph { namespace onnx_import { namespace op { -namespace { +namespace detail { Output get_zero_point(const OutputVector& inputs) { if (inputs.size() == 3 && !ngraph::op::is_null(inputs[2])) { auto zero_point = inputs[2]; @@ -33,7 +33,7 @@ Output get_zero_point(const OutputVector& inputs) { return default_opset::Constant::create(element::f32, Shape{}, {0}); } } -} // namespace +} // namespace detail namespace set_1 { OutputVector dequantize_linear(const Node& node) { const OutputVector inputs{node.get_ng_inputs()}; @@ -44,7 +44,7 @@ OutputVector dequantize_linear(const Node& node) { const auto x = inputs[0]; const auto scale = inputs[1]; - const auto zero_point = get_zero_point(inputs); + const auto zero_point = detail::get_zero_point(inputs); common::validate_scalar_input("Dequantization scale", scale.get_node_shared_ptr(), {element::f32}); common::validate_scalar_input("Zero point", zero_point.get_node_shared_ptr()); @@ -58,7 +58,7 @@ OutputVector dequantize_linear(const Node& node) { } // namespace set_1 namespace set_13 { -namespace { +namespace detail { void validate_scale(const Output scale, const Output x, const int64_t axis) { const auto& scale_shape = scale.get_partial_shape(); NGRAPH_CHECK(scale_shape.rank().get_length() == 0 || scale_shape.rank().get_length() == 1, @@ -129,25 +129,16 @@ std::shared_ptr reshape_input(const Output input, return std::make_shared(input, target_shape, true); } -} // namespace - -OutputVector dequantize_linear(const Node& node) { - const OutputVector inputs{node.get_ng_inputs()}; - - NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3, - "The DequantizeLinear op expects 2 required and one optional " - "input. Got: ", - inputs.size()); - - const auto x = inputs[0]; - auto scale = inputs[1]; - auto zero_point = get_zero_point(inputs); +OutputVector dequantize_linear(Output x, + Output scale, + Output zero_point, + int64_t axis, + Node node) { const auto x_shape = x.get_partial_shape(); NGRAPH_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static)."); - int64_t axis{node.get_attribute_value("axis", 1)}; axis = ngraph::normalize_axis(node.get_description(), axis, x_shape.rank()); validate_scale(scale, x, axis); @@ -163,6 +154,22 @@ OutputVector dequantize_linear(const Node& node) { std::make_shared(std::make_shared(converted_x, zero_point), scale)}; } +} // namespace detail + +OutputVector dequantize_linear(const Node& node) { + const OutputVector inputs{node.get_ng_inputs()}; + + NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3, + "The DequantizeLinear op expects 2 required and one optional " + "input. Got: ", + inputs.size()); + const auto x = inputs[0]; + auto scale = inputs[1]; + auto zero_point = op::detail::get_zero_point(inputs); + + // these reshapes make sure that dequantization happens over the specified axis + return detail::dequantize_linear(x, scale, zero_point, node.get_attribute_value("axis", 1), node); +} } // namespace set_13 } // namespace op } // namespace onnx_import diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp index d1329a247f8ceb..be36d796a506c1 100644 --- a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp +++ b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp @@ -10,14 +10,25 @@ namespace ngraph { namespace onnx_import { namespace op { +namespace detail { +Output get_zero_point(const OutputVector& inputs); +} + namespace set_1 { OutputVector dequantize_linear(const Node& node); } // namespace set_1 namespace set_13 { -OutputVector dequantize_linear(const Node& node); +namespace detail { +OutputVector dequantize_linear(Output x, + Output scale, + Output zero_point, + int64_t axis, + Node node); } +OutputVector dequantize_linear(const Node& node); +} // namespace set_13 } // namespace op diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp new file mode 100644 index 00000000000000..3ee14d345e9e94 --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Disabled in CMakeList +// Update to higher opset required + +#include "op/qlinear_conv.hpp" + +#include +#include +#include + +#include "conv.hpp" +#include "dequantize_linear.hpp" +#include "exceptions.hpp" +#include "ngraph/opsets/opset6.hpp" +#include "onnx_import/core/null_node.hpp" +#include "quantize_linear.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +OutputVector qlinear_conv(const Node& node) { + const OutputVector& inputs = node.get_ng_inputs(); + + auto x = inputs.at(0); + auto x_scale = inputs.at(1); + auto x_zero_point = inputs.at(2); + auto w = inputs.at(3); + auto w_scale = inputs.at(4); + auto w_zero_point = inputs.at(5); + auto y_scale = inputs.at(6); + auto y_zero_point = inputs.at(7); + Output B = inputs.size() > 8 ? inputs.at(8) : std::make_shared()->output(0); + + x = set_13::detail::dequantize_linear(x, + x_scale, + std::make_shared(x_zero_point, element::f32), + 1, + node)[0]; + w = set_13::detail::dequantize_linear(w, + w_scale, + std::make_shared(w_zero_point, element::f32), + 1, + node)[0]; + + if (!ngraph::op::is_null(B)) { + B = std::make_shared(std::make_shared(B, x_scale.get_element_type()), + std::make_shared(x_scale, w_scale)) + ->output(0); + } + + auto result = detail::conv(node, x, w, B)[0]; + + result = op::detail::make_fake_quantize(y_scale, y_zero_point, result); + + return {result}; +} + +} // namespace set_1 + +} // namespace op + +} // namespace onnx_import + +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp new file mode 100644 index 00000000000000..a25cf374174f66 --- /dev/null +++ b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Disabled in CMakeList +// Update to higher opset required + +#pragma once + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph { +namespace onnx_import { +namespace op { +namespace set_1 { +/// \brief Performs ONNX QLinearConv operation. +/// +/// \param node The ONNX node object representing this operation. +/// +/// \return The vector containing Ngraph nodes producing output of ONNX quantizied +/// convolution operation. +OutputVector qlinear_conv(const Node& node); + +} // namespace set_1 + +} // namespace op + +} // namespace onnx_import + +} // namespace ngraph diff --git a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp index b8466c59a2584e..b83e43c77941d4 100644 --- a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp +++ b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp @@ -94,7 +94,7 @@ std::tuple, std::shared_ptr> get_inp return std::make_tuple(input_low, input_high); } - +} // namespace std::shared_ptr make_fake_quantize(const Output& y_scale, const Output& y_zero_point, const Output& data) { @@ -116,7 +116,6 @@ std::shared_ptr make_fake_quantize(const Output& y_s std::make_shared(data, input_low, input_high, output_low, output_high, levels), destination_type); } -} // namespace } // namespace detail namespace set_1 { @@ -135,11 +134,13 @@ OutputVector quantize_linear(const Node& node) { } // namespace set_1 namespace set_13 { -OutputVector quantize_linear(const Node& node) { - OutputVector inputs{node.get_ng_inputs()}; - auto x = inputs.at(0); - auto y_scale = inputs.at(1); - auto y_zero_point = detail::get_zero_point(inputs); +namespace detail { +OutputVector quantize_linear(Output x, + Output y_scale, + Output y_zero_point, + int64_t axis, + Node node) { + namespace detail = ngraph::onnx_import::op::detail; x = detail::validate_data(node, x); detail::validate_zero_point_type(node, y_zero_point); @@ -147,7 +148,6 @@ OutputVector quantize_linear(const Node& node) { const auto& x_shape = x.get_partial_shape(); - int64_t axis{node.get_attribute_value("axis", 1)}; axis = normalize_axis(node.get_description(), axis, x_shape.rank()); const auto& y_scale_shape = y_scale.get_partial_shape(); @@ -185,7 +185,22 @@ OutputVector quantize_linear(const Node& node) { return {detail::make_fake_quantize(y_scale, y_zero_point, x)}; } +} // namespace detail + +OutputVector quantize_linear(const Node& node) { + const OutputVector inputs{node.get_ng_inputs()}; + NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3, + "The QuantizeLinear op expects 2 required and one optional " + "input. Got: ", + inputs.size()); + + const auto x = inputs[0]; + auto scale = inputs[1]; + auto zero_point = op::detail::get_zero_point(inputs); + + return detail::quantize_linear(x, scale, zero_point, node.get_attribute_value("axis", 1), node); +} } // namespace set_13 } // namespace op diff --git a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp index bc049d55d4a029..531d9ac494d656 100644 --- a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp +++ b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.hpp @@ -10,12 +10,18 @@ namespace ngraph { namespace onnx_import { namespace op { +namespace detail { +std::shared_ptr make_fake_quantize(const Output& y_scale, + const Output& y_zero_point, + const Output& data); +} namespace set_1 { OutputVector quantize_linear(const Node& node); } // namespace set_1 namespace set_13 { + OutputVector quantize_linear(const Node& node); } // namespace set_13 diff --git a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp index ed759c1b5eaf63..f4110b1c5d932b 100644 --- a/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp +++ b/ngraph/frontend/onnx/frontend/src/ops_bridge.cpp @@ -91,10 +91,6 @@ #include "op/not.hpp" #include "op/onehot.hpp" #include "op/or.hpp" -#include "op/pad.hpp" -#include "op/pow.hpp" -#include "op/prelu.hpp" -// #include "op/quant_conv.hpp" #include "op/org.openvinotoolkit/deformable_conv_2d.hpp" #include "op/org.openvinotoolkit/detection_output.hpp" #include "op/org.openvinotoolkit/experimental_detectron/detection_output.hpp" @@ -107,6 +103,10 @@ #include "op/org.openvinotoolkit/normalize.hpp" #include "op/org.openvinotoolkit/prior_box.hpp" #include "op/org.openvinotoolkit/swish.hpp" +#include "op/pad.hpp" +#include "op/pow.hpp" +#include "op/prelu.hpp" +#include "op/qlinear_conv.hpp" #include "op/quantize_linear.hpp" #include "op/random_uniform.hpp" #include "op/random_uniform_like.hpp" @@ -368,7 +368,7 @@ OperatorsBridge::OperatorsBridge() { REGISTER_OPERATOR("Pad", 11, pad); REGISTER_OPERATOR("Pow", 1, pow); REGISTER_OPERATOR("PRelu", 1, prelu); - // REGISTER_OPERATOR("QLinearConv", 1, quant_conv); + REGISTER_OPERATOR("QLinearConv", 1, qlinear_conv); REGISTER_OPERATOR("QuantizeLinear", 1, quantize_linear); REGISTER_OPERATOR("QuantizeLinear", 13, quantize_linear); REGISTER_OPERATOR("Range", 1, range); diff --git a/ngraph/test/files/onnx/qlinearconv3d/x.bin b/ngraph/test/files/onnx/qlinearconv3d/x.bin deleted file mode 100644 index 2cfb4e9b24ffb3c4dc88dbfe8792102247ae792d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 64 zcmV-G0Kfl&4)itjckE9Iid{ZJo>8#jcA$0kMXWLxDvJP;o%s76aAS(T7LU5el)jW% W5Qf~pMIr^9|HOtd>D+;9+^l>8$ROwd diff --git a/ngraph/test/files/onnx/qlinearconv3d/y.bin b/ngraph/test/files/onnx/qlinearconv3d/y.bin deleted file mode 100644 index 4ac0510ba7f864c0322054e29453aec3ae84b29a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 64 ccmZo@zyg~8H#9IrBP7I=8XC?WLGY1S01J>G=>Px# diff --git a/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt b/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt new file mode 100644 index 00000000000000..94cf76c2b95775 --- /dev/null +++ b/ngraph/test/models/onnx/quantization/quant_conv_linear_onnx_example.prototxt @@ -0,0 +1,152 @@ +ir_version: 5 +producer_name: "onnx-examples" +graph { + node { + input: "x" + input: "x_scale" + input: "x_zero_point" + input: "w" + input: "w_scale" + input: "w_zero_point" + input: "y_scale" + input: "y_zero_point" + output: "y" + op_type: "QLinearConv" + } + name: "test_qlinearconv" + input { + name: "x" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 7 + } + dim { + dim_value: 7 + } + } + } + } + } + input { + name: "x_scale" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + input { + name: "x_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + } + } + } + } + input { + name: "w" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "w_scale" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "w_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + } + } + } + } + input { + name: "y_scale" + type { + tensor_type { + elem_type: 1 + shape { + } + } + } + } + input { + name: "y_zero_point" + type { + tensor_type { + elem_type: 2 + shape { + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 2 + shape { + dim { + dim_value: 1 + } + dim { + dim_value: 1 + } + dim { + dim_value: 7 + } + dim { + dim_value: 7 + } + } + } + } + } +} +opset_import { + version: 11 +} diff --git a/ngraph/test/onnx/onnx_import_quant.in.cpp b/ngraph/test/onnx/onnx_import_quant.in.cpp index 96faa74393439e..cfb3a918785797 100644 --- a/ngraph/test/onnx/onnx_import_quant.in.cpp +++ b/ngraph/test/onnx/onnx_import_quant.in.cpp @@ -264,20 +264,32 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ne NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear) { auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.onnx")); + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off std::vector> inputs; - inputs.emplace_back(std::vector{ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81}); - - std::vector> expected_output{std::vector{ - 2, 3, 3, 3, 4, 4, 4, 5, 2, 4, 6, 7, 8, 8, 9, 9, 10, 3, 8, 11, 12, 13, 13, 14, 14, 15, 5, - 11, 16, 17, 18, 18, 19, 19, 20, 7, 14, 22, 22, 23, 23, 24, 24, 25, 8, 18, 27, 27, 28, 28, 29, 29, 30, 10, - 21, 32, 32, 33, 33, 34, 34, 35, 12, 24, 37, 37, 38, 38, 39, 40, 40, 13, 17, 26, 27, 27, 27, 28, 28, 28, 9}}; - - std::vector> outputs{ - execute(function, inputs, "${BACKEND_NAME}")}; - EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front())); + test_case.add_input(std::vector{ 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81}); + + test_case.add_expected_output({1, 1, 9, 9}, std::vector{ 2, 3, 3, 3, 4, 4, 4, 5, 2, + 4, 6, 7, 8, 8, 9, 9, 10, 3, + 8, 11, 12, 13, 13, 14, 14, 15, 5, + 11, 16, 17, 18, 18, 19, 19, 20, 7, + 14, 22, 22, 23, 23, 24, 24, 25, 8, + 18, 27, 27, 28, 28, 29, 29, 30, 10, + 21, 32, 32, 33, 33, 34, 34, 35, 12, + 24, 37, 37, 38, 38, 39, 40, 40, 13, + 17, 26, 27, 27, 27, 28, 28, 28, 9}); + //clang-format on + test_case.run(); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_2d) { @@ -303,7 +315,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d) { auto test_case = test::TestCase(function); - test_case.add_input_from_file(TEST_FILES, "onnx/qlinearconv3d/x.bin"); + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{130, 14, 244, 53, + 244, 119, 236, 79, + 9, 138, 93, 62, + 66, 158, 81, 176, + + 225, 118, 160, 117, + 246, 69, 172, 50, + 23, 42, 139, 0, + 146, 157, 248, 251, + + 30, 112, 99, 138, + 190, 22, 143, 186, + 199, 148, 190, 148, + 89, 16, 134, 220, + + 191, 69, 34, 5, + 156, 255, 196, 134, + 49, 233, 220, 129, + 107, 220, 172, 124}); // x test_case.add_input(std::vector{0.00389225385151803f}); // x_scale test_case.add_input(std::vector{127}); // x_zero_point test_case.add_input(std::vector{255}); // w @@ -312,7 +344,61 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d) { test_case.add_input(std::vector{0.0011764180380851f}); // y_scale test_case.add_input(std::vector{128}); // y_zero_point - test_case.add_expected_output_from_file({1, 1, 4, 4, 4}, TEST_FILES, "onnx/qlinearconv3d/y.bin"); + test_case.add_expected_output({1, 1, 4, 4, 4}, + {128, 128, 128, 128, + 128, 128, 128, 128, + 128, 128, 128, 128, + 128, 128, 128, 128, + + 128, 128, 128, 128, + 128, 131, 255, 128, + 128, 0, 91, 128, + 128, 128, 128, 128, + + 128, 128, 128, 128, + 128, 23, 98, 128, + 128, 206, 196, 128, + 128, 128, 128, 128, + + 128, 128, 128, 128, + 128, 128, 128, 128, + 128, 128, 128, 128, + 128, 128, 128, 128}); + // clang-format on + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_onnx_example) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/quant_conv_linear_onnx_example.onnx")); + + auto test_case = test::TestCase(function); + + // don't change style for better readibility + // clang-format off + test_case.add_input(std::vector{255, 174, 162, 25, 203, 168, 58, + 15, 59, 237, 95, 129, 0, 64, + 56, 242, 153, 221, 168, 12, 166, + 232, 178, 186, 195, 237, 162, 237, + 188, 39, 124, 77, 80, 102, 43, + 127, 230, 21, 83, 41, 40, 134, + 255, 154, 92, 141, 42, 148, 247}); // x + test_case.add_input(std::vector{0.00369204697f}); // x_scale + test_case.add_input(std::vector{132}); // x_zero_point + test_case.add_input(std::vector{0}); // w + test_case.add_input(std::vector{0.00172794575f}); // w_scale + test_case.add_input(std::vector{255}); // w_zero_point + test_case.add_input(std::vector{0.00162681262f}); // y_scale + test_case.add_input(std::vector{123}); // y_zero_point + + test_case.add_expected_output({1, 1, 7, 7}, std::vector{ 0, 81, 93, 230, 52, 87, 197, + 240, 196, 18, 160, 126, 255, 191, + 199, 13, 102, 34, 87, 243, 89, + 23, 77, 69, 60, 18, 93, 18, + 67, 216, 131, 178, 175, 153, 212, + 128, 25, 234, 172, 214, 215, 121, + 0, 101, 163, 114, 213, 107, 8}); + // clang-format on test_case.run(); } diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index c32732c3c2639c..8c7fad49569dda 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -130,7 +130,6 @@ onnx_model_scatterND_param_i64_indices IE_CPU.onnx_constant_sparse_tensor_int64_3x4 IE_CPU.onnx_constant_sparse_tensor_uint64_3x4 - # TopK Incorrect input data/index values precision onnx_model_argmax_int32 onnx_model_argmin_int32 @@ -241,6 +240,9 @@ onnx_size_op_single onnx_size_op_graph_end onnx_size_op_graph_middle +# /openvino/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp:747 +# Output blob byte size is not equal network output byte size (64!=216)." thrown in the test body. +onnx_model_quant_conv_linear_3d #------------------------------------------------------------------------------- # @@ -643,11 +645,6 @@ gemm_broadcast_axes_1_input_C scale_shift_no_broadcast scale_shift -# Detected op not belonging to opset1! -onnx_model_quant_conv_linear -onnx_model_quant_conv_linear_2d -onnx_model_quant_conv_linear_3d - # Cannot cast ngraph node Dot to CNNLayer! dot_4d_5d_multi_axis dot_4d_5d_multi_axis_more diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp index 90adbe1245783f..781c252fa2a5af 100644 --- a/ngraph/test/runtime/interpreter/evaluates_map.cpp +++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -2435,28 +2434,6 @@ namespace return true; } - template - bool evaluate(const shared_ptr& op, - const HostTensorVector& outputs, - const HostTensorVector& inputs) - { - using T = typename element_type_traits::value_type; - runtime::reference::fake_quantize(inputs[0]->get_data_ptr(), - inputs[1]->get_data_ptr(), - inputs[2]->get_data_ptr(), - inputs[3]->get_data_ptr(), - inputs[4]->get_data_ptr(), - outputs[0]->get_data_ptr(), - op->get_input_shape(0), - op->get_input_shape(1), - op->get_input_shape(2), - op->get_input_shape(3), - op->get_input_shape(4), - op->get_levels(), - op->get_auto_broadcast()); - return true; - } - template bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest index ec77901f320283..880872f4491cf2 100644 --- a/ngraph/test/runtime/interpreter/unit_test.manifest +++ b/ngraph/test/runtime/interpreter/unit_test.manifest @@ -1,4 +1,3 @@ -INTERPRETER.onnx_model_quant_conv_linear INTERPRETER.onnx_top_k_opset_10 # Temporarily disabled: @@ -80,8 +79,6 @@ INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_uint8 INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_int8 INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_int8_4d INTERPRETER.onnx_model_dequantize_linear_1d_zero_scale_uint8_negative_axis -INTERPRETER.onnx_model_quant_conv_linear_2d -INTERPRETER.onnx_model_quant_conv_linear_3d INTERPRETER.onnx_model_conv_integer INTERPRETER.onnx_model_conv_integer_zero_point_zero INTERPRETER.onnx_model_conv_integer_no_zero_point diff --git a/runtime/bindings/python/tests/__init__.py b/runtime/bindings/python/tests/__init__.py index 929eb41658b8da..4f07f874900229 100644 --- a/runtime/bindings/python/tests/__init__.py +++ b/runtime/bindings/python/tests/__init__.py @@ -25,8 +25,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): skip_segfault = pytest.mark.skip(reason="Segmentation fault error") xfail_issue_33488 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" "MaxUnpool") -xfail_issue_33535 = xfail_test(reason="nGraph does not support the following ONNX operations:" - "DynamicQuantizeLinear") xfail_issue_33538 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" "Scan") skip_issue_38084 = pytest.mark.skip(reason="Aborted (core dumped) Assertion " @@ -72,8 +70,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_38722 = xfail_test(reason="RuntimeError: While validating ONNX nodes MatMulInteger" "and QLinearMatMul" "Input0 scale and input0 zero point shape must be same and 1") -xfail_issue_38723 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" - "QLinearConv") xfail_issue_38724 = xfail_test(reason="RuntimeError: While validating ONNX node '':" "tf_crop_and_resize - this type of coordinate transformation mode" "is not supported. Choose one of the following modes:" @@ -100,8 +96,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_44958 = xfail_test(reason="Expected: Unsupported dynamic op: Interpolate") xfail_issue_44965 = xfail_test(reason="Expected: RuntimeError: value info has no element") xfail_issue_44968 = xfail_test(reason="Expected: Unsupported dynamic op: Squeeze") -xfail_issue_44976 = xfail_test(reason="Expected: RuntimeError: Quantize layer with name:" - "FakeQuantize_xxx has non const input on 1 port") xfail_issue_46762 = xfail_test(reason="Incorrect result of Minimum op if uint data type is used") xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64") xfail_issue_47337 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::OneHot") diff --git a/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py b/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py index d96f870f604bc0..6db4a5f29c4e77 100644 --- a/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py +++ b/runtime/bindings/python/tests/test_ngraph/test_ops_fused.py @@ -6,7 +6,7 @@ import ngraph as ng from tests.runtime import get_runtime -from tests import (xfail_issue_36486, xfail_issue_44976) +from tests import xfail_issue_36486 def test_elu_operator_with_scalar_and_array(): @@ -40,7 +40,6 @@ def test_elu_operator_with_scalar(): assert np.allclose(result, expected) -@xfail_issue_44976 def test_fake_quantize(): runtime = get_runtime() diff --git a/runtime/bindings/python/tests/test_onnx/test_backend.py b/runtime/bindings/python/tests/test_onnx/test_backend.py index 367e9a04dd55f3..fb5ca82b46e83e 100644 --- a/runtime/bindings/python/tests/test_onnx/test_backend.py +++ b/runtime/bindings/python/tests/test_onnx/test_backend.py @@ -8,7 +8,6 @@ BACKEND_NAME, skip_rng_tests, xfail_issue_33488, - xfail_issue_33535, xfail_issue_33538, xfail_issue_33581, xfail_issue_33589, @@ -26,7 +25,6 @@ xfail_issue_38710, xfail_issue_38713, xfail_issue_38722, - xfail_issue_38723, xfail_issue_38724, xfail_issue_38732, xfail_issue_38734, @@ -45,7 +43,6 @@ xfail_issue_44958, xfail_issue_44965, xfail_issue_44968, - xfail_issue_44976, xfail_issue_45180, xfail_issue_45344, xfail_issue_46762, @@ -200,7 +197,12 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_argmax_negative_axis_keepdims_random_select_last_index_cpu", "OnnxBackendNodeModelTest.test_argmin_negative_axis_keepdims_random_select_last_index_cpu", ), - (xfail_issue_38091, "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu"), + ( + xfail_issue_38091, + "OnnxBackendNodeModelTest.test_gather_negative_indices_cpu", + "OnnxBackendNodeModelTest.test_dynamicquantizelinear_cpu", + "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu", + ), ( xfail_issue_52463, "OnnxBackendPyTorchOperatorModelTest.test_operator_add_size1_singleton_broadcast_cpu", @@ -337,19 +339,12 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_isinf_negative_cpu", "OnnxBackendNodeModelTest.test_isinf_cpu", ), - ( - xfail_issue_33535, - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_cpu", - ), ( xfail_issue_38722, "OnnxBackendNodeModelTest.test_matmulinteger_cpu", "OnnxBackendNodeModelTest.test_qlinearmatmul_2D_cpu", "OnnxBackendNodeModelTest.test_qlinearmatmul_3D_cpu", ), - (xfail_issue_38723, "OnnxBackendNodeModelTest.test_qlinearconv_cpu"), (xfail_issue_38724, "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu"), ( xfail_issue_33606, @@ -450,14 +445,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_squeeze_cpu", "OnnxBackendNodeModelTest.test_squeeze_negative_axes_cpu", ), - ( - xfail_issue_44976, - "OnnxBackendNodeModelTest.test_quantizelinear_axis_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_min_adjusted_expanded_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_expanded_cpu", - "OnnxBackendNodeModelTest.test_dynamicquantizelinear_max_adjusted_expanded_cpu", - "OnnxBackendNodeModelTest.test_quantizelinear_cpu", - ), ( xfail_issue_33593, "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu",