[ONNX] QLinearConvolution (openvinotoolkit#7210)

akuporos · Sep 6, 2021 · 868b783 · 868b783
1 parent d384efe
commit 868b783
Show file tree

Hide file tree

Showing 21 changed files with 533 additions and 115 deletions.
diff --git a/ngraph/core/include/ngraph/op/fake_quantize.hpp b/ngraph/core/include/ngraph/op/fake_quantize.hpp
@@ -67,6 +67,12 @@ class NGRAPH_API FakeQuantize : public ngraph::op::Op {
         m_auto_broadcast = auto_broadcast;
     }
 
+    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
+    bool has_evaluate() const override;
+    bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
+        return false;
+    }
+
 private:
     std::size_t m_levels;
     AutoBroadcastSpec m_auto_broadcast = op::AutoBroadcastType::NUMPY;

diff --git a/ngraph/core/src/op/fake_quantize.cpp b/ngraph/core/src/op/fake_quantize.cpp
@@ -11,7 +11,9 @@
 #include "ngraph/op/constant.hpp"
 #include "ngraph/op/convert.hpp"
 #include "ngraph/op/select.hpp"
+#include "ngraph/runtime/reference/fake_quantize.hpp"
 #include "ngraph/shape.hpp"
+#include "ngraph/type/element_type.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -73,3 +75,80 @@ shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new
                                      m_levels,
                                      m_auto_broadcast);
 }
+
+namespace fakequantizeop {
+template <element::Type_t ET>
+bool evaluate(const HostTensorPtr& arg0,
+              const HostTensorPtr& arg1,
+              const HostTensorPtr& arg2,
+              const HostTensorPtr& arg3,
+              const HostTensorPtr& arg4,
+              const HostTensorPtr& out,
+              const ngraph::op::FakeQuantize* parent) {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
+    using T = typename element_type_traits<ET>::value_type;
+    runtime::reference::fake_quantize<T>(arg0->get_data_ptr<const T>(),
+                                         arg1->get_data_ptr<const T>(),
+                                         arg2->get_data_ptr<const T>(),
+                                         arg3->get_data_ptr<const T>(),
+                                         arg4->get_data_ptr<const T>(),
+                                         out->get_data_ptr<T>(),
+                                         parent->get_input_shape(0),
+                                         parent->get_input_shape(1),
+                                         parent->get_input_shape(2),
+                                         parent->get_input_shape(3),
+                                         parent->get_input_shape(4),
+                                         parent->get_levels(),
+                                         parent->get_auto_broadcast());
+    return true;
+}
+
+bool evaluate_fakequantize(const HostTensorPtr& arg0,
+                           const HostTensorPtr& arg1,
+                           const HostTensorPtr& arg2,
+                           const HostTensorPtr& arg3,
+                           const HostTensorPtr& arg4,
+                           const HostTensorPtr& out,
+                           const ngraph::op::FakeQuantize* parent) {
+    bool rc = true;
+    switch (arg0->get_element_type()) {
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent);
+        NGRAPH_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent);
+    default:
+        rc = false;
+        break;
+    }
+    return rc;
+}
+}  // namespace fakequantizeop
+
+bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_evaluate);
+    return fakequantizeop::evaluate_fakequantize(inputs[0],
+                                                 inputs[1],
+                                                 inputs[2],
+                                                 inputs[3],
+                                                 inputs[4],
+                                                 outputs[0],
+                                                 this);
+}
+
+bool ngraph::op::FakeQuantize::has_evaluate() const {
+    NGRAPH_OP_SCOPE(v0_FakeQuantize_has_evaluate);
+    switch (get_input_element_type(0)) {
+    case ngraph::element::i32:
+    case ngraph::element::i64:
+    case ngraph::element::u32:
+    case ngraph::element::u64:
+    case ngraph::element::f16:
+    case ngraph::element::f32:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}
diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.cpp b/ngraph/frontend/onnx/frontend/src/op/conv.cpp
@@ -13,14 +13,15 @@
 #include "ngraph/builder/reshape.hpp"
 #include "ngraph/op/group_conv.hpp"
 #include "ngraph/op/util/attr_types.hpp"
+#include "onnx_import/core/null_node.hpp"
 #include "utils/convpool.hpp"
 #include "utils/reshape.hpp"
 
 namespace ngraph {
 namespace onnx_import {
 namespace op {
 namespace set_1 {
-namespace {
+namespace detail {
 std::shared_ptr<ngraph::op::Op> make_ng_convolution(const Output<ngraph::Node>& data,
                                                     const Output<ngraph::Node>& filters,
                                                     const ngraph::Strides& strides,
@@ -57,14 +58,13 @@ std::shared_ptr<ngraph::Node> add_bias(const Output<ngraph::Node>& ng_conv, cons
     return {
         std::make_shared<default_opset::Add>(ng_conv, reshape::reshape_channel_shaped_node_to_nchw(bias, conv_rank))};
 }
-}  // namespace
 
-OutputVector conv(const Node& node) {
+OutputVector conv(const Node& node,
+                  Output<ngraph::Node> data,
+                  Output<ngraph::Node> filters,
+                  Output<ngraph::Node> bias) {
     // in the current implementation we assume that the data input rank is static
     // and only the 'batch' dimension can be dynamic
-    const OutputVector& inputs = node.get_ng_inputs();
-    const auto data = inputs.at(0);
-    const auto filters = inputs.at(1);
     const auto groups = node.get_attribute_value<int64_t>("group", 1);
 
     NGRAPH_CHECK(data.get_partial_shape().rank().is_static(), "The input data tensor's rank has to be known (static)");
@@ -80,10 +80,9 @@ OutputVector conv(const Node& node) {
         make_ng_convolution(data, filters, strides, dilations, padding_below, padding_above, groups, auto_pad_type);
 
     // no bias param
-    if (inputs.size() < 3) {
+    if (ngraph::op::is_null(bias)) {
         return {conv_node};
     } else {
-        const auto& bias = inputs.at(2);
         const auto& bias_ps = bias.get_partial_shape();
 
         NGRAPH_CHECK(bias_ps.rank().is_static() && bias_ps.rank().get_length() == 1,
@@ -92,7 +91,11 @@ OutputVector conv(const Node& node) {
         return {add_bias(conv_node, bias)};
     }
 }
-
+}  // namespace detail
+OutputVector conv(const Node& node) {
+    const OutputVector& inputs = node.get_ng_inputs();
+    return detail::conv(node, inputs[0], inputs[1], inputs.size() < 3 ? std::make_shared<NullNode>() : inputs[2]);
+}
 }  // namespace set_1
 
 }  // namespace op

diff --git a/ngraph/frontend/onnx/frontend/src/op/conv.hpp b/ngraph/frontend/onnx/frontend/src/op/conv.hpp
@@ -11,6 +11,9 @@ namespace ngraph {
 namespace onnx_import {
 namespace op {
 namespace set_1 {
+namespace detail {
+OutputVector conv(const Node& node, Output<ngraph::Node> data, Output<ngraph::Node> filters, Output<ngraph::Node> bias);
+}
 /// \brief Performs ONNX Conv operation.
 ///
 /// \param node   The ONNX node object representing this operation.

diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.cpp
@@ -19,7 +19,7 @@
 namespace ngraph {
 namespace onnx_import {
 namespace op {
-namespace {
+namespace detail {
 Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
     if (inputs.size() == 3 && !ngraph::op::is_null(inputs[2])) {
         auto zero_point = inputs[2];
@@ -33,7 +33,7 @@ Output<ngraph::Node> get_zero_point(const OutputVector& inputs) {
         return default_opset::Constant::create(element::f32, Shape{}, {0});
     }
 }
-}  // namespace
+}  // namespace detail
 namespace set_1 {
 OutputVector dequantize_linear(const Node& node) {
     const OutputVector inputs{node.get_ng_inputs()};
@@ -44,7 +44,7 @@ OutputVector dequantize_linear(const Node& node) {
 
     const auto x = inputs[0];
     const auto scale = inputs[1];
-    const auto zero_point = get_zero_point(inputs);
+    const auto zero_point = detail::get_zero_point(inputs);
 
     common::validate_scalar_input("Dequantization scale", scale.get_node_shared_ptr(), {element::f32});
     common::validate_scalar_input("Zero point", zero_point.get_node_shared_ptr());
@@ -58,7 +58,7 @@ OutputVector dequantize_linear(const Node& node) {
 }  // namespace set_1
 
 namespace set_13 {
-namespace {
+namespace detail {
 void validate_scale(const Output<ngraph::Node> scale, const Output<ngraph::Node> x, const int64_t axis) {
     const auto& scale_shape = scale.get_partial_shape();
     NGRAPH_CHECK(scale_shape.rank().get_length() == 0 || scale_shape.rank().get_length() == 1,
@@ -129,25 +129,16 @@ std::shared_ptr<ngraph::Node> reshape_input(const Output<ngraph::Node> input,
 
     return std::make_shared<default_opset::Reshape>(input, target_shape, true);
 }
-}  // namespace
-
-OutputVector dequantize_linear(const Node& node) {
-    const OutputVector inputs{node.get_ng_inputs()};
-
-    NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
-                 "The DequantizeLinear op expects 2 required and one optional "
-                 "input. Got: ",
-                 inputs.size());
-
-    const auto x = inputs[0];
-    auto scale = inputs[1];
-    auto zero_point = get_zero_point(inputs);
 
+OutputVector dequantize_linear(Output<ngraph::Node> x,
+                               Output<ngraph::Node> scale,
+                               Output<ngraph::Node> zero_point,
+                               int64_t axis,
+                               Node node) {
     const auto x_shape = x.get_partial_shape();
 
     NGRAPH_CHECK(x_shape.rank().is_static(), "Rank of the input data tensor has to be known (static).");
 
-    int64_t axis{node.get_attribute_value<int64_t>("axis", 1)};
     axis = ngraph::normalize_axis(node.get_description(), axis, x_shape.rank());
 
     validate_scale(scale, x, axis);
@@ -163,6 +154,22 @@ OutputVector dequantize_linear(const Node& node) {
         std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(converted_x, zero_point),
                                                   scale)};
 }
+}  // namespace detail
+
+OutputVector dequantize_linear(const Node& node) {
+    const OutputVector inputs{node.get_ng_inputs()};
+
+    NGRAPH_CHECK(2 <= inputs.size() && inputs.size() <= 3,
+                 "The DequantizeLinear op expects 2 required and one optional "
+                 "input. Got: ",
+                 inputs.size());
+    const auto x = inputs[0];
+    auto scale = inputs[1];
+    auto zero_point = op::detail::get_zero_point(inputs);
+
+    // these reshapes make sure that dequantization happens over the specified axis
+    return detail::dequantize_linear(x, scale, zero_point, node.get_attribute_value<int64_t>("axis", 1), node);
+}
 }  // namespace set_13
 }  // namespace op
 }  // namespace onnx_import

diff --git a/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp b/ngraph/frontend/onnx/frontend/src/op/dequantize_linear.hpp
@@ -10,14 +10,25 @@
 namespace ngraph {
 namespace onnx_import {
 namespace op {
+namespace detail {
+Output<ngraph::Node> get_zero_point(const OutputVector& inputs);
+}
+
 namespace set_1 {
 OutputVector dequantize_linear(const Node& node);
 
 }  // namespace set_1
 
 namespace set_13 {
-OutputVector dequantize_linear(const Node& node);
+namespace detail {
+OutputVector dequantize_linear(Output<ngraph::Node> x,
+                               Output<ngraph::Node> scale,
+                               Output<ngraph::Node> zero_point,
+                               int64_t axis,
+                               Node node);
 }
+OutputVector dequantize_linear(const Node& node);
+}  // namespace set_13
 
 }  // namespace op
 

diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Disabled in CMakeList
+// Update to higher opset required
+
+#include "op/qlinear_conv.hpp"
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "conv.hpp"
+#include "dequantize_linear.hpp"
+#include "exceptions.hpp"
+#include "ngraph/opsets/opset6.hpp"
+#include "onnx_import/core/null_node.hpp"
+#include "quantize_linear.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector qlinear_conv(const Node& node) {
+    const OutputVector& inputs = node.get_ng_inputs();
+
+    auto x = inputs.at(0);
+    auto x_scale = inputs.at(1);
+    auto x_zero_point = inputs.at(2);
+    auto w = inputs.at(3);
+    auto w_scale = inputs.at(4);
+    auto w_zero_point = inputs.at(5);
+    auto y_scale = inputs.at(6);
+    auto y_zero_point = inputs.at(7);
+    Output<ngraph::Node> B = inputs.size() > 8 ? inputs.at(8) : std::make_shared<NullNode>()->output(0);
+
+    x = set_13::detail::dequantize_linear(x,
+                                          x_scale,
+                                          std::make_shared<opset6::Convert>(x_zero_point, element::f32),
+                                          1,
+                                          node)[0];
+    w = set_13::detail::dequantize_linear(w,
+                                          w_scale,
+                                          std::make_shared<opset6::Convert>(w_zero_point, element::f32),
+                                          1,
+                                          node)[0];
+
+    if (!ngraph::op::is_null(B)) {
+        B = std::make_shared<opset6::Multiply>(std::make_shared<opset6::Convert>(B, x_scale.get_element_type()),
+                                               std::make_shared<opset6::Multiply>(x_scale, w_scale))
+                ->output(0);
+    }
+
+    auto result = detail::conv(node, x, w, B)[0];
+
+    result = op::detail::make_fake_quantize(y_scale, y_zero_point, result);
+
+    return {result};
+}
+
+}  // namespace set_1
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph
diff --git a/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp b/ngraph/frontend/onnx/frontend/src/op/qlinear_conv.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// Disabled in CMakeList
+// Update to higher opset required
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+/// \brief Performs ONNX QLinearConv operation.
+///
+/// \param node   The ONNX node object representing this operation.
+///
+/// \return The vector containing Ngraph nodes producing output of ONNX quantizied
+///         convolution operation.
+OutputVector qlinear_conv(const Node& node);
+
+}  // namespace set_1
+
+}  // namespace op
+
+}  // namespace onnx_import
+
+}  // namespace ngraph