[GNA] Add POT/FakeQuatize support

openvinotoolkit · Sep 2, 2021 · 9e30085 · 9e30085
1 parent 2d6cdaa
commit 9e30085
Show file tree

Hide file tree

Showing 8 changed files with 164 additions and 54 deletions.
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -1300,6 +1300,7 @@ class ScaleFactorCalculator {
         if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize, infiniteLoopCount)) {
             return false;
         }
+
         if (result) {
             idx++;
             return true;
@@ -1309,7 +1310,6 @@ class ScaleFactorCalculator {
             if (!result) {
                 return result.restartLayer == cnnLayer.get();
             }
-            return ptr == cnnLayer.get();
         });
         if (idx != net.end()) {
             idx++;

diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -724,6 +724,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
         manager.register_pass<ConvertDWSCBiasToScaleShifts>();
         manager.register_pass<ConvertDWSCToScaleShifts>();
+        manager.register_pass<ConvertDWSCWithFqToScaleShifts>();
         manager.register_pass<ConvertPaddedToValidConv>();
         if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
             manager.register_pass<Decompose2DConvTransposedWithBiasAF>();

diff --git a/inference-engine/src/gna_plugin/transformations/convert_dwsc_to_scaleshifts.cpp b/inference-engine/src/gna_plugin/transformations/convert_dwsc_to_scaleshifts.cpp
@@ -12,13 +12,13 @@
 #include <ngraph/rt_info.hpp>
 #include <ie_common.h>
 #include "utils/transformation_helper.hpp"
-//#include "backend/gna_limitations.hpp"
 
 
 using namespace GNAPluginNS;
 
 NGRAPH_RTTI_DEFINITION(ConvertDWSCToScaleShifts, "ConvertDWSCToScaleShifts", 0);
 NGRAPH_RTTI_DEFINITION(ConvertDWSCBiasToScaleShifts, "ConvertDWSCBiasToScaleShifts", 0);
+NGRAPH_RTTI_DEFINITION(ConvertDWSCWithFqToScaleShifts, "ConvertDWSCWithFqToScaleShifts", 0);
 
 static bool VerifyDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc) {
     // Verify it's a 1D convolution
@@ -32,7 +32,8 @@ static bool VerifyDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc) {
     return true;
 }
 
-static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc, std::shared_ptr<ngraph::opset7::Constant> bias_const,
+static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset7::GroupConvolution> dwsc,
+    std::shared_ptr<ngraph::opset7::Constant> bias_const, std::shared_ptr<ngraph::opset7::FakeQuantize> fq_bias,
     std::shared_ptr<ngraph::opset7::Reshape> flat_input_plane, std::shared_ptr<ngraph::Node> flat_filters_plane) {
     std::shared_ptr<ngraph::opset7::Constant> const_zero_padding;
     std::shared_ptr<ngraph::Node> reshaped_bias;
@@ -78,6 +79,7 @@ static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset
                     if (bias_const) {
                         previous_layer_output = std::make_shared<ngraph::opset7::Add>(previous_layer_output, reshaped_bias);
                         copy_runtime_info(dwsc, previous_layer_output);
+                        previous_layer_output = InsertFQLayer(fq_bias, previous_layer_output);
                     }
                     last_layer_output = previous_layer_output;
                 } else {
@@ -111,11 +113,15 @@ static std::shared_ptr<ngraph::Node> DecomposeDWSC(std::shared_ptr<ngraph::opset
 }
 
 static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
+    std::shared_ptr<ngraph::Node> reshape_filters_const_node,
     std::shared_ptr<ngraph::Node> bias_node,
-    std::shared_ptr<ngraph::Node> bias_const_node) {
+    std::shared_ptr<ngraph::Node> bias_const_node,
+    std::shared_ptr<ngraph::Node> fq_bias_node) {
     auto dwsc = std::dynamic_pointer_cast<ngraph::opset7::GroupConvolution>(dwsc_node);
+    auto reshape_filters_const = std::dynamic_pointer_cast<ngraph::opset7::Reshape>(reshape_filters_const_node);
     auto bias = std::dynamic_pointer_cast<ngraph::opset7::Add>(bias_node);
     auto bias_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias_const_node);
+    auto fq_bias = std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias_node);
 
     if (!VerifyDWSC(dwsc))
         return false;
@@ -124,7 +130,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
     auto input_width = dwsc->get_input_shape(0)[3];
     auto output_channel_count = dwsc->get_output_shape(0)[1];
     auto output_width = dwsc->get_output_shape(0)[3];
-    auto original_last_node = (bias_const ? bias_node : dwsc_node);
+    auto original_last_node = (fq_bias ? fq_bias_node : (bias_const ? bias_node : dwsc_node));
 
     // Prepare flat input data
     auto reshaped_input_plane = std::make_shared<ngraph::opset7::Reshape>(dwsc->input_value(0),
@@ -139,7 +145,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
             ngraph::Shape{1, shape_size(dwsc->input_value(0).get_shape())}), false);
 
     // Prepare flat filter data
-    auto filters_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(dwsc->input_value(1).get_node_shared_ptr());
+    auto filters_const = std::dynamic_pointer_cast<ngraph::Node>(dwsc->get_input_node_shared_ptr(1));
     auto filters_size = shape_size(filters_const->get_shape());
 
     auto transposed_filters_const = ngraph::op::util::make_try_fold<ngraph::opset7::Transpose>(filters_const,
@@ -151,7 +157,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> dwsc_node,
     copy_runtime_info(dwsc, {reshaped_input_plane, transposed_input_plane, flat_input_plane, transposed_filters_const, flat_filters_plane});
 
     // Convert DWSC to a set of diagonal layers
-    auto output_plane = DecomposeDWSC(dwsc, bias_const, flat_input_plane, flat_filters_plane);
+    auto output_plane = DecomposeDWSC(dwsc, bias_const, fq_bias, flat_input_plane, flat_filters_plane);
 
     // Restore the original output shape
     auto result = std::make_shared<ngraph::opset7::Reshape>(output_plane,
@@ -176,7 +182,7 @@ ConvertDWSCToScaleShifts::ConvertDWSCToScaleShifts() {
 
     ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
         const auto& pattern_map = m.get_pattern_value_map();
-        return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, nullptr);
+        return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, nullptr, nullptr, nullptr);
     };
 
     auto m = std::make_shared<ngraph::pattern::Matcher>(dwsc, matcher_name);
@@ -201,9 +207,40 @@ ConvertDWSCBiasToScaleShifts::ConvertDWSCBiasToScaleShifts() {
         if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
             return false;
 
-        return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node, bias_const);
+        return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), nullptr, bias_node, bias_const, nullptr);
     };
 
     auto m = std::make_shared<ngraph::pattern::Matcher>(bias, matcher_name);
     this->register_matcher(m, callback);
 }
+
+ConvertDWSCWithFqToScaleShifts::ConvertDWSCWithFqToScaleShifts() {
+    MATCHER_SCOPE(ConvertDWSCWithFqToScaleShifts);
+
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto filters_const_fq = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4));
+    auto fq_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({filters_const_fq, const_input, const_input, const_input, const_input},
+        consumers_and_rank(1, 4));
+    auto reshape_filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({fq_filters_const, const_input});
+    auto dwsc = ngraph::pattern::wrap_type<ngraph::opset7::GroupConvolution>(
+        {ngraph::pattern::any_input(), reshape_filters_const}, consumers_and_rank(1, 4));
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({dwsc, const_input});
+    auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
+        consumers_and_rank(1, 4));
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto bias_it = pattern_map.find(bias);
+        auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
+        std::shared_ptr<ngraph::Node> bias_const = nullptr;
+
+        if (bias_node && (bias_const = VerifyBiasGetConst(pattern_map.at(dwsc).get_node_shared_ptr(), bias_node)) == nullptr)
+            return false;
+
+        return Convert(pattern_map.at(dwsc).get_node_shared_ptr(), pattern_map.at(reshape_filters_const).get_node_shared_ptr(),
+            bias_node, bias_const, pattern_map.at(fq_bias).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fq_bias, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/gna_plugin/transformations/convert_dwsc_to_scaleshifts.hpp b/inference-engine/src/gna_plugin/transformations/convert_dwsc_to_scaleshifts.hpp
@@ -26,4 +26,14 @@ class ConvertDWSCBiasToScaleShifts : public ngraph::pass::MatcherPass {
     ConvertDWSCBiasToScaleShifts();
 };
 
+/**
+ * @brief Convert a depthwise separable convolution + potential bias (represented by a GroupConvolution + Add), processed by POT,
+ * to a set of ScaleShift layers (MatMul + Add)
+ */
+class ConvertDWSCWithFqToScaleShifts : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertDWSCWithFqToScaleShifts();
+};
+
 } // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.cpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.cpp
@@ -262,16 +262,6 @@ static void TransformInput(const GraphData& graph_data, const ConvData& conv_dat
     split_input_plane = flattened_dilated_transposed_input;
 }
 
-static void InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fqLayer,
-    std::shared_ptr<ngraph::Node> lastNode) {
-    if (fqLayer != nullptr) {
-        lastNode = fqLayer->clone_with_new_inputs({lastNode,
-            fqLayer->input_value(1), fqLayer->input_value(2),
-            fqLayer->input_value(3), fqLayer->input_value(4)});
-        ngraph::copy_runtime_info(fqLayer, lastNode);
-    }
-}
-
 // Valid 1D (decomposed 2D) convolution wrapped with transposes NHWC => NCHW => conv => NCHW => NHWC
 static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, const ConvData& conv_data, const ngraph::Output<ngraph::Node>& input,
     std::shared_ptr<ngraph::Node> filters, const size_t conv_index, const size_t h_index) {
@@ -280,7 +270,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
             ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0));
 
         // Fake quantize
-        InsertFQLayer(graph_data.fq_conv, filters);
+        filters = InsertFQLayer(graph_data.fq_conv, filters);
 
         // 1D Convolution
         auto conv = std::make_shared<ngraph::opset7::Convolution>(nchw_input, filters,
@@ -297,7 +287,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
                 ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false);
             last_conv_block_op = std::make_shared<ngraph::opset7::Add>(conv, reshaped_bias_const);
             copy_runtime_info(graph_data.conv, last_conv_block_op);
-            InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
+            last_conv_block_op = InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
         }
 
         // Max pooling
@@ -311,7 +301,7 @@ static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, c
         if (graph_data.af && graph_data.conv_count == 1) {
             last_conv_block_op = graph_data.af->copy_with_new_inputs({last_conv_block_op});
             copy_runtime_info(conv, last_conv_block_op);
-            InsertFQLayer(graph_data.fq_af, last_conv_block_op);
+            last_conv_block_op = InsertFQLayer(graph_data.fq_af, last_conv_block_op);
         }
 
         // Transpose NCHW => NHWC

diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp
@@ -5,6 +5,7 @@
 
 #include <ngraph/opsets/opset7.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
 #include "transformation_helper.hpp"
 
 
@@ -90,4 +91,16 @@ std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> c
     return nullptr;
 }
 
+std::shared_ptr<ngraph::Node> InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fq_layer,
+    std::shared_ptr<ngraph::Node> last_node) {
+    if (fq_layer != nullptr) {
+        auto new_fq = fq_layer->clone_with_new_inputs({last_node,
+            fq_layer->input_value(1), fq_layer->input_value(2),
+            fq_layer->input_value(3), fq_layer->input_value(4)});
+        ngraph::copy_runtime_info(new_fq, fq_layer);
+        return new_fq;
+    }
+    return last_node;
+}
+
 } // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp
@@ -70,4 +70,12 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
  */
 std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias);
 
+/**
+ * @brief inserts a new fake quantize layer (if it exists) copied from an existing fake quantize layer and conncts it to the output of a given layer
+ * @param fq_layer existing fake quantize layer to be copied
+ * @param last_node the node to which output the new fake quantize layer will be connected
+ * @return new fake quantize layer or the last node
+ */
+std::shared_ptr<ngraph::Node> InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fq_layer, std::shared_ptr<ngraph::Node> last_node);
+
 } // namespace GNAPluginNS