initial

mryzhov · Mar 6, 2023 · d69e88c · d69e88c
1 parent 82e3f0e
commit d69e88c
Show file tree

Hide file tree

Showing 14 changed files with 1,421 additions and 7 deletions.
diff --git a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp
@@ -614,6 +614,27 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) :
                            }
                            return res;
                        });
+
+    addSpecificCreator({"GNAMaxPool"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                          const std::map<std::string, std::string>& params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "Pooling",
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<PoolingLayer>(attrs);
+        res->params = params;
+        if (res->params.find("auto_pad") != res->params.end() &&
+            details::CaselessEq<std::string>()(res->params["auto_pad"], "EXPLICIT"))
+            res->params.erase("auto_pad");
+
+        if (res->params.find("exclude_pad") != res->params.end()) {
+            res->params["exclude-pad"] = res->params["exclude_pad"];
+            res->params.erase("exclude_pad");
+        }
+
+        res->params["pool-method"] = "max";
+
+        return res;
+    });
+
     addSpecificCreator({"Select"},
                        [](const std::shared_ptr<::ngraph::Node>& node,
                           const std::map<std::string, std::string>& params) -> CNNLayerPtr {
@@ -1710,6 +1731,41 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) :
                            return res;
                        });
 
+    addSpecificCreator({"GNAConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                             const std::map<std::string, std::string>& params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "Convolution", details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<InferenceEngine::ConvolutionLayer>(attrs);
+        res->params = params;
+
+        auto && rt_info = node->get_rt_info();
+        bool keep_constants = rt_info["keep_constants"].as<bool>();
+
+        // Restore output and kernel size
+        auto shape = node->get_input_shape(1);
+        //shape.erase(shape.begin(), shape.begin() + 2); - NCHW needs to have HW, for NHWC we need second and third
+        // what about NC or N ?
+        shape.erase(shape.begin());
+        shape.erase(shape.end() - 1);
+
+        res->params["kernel"] = Builder::asString(static_cast<std::vector<size_t>&>(shape));
+        res->params["output"] = Builder::asString(*(node->get_shape().rbegin())); // instead of ->get_shape()[1]
+
+        // forward auto_pad only when its value is different than explicit
+        if (params.at("auto_pad") == "explicit") {
+            res->params.erase("auto_pad");
+        }
+
+        const auto weightsNode = node->input_value(1).get_node_shared_ptr();
+        if (!keep_constants && InferenceEngine::details::addBlob(weightsNode, res, InferenceEngine::details::weights)) {
+            if (node->inputs().size() == 3) {
+                const auto biasNode = node->input_value(2).get_node_shared_ptr();
+                InferenceEngine::details::addBlob(biasNode, res, InferenceEngine::details::biases);
+            }
+        }
+
+        return res;
+    });
+
     addSpecificCreator({"DeformableConvolution"},
                        [](const std::shared_ptr<::ngraph::Node>& node,
                           const std::map<std::string, std::string>& params) -> CNNLayerPtr {
@@ -2013,9 +2069,19 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
     const auto isInternalConstLayer = [](const std::shared_ptr<::ngraph::op::Constant>& constLayer,
                                          const std::shared_ptr<::ngraph::Node>& consumerLayer,
                                          bool keep_constants) -> bool {
+
+        const auto isGNAConvolution = [](const std::shared_ptr<::ngraph::Node> &node) -> bool {
+            return (node->get_friendly_name().find("gna_convolution") != std::string::npos);
+        };
+        const auto isGNAMaxPool = [](const std::shared_ptr<::ngraph::Node> &node) -> bool {
+            return (node->get_friendly_name().find("gna_max_pool") != std::string::npos);
+        };
+
         if (((::ngraph::as_type_ptr<::ngraph::op::ConvolutionIE>(consumerLayer) ||
               ::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer)) &&
              !keep_constants) ||
+            isGNAConvolution(consumerLayer) ||
+            isGNAMaxPool(consumerLayer) ||
             ::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(consumerLayer) ||
             ::ngraph::as_type_ptr<::ngraph::op::DeconvolutionIE>(consumerLayer) ||
             ::ngraph::as_type_ptr<::ngraph::op::v1::DeformableConvolution>(consumerLayer) ||

diff --git a/src/plugins/intel_gna/src/debug_new_pass.hpp b/src/plugins/intel_gna/src/debug_new_pass.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <iostream>
+
+#undef DEBUG_USE_NEW_PASS
+#define DEBUG_USE_NEW_PASS 1
+
+#undef DEBUG_VISUALIZE
+//#define DEBUG_VISUALIZE 1
+
+#define EMUTEX_DEBUG_CHECKPOINT std::cout << "[EMUTEX DEBUG] CHECKPOINT " << __FILE__ << ":" << __LINE__ << std::endl;
+#define EMUTEX_DEBUG_CHECKPOINT_MESSAGE(message) std::cout << "[EMUTEX DEBUG] CHECKPOINT " << __FILE__ << ":" << __LINE__ << \
+                                        " " << message << std::endl;
+#define EMUTEX_DEBUG_VALUE(value) std::cout << "[EMUTEX DEBUG] " << __FILE__ << ":" << __LINE__ << " " << #value << " = " << (value) << std::endl;
diff --git a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp
@@ -8,6 +8,7 @@
 #include "common/gna_target.hpp"
 #include "gna_graph_tools.hpp"
 #include "weights_converter.hpp"
+#include "debug_new_pass.hpp" // DEBUG
 
 namespace ov {
 namespace intel_gna {
@@ -89,7 +90,11 @@ size_t LayerQuantizer::GetBiasSizeForLayer(InferenceEngine::WeightableLayer& wl)
         return wl._biases->size();
     } else if (LayerInfo(wl).isConvolution()) {
         // Calculating biases len using outdata dims: biases number should be equal to output channels number
+#ifndef DEBUG_USE_NEW_PASS
         return InferenceEngine::GetDataDimByName(wl.outData.front(), InferenceEngine::DataDimName::C);
+#else
+        return InferenceEngine::GetDataDimSizeNHWC(wl.outData.front(), InferenceEngine::DataDimName::C);
+#endif
     } else {
         // Calculating biases size using outData dimensions
         return wl.outData.front()->getDims().back();

diff --git a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
@@ -11,6 +11,7 @@
 #include "layers/gna_convolution_layer.hpp"
 #include "log/debug.hpp"
 #include "weights_converter.hpp"
+#include "debug_new_pass.hpp" // DEBUG
 
 namespace ov {
 namespace intel_gna {
@@ -1262,7 +1263,11 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
         double weights_reducer = 1.0;
         auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer*>(wl);
         if (conv && !LayerInfo(conv).isConvolutionFilter()) {
+#ifndef DEBUG_USE_NEW_PASS
             const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+#else
+            const auto inDepth = GetDataDimSizeNHWC(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+#endif
             weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
             weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
             weights_reducer = std::max(1.0, weights_reducer);

diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
@@ -41,6 +41,8 @@
 #include "ops/pwl.hpp"
 #include "runtime/pwl.h"
 
+#include "debug_new_pass.hpp"
+
 using namespace InferenceEngine;
 using namespace std;
 
@@ -301,12 +303,28 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) {
 
 void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
     if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
-        data->getLayout() != InferenceEngine::Layout::NC) {
+        data->getLayout() != InferenceEngine::Layout::NC && data->getLayout() != InferenceEngine::Layout::CHW) {
         THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout()
                             << " isn't currently supported on GNA";
     }
 }
 
+#ifdef DEBUG_USE_NEW_PASS
+namespace {
+
+template <typename T>
+PropertyVector<T> PropertyVectorAppend(PropertyVector<T> properties, T value) {
+    std::vector<T> new_values;
+    for (size_t i = 0; i < properties.size(); ++i)
+        new_values.push_back(properties[i]);
+    new_values.push_back(value);
+
+    return PropertyVector<T>(new_values);
+}
+
+} // namespace
+#endif
+
 /**
  * Create AMIntelDNN Convolutional1DComponent from ConvolutionLayer
  *
@@ -332,6 +350,27 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
     const auto outputs = layer->outData.front();
     assertConvolutionLayoutProper(inputs);
 
+#ifdef DEBUG_USE_NEW_PASS
+    const auto in_batch = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::N);
+    const auto in_channels = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);
+    auto in_height = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
+    auto in_width = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
+    const auto out_batch = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::N);
+    const auto out_channels = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
+    auto out_height = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
+    auto out_width = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);
+
+    if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
+        // convolution is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
+        convolution._kernel_y = 1;
+        convolution._dilation_y = 1;
+        convolution._stride_y = 1;
+
+        convolution._padding = PropertyVectorAppend<unsigned int>(convolution._padding, 0);
+        convolution._pads_end = PropertyVectorAppend<unsigned int>(convolution._pads_end, 0);
+    }
+
+#else
     const auto in_batch = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::N);
     const auto in_channels = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
     auto in_height = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
@@ -341,6 +380,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
     const auto out_channels = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
     auto out_height = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
     auto out_width = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
+#endif
 
     if (in_height > 1 && in_width == 1) {
         std::swap(in_height, in_width);
@@ -355,7 +395,20 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
     auto in_kernel_w = convolution._kernel_x;
     auto in_kernel_h = convolution._kernel_y;
     bool transpose_h_w = false;
-
+/*
+    EMUTEX_DEBUG_VALUE(layer->name);
+    EMUTEX_DEBUG_VALUE(in_batch);
+    EMUTEX_DEBUG_VALUE(in_channels);
+    EMUTEX_DEBUG_VALUE(in_height);
+    EMUTEX_DEBUG_VALUE(in_width);
+    EMUTEX_DEBUG_VALUE(out_batch);
+    EMUTEX_DEBUG_VALUE(out_channels);
+    EMUTEX_DEBUG_VALUE(out_height);
+    EMUTEX_DEBUG_VALUE(out_width);
+    EMUTEX_DEBUG_VALUE(in_kernel_w);
+    EMUTEX_DEBUG_VALUE(in_kernel_h);
+    EMUTEX_DEBUG_VALUE(transpose_h_w);
+*/
     // Map 2d convolution to 1d if it's possible.
     if (!ShouldUseOnlyConv2DGnaIface() && gna_convolution_layer::isMappableFrom2DTo1D(in_height,
                                                                                       in_width,
@@ -583,6 +636,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
         });
     }
 
+#ifndef DEBUG_USE_NEW_PASS
     // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
     // how kaldi will handle that
     if (!dnn->do_rotate_input) {
@@ -596,7 +650,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
             dnn->do_rotate_input = false;
         }
     }
-
+#endif
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
     // Transpose H with W or C with HW
@@ -607,7 +661,11 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
     for (uint32_t k = 0; k < num_filters; k++) {
         uint8_t* ptr_filt_current =
             convolution._weights->cbuffer().as<uint8_t*>() + k * A * B * convolution.precision.size();
-        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
+#ifdef DEBUG_USE_NEW_PASS
+        auto transposedPart = copyMatrix(ptr_filt_current, convolution.precision.size(), A, B);
+#else
+         auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
+#endif
         transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
     }
     if (transposedWeights.size() != convolution._weights->byteSize()) {
@@ -682,7 +740,15 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     if (!cnn2dValidator) {
         THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
     }
-
+/*
+    EMUTEX_DEBUG_VALUE(convolution.name);
+    EMUTEX_DEBUG_VALUE(convolution._padding_y);
+    EMUTEX_DEBUG_VALUE(convolution._pads_end_y);
+    EMUTEX_DEBUG_VALUE(convolution._padding_x);
+    EMUTEX_DEBUG_VALUE(convolution._pads_end_x);
+    EMUTEX_DEBUG_VALUE(convolution._kernel_y);
+    EMUTEX_DEBUG_VALUE(convolution._kernel_x);
+*/
     cnn2dValidator->ValidateInputPadding(convolution.name,
                                          convolution._padding_y,
                                          convolution._pads_end_y,
@@ -798,7 +864,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize;
     for (uint32_t k = 0; k < convolution._out_depth; k++) {
         uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * singleKernelSize;
+#ifdef DEBUG_USE_NEW_PASS
+        auto transposedPart = copyMatrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW);
+#else
         auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW);
+#endif
         transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
         transposedWeights.resize(transposedWeights.size() + kernelPad);
     }
@@ -970,14 +1040,29 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto inputs = layer->insData.begin()->lock();
     auto outputs = *layer->outData.begin();
 
+#ifdef DEBUG_USE_NEW_PASS
+    uint32_t w_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
+    uint32_t h_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
+    const uint32_t c_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);
+
+    uint32_t w_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);
+    uint32_t h_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
+    const uint32_t c_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
+
+    if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
+        // Pooling is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
+        pooling._kernel = PropertyVectorAppend<unsigned int>(pooling._kernel, 1);
+        pooling._stride = PropertyVectorAppend<unsigned int>(pooling._stride, 1);
+    }
+#else
     uint32_t w_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W);
     uint32_t h_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
     const uint32_t c_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
 
     uint32_t w_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
     uint32_t h_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
     const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
-
+#endif
     if (w_dim_in == 1) {  // swap dimensions if needed to support swapped 1D case
         std::swap(h_dim_in, w_dim_in);
         std::swap(h_dim_out, w_dim_out);
@@ -2833,5 +2918,13 @@ std::vector<uint8_t> GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix,
     return temp_buffer;
 }
 
+std::vector<uint8_t>
+GNAGraphCompiler::copyMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
+    const size_t dest_size = num_rows * num_cols * element_size;
+    std::vector<uint8_t> temp_buffer(dest_size);
+    ::memcpy(temp_buffer.data(), ptr_matrix, dest_size);
+    return temp_buffer;
+}
+
 }  // namespace intel_gna
 }  // namespace ov
diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.hpp b/src/plugins/intel_gna/src/gna_graph_compiler.hpp
@@ -53,6 +53,10 @@ class GNAGraphCompiler {
                                                 size_t element_size,
                                                 uint32_t num_rows,
                                                 uint32_t num_cols);
+    std::vector<uint8_t> static copyMatrix(uint8_t* ptr_matrix,
+                                           size_t element_size,
+				 	   uint32_t num_rows,
+					   uint32_t num_cols);
 
     std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
 

diff --git a/src/plugins/intel_gna/src/gna_graph_tools.hpp b/src/plugins/intel_gna/src/gna_graph_tools.hpp
@@ -925,4 +925,38 @@ inline uint32_t GetDataDimByName(InferenceEngine::DataPtr data, DataDimName dimN
     return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
 }
 
+/**
+ * @brief returns a size of a specified data dimension depending on the layout
+ *        NHWC specialization
+ * @param data a pointer to the data
+ * @param dimName dimension name
+ */
+inline uint32_t GetDataDimSizeNHWC(InferenceEngine::DataPtr data, DataDimName dimName) {
+    uint32_t dimIxInNCHW = static_cast<uint32_t>(dimName);
+    IE_ASSERT(dimIxInNCHW <= 3);
+
+    std::vector<uint32_t> backOffsets;
+    switch (data->getLayout()) {
+        case Layout::C:
+        case Layout::NC:
+            // 1 will be returned for offsets > 2
+            backOffsets = std::vector<uint32_t>{2, 1, 3, 4};
+            break;
+        case Layout::HWC:
+            // 1 will be returned for offset 4
+        case Layout::NHWC:
+            backOffsets = std::vector<uint32_t>{4, 3, 2, 1};
+            break;
+        case Layout::CHW:
+            // 1 will be returned for offset 4
+        case Layout::NCHW:
+            backOffsets = std::vector<uint32_t>{4, 1, 3, 2};
+            break;
+        default:
+            THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout();
+    }
+    auto dims = data->getDims();
+    return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
+}
+
 }  // namespace InferenceEngine