diff --git a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp index 94258ddd25495f..3f6bc60c1d5574 100644 --- a/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp +++ b/src/plugins/intel_gna/legacy/src/convert_function_to_cnn_network.cpp @@ -614,6 +614,27 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) : } return res; }); + + addSpecificCreator({"GNAMaxPool"}, [](const std::shared_ptr<::ngraph::Node>& node, + const std::map& params) -> CNNLayerPtr { + LayerParams attrs = {node->get_friendly_name(), "Pooling", + details::convertPrecision(node->get_output_element_type(0))}; + auto res = std::make_shared(attrs); + res->params = params; + if (res->params.find("auto_pad") != res->params.end() && + details::CaselessEq()(res->params["auto_pad"], "EXPLICIT")) + res->params.erase("auto_pad"); + + if (res->params.find("exclude_pad") != res->params.end()) { + res->params["exclude-pad"] = res->params["exclude_pad"]; + res->params.erase("exclude_pad"); + } + + res->params["pool-method"] = "max"; + + return res; + }); + addSpecificCreator({"Select"}, [](const std::shared_ptr<::ngraph::Node>& node, const std::map& params) -> CNNLayerPtr { @@ -1710,6 +1731,41 @@ CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node) : return res; }); + addSpecificCreator({"GNAConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node, + const std::map& params) -> CNNLayerPtr { + LayerParams attrs = {node->get_friendly_name(), "Convolution", details::convertPrecision(node->get_output_element_type(0))}; + auto res = std::make_shared(attrs); + res->params = params; + + auto && rt_info = node->get_rt_info(); + bool keep_constants = rt_info["keep_constants"].as(); + + // Restore output and kernel size + auto shape = node->get_input_shape(1); + //shape.erase(shape.begin(), shape.begin() + 2); - NCHW needs to have HW, for NHWC we need second and third + // what about NC or N ? + shape.erase(shape.begin()); + shape.erase(shape.end() - 1); + + res->params["kernel"] = Builder::asString(static_cast&>(shape)); + res->params["output"] = Builder::asString(*(node->get_shape().rbegin())); // instead of ->get_shape()[1] + + // forward auto_pad only when its value is different than explicit + if (params.at("auto_pad") == "explicit") { + res->params.erase("auto_pad"); + } + + const auto weightsNode = node->input_value(1).get_node_shared_ptr(); + if (!keep_constants && InferenceEngine::details::addBlob(weightsNode, res, InferenceEngine::details::weights)) { + if (node->inputs().size() == 3) { + const auto biasNode = node->input_value(2).get_node_shared_ptr(); + InferenceEngine::details::addBlob(biasNode, res, InferenceEngine::details::biases); + } + } + + return res; + }); + addSpecificCreator({"DeformableConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node, const std::map& params) -> CNNLayerPtr { @@ -2013,9 +2069,19 @@ void convertFunctionToICNNNetwork(const std::shared_ptr& constLayer, const std::shared_ptr<::ngraph::Node>& consumerLayer, bool keep_constants) -> bool { + + const auto isGNAConvolution = [](const std::shared_ptr<::ngraph::Node> &node) -> bool { + return (node->get_friendly_name().find("gna_convolution") != std::string::npos); + }; + const auto isGNAMaxPool = [](const std::shared_ptr<::ngraph::Node> &node) -> bool { + return (node->get_friendly_name().find("gna_max_pool") != std::string::npos); + }; + if (((::ngraph::as_type_ptr<::ngraph::op::ConvolutionIE>(consumerLayer) || ::ngraph::as_type_ptr<::ngraph::op::FullyConnected>(consumerLayer)) && !keep_constants) || + isGNAConvolution(consumerLayer) || + isGNAMaxPool(consumerLayer) || ::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(consumerLayer) || ::ngraph::as_type_ptr<::ngraph::op::DeconvolutionIE>(consumerLayer) || ::ngraph::as_type_ptr<::ngraph::op::v1::DeformableConvolution>(consumerLayer) || diff --git a/src/plugins/intel_gna/src/debug_new_pass.hpp b/src/plugins/intel_gna/src/debug_new_pass.hpp new file mode 100644 index 00000000000000..1b6728f51b0fe6 --- /dev/null +++ b/src/plugins/intel_gna/src/debug_new_pass.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#undef DEBUG_USE_NEW_PASS +#define DEBUG_USE_NEW_PASS 1 + +#undef DEBUG_VISUALIZE +//#define DEBUG_VISUALIZE 1 + +#define EMUTEX_DEBUG_CHECKPOINT std::cout << "[EMUTEX DEBUG] CHECKPOINT " << __FILE__ << ":" << __LINE__ << std::endl; +#define EMUTEX_DEBUG_CHECKPOINT_MESSAGE(message) std::cout << "[EMUTEX DEBUG] CHECKPOINT " << __FILE__ << ":" << __LINE__ << \ + " " << message << std::endl; +#define EMUTEX_DEBUG_VALUE(value) std::cout << "[EMUTEX DEBUG] " << __FILE__ << ":" << __LINE__ << " " << #value << " = " << (value) << std::endl; diff --git a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp index b63db46d973eee..d95714d78b1320 100644 --- a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp +++ b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp @@ -8,6 +8,7 @@ #include "common/gna_target.hpp" #include "gna_graph_tools.hpp" #include "weights_converter.hpp" +#include "debug_new_pass.hpp" // DEBUG namespace ov { namespace intel_gna { @@ -89,7 +90,11 @@ size_t LayerQuantizer::GetBiasSizeForLayer(InferenceEngine::WeightableLayer& wl) return wl._biases->size(); } else if (LayerInfo(wl).isConvolution()) { // Calculating biases len using outdata dims: biases number should be equal to output channels number +#ifndef DEBUG_USE_NEW_PASS return InferenceEngine::GetDataDimByName(wl.outData.front(), InferenceEngine::DataDimName::C); +#else + return InferenceEngine::GetDataDimSizeNHWC(wl.outData.front(), InferenceEngine::DataDimName::C); +#endif } else { // Calculating biases size using outData dimensions return wl.outData.front()->getDims().back(); diff --git a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp index 7291b586c7cdef..d129a7b5cd76c3 100644 --- a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp +++ b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp @@ -11,6 +11,7 @@ #include "layers/gna_convolution_layer.hpp" #include "log/debug.hpp" #include "weights_converter.hpp" +#include "debug_new_pass.hpp" // DEBUG namespace ov { namespace intel_gna { @@ -1262,7 +1263,11 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh double weights_reducer = 1.0; auto conv = dynamic_cast(wl); if (conv && !LayerInfo(conv).isConvolutionFilter()) { +#ifndef DEBUG_USE_NEW_PASS const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C); +#else + const auto inDepth = GetDataDimSizeNHWC(conv->insData.front().lock(), InferenceEngine::DataDimName::C); +#endif weights_reducer = gna_convolution_layer::getWeightsReducer(*conv); weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits::max(); weights_reducer = std::max(1.0, weights_reducer); diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp index 32b2e6be8be1c0..20a206b91392b4 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp @@ -41,6 +41,8 @@ #include "ops/pwl.hpp" #include "runtime/pwl.h" +#include "debug_new_pass.hpp" + using namespace InferenceEngine; using namespace std; @@ -301,12 +303,28 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) { void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) { if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW && - data->getLayout() != InferenceEngine::Layout::NC) { + data->getLayout() != InferenceEngine::Layout::NC && data->getLayout() != InferenceEngine::Layout::CHW) { THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout() << " isn't currently supported on GNA"; } } +#ifdef DEBUG_USE_NEW_PASS +namespace { + +template +PropertyVector PropertyVectorAppend(PropertyVector properties, T value) { + std::vector new_values; + for (size_t i = 0; i < properties.size(); ++i) + new_values.push_back(properties[i]); + new_values.push_back(value); + + return PropertyVector(new_values); +} + +} // namespace +#endif + /** * Create AMIntelDNN Convolutional1DComponent from ConvolutionLayer * @@ -332,6 +350,27 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) const auto outputs = layer->outData.front(); assertConvolutionLayoutProper(inputs); +#ifdef DEBUG_USE_NEW_PASS + const auto in_batch = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::N); + const auto in_channels = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C); + auto in_height = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H); + auto in_width = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W); + const auto out_batch = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::N); + const auto out_channels = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C); + auto out_height = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H); + auto out_width = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W); + + if (inputs->getLayout() == InferenceEngine::Layout::CHW) { + // convolution is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D + convolution._kernel_y = 1; + convolution._dilation_y = 1; + convolution._stride_y = 1; + + convolution._padding = PropertyVectorAppend(convolution._padding, 0); + convolution._pads_end = PropertyVectorAppend(convolution._pads_end, 0); + } + +#else const auto in_batch = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::N); const auto in_channels = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C); auto in_height = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H); @@ -341,6 +380,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) const auto out_channels = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C); auto out_height = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H); auto out_width = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W); +#endif if (in_height > 1 && in_width == 1) { std::swap(in_height, in_width); @@ -355,7 +395,20 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) auto in_kernel_w = convolution._kernel_x; auto in_kernel_h = convolution._kernel_y; bool transpose_h_w = false; - +/* + EMUTEX_DEBUG_VALUE(layer->name); + EMUTEX_DEBUG_VALUE(in_batch); + EMUTEX_DEBUG_VALUE(in_channels); + EMUTEX_DEBUG_VALUE(in_height); + EMUTEX_DEBUG_VALUE(in_width); + EMUTEX_DEBUG_VALUE(out_batch); + EMUTEX_DEBUG_VALUE(out_channels); + EMUTEX_DEBUG_VALUE(out_height); + EMUTEX_DEBUG_VALUE(out_width); + EMUTEX_DEBUG_VALUE(in_kernel_w); + EMUTEX_DEBUG_VALUE(in_kernel_h); + EMUTEX_DEBUG_VALUE(transpose_h_w); +*/ // Map 2d convolution to 1d if it's possible. if (!ShouldUseOnlyConv2DGnaIface() && gna_convolution_layer::isMappableFrom2DTo1D(in_height, in_width, @@ -583,6 +636,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP }); } +#ifndef DEBUG_USE_NEW_PASS // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know // how kaldi will handle that if (!dnn->do_rotate_input) { @@ -596,7 +650,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP dnn->do_rotate_input = false; } } - +#endif connectOutput(layer, ptr_outputs, num_data_bytes_out); // Transpose H with W or C with HW @@ -607,7 +661,11 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP for (uint32_t k = 0; k < num_filters; k++) { uint8_t* ptr_filt_current = convolution._weights->cbuffer().as() + k * A * B * convolution.precision.size(); - auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B); +#ifdef DEBUG_USE_NEW_PASS + auto transposedPart = copyMatrix(ptr_filt_current, convolution.precision.size(), A, B); +#else + auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B); +#endif transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end()); } if (transposedWeights.size() != convolution._weights->byteSize()) { @@ -682,7 +740,15 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP if (!cnn2dValidator) { THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name; } - +/* + EMUTEX_DEBUG_VALUE(convolution.name); + EMUTEX_DEBUG_VALUE(convolution._padding_y); + EMUTEX_DEBUG_VALUE(convolution._pads_end_y); + EMUTEX_DEBUG_VALUE(convolution._padding_x); + EMUTEX_DEBUG_VALUE(convolution._pads_end_x); + EMUTEX_DEBUG_VALUE(convolution._kernel_y); + EMUTEX_DEBUG_VALUE(convolution._kernel_x); +*/ cnn2dValidator->ValidateInputPadding(convolution.name, convolution._padding_y, convolution._pads_end_y, @@ -798,7 +864,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize; for (uint32_t k = 0; k < convolution._out_depth; k++) { uint8_t* ptr_filt_current = convolution._weights->cbuffer().as() + k * singleKernelSize; +#ifdef DEBUG_USE_NEW_PASS + auto transposedPart = copyMatrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW); +#else auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW); +#endif transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end()); transposedWeights.resize(transposedWeights.size() + kernelPad); } @@ -970,6 +1040,21 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { auto inputs = layer->insData.begin()->lock(); auto outputs = *layer->outData.begin(); +#ifdef DEBUG_USE_NEW_PASS + uint32_t w_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W); + uint32_t h_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H); + const uint32_t c_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C); + + uint32_t w_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W); + uint32_t h_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H); + const uint32_t c_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C); + + if (inputs->getLayout() == InferenceEngine::Layout::CHW) { + // Pooling is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D + pooling._kernel = PropertyVectorAppend(pooling._kernel, 1); + pooling._stride = PropertyVectorAppend(pooling._stride, 1); + } +#else uint32_t w_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W); uint32_t h_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H); const uint32_t c_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C); @@ -977,7 +1062,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { uint32_t w_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W); uint32_t h_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H); const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C); - +#endif if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case std::swap(h_dim_in, w_dim_in); std::swap(h_dim_out, w_dim_out); @@ -2833,5 +2918,13 @@ std::vector GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, return temp_buffer; } +std::vector +GNAGraphCompiler::copyMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) { + const size_t dest_size = num_rows * num_cols * element_size; + std::vector temp_buffer(dest_size); + ::memcpy(temp_buffer.data(), ptr_matrix, dest_size); + return temp_buffer; +} + } // namespace intel_gna } // namespace ov diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.hpp b/src/plugins/intel_gna/src/gna_graph_compiler.hpp index f58c4ff80317a3..78b6116ca30fda 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.hpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.hpp @@ -53,6 +53,10 @@ class GNAGraphCompiler { size_t element_size, uint32_t num_rows, uint32_t num_cols); + std::vector static copyMatrix(uint8_t* ptr_matrix, + size_t element_size, + uint32_t num_rows, + uint32_t num_cols); std::unique_ptr cnn2dValidator; diff --git a/src/plugins/intel_gna/src/gna_graph_tools.hpp b/src/plugins/intel_gna/src/gna_graph_tools.hpp index b4422f5c0265b6..f29466a2673cc6 100644 --- a/src/plugins/intel_gna/src/gna_graph_tools.hpp +++ b/src/plugins/intel_gna/src/gna_graph_tools.hpp @@ -925,4 +925,38 @@ inline uint32_t GetDataDimByName(InferenceEngine::DataPtr data, DataDimName dimN return GetDimFromBack(dims, backOffsets[dimIxInNCHW]); } +/** + * @brief returns a size of a specified data dimension depending on the layout + * NHWC specialization + * @param data a pointer to the data + * @param dimName dimension name + */ +inline uint32_t GetDataDimSizeNHWC(InferenceEngine::DataPtr data, DataDimName dimName) { + uint32_t dimIxInNCHW = static_cast(dimName); + IE_ASSERT(dimIxInNCHW <= 3); + + std::vector backOffsets; + switch (data->getLayout()) { + case Layout::C: + case Layout::NC: + // 1 will be returned for offsets > 2 + backOffsets = std::vector{2, 1, 3, 4}; + break; + case Layout::HWC: + // 1 will be returned for offset 4 + case Layout::NHWC: + backOffsets = std::vector{4, 3, 2, 1}; + break; + case Layout::CHW: + // 1 will be returned for offset 4 + case Layout::NCHW: + backOffsets = std::vector{4, 1, 3, 2}; + break; + default: + THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout(); + } + auto dims = data->getDims(); + return GetDimFromBack(dims, backOffsets[dimIxInNCHW]); +} + } // namespace InferenceEngine diff --git a/src/plugins/intel_gna/src/layers/gna_layer_type.hpp b/src/plugins/intel_gna/src/layers/gna_layer_type.hpp index 532da1ed7e0317..34712069673f3d 100644 --- a/src/plugins/intel_gna/src/layers/gna_layer_type.hpp +++ b/src/plugins/intel_gna/src/layers/gna_layer_type.hpp @@ -52,6 +52,8 @@ enum class LayerType { Gemm, Pwl, Identity, + GNAConvolution, + GNAMaxPool, NO_TYPE }; @@ -93,7 +95,9 @@ static const InferenceEngine::details::caseless_map Laye {"Pwl", LayerType::Pwl}, {"Identity", LayerType::Identity}, {"Gemm", LayerType::Gemm}, -}; + {"GNAConvolution", LayerType::GNAConvolution }, + {"GNAMaxPool", LayerType::GNAMaxPool }, + }; LayerType LayerTypeFromStr(const std::string& str); diff --git a/src/plugins/intel_gna/src/ops/gna_convolution.cpp b/src/plugins/intel_gna/src/ops/gna_convolution.cpp new file mode 100644 index 00000000000000..9f848fe8ffa503 --- /dev/null +++ b/src/plugins/intel_gna/src/ops/gna_convolution.cpp @@ -0,0 +1,364 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gna_convolution.hpp" + +#include + +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/runtime/host_tensor.hpp" + +#include +#include + +NGRAPH_RTTI_DEFINITION(ov::intel_gna::op::GNAConvolution, "GNAConvolution", 0); + +namespace ov { +namespace intel_gna { +namespace op { +namespace internal { + +// code is based on ngraph/core/shape_inference/src/convolution_shape_inference.cpp +// differs only *op type +// TODO: think how can we avoid DRY +int64_t calculate_num_spatial(const GNAConvolution* op, + const ngraph::PartialShape& input_shape, + const ngraph::PartialShape& filters_shape, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims) { + int64_t num_spatial = op->m_num_spatial; + if (num_spatial == -1) { + const auto &input_rank = input_shape.rank(); + const auto &filters_rank = filters_shape.rank(); + + if (const auto &size = op->m_dilations.size()) + num_spatial = static_cast(size); + if (const auto &size = op->m_strides.size()) + num_spatial = static_cast(size); + if (const auto &size = op->m_pads_begin.size()) + num_spatial = static_cast(size); + if (const auto &size = op->m_pads_end.size()) + num_spatial = static_cast(size); + if (input_rank.is_static()) + num_spatial = input_rank.get_length() - num_non_spatial_data_dims; + if (filters_rank.is_static()) + num_spatial = filters_rank.get_length() - num_non_spatial_filter_dims; + } + return num_spatial; +} + +void update_and_validate_attributes(GNAConvolution* op) { + const auto& num_spatial = op->m_num_spatial; + if (num_spatial != -1) { + auto& strides = op->m_strides; + auto& dilations = op->m_dilations; + auto& pad_begin = op->m_pads_begin; + auto& pad_end = op->m_pads_end; + auto& auto_pad = op->m_auto_pad; + + if (strides.empty()) + strides = ngraph::Strides(num_spatial, 1); + if (dilations.empty()) + dilations = ngraph::Strides(num_spatial, 1); + if (pad_begin.empty() || auto_pad == ov::op::PadType::VALID) + pad_begin = ngraph::CoordinateDiff(num_spatial, 0); + if (pad_end.empty() || auto_pad == ov::op::PadType::VALID) + pad_end = ngraph::CoordinateDiff(num_spatial, 0); + + NODE_VALIDATION_CHECK(op, + static_cast(strides.size()) == num_spatial, + "Strides should be defined for all and only spatial features."); + NODE_VALIDATION_CHECK(op, + static_cast(dilations.size()) == num_spatial, + "Dilations should be defined for all and only spatial features."); + NODE_VALIDATION_CHECK(op, + static_cast(pad_begin.size()) == num_spatial && + static_cast(pad_end.size()) == num_spatial, + "Pads should be defined for all and only spatial features."); + NODE_VALIDATION_CHECK(op, + std::all_of(dilations.begin(), + dilations.end(), + [](const size_t &i) { + return i > 0; + }), + "Filter dilation (", + dilations, + ") has zero dimension."); + NODE_VALIDATION_CHECK(op, + std::all_of(strides.begin(), + strides.end(), + [](const size_t &i) { + return i > 0; + }), + "Filter strides (", + strides, + ") has zero dimension."); + } +} + +// code is based on ngraph/core/shape_inference/include/convolution_shape_inference.hpp +// but instead of NCHW uses NHWC layout + +template +inline bool dynamic_check(const int64_t& num_spatial) { + OPENVINO_ASSERT(num_spatial != -1, + "Convolution shape inference doesn't have enough information for static shape calculation"); + return true; +} + +// FIXME: do we need that function as a template ? +template<> +inline bool dynamic_check(const int64_t& num_spatial) { + return num_spatial != -1; +} + +// FIXME: do we need that function as a template ? +// TODO: search where that function is used in openvino +template +bool resolve_auto_pad_for_shape(const GNAConvolution* op, + ngraph::CoordinateDiff& pads_begin, + ngraph::CoordinateDiff& pads_end, + const std::vector &input_shapes, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims) { + const auto& auto_pad = op->get_auto_pad(); + if (auto_pad != ov::op::PadType::SAME_UPPER && auto_pad != ov::op::PadType::SAME_LOWER) { + pads_begin = op->m_pads_begin; + pads_end = op->m_pads_end; + return true; + } + + auto& num_spatial = op->m_num_spatial; + if (!dynamic_check(num_spatial)) + return false; + + auto input_shape = input_shapes[0]; + auto filters_shape = input_shapes[1]; + + if (input_shape.rank().is_dynamic()) + input_shape.resize(num_spatial + num_non_spatial_data_dims); + if (filters_shape.rank().is_dynamic()) + filters_shape.resize(num_spatial + num_non_spatial_filter_dims); + + const auto& strides = op->m_strides; + const auto& dilations = op->m_dilations; + pads_begin.resize(num_spatial); + pads_end.resize(num_spatial); + + bool status = true; + for (int64_t i = 0; i < num_spatial; ++i) { + const auto& input_dim = input_shape[i + 1]; + const auto& filters_dim = filters_shape[i + 1]; + if (input_dim.is_static() && filters_dim.is_static()) { + const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1; + NODE_VALIDATION_CHECK(op, + window_dilated_dim > 0, + "Window after dilation has dimension less than 1 (dim: ", + window_dilated_dim, + ") at axis ", + i, + "."); + + const int64_t& image_size = input_dim.get_length(); + const int64_t& filter_stride = strides[i]; + const int64_t& output_size = (image_size + filter_stride - 1) / filter_stride; + + const int64_t& tmp = (output_size - 1) * filter_stride + window_dilated_dim; + const int64_t& padding_needed = tmp > image_size ? tmp - image_size : 0; + + const size_t& padding_lhs = static_cast(padding_needed / 2); + const size_t& padding_rhs = static_cast(padding_needed - padding_lhs); + + pads_begin[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_lhs : padding_rhs; + pads_end[i] = auto_pad == ov::op::PadType::SAME_UPPER ? padding_rhs : padding_lhs; + } else { + status = false; + } + } + return status; +} + +// FIXME: do we need that function as a template ? +// TODO: search where that function is used in openvino +template +void shape_infer(const GNAConvolution* op, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const std::vector &input_shapes, + std::vector &output_shapes) { + NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1); + auto input_shape = input_shapes[0], filters_shape = input_shapes[1]; + + const auto& num_spatial = op->m_num_spatial; + NODE_VALIDATION_CHECK(op, num_spatial != -1, + "Convolution shape_infer should be provided with correct num_spatial attribute"); + + if (input_shape.rank().is_dynamic()) + input_shape.resize(num_spatial + 2); + if (filters_shape.rank().is_dynamic()) + filters_shape.resize(num_spatial + 2); + + NODE_VALIDATION_CHECK(op, + (static_cast(input_shape.size()) == (num_spatial + 2)) && + (static_cast(filters_shape.size()) == (num_spatial + 2)), + "Data batch and filters rank do not match (data batch shape: ", + input_shape, + ", filters shape: ", + filters_shape, + ")."); + + // ranks are originally static or aligned with num_spatial, attributes assumed to be valid + auto& output_shape = output_shapes[0]; + output_shape.resize(num_spatial + 2); + output_shape[0] = input_shape[0]; + // Channel is the last in NHWC layout + *(output_shape.rbegin()) = filters_shape[0]; // NHWC C is last instead of filters_shape[0] for NCHW layout + + const auto n_data_channel = *(input_shape.rbegin()); + const auto n_filter_channel = *(filters_shape.rbegin()); + + NODE_VALIDATION_CHECK( + op, + n_data_channel.compatible(n_filter_channel), // instead of input_shape[1].compatible(filters_shape[1]), + "Data batch channel count (", + n_data_channel, // instead of input_shape[1], + ") does not match filter input ", + "channel count (", + n_filter_channel, // instead of filters_shape[1], + ")."); + + const auto& dilations = op->m_dilations; + const auto& strides = op->m_strides; + + for (int64_t i = 0; i < num_spatial; ++i) { + const auto& input_dim = input_shape[i + 1]; + const auto& filters_dim = filters_shape[i + 1]; + if (input_dim.is_static() && filters_dim.is_static()) { + const int64_t& window_dilated_dim = (filters_dim.get_length() - 1) * dilations[i] + 1; + NODE_VALIDATION_CHECK(op, + window_dilated_dim > 0, + "Window after dilation has dimension less than 1 (dim: ", + window_dilated_dim, + ") at axis ", + i, + "."); + + const int64_t& data_padded_dilated_dim = input_dim.get_length() + pads_begin[i] + pads_end[i]; + NODE_VALIDATION_CHECK(op, + window_dilated_dim <= data_padded_dilated_dim, + "Window after dilation has dimension (dim: ", + window_dilated_dim, + ") larger than the data shape after padding (dim: ", + data_padded_dilated_dim, + ") at axis ", + i, + "."); + output_shape[i + 1] = (data_padded_dilated_dim - window_dilated_dim) / strides[i] + 1; + } + } +} + +} // namespace internal + +GNAConvolution::GNAConvolution(const ngraph::Output& data_batch, + const ngraph::Output& filters, + const ngraph::Output& bias, + const ngraph::Strides& strides, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const ngraph::Strides& dilations, + const ov::op::PadType& auto_pad) + : ov::op::Op({data_batch, filters, bias}), + m_strides(strides), + m_dilations(dilations), + m_pads_begin(pads_begin), + m_pads_end(pads_end), + m_auto_pad(auto_pad) { + constructor_validate_and_infer_types(); +} + +GNAConvolution::GNAConvolution(const ngraph::Output& data_batch, + const ngraph::Output& filters, + const ngraph::Strides& strides, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const ngraph::Strides& dilations, + const ov::op::PadType& auto_pad) + : ov::op::Op({data_batch, filters}), + m_strides(strides), + m_dilations(dilations), + m_pads_begin(pads_begin), + m_pads_end(pads_end), + m_auto_pad(auto_pad) { + constructor_validate_and_infer_types(); +} + +bool GNAConvolution::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("strides", m_strides); + visitor.on_attribute("dilations", m_dilations); + visitor.on_attribute("pads_begin", m_pads_begin); + visitor.on_attribute("pads_end", m_pads_end); + visitor.on_attribute("auto_pad", m_auto_pad); + return true; +} + +void GNAConvolution::validate_and_infer_types() { + ngraph::element::Type data_batch_et = get_input_element_type(0); + ngraph::element::Type filters_et = get_input_element_type(1); + + ngraph::element::Type result_et; + NODE_VALIDATION_CHECK(this, + ngraph::element::Type::merge(result_et, data_batch_et, filters_et), + "Element types for data batch and filters do not match (data batch element type: ", + data_batch_et, + ", filters element type: ", + filters_et, + ")."); + + NODE_VALIDATION_CHECK(this, + result_et.is_real() || result_et.is_integral_number(), + "Element types must be numeric. Got: ", + result_et); + auto& data_shape = get_input_partial_shape(0); + auto& filter_shape = get_input_partial_shape(1); + + m_num_spatial = internal::calculate_num_spatial(this, data_shape, filter_shape, 2, 2); + internal::update_and_validate_attributes(this); + + std::vector input_shapes = {data_shape, filter_shape}; + std::vector output_shapes = {ov::PartialShape{}}; + + if (m_num_spatial != -1) { + internal::resolve_auto_pad_for_shape(this, m_pads_begin, m_pads_end, input_shapes, 2, 2); + internal::shape_infer(this, m_pads_begin, m_pads_end, input_shapes, output_shapes); + } + + set_output_type(0, result_et, output_shapes[0]); +} + +std::shared_ptr GNAConvolution::clone_with_new_inputs(const ngraph::OutputVector& new_args) const { + if (new_args.size() == 2) { + return std::make_shared(new_args.at(0), + new_args.at(1), + m_strides, + m_pads_begin, + m_pads_end, + m_dilations, + m_auto_pad); + } else if (new_args.size() == 3) { + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + m_strides, + m_pads_begin, + m_pads_end, + m_dilations, + m_auto_pad); + } + + throw ngraph::ngraph_error("Unsupported number of arguments for GNAConvolution operation"); +} +} // namespace op +} // namespace intel_gna +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_gna/src/ops/gna_convolution.hpp b/src/plugins/intel_gna/src/ops/gna_convolution.hpp new file mode 100644 index 00000000000000..43e1fff2d7f810 --- /dev/null +++ b/src/plugins/intel_gna/src/ops/gna_convolution.hpp @@ -0,0 +1,172 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "ngraph/node.hpp" +#include + +#include "ngraph/coordinate_diff.hpp" +#include "ngraph/op/op.hpp" + +namespace ov { +namespace intel_gna { +namespace op { + +class GNAConvolution; + +namespace internal { + +int64_t calculate_num_spatial(const ov::intel_gna::op::GNAConvolution * op, + const ngraph::PartialShape& input_shape, + const ngraph::PartialShape& filters_shape, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims); + +void update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op); + +template +bool resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op, + ngraph::CoordinateDiff& pads_begin, + ngraph::CoordinateDiff& pads_end, + const std::vector& input_shapes, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims); +template +void shape_infer(const ov::intel_gna::op::GNAConvolution* op, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const std::vector& input_shapes, + std::vector& output_shapes); + +} // namespace internal + +/// \brief Convolution with NHWC layout +/// +class GNAConvolution : public ov::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + /// \brief Constructs a batched convolution operation. + GNAConvolution() = default; + /// \brief Constructs a batched convolution operation. + /// + /// \param data_batch The node producing the input data batch tensor.
+ /// `[N, C_IN, D1, ... Df]` + /// \param filters The node producing the filters tensor.
+ /// `[C_OUT, C_IN, F1, ... Ff]` + /// \param strides The strides.
+ /// `[f]` + /// \param dilations The dilations.
+ /// `[f]` + /// \param pads_begin The beginning of padding shape.
+ /// `[f]` + /// \param pads_end The end of padding shape.
+ /// `[f]` + /// \param auto_pad The pad type for automatically computing padding sizes.
+ /// `[f]` + /// + /// Output `[N, C_OUT, R1, ... Rf]` + /// + GNAConvolution(const ngraph::Output& data_batch, + const ngraph::Output& filters, + const ngraph::Output& bias, + const ngraph::Strides& strides, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const ngraph::Strides& dilations, + const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT); + + GNAConvolution(const ngraph::Output& data_batch, + const ngraph::Output& filters, + const ngraph::Strides& strides, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const ngraph::Strides& dilations, + const ov::op::PadType& auto_pad = ov::op::PadType::EXPLICIT); + + void validate_and_infer_types() override; + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + std::shared_ptr clone_with_new_inputs(const ngraph::OutputVector& new_args) const override; + + /// \return The strides. + const ngraph::Strides& get_strides() const { + return m_strides; + } + void set_strides(const ngraph::Strides& strides) { + m_strides = strides; + } + /// \return The dilations. + const ngraph::Strides& get_dilations() const { + return m_dilations; + } + void set_dilations(const ngraph::Strides& dilations) { + m_dilations = dilations; + } + /// \return The padding-below sizes (possibly negative). + const ngraph::CoordinateDiff& get_pads_begin() const { + return m_pads_begin; + } + void set_pads_begin(const ngraph::CoordinateDiff& pads_begin) { + m_pads_begin = pads_begin; + } + /// \return The padding-above sizes (possibly negative). + const ngraph::CoordinateDiff& get_pads_end() const { + return m_pads_end; + } + void set_adding_above(const ngraph::CoordinateDiff& pads_end) { + m_pads_end = pads_end; + } + /// \return The pad type for convolution. + const ov::op::PadType& get_auto_pad() const { + return m_auto_pad; + } + void set_auto_pad(const ov::op::PadType& auto_pad) { + m_auto_pad = auto_pad; + } + + /* + * TODO: for unit tests + bool evaluate(ov::runtime::TensorVector& output_values, + const ov::runtime::TensorVector& input_values, + const ov::EvaluationContext & evaluation_context) const override; + bool has_evaluate() const override; + */ + +protected: + ngraph::Strides m_strides; + ngraph::Strides m_dilations; + ngraph::CoordinateDiff m_pads_begin; + ngraph::CoordinateDiff m_pads_end; + ov::op::PadType m_auto_pad; + int64_t m_num_spatial = -1; + +private: + friend int64_t internal::calculate_num_spatial(const ov::intel_gna::op::GNAConvolution* op, + const ngraph::PartialShape& input_shape, + const ngraph::PartialShape& filters_shape, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims); + + friend void internal::update_and_validate_attributes(ov::intel_gna::op::GNAConvolution* op); + + template + friend bool internal::resolve_auto_pad_for_shape(const ov::intel_gna::op::GNAConvolution* op, + ngraph::CoordinateDiff& pads_begin, + ngraph::CoordinateDiff& pads_end, + const std::vector& input_shapes, + const int64_t& num_non_spatial_data_dims, + const int64_t& num_non_spatial_filter_dims); + template + friend void internal::shape_infer(const ov::intel_gna::op::GNAConvolution* op, + const ngraph::CoordinateDiff& pads_begin, + const ngraph::CoordinateDiff& pads_end, + const std::vector& input_shapes, + std::vector& output_shapes); +}; +} // namespace op +} // namespace intel_gna +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_gna/src/ops/gna_max_pool.cpp b/src/plugins/intel_gna/src/ops/gna_max_pool.cpp new file mode 100644 index 00000000000000..f64dc866dbd731 --- /dev/null +++ b/src/plugins/intel_gna/src/ops/gna_max_pool.cpp @@ -0,0 +1,275 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gna_max_pool.hpp" +#include + +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/op/constant.hpp" +#include "ngraph/runtime/host_tensor.hpp" +#include "ngraph/validation_util.hpp" +#include "ngraph/node.hpp" +#include "ngraph/validation_util.hpp" + +NGRAPH_RTTI_DEFINITION(ov::intel_gna::op::GNAMaxPool, "GNAMaxPool", 0); + +namespace ov { +namespace intel_gna { +namespace op { +// +// Infers the output batch shape and element type for batched pooling fprop. +// +ov::PartialShape infer_batched_pooling_forward(const ngraph::Node* node, + const ov::PartialShape& data_batch_shape, + const ov::CoordinateDiff& data_padding_below, + const ov::CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const ngraph::Strides& window_strides, + bool is_window_all_in_padding_allowed, + bool ceil_mode, + const ngraph::Strides& window_dilation); + +// +// Infers the output batch shape and element type for batched pooling fprop. +// +ov::PartialShape infer_batched_pooling_forward(const ngraph::Node* node, + const ov::PartialShape& data_batch_shape, + const ov::CoordinateDiff& data_padding_below, + const ov::CoordinateDiff& data_padding_above, + const ov::PartialShape& window_shape, + const ngraph::Strides& window_strides, + bool is_window_all_in_padding_allowed, + bool ceil_mode, + const ngraph::Strides& window_dilation) { + NODE_VALIDATION_CHECK(node, + data_batch_shape.rank().is_dynamic() || + (data_batch_shape.rank().get_length() >= 3 && data_batch_shape.rank().get_length() <= 5), + "Data batch must have rank of at least 4 or 5 (one batch axis, ", + "one input-channel axis, and two or three spatial dimension) ", + "(data batch shape: ", + data_batch_shape, + ")."); + + ov::PartialShape data_spatial_shape{ov::PartialShape::dynamic()}; + + NODE_VALIDATION_CHECK(node, + data_spatial_shape.merge_rank(data_batch_shape.rank() - 2) && + data_spatial_shape.merge_rank(data_padding_below.size()) && + data_spatial_shape.merge_rank(data_padding_above.size()) && + data_spatial_shape.merge_rank(window_shape.rank()) && + data_spatial_shape.merge_rank(window_strides.size()), + "Ranks for data item shape (data batch has shape ", + data_batch_shape, + ", so data item rank is ", + (data_batch_shape.rank() - 2), + "), padding below (", + data_padding_below, + "), padding above (", + data_padding_above, + "), window shape (", + window_shape, + "), and window strides (", + window_strides, + ") do not match."); + + ov::Dimension batch_size{ov::Dimension::dynamic()}; + ov::Dimension channel_count{ov::Dimension::dynamic()}; + ov::PartialShape data_output_spatial_shape{ov::PartialShape::dynamic(data_spatial_shape.rank())}; + + if (data_batch_shape.rank().is_static()) { + batch_size = data_batch_shape[0]; + channel_count = *(data_batch_shape.end() - 1); // EMUTEX fix NCHW -> NHWC from data_batch_shape[1] + + for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) { + data_spatial_shape[i] = data_batch_shape[i + 1]; // EMUTEX fix NCHW -> NHWC from data_spatial_shape[i] = data_batch_shape[i + 2] + } + + NODE_VALIDATION_CHECK(node, batch_size.is_dynamic() || batch_size.get_length() > 0, "Batch size is zero."); + + NODE_VALIDATION_CHECK(node, + channel_count.is_dynamic() || channel_count.get_length() > 0, + "Channel count is zero."); + + // For pooling ops we don't need dilation, so we fill in the identity value (all 1). + ngraph::Strides data_dilation(data_spatial_shape.rank().get_length(), 1); + ngraph::Strides dilations = window_dilation; + // if the window_dilation was not specified, generate the default value (no dilations) + if (window_dilation.empty()) { + // dilations equal to 1 for each spatial axis mean that the window is not dilated + dilations = ngraph::Strides(data_spatial_shape.rank().get_length(), 1); + } + + data_output_spatial_shape = ngraph::infer_windowed_reduction_output_shape(node, + data_spatial_shape, + data_dilation, + data_padding_below, + data_padding_above, + window_shape, + window_strides, + dilations, + is_window_all_in_padding_allowed, + ceil_mode); + } + + ov::PartialShape data_batch_output_shape{ov::PartialShape::dynamic(data_output_spatial_shape.rank() + 2)}; + data_batch_output_shape[0] = batch_size; + *(data_batch_output_shape.end() - 1) = channel_count;// EMUTEX fix NCHW -> NHWC data_batch_output_shape[1] = channel_count; + + for (int64_t i = 0; i < data_spatial_shape.rank().get_length(); i++) { + data_batch_output_shape[i + 1] = data_output_spatial_shape[i]; // EMUTEX fix NCHW -> NHWC data_batch_output_shape[i + 2] = data_output_spatial_shape[i]; + } + + return data_batch_output_shape; +} + + +GNAMaxPool::GNAMaxPool(const ngraph::Output& arg, + const ngraph::Strides& strides, + const ov::Shape& pads_begin, + const ov::Shape& pads_end, + const ov::Shape& kernel, + const ov::op::RoundingType rounding_type, + const ov::op::PadType auto_pad) + : Op({arg}), + m_kernel(kernel), + m_strides(strides), + m_pads_begin(pads_begin), + m_pads_end(pads_end), + m_auto_pad(auto_pad), + m_rounding_type(rounding_type) { + constructor_validate_and_infer_types(); +} + +bool GNAMaxPool::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("strides", m_strides); + visitor.on_attribute("pads_begin", m_pads_begin); + visitor.on_attribute("pads_end", m_pads_end); + visitor.on_attribute("kernel", m_kernel); + visitor.on_attribute("rounding_type", m_rounding_type); + visitor.on_attribute("auto_pad", m_auto_pad); + return true; +} + +void GNAMaxPool::validate_and_infer_types() { + if (0 == m_strides.size()) { + m_strides = ngraph::Strides(m_kernel.size(), 1); + } + + if (0 == m_pads_begin.size()) { + m_pads_begin = ov::Shape(m_kernel.size(), 0); + } + + if (0 == m_pads_end.size()) { + m_pads_end = ov::Shape(m_kernel.size(), 0); + } + + const ov::PartialShape& arg_shape = get_input_partial_shape(0); + + NODE_VALIDATION_CHECK( + this, + arg_shape.rank().compatible(3) || arg_shape.rank().compatible(4) || arg_shape.rank().compatible(5), + "Expected a 3D, 4D or 5D tensor for the input. Got: ", + arg_shape); + + if (arg_shape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + static_cast(m_pads_end.size()) == arg_shape.rank().get_max_length() - 2, + "Expected pads_end size to be equal to input size - 2. Got: ", + m_pads_end.size()); + + NODE_VALIDATION_CHECK(this, + static_cast(m_pads_begin.size()) == arg_shape.rank().get_max_length() - 2, + "Expected pads_begin size to be equal to input size - 2. Got: ", + m_pads_begin.size()); + NODE_VALIDATION_CHECK(this, + static_cast(m_kernel.size()) == arg_shape.rank().get_max_length() - 2, + "Expected kernel size to be equal to input size - 2. Got: ", + m_kernel.size()); + NODE_VALIDATION_CHECK(this, + static_cast(m_strides.size()) == arg_shape.rank().get_max_length() - 2, + "Expected strides size to be equal to input size - 2. Got: ", + m_strides.size()); + } + + const ov::PartialShape output_shape = infer_output_shape(ngraph::Strides{}); // no dilations of the filter window + + set_output_type(0, get_input_element_type(0), output_shape); +} + +ov::PartialShape GNAMaxPool::infer_output_shape(const ngraph::Strides& dilations) { + const auto& arg_shape = get_input_partial_shape(0); + + bool update_auto_padding_succeed = true; + + if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) { + const auto filter_dilations = dilations.empty() ? ngraph::Strides(m_kernel.size(), 1) : dilations; + update_auto_padding_succeed = update_auto_padding(arg_shape, filter_dilations, m_pads_end, m_pads_begin); + } + if (m_auto_pad == ov::op::PadType::VALID) { + m_pads_end = ov::Shape(m_pads_end.size(), 0); + m_pads_begin = ov::Shape(m_pads_begin.size(), 0); + } + + auto output_shape = ov::PartialShape::dynamic(); + if (update_auto_padding_succeed) { + ov::CoordinateDiff pads_begin(m_pads_begin.begin(), m_pads_begin.end()); + ov::CoordinateDiff pads_end(m_pads_end.begin(), m_pads_end.end()); + output_shape = ov::intel_gna::op::infer_batched_pooling_forward(this, + get_input_partial_shape(0), + pads_begin, + pads_end, + m_kernel, + m_strides, + true, + m_rounding_type == ov::op::RoundingType::CEIL, + dilations); + } else { + if (arg_shape.rank().is_static()) { + output_shape = std::vector(arg_shape.rank().get_max_length(), ov::Dimension::dynamic()); + if (arg_shape[0].is_static()) { + output_shape[0] = arg_shape[0]; // batch size + } + if ((arg_shape.end() - 1)->is_static()) { // EMUTEX FIXED: from [1] to end() - 1 NCHW -> NHWC + *(output_shape.end() - 1) = *(arg_shape.end() - 1); // channel size + } + } + } + + return output_shape; +} + +bool GNAMaxPool::update_auto_padding(const ov::PartialShape& in_shape, + const ngraph::Strides& filter_dilations, + ov::Shape& new_pads_end, + ov::Shape& new_pads_begin) const { + bool update_auto_padding_succeed = true; + if (m_auto_pad == ov::op::PadType::SAME_UPPER || m_auto_pad == ov::op::PadType::SAME_LOWER) { + ov::CoordinateDiff pads_end, pads_begin; + update_auto_padding_succeed = ngraph::try_apply_auto_padding(in_shape, + m_kernel, + m_strides, + filter_dilations, + m_auto_pad, + pads_end, + pads_begin); + new_pads_end = ov::Shape(pads_end.begin(), pads_end.end()); + new_pads_begin = ov::Shape(pads_begin.begin(), pads_begin.end()); + } + return update_auto_padding_succeed; +} + +std::shared_ptr GNAMaxPool::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), + m_strides, + m_pads_begin, + m_pads_end, + m_kernel, + m_rounding_type, + m_auto_pad); +} + +} // namespace op +} // namespace intel_gna +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_gna/src/ops/gna_max_pool.hpp b/src/plugins/intel_gna/src/ops/gna_max_pool.hpp new file mode 100644 index 00000000000000..3c6d8c4cbb55a9 --- /dev/null +++ b/src/plugins/intel_gna/src/ops/gna_max_pool.hpp @@ -0,0 +1,108 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/op/op.hpp" +#include "openvino/op/util/attr_types.hpp" +#include "ngraph/node.hpp" +#include "openvino/op/util/max_pool_base.hpp" + +namespace ov { +namespace intel_gna { +namespace op { +/// \brief Batched max pooling operation. +class GNAMaxPool : public ov::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + /// \brief Constructs a batched max pooling operation. + GNAMaxPool() = default; + + /// \brief Constructs a batched max pooling operation. + /// + /// \param arg The node producing the input data batch tensor. + /// \param strides The strides. + /// \param pads_begin The beginning of padding shape. + /// \param pads_end The end of padding shape. + /// \param kernel The kernel shape. + /// \param rounding_type Whether to use ceiling or floor rounding type while + /// computing output shape. + /// \param auto_pad The pad type for automatically computing padding sizes. + GNAMaxPool(const ngraph::Output& arg, + const ngraph::Strides& strides, + const ngraph::Shape& pads_begin, + const ngraph::Shape& pads_end, + const ngraph::Shape& kernel, + const ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, + const ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT); + + void validate_and_infer_types() override; + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + /// \return The kernel shape. + const ngraph::Shape& get_kernel() const { + return m_kernel; + } + void set_kernel(const ngraph::Shape& kernel) { + m_kernel = kernel; + } + /// \return The strides. + const ngraph::Strides& get_strides() const { + return m_strides; + } + void set_strides(const ngraph::Strides& strides) { + m_strides = strides; + } + /// \return The beginning of padding shape. + const ngraph::Shape& get_pads_begin() const { + return m_pads_begin; + } + void set_pads_begin(const ngraph::Shape& pads_begin) { + m_pads_begin = pads_begin; + } + /// \return The end of padding shape. + const ngraph::Shape& get_pads_end() const { + return m_pads_end; + } + void set_adding_above(const ngraph::Shape& pads_end) { + m_pads_end = pads_end; + } + /// \return The pad type for pooling. + ov::op::PadType get_auto_pad() const { + return m_auto_pad; + } + void set_auto_pad(const ov::op::PadType auto_pad) { + m_auto_pad = auto_pad; + } + /// \return The ceiling mode being used for output shape computations + ov::op::RoundingType get_rounding_type() const { + return m_rounding_type; + } + void set_rounding_type(ov::op::RoundingType rounding_type) { + m_rounding_type = rounding_type; + } + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + +protected: + bool update_auto_padding(const ov::PartialShape& in_shape, + const ngraph::Strides& filter_dilations, + ngraph::Shape& new_pads_end, + ngraph::Shape& new_pads_begin) const; + + ov::PartialShape infer_output_shape(const ngraph::Strides& dilations); + + ngraph::Shape m_kernel; + ngraph::Strides m_strides; + ngraph::Shape m_pads_begin; + ngraph::Shape m_pads_end; + ov::op::PadType m_auto_pad; + ov::op::RoundingType m_rounding_type; +}; +} // namespace op +} // namespace intel_gna +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_gna/src/transformations/transpose_nchw.cpp b/src/plugins/intel_gna/src/transformations/transpose_nchw.cpp new file mode 100644 index 00000000000000..fd0d03b36f44c3 --- /dev/null +++ b/src/plugins/intel_gna/src/transformations/transpose_nchw.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/transpose_nchw.hpp" + +#include "transformations/utils/transformation_helper.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::TransposeNCHW, "TransposeNCHW", 0); +NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAConvolution, "SubstituteGNAConvolution", 0); +NGRAPH_RTTI_DEFINITION(ov::intel_gna::pass::SubstituteGNAMaxPool, "SubstituteGNAMaxPool", 0); + +using Node = std::shared_ptr; + +namespace { +ngraph::Shape MakeTransposeOrderNCHW2NHWC(size_t shape_size); +ngraph::Shape MakeTransposeOrderNHWC2NCHW(size_t shape_size); + +/* transpose orders + before convolution convert NCHW -> NHWC + 3D: NCX {0, 1, 2} -> NXC {0, 2, 1} + 4D: NCHW {0, 1, 2, 3} -> NHWC {0, 2, 3, 1} + 5D: NCZYX {0, 1, 2, 3, 4} -> NZYXC {0, 2, 3, 4, 1} + + after convolution convert NHWC -> NCHW + 3D: NXC {0, 1, 2} -> NCX {0, 2, 1} + 4D: NHWC {0, 1, 2, 3} -> NCHW {0, 3, 1, 2} + 5D: NZYXC {0, 1, 2, 3} -> NCZYX {0, 4, 1, 2, 3} + so just + 1) temp = A[N - 1] + 2) move A[j] -> A[j + 1] for 1 <= j <= N - 2 + 3) A[1] = temp +*/ + +ngraph::Shape MakeTransposeOrderNCHW2NHWC(size_t shape_size) { + ngraph::Shape shape(shape_size); + std::iota(shape.begin(), shape.end(), 0); + + for (int i = 1; i < shape.size() - 1; ++i) + shape[i] = shape[i + 1]; + + *(shape.end() - 1) = 1; + + return shape; +} + +ngraph::Shape MakeTransposeOrderNHWC2NCHW(size_t shape_size) { + ngraph::Shape shape(shape_size); + std::iota(shape.begin(), shape.end(), 0); + + const size_t channels_position = *(shape.end() - 1); + + for (int i = shape.size() - 1; i > 0; --i) + shape[i] = shape[i - 1]; + + shape[1] = channels_position; + + return shape; +} + +} // namespace + +namespace SubstituteGNAConvolutionNS { + +bool DoTransformation(Node convolution); + +bool DoTransformation(Node convolution) { + auto convolution_node = std::dynamic_pointer_cast(convolution); + auto convolution_input_data_node = convolution_node->input_value(0); + auto convolution_input_const_node = convolution_node->input_value(1); + const ngraph::Shape convolution_input_shape = convolution_node->get_input_shape(0); + + // TODO: check input_data_node is not Reshape since that pattern should be matched in another transformation + + if (convolution_input_shape.size() != 3 && convolution_input_shape.size() != 4) { + std::cout << "TransposeNCHW: unsupported convolution size " << convolution_input_shape.size() << std::endl; + return false; + } + + const ngraph::Shape transpose_before_order = MakeTransposeOrderNCHW2NHWC(convolution_input_shape.size()); + + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, + ngraph::Shape{transpose_before_order.size()}, + transpose_before_order); + + auto transpose_before = std::make_shared(convolution_input_data_node, + transpose_const); + + auto transpose_conv_constant = std::make_shared(convolution_input_const_node, + transpose_const); + auto conv_new = std::make_shared(transpose_before, + transpose_conv_constant, + convolution_node->get_strides(), + convolution_node->get_pads_begin(), + convolution_node->get_pads_end(), + convolution_node->get_dilations(), + convolution_node->get_auto_pad()); + + const ngraph::Shape transpose_after_order = MakeTransposeOrderNHWC2NCHW(conv_new->get_output_shape(0).size()); + + auto transpose_after = std::make_shared(conv_new, + ngraph::opset8::Constant::create(ngraph::element::i64, + ngraph::Shape{transpose_after_order.size()}, + transpose_after_order)); + + ov::copy_runtime_info(convolution_node, transpose_before); + transpose_before->set_friendly_name(convolution_node->get_friendly_name() + "/gna_conv_transpose_before"); + + ov::copy_runtime_info(convolution_node, transpose_const); + transpose_const->set_friendly_name(convolution_node->get_friendly_name() + "/gna_conv_transpose_const"); + + ov::copy_runtime_info(convolution_node, conv_new); + conv_new->set_friendly_name(convolution_node->get_friendly_name() + "/gna_convolution"); + + ov::copy_runtime_info(convolution_node, transpose_after); + transpose_after->set_friendly_name(convolution_node->get_friendly_name() + "/gna_conv_transpose_after"); + + convolution->output(0).replace(transpose_after->output(0)); + return true; +} + +} // namespace SubstituteGNAConvolutionNS + +namespace SubstituteGNAMaxPoolNS { + +bool DoTransformation(Node convolution); + +bool DoTransformation(Node max_pool) { + auto max_pool_node = std::dynamic_pointer_cast(max_pool); + auto max_pool_input_data_node = max_pool_node->input_value(0); + const ngraph::Shape max_pool_input_shape = max_pool_node->get_input_shape(0); + + const ngraph::Shape transpose_before_order = MakeTransposeOrderNCHW2NHWC(max_pool_input_shape.size()); + + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, + ngraph::Shape{transpose_before_order.size()}, + transpose_before_order); + + auto transpose_before = std::make_shared(max_pool_input_data_node, + transpose_const); + + auto max_pool_new = std::make_shared(transpose_before, + max_pool_node->get_strides(), + max_pool_node->get_pads_begin(), + max_pool_node->get_pads_end(), + max_pool_node->get_kernel(), + max_pool_node->get_rounding_type(), + max_pool_node->get_auto_pad()); + + const ngraph::Shape transpose_after_order = MakeTransposeOrderNHWC2NCHW(max_pool_new->get_output_shape(0).size()); + + auto transpose_after = std::make_shared(max_pool_new, + ngraph::opset8::Constant::create(ngraph::element::i64, + ngraph::Shape{transpose_after_order.size()}, + transpose_after_order)); + + ov::copy_runtime_info(max_pool_node, transpose_before); + transpose_before->set_friendly_name(max_pool_node->get_friendly_name() + "/gna_max_pool_transpose_before"); + + ov::copy_runtime_info(max_pool_node, max_pool_new); + max_pool_new->set_friendly_name(max_pool_node->get_friendly_name() + "/gna_max_pool"); + + ov::copy_runtime_info(max_pool_node, transpose_after); + transpose_after->set_friendly_name(max_pool_node->get_friendly_name() + "/gna_max_pool_transpose_after"); + + max_pool->output(0).replace(transpose_after->output(0)); + return true; +} + +} // namespace SubstituteGNAMaxPoolNS + +// ---------------------------------------------------------------------------- + +ov::intel_gna::pass::SubstituteGNAConvolution::SubstituteGNAConvolution() { + MATCHER_SCOPE(SubstituteGNAConvolution); + + auto convolution = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto convolution_node = std::dynamic_pointer_cast(m.get_match_root()); + if (!convolution_node) { + return false; + } + + return SubstituteGNAConvolutionNS::DoTransformation(convolution_node); + }; + + auto m = std::make_shared(convolution, matcher_name); + this->register_matcher(m, callback); +} + +ov::intel_gna::pass::SubstituteGNAMaxPool::SubstituteGNAMaxPool() { + MATCHER_SCOPE(SubstituteGNAMaxPool); + + auto max_pool = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto max_pool_node = std::dynamic_pointer_cast(m.get_match_root()); + if (!max_pool_node) { + return false; + } + + return SubstituteGNAMaxPoolNS::DoTransformation(max_pool_node); + }; + + auto m = std::make_shared(max_pool, matcher_name); + this->register_matcher(m, callback); +} + +bool ov::intel_gna::pass::TransposeNCHW::run_on_model(const std::shared_ptr& function) { + RUN_ON_FUNCTION_SCOPE(TransposeNCHW); + + ngraph::pass::Manager manager(get_pass_config()); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + + return false; // FIXME: should we return true here? +} diff --git a/src/plugins/intel_gna/src/transformations/transpose_nchw.hpp b/src/plugins/intel_gna/src/transformations/transpose_nchw.hpp new file mode 100644 index 00000000000000..20ab0b8c515f9c --- /dev/null +++ b/src/plugins/intel_gna/src/transformations/transpose_nchw.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace ov { +namespace intel_gna { +namespace pass { + +/** + * @brief TODO + */ +class SubstituteGNAConvolution : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SubstituteGNAConvolution(); +}; + +class SubstituteGNAMaxPool : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SubstituteGNAMaxPool(); +}; + +class TransposeNCHW : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_model(const std::shared_ptr& f) override; +}; + +} // namespace pass +} // namespace intel_gna +} // namespace ov