diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index 169586819b7fc0..77db762169254e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -5,14 +5,23 @@ #include "mkldnn_roi_pooling_node.h" #include -#include -#include -#include #include -#include -#include "ie_parallel.hpp" +#include + #include +#include "ie_parallel.hpp" +#include "utils/bfloat16.hpp" +#include "emitters/jit_load_store_emitters.hpp" + +#include + +#include +#include +#include +#include +#include + using namespace MKLDNNPlugin; using namespace InferenceEngine; using namespace mkldnn; @@ -25,7 +34,7 @@ using namespace Xbyak; template struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, public jit_generator { - DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_roi_pooling_kernel_f32) + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_roi_pooling_kernel_f32); explicit jit_uni_roi_pooling_kernel_f32(jit_roi_pooling_params jcp) : jit_uni_roi_pooling_kernel(jcp), jit_generator() {} @@ -35,6 +44,9 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi }; void generate() override { + load_emitter.reset(new jit_load_emitter(this, isa, nullptr)); + store_emitter.reset(new jit_store_emitter(this, isa, nullptr)); + this->preamble(); Label exit_label; @@ -42,7 +54,6 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi mov(reg_input, ptr[this->param1 + GET_OFF(src)]); mov(reg_output, ptr[this->param1 + GET_OFF(dst)]); - mov(reg_bin_area, ptr[this->param1 + GET_OFF(bin_area)]); mov(reg_c_blocks, ptr[this->param1 + GET_OFF(c_blocks)]); @@ -56,6 +67,10 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi mov(reg_xoff, ptr[this->param1 + GET_OFF(xoff)]); } + load_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx()), static_cast(reg_load_table.getIdx())}; + store_pool_gpr_idxs = {static_cast(reg_load_store_mask.getIdx())}; + store_pool_vec_idxs = {static_cast(vmm_zero.getIdx())}; + int nb_c_tail = jpp_.nb_c % jpp_.nb_c_blocking; cmp(reg_c_blocks, jpp_.nb_c_blocking); jne(nb_c_tail ? tail_label : exit_label, T_NEAR); @@ -71,6 +86,10 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi L(exit_label); this->postamble(); + + load_emitter->emit_data(); + if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) + store_emitter->get_emu_vcvtneps2bf16()->emit_data(); } private: @@ -78,6 +97,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi Xbyak::Ymm, Xbyak::Zmm>::type; const int vlen = cpu_isa_traits::vlen; + const int step = vlen / sizeof(float); Vmm vmm_mask = Vmm(0); Vmm vmm_zero = Vmm(0); @@ -87,6 +107,13 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi Xmm xmm_xf = Xmm(1); Vmm vmm_xf = Vmm(1); + std::unique_ptr load_emitter = nullptr; + std::vector load_pool_gpr_idxs; + + std::unique_ptr store_emitter = nullptr; + std::vector store_pool_gpr_idxs; + std::vector store_pool_vec_idxs; + Vmm get_acc_reg(int idx) { return Vmm(2*idx + 1); } Vmm get_src_reg(int idx) { return Vmm(2*idx + 2); } @@ -102,8 +129,8 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi reg64_t reg_kh = r10; reg64_t reg_kw = r11; - reg64_t h_iter = r14; - reg64_t w_iter = r15; + reg64_t h_iter = r13; + reg64_t w_iter = r14; reg64_t reg_c_blocks = rbx; reg64_t reg_bin_area = rdx; @@ -114,15 +141,22 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi reg64_t reg_yoff = h_iter; reg64_t reg_xoff = r12; + Xbyak::Reg64 reg_load_table = r15; + Xbyak::Reg64 reg_load_store_mask = abi_param1; + void roi_pool_max(int c_blocks) { Label h_loop_label; Label w_loop_label; mov(aux_reg_input, reg_input); + const int src_c_off = jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size; for (int i = 0; i < c_blocks; i++) { Vmm vmm_max = get_acc_reg(i); - uni_vmovups(vmm_max, ptr[reg_input + i * jpp_.ih * jpp_.iw * jpp_.c_block * sizeof(float)]); + + load_emitter->emit_code({static_cast(reg_input.getIdx())}, {static_cast(vmm_max.getIdx())}, + std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), + {}, load_pool_gpr_idxs); } xor_(h_iter, h_iter); @@ -134,7 +168,10 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi Vmm vmm_max = get_acc_reg(i); Vmm vmm_src = get_src_reg(i); - uni_vmovups(vmm_src, ptr[aux_reg_input1 + i * jpp_.ih * jpp_.iw * jpp_.c_block * sizeof(float)]); + load_emitter->emit_code({static_cast(aux_reg_input1.getIdx())}, {static_cast(vmm_src.getIdx())}, + std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), + {}, load_pool_gpr_idxs); + if (isa == cpu::x64::sse41) { movups(vmm_mask, vmm_max); cmpps(vmm_mask, vmm_src, _cmp_lt_os); @@ -148,23 +185,27 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi } } - add(aux_reg_input1, jpp_.c_block * sizeof(float)); + add(aux_reg_input1, jpp_.c_block * jpp_.src_data_size); inc(w_iter); cmp(w_iter, reg_kw); jl(w_loop_label, T_NEAR); } - add(aux_reg_input, jpp_.iw * jpp_.c_block * sizeof(float)); + add(aux_reg_input, jpp_.iw * jpp_.c_block * jpp_.src_data_size); inc(h_iter); cmp(h_iter, reg_kh); jl(h_loop_label, T_NEAR); } + const int dst_c_off = jpp_.oh * jpp_.ow * jpp_.c_block * jpp_.dst_data_size; for (int i = 0; i < c_blocks; i++) { Vmm vmm_dst = get_acc_reg(i); - uni_vmovups(ptr[reg_output + i * jpp_.oh * jpp_.ow * jpp_.c_block * sizeof(float)], vmm_dst); + + store_emitter->emit_code({static_cast(vmm_dst.getIdx())}, {static_cast(reg_output.getIdx())}, + std::make_shared(Precision::FP32, jpp_.dst_prc, step, i * dst_c_off), + store_pool_vec_idxs, store_pool_gpr_idxs); } } @@ -180,17 +221,29 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi Vmm vmm_src11 = get_src_reg(3); for (int i = 0; i < c_blocks; i++) { - int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * sizeof(float); + const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size; + const auto load_context = std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", src_c_off); mov(aux_reg_input, reg_input); - uni_vmovups(vmm_src00, ptr[aux_reg_input + src_c_off]); + + load_emitter->emit_code({static_cast(aux_reg_input.getIdx())}, {static_cast(vmm_src00.getIdx())}, + load_context, + {}, load_pool_gpr_idxs); add(aux_reg_input, reg_xoff); - uni_vmovups(vmm_src01, ptr[aux_reg_input + src_c_off]); + + load_emitter->emit_code({static_cast(aux_reg_input.getIdx())}, {static_cast(vmm_src01.getIdx())}, + load_context, + {}, load_pool_gpr_idxs); add(aux_reg_input, reg_yoff); - uni_vmovups(vmm_src11, ptr[aux_reg_input + src_c_off]); + load_emitter->emit_code({static_cast(aux_reg_input.getIdx())}, {static_cast(vmm_src11.getIdx())}, + load_context, + {}, load_pool_gpr_idxs); sub(aux_reg_input, reg_xoff); - uni_vmovups(vmm_src10, ptr[aux_reg_input + src_c_off]); + + load_emitter->emit_code({static_cast(aux_reg_input.getIdx())}, {static_cast(vmm_src10.getIdx())}, + load_context, + {}, load_pool_gpr_idxs); uni_vsubps(vmm_src01, vmm_src01, vmm_src00); uni_vfmadd213ps(vmm_src01, vmm_xf, vmm_src00); @@ -201,15 +254,22 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi uni_vsubps(vmm_src11, vmm_src11, vmm_src01); uni_vfmadd213ps(vmm_src11, vmm_yf, vmm_src01); - int dst_c_off = i * jpp_.oh * jpp_.ow * jpp_.c_block * sizeof(float); - uni_vmovups(ptr[reg_output + dst_c_off], vmm_src11); + const int dst_c_off = i * jpp_.oh * jpp_.ow * jpp_.c_block * jpp_.dst_data_size; + + store_emitter->emit_code({static_cast(vmm_src11.getIdx())}, {static_cast(reg_output.getIdx())}, + std::make_shared(Precision::FP32, jpp_.dst_prc, step, dst_c_off), + store_pool_vec_idxs, store_pool_gpr_idxs); } } void empty_roi(int c_blocks) { uni_vpxor(vmm_zero, vmm_zero, vmm_zero); + + const int dst_c_off = jpp_.oh * jpp_.ow * jpp_.c_block * jpp_.dst_data_size; for (int i = 0; i < c_blocks; i++) { - uni_vmovups(ptr[reg_output + i * jpp_.oh * jpp_.ow * jpp_.c_block * sizeof(float)], vmm_zero); + store_emitter->emit_code({static_cast(vmm_zero.getIdx())}, {static_cast(reg_output.getIdx())}, + std::make_shared(jpp_.src_prc, jpp_.dst_prc, step, i * dst_c_off), + store_pool_vec_idxs, store_pool_gpr_idxs); } } @@ -226,8 +286,8 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi roi_pool_bilinear(c_blocks); if (isa == cpu::x64::sse41) { - add(reg_input, 4 * sizeof(float)); - add(reg_output, 4 * sizeof(float)); + add(reg_input, 4 * jpp_.src_data_size); + add(reg_output, 4 * jpp_.dst_data_size); if (jpp_.alg == Algorithm::ROIPoolingMax) roi_pool_max(c_blocks); @@ -239,7 +299,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi L(empty_roi_label); empty_roi(c_blocks); if (isa == cpu::x64::sse41) { - add(reg_output, 4 * sizeof(float)); + add(reg_output, 4 * jpp_.dst_data_size); empty_roi(c_blocks); } @@ -317,6 +377,18 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; + runtimePrecision = getOriginalInputPrecisionAtPort(0); + + if (!mayiuse(avx512_core)) { + if (runtimePrecision == Precision::BF16) + runtimePrecision = Precision::FP32; + } + + auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); + + src_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); + dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); + InferenceEngine::LayerConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); @@ -342,9 +414,9 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, format); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, format); + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, format); + config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), dataType, memory::format_tag::nc); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, format); supportedPrimitiveDescriptors.push_back({config, impl_type, format}); } @@ -375,6 +447,12 @@ void MKLDNNROIPoolingNode::createPrimitive() { jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7; + auto selectedPD = getSelectedPrimitiveDescriptor(); + jpp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); + jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jpp.src_data_size = jpp.src_prc.size(); + jpp.dst_data_size = jpp.dst_prc.size(); + jpp.alg = getAlgorithm(); if (mayiuse(cpu::x64::avx512_common)) { @@ -389,14 +467,15 @@ void MKLDNNROIPoolingNode::createPrimitive() { roi_pooling_kernel->create_ker(); } -void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { +template +void MKLDNNROIPoolingNode::execute() { auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); - auto &dstMemory = getChildEdgeAt(0)->getMemory(); + auto &dstMemory = getChildEdgeAt(0)->getMemory(); - const auto *src_data = reinterpret_cast(srcMemory0.GetPtr()); - const auto *src_roi = reinterpret_cast(srcMemory1.GetPtr()); - float *dst = reinterpret_cast(dstMemory.GetPtr()); + const auto *src_data = reinterpret_cast(srcMemory0.GetPtr()); + const auto *src_roi = reinterpret_cast(srcMemory1.GetPtr()); + auto *dst = reinterpret_cast(dstMemory.GetPtr()); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) @@ -405,16 +484,16 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides(); auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides(); + size_t src_roi_step = config.inConfs[1].desc.getBlockingDesc().getStrides()[0]; int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking); int MB = jpp.mb; - size_t src_roi_step = config.inConfs[1].desc.getBlockingDesc().getStrides()[0]; int real_rois = 0; for (; real_rois < MB; real_rois++) { size_t roi_off = real_rois * src_roi_step; - const float *src_roi_ptr = &src_roi[roi_off]; + const auto *src_roi_ptr = &src_roi[roi_off]; int roi_batch_ind = static_cast(src_roi_ptr[0]); if (roi_batch_ind == -1) { break; @@ -443,7 +522,7 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { (*roi_pooling_kernel)(&arg); } else { size_t roi_off = n * src_roi_step; - const float* src_roi_ptr = &src_roi[roi_off]; + const auto *src_roi_ptr = &src_roi[roi_off]; int roi_batch_ind = static_cast(src_roi_ptr[0]); @@ -549,11 +628,12 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { arg.xf = in_x - left_x_index; arg.yf = in_y - top_y_index; - arg.xoff = sizeof(float) * (right_x_index - left_x_index) * jpp.c_block; - arg.yoff = sizeof(float) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block; + arg.xoff = sizeof(T) * (right_x_index - left_x_index) * jpp.c_block; + arg.yoff = sizeof(T) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block; arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + top_y_index * src_strides[2] + left_x_index * src_strides[3]]; + arg.bin_area = 1; } else { for (int c = 0; c < 1; c++) { @@ -583,6 +663,28 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { }); } +namespace { +struct ROIPoolingContext { + MKLDNNROIPoolingNode &node; +}; +} + +template +struct MKLDNNROIPoolingNode::ROIPoolingExecute { + void operator()(ROIPoolingContext & ctx) { + ctx.node.execute(); + } +}; + +void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { + ROIPoolingContext ctx = { + *this + }; + // enable conditional compilation + OV_SWITCH(MKLDNNPlugin, ROIPoolingExecute, ctx, runtimePrecision, + OV_CASE(Precision::FP32, float), + OV_CASE(Precision::BF16, bfloat16_t)) +} bool MKLDNNROIPoolingNode::created() const { return getType() == ROIPooling; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h index d796fd47a78a66..512616c60e486c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h @@ -4,8 +4,9 @@ #pragma once -#include #include +#include + #include #include #include @@ -22,12 +23,17 @@ struct jit_roi_pooling_params { int pooled_h; int pooled_w; + InferenceEngine::Precision src_prc; + InferenceEngine::Precision dst_prc; + int src_data_size; + int dst_data_size; + Algorithm alg; }; struct jit_roi_pooling_call_args { - const float *src; - float *dst; + const void *src; + void *dst; size_t kh; size_t kw; @@ -68,15 +74,22 @@ class MKLDNNROIPoolingNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; +private: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; -private: + template void execute(); + template struct ROIPoolingExecute; + + InferenceEngine::Precision runtimePrecision; + + size_t src_data_size; + size_t dst_data_size; + int pooled_h = 0; int pooled_w = 0; float spatial_scale = 0; jit_roi_pooling_params jpp = {}; - std::shared_ptr roi_pooling_kernel = nullptr; std::string errorPrefix; diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp new file mode 100644 index 00000000000000..7422befc949160 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp @@ -0,0 +1,203 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include "ie_common.h" +#include "test_utils/cpu_test_utils.hpp" +#include "utils/bfloat16.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using ROIPoolingCPUTestParamsSet = std::tuple>; + +class ROIPoolingCPULayerTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + std::ostringstream result; + + result << LayerTestsDefinitions::ROIPoolingLayerTest::getTestCaseName( + testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + + return result.str(); + } + +protected: + void GenerateInputs() override { + auto feat_map_shape = cnnNetwork.getInputShapes().begin()->second; + + const auto is_roi_max_mode = (pool_method == ngraph::helpers::ROIPoolingTypes::ROI_MAX); + + const int height = is_roi_max_mode ? feat_map_shape[2] / spatial_scale : 1; + const int width = is_roi_max_mode ? feat_map_shape[3] / spatial_scale : 1; + + size_t it = 0; + for (const auto &input : cnnNetwork.getInputsInfo()) { + const auto &info = input.second; + InferenceEngine::Blob::Ptr blob; + + if (it == 1) { + blob = make_blob_with_precision(info->getTensorDesc()); + blob->allocate(); + switch (inPrc) { + case Precision::FP32: { + CommonTestUtils::fill_data_roi + (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode); + break; + } + case Precision::BF16: { + CommonTestUtils::fill_data_roi + (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode); + break; + } + default: + IE_THROW() << "roi_pooling. Unsupported precision"; + break; + } + } else { + blob = GenerateInput(*info); + } + inputs.push_back(blob); + it++; + } + } + + void SetUp() override { + LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + InferenceEngine::SizeVector inputShape; + InferenceEngine::SizeVector coordsShape; + InferenceEngine::SizeVector poolShape; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(inputShape, coordsShape, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet; + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) + inPrc = outPrc = netPrecision = Precision::BF16; + else + inPrc = outPrc = netPrecision; + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape, coordsShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + std::shared_ptr roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method); + ngraph::ResultVector results{std::make_shared(roi_pooling)}; + + function = makeNgraphFunction(ngPrc, params, roi_pooling, "roi_pooling"); + + selectedType += "_"; + selectedType += netPrecision.name(); + } + +private: + ngraph::helpers::ROIPoolingTypes pool_method; + float spatial_scale; +}; + +TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + Run(); + CheckPluginRelatedResults(executableNetwork, "ROIPooling"); +} + +namespace { + +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +/* have to select particular implementation type, since currently + * nodes always choose the best one */ +std::vector selectCPUInfoForDevice() { + std::vector resCPUParams; + if (with_cpu_x86_avx512f()) { + resCPUParams.push_back(CPUSpecificParams{{nChw16c, nc}, {nChw16c}, {"jit_avx512"}, "jit_avx512"}); + } else if (with_cpu_x86_avx2()) { + resCPUParams.push_back(CPUSpecificParams{{nChw8c, nc}, {nChw8c}, {"jit_avx2"}, "jit_avx2"}); + } else if (with_cpu_x86_sse42()) { + resCPUParams.push_back(CPUSpecificParams{{nChw8c, nc}, {nChw8c}, {"jit_sse42"}, "jit_sse42"}); + } else { + resCPUParams.push_back(CPUSpecificParams{{nChw8c, nc}, {nChw8c}, {"ref"}, "ref"}); + } + + return resCPUParams; +} + +const std::vector> inShapes = {{1, 3, 8, 8}, + {3, 4, 50, 50}}; + +const std::vector> pooledShapes_max = {{1, 1}, + {2, 2}, + {3, 3}, + {6, 6}}; + +const std::vector> pooledShapes_bilinear = {{1, 1}, + {2, 2}, + {3, 3}, + {6, 6}}; + +const std::vector> coordShapes = {{1, 5}, + {3, 5}, + {5, 5}}; + +const std::vector netPRCs = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16}; + +const std::vector spatial_scales = {0.625f, 1.f}; + +const auto test_ROIPooling_max = ::testing::Combine(::testing::ValuesIn(inShapes), + ::testing::ValuesIn(coordShapes), + ::testing::ValuesIn(pooledShapes_max), + ::testing::ValuesIn(spatial_scales), + ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_MAX), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +const auto test_ROIPooling_bilinear = ::testing::Combine(::testing::ValuesIn(inShapes), + ::testing::ValuesIn(coordShapes), + ::testing::ValuesIn(pooledShapes_bilinear), + ::testing::Values(spatial_scales[1]), + ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR), + ::testing::ValuesIn(netPRCs), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_max, + ROIPoolingCPULayerTest, + ::testing::Combine(test_ROIPooling_max, + ::testing::ValuesIn(selectCPUInfoForDevice()), + ::testing::ValuesIn(additionalConfig)), + ROIPoolingCPULayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_bilinear, + ROIPoolingCPULayerTest, + ::testing::Combine(test_ROIPooling_bilinear, + ::testing::ValuesIn(selectCPUInfoForDevice()), + ::testing::ValuesIn(additionalConfig)), + ROIPoolingCPULayerTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp b/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp index 0d52ca6dedc886..1b8432c5bbe4db 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp @@ -581,4 +581,4 @@ InputsMap getInputMap() { return inputsMap; } -} // namespace LayerTestsDefinitions \ No newline at end of file +} // namespace LayerTestsDefinitions