From 18806ecd761b5d967f3f870e5e325b7c08b42295 Mon Sep 17 00:00:00 2001 From: Luo Cheng Date: Fri, 30 Jul 2021 20:45:44 +0800 Subject: [PATCH] MulticlassNms/MatrixNms: transformations and CPU implementation (#6653) * init version, need revise: opset7 * add convert testcase * multiclass_nms support spec * init version * matrixnms support spec * init support for matrix_nms * impl matirx_nms * implemented multiclass_nms reference. TODO: more test cases. * support dynamic shape in test * update to spec 0611 * update to spec 0611 * fixes. * fix: now sort by class_id and score work. * fix clang check error * more test cases verified. * fixes in ref impl. * attribute nms_eta works * test cross_batch and output_type i32. * enable multiclass-nms cpu plugin fallback ngraph * keep topk typo * enable matrix-nms cpu plugin fallback ngraph * support sort_result_across_batch * Add matrix_nms unit test * Add cross batch test cases * fix typo * move multiclass to opset8 * move matrixnms to opset8 * Reference implementations for MulticlassNms and MatrixNms ops * fix name conflict * remove unused var sort_result_across_batch default set to false * avoid float overflow * fix clang check error * info for mac fail * change testcase due to unstable sort * nms add 'normalized' attribute * multiclass cpu test support 'normalized' * nms add 'normalized' attribute * fixes: 1. normalized support. 2. sort by score before keep_top_k inside a batch. * fixes: 1. normalized support. 2. sort by score before keep_top_k inside a batch. * fix sort order in matrix_nms * fix review comments * add matrix_nms MKLDNN extension layer * parallel in matirx nms * separate filtered_box * separate class_nms result * parallel in class * parallel in batch * partial new nms * partial remove useless function * debug & fix * debug in indexing * fix test cases * remove logging * fix code-style * fix typo * add matrix_nms extension * nms python api * remove unused testcases * refactor transformation * transform dynamic shape to static shape * Update inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp Co-authored-by: Ilya Churaev * remove register_pass call * [MKLDNN]migrate matrix_nms to MKLDNNNode * bug fix in matrix_nms * padding on matrix_nms * remove logging * test case refine * merged transform_matrix_nms branch * refine matrixnms testcase * multiclass nms cpu plugin implement for static shape, rebased on Reference implementations PR * rebase to new multi-classs transform provided by lc * Name style algin with matrix-nms * static shape padding style to batch inside,new unit test method, real classnum shape * fix format * fix ci error * multi-class NMS modification based on PR reviewer opinion: code format, copyright, delete unused include and funciton way * explicit template instantiation due to mac ci fail * Yi3/fix review (#16) * fix coding style * use parallel_for2d * fix ci fail * unify 'copyright 2021' * mkldnn_multiclass_nms node update based on PR review (#17) * [MKLDNN] apply suggestion for matrix_nms (#18) * fix bug * apply review comments * apply review comments * apply review comments * apply review comments * skip only Nms test, not MatrixNms MulticlassNms test Co-authored-by: Zhang Yi3 Co-authored-by: jialipen Co-authored-by: mangguo Co-authored-by: Ilya Churaev Co-authored-by: liubo-intel --- .../cnn_network_ngraph_impl.cpp | 5 + .../src/mkldnn_plugin/cpu_types.h | 4 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 4 +- .../src/mkldnn_plugin/mkldnn_node.h | 4 + .../src/mkldnn_plugin/mkldnn_plugin.cpp | 4 + .../nodes/mkldnn_matrix_nms_node.cpp | 382 ++++++++++++++++ .../nodes/mkldnn_matrix_nms_node.h | 100 +++++ .../nodes/mkldnn_multiclass_nms.cpp | 414 ++++++++++++++++++ .../nodes/mkldnn_multiclass_nms.hpp | 93 ++++ .../ngraph_ops/nms_static_shape_ie.hpp | 114 +++++ .../convert_matrix_nms_to_matrix_nms_ie.hpp | 26 ++ ...rt_multiclass_nms_to_multiclass_nms_ie.hpp | 26 ++ .../src/ngraph_ops/nms_static_shape_ie.cpp | 19 + .../convert_matrix_nms_to_matrix_nms_ie.cpp | 66 +++ ...rt_multiclass_nms_to_multiclass_nms_ie.cpp | 67 +++ .../serialization/single_layer/matrix_nms.cpp | 60 +++ .../single_layer/multiclass_nms.cpp | 60 +++ .../inference_engine/skip_tests_config.cpp | 2 +- ...t_matrix_nms_to_matrix_nms_ie_internal.cpp | 58 +++ ...lass_nms_to_multiclass_nms_ie_internal.cpp | 58 +++ .../single_layer_tests/matrix_nms.cpp | 54 +++ .../single_layer_tests/multiclass_nms.cpp | 37 ++ .../skip_tests_config.cpp | 2 +- .../include/single_layer_tests/matrix_nms.hpp | 15 + .../single_layer_tests/multiclass_nms.hpp | 15 + .../single_layer/matrix_nms.hpp | 58 +++ .../single_layer/multiclass_nms.hpp | 59 +++ .../src/single_layer/matrix_nms.cpp | 250 +++++++++++ .../src/single_layer/multiclass_nms.cpp | 270 ++++++++++++ ngraph/core/src/op/matrix_nms.cpp | 3 +- ngraph/core/src/op/util/nms_base.cpp | 2 +- 31 files changed, 2325 insertions(+), 6 deletions(-) create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp create mode 100644 inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp create mode 100644 inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp create mode 100644 inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp create mode 100644 inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp create mode 100644 inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp create mode 100644 inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp index c60c515edda59a..5343fd108c1a67 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp @@ -35,6 +35,9 @@ #include +#include +#include + #include "ie_ngraph_utils.hpp" #include "exec_graph_info.hpp" #include "ie_itt.hpp" @@ -389,6 +392,8 @@ CNNNetworkNGraphImpl::reshape(const std::map& ::ngraph::pass::Manager manager; // resolves dynamism by replacing dynamic operation with static version manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); + manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(); + manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(); manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>(); manager.register_pass<::ngraph::pass::ConstantFolding>(); // OneHotToLegacy changes output precision diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index eb54b431cb6658..7c820c4db50ccf 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -86,7 +86,9 @@ enum Type { ExperimentalDetectronPriorGridGenerator, ExperimentalDetectronGenerateProposalsSingleImage, ExtractImagePatches, - NonMaxSuppression + NonMaxSuppression, + MatrixNms, + MulticlassNms }; enum Algorithm { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 49e8e7ca10b972..8f4b204ca8e727 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -225,7 +225,9 @@ static const InferenceEngine::details::caseless_unordered_map { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator}, { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage}, { "ExtractImagePatches", ExtractImagePatches}, - { "NonMaxSuppressionIEInternal", NonMaxSuppression} + { "NonMaxSuppressionIEInternal", NonMaxSuppression}, + { "MatrixNms", MatrixNms}, + { "MulticlassNms", MulticlassNms} }; Type TypeFromName(const std::string type) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 436ecc3ac3998e..35993f96c79c99 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -194,6 +194,10 @@ static std::string NameFromType(Type type) { return "ExtractImagePatches"; case NonMaxSuppression: return "NonMaxSuppression"; + case MatrixNms: + return "MatrixNms"; + case MulticlassNms: + return "MulticlassNms"; default: return "Unknown"; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 2d7299aed9201a..c7907aa55692b2 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -57,6 +57,8 @@ #include #include #include +#include +#include #include #include #include @@ -168,6 +170,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp new file mode 100644 index 00000000000000..5bd27d079cef20 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp @@ -0,0 +1,382 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_matrix_nms_node.h" + +#include +#include +#include +#include +#include + +#include "base.hpp" +#include "ie_parallel.hpp" +#include "ngraph/opsets/opset8.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction; + +bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MatrixNms operation is supported"; + return false; + } + const auto& attrs = nms->get_attrs(); + const auto& sortType = attrs.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + const auto& decayType = attrs.decay_function; + if (!one_of(decayType, ngNmseDcayFunction::LINEAR, ngNmseDcayFunction::GAUSSIAN)) { + errorMessage = "Does not support DcayFunction " + ngraph::as_string(decayType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + errorPrefix = "MatrixNMS layer with name '" + getName() + "' "; + const auto matrix_nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + if (!(inDims[NMS_BOXES][0] == inDims[NMS_SCORES][0] && inDims[NMS_BOXES][1] == inDims[NMS_SCORES][2])) { + IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions"; + } + const SizeVector& boxes_dims = op->get_input_shape(NMS_BOXES); + m_numBatches = boxes_dims[0]; + m_numBoxes = boxes_dims[1]; + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + const SizeVector& scores_dims = op->get_input_shape(NMS_SCORES); + m_numClasses = scores_dims[1]; + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (m_numBatches != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (m_numBoxes != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + auto& attrs = matrix_nms->get_attrs(); + if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID) + m_sortResultType = MatrixNmsSortResultType::CLASSID; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::SCORE) + m_sortResultType = MatrixNmsSortResultType::SCORE; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::NONE) + m_sortResultType = MatrixNmsSortResultType::NONE; + + if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN) + m_decayFunction = GAUSSIAN; + else if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::LINEAR) + m_decayFunction = LINEAR; + + m_sortResultAcrossBatch = attrs.sort_result_across_batch; + m_scoreThreshold = attrs.score_threshold; + m_nmsTopk = attrs.nms_top_k; + m_keepTopk = attrs.keep_top_k; + m_backgroundClass = attrs.background_class; + + m_gaussianSigma = attrs.gaussian_sigma; + m_postThreshold = attrs.post_threshold; + m_normalized = attrs.normalized; + int64_t max_output_boxes_per_class = 0; + size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + if (m_nmsTopk >= 0) + max_output_boxes_per_class = std::min(m_numBoxes, static_cast(m_nmsTopk)); + else + max_output_boxes_per_class = m_numBoxes; + + m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes; + if (m_keepTopk >= 0) + m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast(m_keepTopk)); +} + +void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast(m_numBoxes)); + m_numPerBatch.resize(m_numBatches); + m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes); + m_numPerBatchClass.resize(m_numBatches, std::vector(m_numClasses, 0)); + m_classOffset.resize(m_numClasses, 0); + + for (size_t i = 0, count = 0; i < m_numClasses; i++) { + if (i == m_backgroundClass) + continue; + m_classOffset[i] = (count++) * m_realNumBoxes; + } + + if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return (1. - iou) / (1. - max_iou + 1e-10f); + }; + } else { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return std::exp((max_iou * max_iou - iou * iou) * sigma); + }; + } + + const std::vector supportedFloatPrecision = {Precision::FP32}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); + + addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, + {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{TensorDescCreatorTypes::ncsp, Precision::FP32}, + {TensorDescCreatorTypes::ncsp, Precision::I32}, + {TensorDescCreatorTypes::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +bool MKLDNNMatrixNmsNode::created() const { + return getType() == MatrixNms; +} + +namespace { + +static inline float boxArea(const float* bbox, const bool normalized) { + if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) { + return static_cast(0.); + } else { + const float width = bbox[2] - bbox[0]; + const float height = bbox[3] - bbox[1]; + if (normalized) { + return width * height; + } else { + return (width + 1) * (height + 1); + } + } +} + +static inline float intersectionOverUnion(const float* bbox1, const float* bbox2, const bool normalized) { + if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] || bbox2[3] < bbox1[1]) { + return static_cast(0.); + } else { + const float xMin = std::max(bbox1[0], bbox2[0]); + const float yMin = std::max(bbox1[1], bbox2[1]); + const float xMax = std::min(bbox1[2], bbox2[2]); + const float yMax = std::min(bbox1[3], bbox2[3]); + float norm = normalized ? static_cast(0.) : static_cast(1.); + float width = xMax - xMin + norm; + float height = yMax - yMin + norm; + const float interArea = width * height; + const float bbox1Area = boxArea(bbox1, normalized); + const float bbox2Area = boxArea(bbox2, normalized); + return interArea / (bbox1Area + bbox2Area - interArea); + } +} +} // namespace + +size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) { + std::vector candidateIndex(m_numBoxes); + std::iota(candidateIndex.begin(), candidateIndex.end(), 0); + auto end = std::remove_if(candidateIndex.begin(), candidateIndex.end(), [&scoresData, this](int32_t idx) { + return scoresData[idx] <= m_scoreThreshold; + }); + int64_t numDet = 0; + int64_t originalSize = std::distance(candidateIndex.begin(), end); + if (originalSize <= 0) { + return 0; + } + if (m_nmsTopk > -1 && originalSize > m_nmsTopk) { + originalSize = m_nmsTopk; + } + + std::partial_sort(candidateIndex.begin(), candidateIndex.begin() + originalSize, end, [&scoresData](int32_t a, int32_t b) { + return scoresData[a] > scoresData[b]; + }); + + std::vector iouMatrix((originalSize * (originalSize - 1)) >> 1); + std::vector iouMax(originalSize); + + iouMax[0] = 0.; + InferenceEngine::parallel_for(originalSize - 1, [&](size_t i) { + float max_iou = 0.; + size_t actual_index = i + 1; + auto idx_a = candidateIndex[actual_index]; + for (int64_t j = 0; j < actual_index; j++) { + auto idx_b = candidateIndex[j]; + auto iou = intersectionOverUnion(boxesData + idx_a * 4, boxesData + idx_b * 4, m_normalized); + max_iou = std::max(max_iou, iou); + iouMatrix[actual_index * (actual_index - 1) / 2 + j] = iou; + } + iouMax[actual_index] = max_iou; + }); + + if (scoresData[candidateIndex[0]] > m_postThreshold) { + auto box_index = candidateIndex[0]; + auto box = boxesData + box_index * 4; + filterBoxes[0].box.x1 = box[0]; + filterBoxes[0].box.y1 = box[1]; + filterBoxes[0].box.x2 = box[2]; + filterBoxes[0].box.y2 = box[3]; + filterBoxes[0].index = batchIdx * m_numBoxes + box_index; + filterBoxes[0].score = scoresData[candidateIndex[0]]; + filterBoxes[0].batchIndex = batchIdx; + filterBoxes[0].classIndex = classIdx; + numDet++; + } + + for (int64_t i = 1; i < originalSize; i++) { + float minDecay = 1.; + for (int64_t j = 0; j < i; j++) { + auto maxIou = iouMax[j]; + auto iou = iouMatrix[i * (i - 1) / 2 + j]; + auto decay = m_decay_fn(iou, maxIou, m_gaussianSigma); + minDecay = std::min(minDecay, decay); + } + auto ds = minDecay * scoresData[candidateIndex[i]]; + if (ds <= m_postThreshold) + continue; + auto boxIndex = candidateIndex[i]; + auto box = boxesData + boxIndex * 4; + filterBoxes[numDet].box.x1 = box[0]; + filterBoxes[numDet].box.y1 = box[1]; + filterBoxes[numDet].box.x2 = box[2]; + filterBoxes[numDet].box.y2 = box[3]; + filterBoxes[numDet].index = batchIdx * m_numBoxes + boxIndex; + filterBoxes[numDet].score = ds; + filterBoxes[numDet].batchIndex = batchIdx; + filterBoxes[numDet].classIndex = classIdx; + numDet++; + } + return numDet; +} + +void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) { + if (classIdx == m_backgroundClass) { + m_numPerBatchClass[batchIdx][classIdx] = 0; + return; + } + const float* boxesPtr = boxes + batchIdx * m_numBoxes * 4; + const float* scoresPtr = scores + batchIdx * (m_numClasses * m_numBoxes) + classIdx * m_numBoxes; + size_t classNumDet = 0; + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + classNumDet = nmsMatrix(boxesPtr, scoresPtr, m_filteredBoxes.data() + batchOffset + m_classOffset[classIdx], batchIdx, classIdx); + m_numPerBatchClass[batchIdx][classIdx] = classNumDet; + }); + + InferenceEngine::parallel_for(m_numBatches, [&](size_t batchIdx) { + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + BoxInfo* batchFilteredBox = m_filteredBoxes.data() + batchOffset; + auto& numPerClass = m_numPerBatchClass[batchIdx]; + auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), 0); + auto start_offset = numPerClass[0]; + + for (size_t i = 1; i < numPerClass.size(); i++) { + auto offset_class = m_classOffset[i]; + for (size_t j = 0; j < numPerClass[i]; j++) { + batchFilteredBox[start_offset + j] = batchFilteredBox[offset_class + j]; + } + start_offset += numPerClass[i]; + } + auto keepNum = numDet; + if (m_keepTopk > -1) { + auto k = static_cast(m_keepTopk); + if (keepNum > k) + keepNum = k; + } + + std::partial_sort(batchFilteredBox, batchFilteredBox + keepNum, batchFilteredBox + numDet, [](const BoxInfo& lhs, const BoxInfo rhs) { + return lhs.score > rhs.score || (lhs.score == rhs.score && lhs.classIndex < rhs.classIndex) || + (lhs.score == rhs.score && lhs.classIndex == rhs.classIndex && lhs.index < rhs.index); + }); + m_numPerBatch[batchIdx] = keepNum; + }); + + auto startOffset = m_numPerBatch[0]; + for (size_t i = 1; i < m_numPerBatch.size(); i++) { + auto offset_batch = i * m_realNumClasses * m_realNumBoxes; + for (size_t j = 0; j < m_numPerBatch[i]; j++) { + m_filteredBoxes[startOffset + j] = m_filteredBoxes[offset_batch + j]; + } + startOffset += m_numPerBatch[i]; + } + + if (m_sortResultAcrossBatch) { /* sort across batch */ + if (m_sortResultType == MatrixNmsSortResultType::SCORE) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.score > r.score) || (l.score == r.score && l.batchIndex < r.batchIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex < r.classIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex == r.classIndex && l.index < r.index); + }); + } else if (m_sortResultType == MatrixNmsSortResultType::CLASSID) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.classIndex < r.classIndex) || (l.classIndex == r.classIndex && l.batchIndex < r.batchIndex) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score > r.score) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score == r.score && l.index < r.index); + }); + } + } + + float* selectedOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + int* selectedIndices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr()); + int* validOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs); + + int64_t outputOffset = 0; + int64_t originalOffset = 0; + for (size_t i = 0; i < m_numBatches; i++) { + auto real_boxes = m_numPerBatch[i]; + for (size_t j = 0; j < real_boxes; j++) { + auto originalIndex = originalOffset + j; + selectedIndices[j + outputOffset] = static_cast(m_filteredBoxes[originalIndex].index); + auto selectedBase = selectedOutputs + (outputOffset + j) * 6; + selectedBase[0] = m_filteredBoxes[originalIndex].classIndex; + selectedBase[1] = m_filteredBoxes[originalIndex].score; + selectedBase[2] = m_filteredBoxes[originalIndex].box.x1; + selectedBase[3] = m_filteredBoxes[originalIndex].box.y1; + selectedBase[4] = m_filteredBoxes[originalIndex].box.x2; + selectedBase[5] = m_filteredBoxes[originalIndex].box.y2; + } + std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1); + std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1); + outputOffset += m_maxBoxesPerBatch; + originalOffset += real_boxes; + } +} + +void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h new file mode 100644 index 00000000000000..5d85a3669529d3 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +namespace MKLDNNPlugin { + +enum MatrixNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR }; + +class MKLDNNMatrixNmsNode : public MKLDNNNode { +public: + MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input + static const size_t NMS_BOXES = 0; + static const size_t NMS_SCORES = 1; + + // output + static const size_t NMS_SELECTED_OUTPUTS = 0; + static const size_t NMS_SELECTED_INDICES = 1; + static const size_t NMS_VALID_OUTPUTS = 2; + + size_t m_numBatches; + size_t m_numBoxes; + size_t m_numClasses; + size_t m_maxBoxesPerBatch; + + MatrixNmsSortResultType m_sortResultType; + bool m_sortResultAcrossBatch; + float m_scoreThreshold; + int m_nmsTopk; + int m_keepTopk; + int m_backgroundClass; + MatrixNmsDecayFunction m_decayFunction; + float m_gaussianSigma; + float m_postThreshold; + bool m_normalized; + + struct Rectangle { + Rectangle(float x_left, float y_left, float x_right, float y_right) : x1 {x_left}, y1 {y_left}, x2 {x_right}, y2 {y_right} {} + + Rectangle() = default; + + float x1 = 0.0f; + float y1 = 0.0f; + float x2 = 0.0f; + float y2 = 0.0f; + }; + + struct BoxInfo { + BoxInfo(const Rectangle& r, int64_t idx, float sc, int64_t batch_idx, int64_t class_idx) + : box {r}, index {idx}, batchIndex {batch_idx}, classIndex {class_idx}, score {sc} {} + + BoxInfo() = default; + + Rectangle box; + int64_t index = -1; + int64_t batchIndex = -1; + int64_t classIndex = -1; + float score = 0.0f; + }; + std::string errorPrefix; + const std::string inType = "input", outType = "output"; + std::vector m_numPerBatch; + std::vector> m_numPerBatchClass; + std::vector m_filteredBoxes; + std::vector m_classOffset; + size_t m_realNumClasses; + size_t m_realNumBoxes; + float (*m_decay_fn)(float, float, float); + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + size_t nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp new file mode 100644 index 00000000000000..1ea109f5fdb1e5 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp @@ -0,0 +1,414 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_multiclass_nms.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base.hpp" +#include "ie_parallel.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported"; + return false; + } + const auto& atrri = nms->get_attrs(); + const auto& sortType = atrri.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + errorPrefix = "MultiClassNms layer with name '" + getName() + "' "; + const auto nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + auto& atrri = nms->get_attrs(); + sort_result_across_batch = atrri.sort_result_across_batch; + max_output_boxes_per_class = atrri.nms_top_k; + iou_threshold = atrri.iou_threshold; + score_threshold = atrri.score_threshold; + background_class = atrri.background_class; + keep_top_k = atrri.keep_top_k; + if (atrri.sort_result_type == ngNmsSortResultType::CLASSID) + sort_result_type = MulticlassNmsSortResultType::CLASSID; + else if (atrri.sort_result_type == ngNmsSortResultType::SCORE) + sort_result_type = MulticlassNmsSortResultType::SCORE; + else if (atrri.sort_result_type == ngNmsSortResultType::NONE) + sort_result_type = MulticlassNmsSortResultType::NONE; + nms_eta = atrri.nms_eta; + normalized = atrri.normalized; + + const SizeVector& boxes_dims = inDims[NMS_BOXES].ToSizeVector(); + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + + const SizeVector& scores_dims = inDims[NMS_SCORES].ToSizeVector(); + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (boxes_dims[0] != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (boxes_dims[1] != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + + const SizeVector& valid_outputs_dims = outDims[NMS_SELECTEDNUM].ToSizeVector(); + if (valid_outputs_dims.size() != 1) + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size(); + if (valid_outputs_dims[0] != boxes_dims[0]) // valid_outputs_dims[0] != num_batches + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0]; +} + +void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + const SizeVector& boxes_dims = inDims[NMS_BOXES].ToSizeVector(); + num_batches = boxes_dims[0]; + num_boxes = boxes_dims[1]; + const SizeVector& scores_dims = inDims[NMS_SCORES].ToSizeVector(); + num_classes = scores_dims[1]; + numFiltBox.resize(num_batches, std::vector(num_classes)); // batches + numBoxOffset.resize(num_batches); + + if (max_output_boxes_per_class) { + max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class; + filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes); + } + + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType); + + addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, + {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{TensorDescCreatorTypes::ncsp, Precision::FP32}, + {TensorDescCreatorTypes::ncsp, Precision::I32}, + {TensorDescCreatorTypes::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getDesc().getDims(); + + if (max_output_boxes_per_class == 0) + return; + + int* selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); + + float* selected_outputs = selected_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + + int* selected_num = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr()); + + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getDesc().getBlockingDesc().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getDesc().getBlockingDesc().getStrides(); + + if ((nms_eta >= 0) && (nms_eta < 1)) { + nmsWithEta(boxes, scores, boxesStrides, scoresStrides); + } else { + nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides); + } + + size_t startOffset = numFiltBox[0][0]; + numBoxOffset[0] = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + size_t batchOffsetNew = 0; + size_t batchOffset = b * num_classes * max_output_boxes_per_class; + for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) { + size_t offset = batchOffset + c * max_output_boxes_per_class; + for (size_t i = 0; i < numFiltBox[b][c]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numFiltBox[b][c]; + batchOffsetNew += numFiltBox[b][c]; + } + numBoxOffset[b] = batchOffsetNew; + if (b == 0) + numBoxOffset[b] += numFiltBox[0][0]; + } + // sort element before go through keep_top_k + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) || + ((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + + if (keep_top_k > -1) { + startOffset = 0; + size_t offset = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + if (numBoxOffset[b] > keep_top_k) { + if (startOffset == offset) { + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < keep_top_k; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } + } else { + if (startOffset == offset) { + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < numBoxOffset[b]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } + } + } + } + + if (sort_result_across_batch) { + if (sort_result_type == SCORE) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index); + }); + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index); + }); + } + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && + ((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) || + ((std::fabs(l.score - r.score) <= 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + } + + const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getDesc().getDims()[0]; + const size_t validOutputs = std::min(startOffset, selectedBoxesNum); + + std::vector m_selected_num; + m_selected_num.resize(dims_boxes[0]); + + const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0]; + + for (size_t idx = 0lu; idx < validOutputs; idx++) { + m_selected_num[filtBoxes[idx].batch_index]++; + } + + int64_t output_offset = 0; + int64_t original_offset = 0; + for (size_t i = 0; i < dims_boxes[0]; i++) { + auto real_boxes = m_selected_num[i]; + selected_num[i] = static_cast(real_boxes); + + for (size_t j = 0; j < real_boxes; j++) { + auto original_index = original_offset + j; + selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index; + auto selected_base = selected_outputs + (output_offset + j) * 6; + selected_base[0] = filtBoxes[original_index].class_index; + selected_base[1] = filtBoxes[original_index].score; + selected_base[2] = boxes[selected_indices[j + output_offset] * 4]; + selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1]; + selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2]; + selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3]; + } + std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1); + std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1); + output_offset += selectedBoxesNum_perBatch; + original_offset += real_boxes; + } +} + +bool MKLDNNMultiClassNmsNode::created() const { + return getType() == MulticlassNms; +} + +float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) { + float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; + const float norm = static_cast(normalized == false); + + // to align with reference + yminI = boxesI[0]; + xminI = boxesI[1]; + ymaxI = boxesI[2]; + xmaxI = boxesI[3]; + yminJ = boxesJ[0]; + xminJ = boxesJ[1]; + ymaxJ = boxesJ[2]; + xmaxJ = boxesJ[3]; + + float areaI = (ymaxI - yminI + norm) * (xmaxI - xminI + norm); + float areaJ = (ymaxJ - yminJ + norm) * (xmaxJ - xminJ + norm); + if (areaI <= 0.f || areaJ <= 0.f) + return 0.f; + + float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ) + norm, 0.f) * + (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ) + norm, 0.f); + return intersection_area / (areaI + areaJ - intersection_area); +} + +void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + auto less = [](const boxInfo& l, const boxInfo& r) { + return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); + }; + + auto func = [](float iou, float adaptive_threshold) { + return iou <= adaptive_threshold ? 1.0f : 0.0f; + }; + + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + std::vector fb; + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::priority_queue, decltype(less)> sorted_boxes(less); + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); + } + fb.reserve(sorted_boxes.size()); + if (sorted_boxes.size() > 0) { + auto adaptive_threshold = iou_threshold; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + while (max_out_box && !sorted_boxes.empty()) { + boxInfo currBox = sorted_boxes.top(); + float origScore = currBox.score; + sorted_boxes.pop(); + max_out_box--; + + bool box_is_selected = true; + for (int idx = static_cast(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) { + float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized); + currBox.score *= func(iou, adaptive_threshold); + if (iou >= adaptive_threshold) { + box_is_selected = false; + break; + } + if (currBox.score <= score_threshold) + break; + } + + currBox.suppress_begin_index = fb.size(); + if (box_is_selected) { + if (nms_eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= nms_eta; + } + if (currBox.score == origScore) { + fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx}); + continue; + } + if (currBox.score > score_threshold) { + sorted_boxes.push(currBox); + } + } + } + } + numFiltBox[batch_idx][class_idx] = fb.size(); + size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + for (size_t i = 0; i < fb.size(); i++) { + filtBoxes[offset + i] = fb[i]; + } + } + }); +} + +void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::vector> sorted_boxes; + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx)); + } + + int io_selection_size = 0; + if (sorted_boxes.size() > 0) { + parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair& l, const std::pair& r) { + return (l.first > r.first || ((l.first == r.first) && (l.second < r.second))); + }); + int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second); + io_selection_size++; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) { + bool box_is_selected = true; + for (int idx = io_selection_size - 1; idx >= 0; idx--) { + float iou = + intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized); + if (iou >= iou_threshold) { + box_is_selected = false; + break; + } + } + + if (box_is_selected) { + filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second); + io_selection_size++; + } + } + } + numFiltBox[batch_idx][class_idx] = io_selection_size; + } + }); +} + +void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms) \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp new file mode 100644 index 00000000000000..0627f72cea0df8 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp @@ -0,0 +1,93 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +namespace MKLDNNPlugin { + +enum MulticlassNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +class MKLDNNMultiClassNmsNode : public MKLDNNNode { +public: + MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input (port Num) + const size_t NMS_BOXES = 0; + const size_t NMS_SCORES = 1; + + // output (port Num) + const size_t NMS_SELECTEDOUTPUTS = 0; + const size_t NMS_SELECTEDINDICES = 1; + const size_t NMS_SELECTEDNUM = 2; + + bool sort_result_across_batch = false; + MulticlassNmsSortResultType sort_result_type = NONE; + + size_t num_batches; + size_t num_boxes; + size_t num_classes; + + int max_output_boxes_per_class = 0; + float iou_threshold = 0.0f; + float score_threshold = 0.0f; + + int32_t background_class = 0; + int32_t keep_top_k = 0; + float nms_eta = 0.0f; + bool normalized = true; + + std::string errorPrefix; + + std::vector> numFiltBox; + std::vector numBoxOffset; + const std::string inType = "input", outType = "output"; + + struct filteredBoxes { + float score; + int batch_index; + int class_index; + int box_index; + filteredBoxes() = default; + filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) + : score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {} + }; + + struct boxInfo { + float score; + int idx; + int suppress_begin_index; + }; + + std::vector filtBoxes; + + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + float intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized); + + void nmsWithEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, const InferenceEngine::SizeVector& scoresStrides); + + void nmsWithoutEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, + const InferenceEngine::SizeVector& scoresStrides); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp new file mode 100644 index 00000000000000..3bed4a37e6adb7 --- /dev/null +++ b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp @@ -0,0 +1,114 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template +class NmsStaticShapeIE : public BaseNmsOp { +public: + NGRAPH_RTTI_DECLARATION; + + using Attributes = typename BaseNmsOp::Attributes; + + /// \brief Constructs a NmsStaticShapeIE operation + /// + /// \param boxes Node producing the box coordinates + /// \param scores Node producing the box scores + /// \param attrs Attributes of the operation + NmsStaticShapeIE(const Output& boxes, + const Output& scores, + const Attributes& attrs) : BaseNmsOp(boxes, scores, attrs) { + this->constructor_validate_and_infer_types(); + } + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + return std::make_shared(new_args.at(0), new_args.at(1), this->m_attrs); + } +}; + +template +void NmsStaticShapeIE::validate_and_infer_types() { + const auto boxes_ps = this->get_input_partial_shape(0); + const auto scores_ps = this->get_input_partial_shape(1); + + auto first_dim_shape = Dimension::dynamic(); + + if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) { + const auto num_boxes_boxes = boxes_ps[1]; + if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) { + const auto num_boxes = num_boxes_boxes.get_length(); + auto num_classes = scores_ps[1].get_length(); + if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) { + num_classes = num_classes - 1; + } + int64_t max_output_boxes_per_class = 0; + if (this->m_attrs.nms_top_k >= 0) + max_output_boxes_per_class = std::min(num_boxes, static_cast(this->m_attrs.nms_top_k)); + else + max_output_boxes_per_class = num_boxes; + + auto max_output_boxes_per_batch = max_output_boxes_per_class * num_classes; + if (this->m_keep_top_k >= 0) + max_output_boxes_per_batch = + std::min(max_output_boxes_per_batch, static_cast(this->m_attrs.keep_top_k)); + + first_dim_shape = max_output_boxes_per_batch * scores_ps[0].get_length(); + } + } + + // 'selected_outputs' have the following format: + // [number of selected boxes, [class_id, box_score, xmin, ymin, xmax, ymax]] + this->set_output_type(0, element::f32, {first_dim_shape, 6}); + // 'selected_indices' have the following format: + // [number of selected boxes, 1] + this->set_output_type(1, this->m_attrs.output_type, {first_dim_shape, 1}); + // 'selected_num' have the following format: + // [num_batches, ] + if (boxes_ps.rank().is_static() && boxes_ps.rank().get_length() > 0) { + this->set_output_type(2, this->m_attrs.output_type, {boxes_ps[0]}); + } else { + this->set_output_type(2, this->m_attrs.output_type, {Dimension::dynamic()}); + } +} + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info() const { return get_type_info_static(); } + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_static() { + auto BaseNmsOpTypeInfoPtr = &BaseNmsOp::get_type_info_static(); + + // TODO: it should be static const std::string name = std::string("NmsStaticShapeIE_") + BaseNmsOpTypeInfoPtr->name; + // but currently it will not pass conversion ot Legacy Opset correctly + static const std::string name = BaseNmsOpTypeInfoPtr->name; + + static const ::ngraph::Node::type_info_t type_info_static{ + name.c_str(), BaseNmsOpTypeInfoPtr->version, BaseNmsOpTypeInfoPtr}; + return type_info_static; +} + +template +const ::ngraph::Node::type_info_t NmsStaticShapeIE::type_info = NmsStaticShapeIE::get_type_info_static(); + +#ifdef __clang__ +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +#endif // __clang__ + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp new file mode 100644 index 00000000000000..080a08683222d9 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatrixNmsToMatrixNmsIE(); +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp new file mode 100644 index 00000000000000..b639364b24e978 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMulticlassNmsToMulticlassNmsIE(); +}; diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp new file mode 100644 index 00000000000000..8f173eafcae271 --- /dev/null +++ b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "ngraph/ops.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp new file mode 100644 index 00000000000000..34163fc48601d7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0); + +ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() { + MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp new file mode 100644 index 00000000000000..1f236610e53ed7 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0); + +ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() { + MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp new file mode 100644 index 00000000000000..750b483bd29414 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/matrix_nms.hpp" + +using namespace ngraph; +using namespace LayerTestsDefinitions; + +namespace { + TEST_P(MatrixNmsLayerTest, Serialize) { + Serialize(); + } + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 + }; + + const std::vector inShapeParams = { + InputShapeParams{3, 100, 5}, + InputShapeParams{1, 10, 50}, + InputShapeParams{2, 50, 50} + }; + + const std::vector sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID, + op::v8::MatrixNms::SortResultType::SCORE, + op::v8::MatrixNms::SortResultType::NONE}; + const std::vector outType = {element::i32, element::i64}; + const std::vector topKParams = { + TopKParams{-1, 5}, + TopKParams{100, -1} + }; + const std::vector thresholdParams = { + ThresholdParams{0.0f, 2.0f, 0.0f}, + ThresholdParams{0.1f, 1.5f, 0.2f} + }; + const std::vector nmsTopK = {-1, 100}; + const std::vector keepTopK = {-1, 5}; + const std::vector backgroudClass = {-1, 0}; + const std::vector normalized = {true, false}; + const std::vector decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN, + op::v8::MatrixNms::DecayFunction::LINEAR}; + const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(InferenceEngine::Precision::FP32)), + ::testing::ValuesIn(sortResultType), + ::testing::ValuesIn(outType), + ::testing::ValuesIn(topKParams), + ::testing::ValuesIn(thresholdParams), + ::testing::ValuesIn(backgroudClass), + ::testing::ValuesIn(normalized), + ::testing::ValuesIn(decayFunction), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + + INSTANTIATE_TEST_CASE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp new file mode 100644 index 00000000000000..203b20c4ab4cf9 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/multiclass_nms.hpp" + +using namespace ngraph; +using namespace LayerTestsDefinitions; + +namespace { +TEST_P(MulticlassNmsLayerTest, Serialize) { + Serialize(); +} + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16}; + +const std::vector inShapeParams = { + InputShapeParams{3, 100, 5}, InputShapeParams{1, 10, 50}, + InputShapeParams{2, 50, 50}}; + +const std::vector nmsTopK = {-1, 20}; +const std::vector iouThreshold = {0.7f}; +const std::vector scoreThreshold = {0.7f}; +const std::vector backgroundClass = {-1, 0}; +const std::vector keepTopK = {-1, 30}; +const std::vector outType = {element::i32, element::i64}; + +const std::vector sortResultType = { + op::v8::MulticlassNms::SortResultType::SCORE, + op::v8::MulticlassNms::SortResultType::CLASSID, + op::v8::MulticlassNms::SortResultType::NONE}; +const std::vector sortResDesc = {true, false}; +const std::vector nmsEta = {0.6f, 1.0f}; +const std::vector normalized = {true, false}; + +const auto nmsParams = ::testing::Combine( + ::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(InferenceEngine::Precision::FP32)), + ::testing::ValuesIn(nmsTopK), + ::testing::Combine(::testing::ValuesIn(iouThreshold), + ::testing::ValuesIn(scoreThreshold), + ::testing::ValuesIn(nmsEta)), + ::testing::ValuesIn(backgroundClass), + ::testing::ValuesIn(keepTopK), + ::testing::ValuesIn(outType), + ::testing::ValuesIn(sortResultType), + ::testing::Combine(::testing::ValuesIn(sortResDesc), + ::testing::ValuesIn(normalized)), + ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +INSTANTIATE_TEST_CASE_P(smoke_MulticlassNmsLayerTest, + MulticlassNmsLayerTest, + nmsParams, + MulticlassNmsLayerTest::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp b/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp index 75fb7d791899ed..aff04cee6e5eab 100644 --- a/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp +++ b/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp @@ -15,6 +15,6 @@ std::vector disabledTestPatterns() { // TODO: task 32568, enable after supporting constants outputs in plugins ".*TransformationTests\\.ConstFoldingPriorBox.*", // azure is failing after #6199 - ".*NmsLayerTest.*", + ".*/NmsLayerTest.*", }; } diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp new file mode 100644 index 00000000000000..afd4cd26a5b348 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, ConvertMatrixNmsToMatrixNmsIE) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + + auto nms = std::make_shared(boxes, scores, opset8::MatrixNms::Attributes()); + + f = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; + } + + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + auto nms = std::make_shared>(boxes, scores, opset8::MatrixNms::Attributes()); + + f_ref = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static"; + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp new file mode 100644 index 00000000000000..1f0f6f856f76ba --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, ConvertMulticlassNmsToMulticlassNmsIE) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + + auto nms = std::make_shared(boxes, scores, opset8::MulticlassNms::Attributes()); + + f = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; + } + + { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + auto nms = std::make_shared>(boxes, scores, opset8::MulticlassNms::Attributes()); + + f_ref = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static"; + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp new file mode 100644 index 00000000000000..25766a89fc8fbc --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp @@ -0,0 +1,54 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "single_layer_tests/matrix_nms.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine; +using namespace ngraph; + +const std::vector inShapeParams = { + InputShapeParams{3, 100, 5}, + InputShapeParams{1, 10, 50}, + InputShapeParams{2, 50, 50} +}; + +const std::vector sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID, + op::v8::MatrixNms::SortResultType::SCORE, + op::v8::MatrixNms::SortResultType::NONE}; +const std::vector outType = {element::i32, element::i64}; +const std::vector topKParams = { + TopKParams{-1, 5}, + TopKParams{100, -1} +}; +const std::vector thresholdParams = { + ThresholdParams{0.0f, 2.0f, 0.0f}, + ThresholdParams{0.1f, 1.5f, 0.2f} +}; +const std::vector nmsTopK = {-1, 100}; +const std::vector keepTopK = {-1, 5}; +const std::vector backgroudClass = {-1, 0}; +const std::vector normalized = {true, false}; +const std::vector decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN, + op::v8::MatrixNms::DecayFunction::LINEAR}; + +const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::Values(Precision::FP32), + ::testing::Values(Precision::I32), + ::testing::Values(Precision::FP32)), + ::testing::ValuesIn(sortResultType), + ::testing::ValuesIn(outType), + ::testing::ValuesIn(topKParams), + ::testing::ValuesIn(thresholdParams), + ::testing::ValuesIn(backgroudClass), + ::testing::ValuesIn(normalized), + ::testing::ValuesIn(decayFunction), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp new file mode 100644 index 00000000000000..6622a24ce3d2a9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/multiclass_nms.hpp" + +#include + +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace InferenceEngine; +using namespace ngraph; + +const std::vector inShapeParams = {InputShapeParams {3, 100, 5}, InputShapeParams {1, 10, 50}, InputShapeParams {2, 50, 50}}; + +const std::vector nmsTopK = {-1, 20}; +const std::vector iouThreshold = {0.7f}; +const std::vector scoreThreshold = {0.7f}; +const std::vector backgroundClass = {-1, 0}; +const std::vector keepTopK = {-1, 30}; +const std::vector outType = {element::i32, element::i64}; + +const std::vector sortResultType = { + op::v8::MulticlassNms::SortResultType::SCORE, op::v8::MulticlassNms::SortResultType::CLASSID, op::v8::MulticlassNms::SortResultType::NONE}; +const std::vector sortResDesc = {true, false}; +const std::vector nmsEta = {0.6f, 1.0f}; +const std::vector normalized = {true, false}; + +const auto nmsParams = ::testing::Combine( + ::testing::ValuesIn(inShapeParams), + ::testing::Combine(::testing::Values(Precision::FP32), ::testing::Values(Precision::I32), ::testing::Values(Precision::FP32)), ::testing::ValuesIn(nmsTopK), + ::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)), + ::testing::ValuesIn(backgroundClass), ::testing::ValuesIn(keepTopK), ::testing::ValuesIn(outType), ::testing::ValuesIn(sortResultType), + ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)), ::testing::Values(CommonTestUtils::DEVICE_CPU)); + +INSTANTIATE_TEST_CASE_P(smoke_MulticlassNmsLayerTest, MulticlassNmsLayerTest, nmsParams, MulticlassNmsLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 14d25be9a17f6a..8019fc072a8fdb 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -77,7 +77,7 @@ std::vector disabledTestPatterns() { // need to implement Export / Import R"(.*IEClassImportExportTestP.*)", // azure is failing after #6199 - R"(.*NmsLayerTest.*)" + R"(.*/NmsLayerTest.*)" }; #ifdef __APPLE__ // TODO: Issue 55717 diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp new file mode 100644 index 00000000000000..21e89bf0474455 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/matrix_nms.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(MatrixNmsLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp new file mode 100644 index 00000000000000..e89ba2d126c3cb --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/multiclass_nms.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(MulticlassNmsLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp new file mode 100644 index 00000000000000..9be3b082c3b808 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +using InputShapeParams = std::tuple; // Number of classes + +using InputPrecisions = std::tuple; // iou_threshold, score_threshold, soft_nms_sigma precisions + +using TopKParams = std::tuple; // Maximum number of boxes to be selected per batch element + +using ThresholdParams = std::tuple; // filter out boxes with low confidence score after decaying + +using NmsParams = std::tuple; // Device name + +class MatrixNmsLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + void GenerateInputs() override; + void Compare(const std::vector>> &expectedOutputs, + const std::vector &actualOutputs) + override; + +protected: + void SetUp() override; + +private: + size_t numBatches, numBoxes, numClasses; + size_t maxOutputBoxesPerClass; + size_t maxOutputBoxesPerBatch; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp new file mode 100644 index 00000000000000..4add46d8ce13f2 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "ngraph_functions/builders.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" + +namespace LayerTestsDefinitions { + +using InputShapeParams = std::tuple; // Number of classes + +using InputPrecisions = std::tuple; // iou_threshold, score_threshold, + // soft_nms_sigma precisions + +using InputfloatVar = std::tuple; // nmsEta + +using InputboolVar = std::tuple; // normalized + +using MulticlassNmsParams = std::tuple; + +class MulticlassNmsLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + void GenerateInputs() override; + void Compare(const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) override; + +protected: + void SetUp() override; + +private: + size_t numBatches, numBoxes, numClasses; + size_t maxOutputBoxesPerClass; + size_t maxOutputBoxesPerBatch; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp new file mode 100644 index 00000000000000..2b33a25ae1e764 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp @@ -0,0 +1,250 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/matrix_nms.hpp" + +namespace LayerTestsDefinitions { + +using namespace ngraph; +using namespace InferenceEngine; +using namespace FuncTestUtils::PrecisionUtils; + +std::string MatrixNmsLayerTest::getTestCaseName(testing::TestParamInfo obj) { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + op::v8::MatrixNms::SortResultType sortResultType; + element::Type outType; + int backgroudClass; + op::v8::MatrixNms::DecayFunction decayFunction; + TopKParams topKParams; + ThresholdParams thresholdParams; + bool normalized; + std::string targetDevice; + std::tie(inShapeParams, inPrecisions, sortResultType, outType, topKParams, thresholdParams, + backgroudClass, normalized, decayFunction, targetDevice) = obj.param; + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision paramsPrec, maxBoxPrec, thrPrec; + std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions; + + int nmsTopK, keepTopK; + std::tie(nmsTopK, keepTopK) = topKParams; + + float score_threshold, gaussian_sigma, post_threshold; + std::tie(score_threshold, gaussian_sigma, post_threshold) = thresholdParams; + + std::ostringstream result; + result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_"; + result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_"; + result << "sortResultType=" << sortResultType << "_normalized=" << normalized << "_"; + result << "outType=" << outType << "_nmsTopK=" << nmsTopK << "_keepTopK=" << keepTopK << "_"; + result << "backgroudClass=" << backgroudClass << "_decayFunction=" << decayFunction << "_"; + result << "score_threshold=" << score_threshold << "_gaussian_sigma=" << gaussian_sigma << "_"; + result << "post_threshold=" << post_threshold << "_TargetDevice=" << targetDevice; + return result.str(); +} + +void MatrixNmsLayerTest::GenerateInputs() { + size_t it = 0; + for (const auto &input : cnnNetwork.getInputsInfo()) { + const auto &info = input.second; + Blob::Ptr blob; + + if (it == 1) { + blob = make_blob_with_precision(info->getTensorDesc()); + blob->allocate(); + CommonTestUtils::fill_data_random_float(blob, 1, 0, 100000); + } else { + blob = GenerateInput(*info); + } + inputs.push_back(blob); + it++; + } +} + +void MatrixNmsLayerTest::Compare(const std::vector>> &expectedOutputs, + const std::vector &actualOutputs) { + auto batchIndex = -1; + std::vector numPerBatch(numBatches); + for (int outputIndex = static_cast(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) { + const auto& actual = actualOutputs[outputIndex]; + const auto _dims = actual->getTensorDesc().getDims(); + if (_dims.size() == 1 && _dims[0] == numBatches) { + batchIndex = outputIndex; + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + auto buffer = reinterpret_cast(actualBuffer); + std::copy_n(buffer, numBatches, numPerBatch.begin()); + } + } + + for (int outputIndex = static_cast(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) { + const auto& expected = expectedOutputs[outputIndex]; + const auto& actual = actualOutputs[outputIndex]; + + //Compare Selected Outputs & Selected Indices + if (outputIndex != batchIndex) { + const auto &expectedBuffer = expected.second.data(); + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + + auto k = static_cast(expected.first.size()) / actual->getTensorDesc().getPrecision().size(); + // W/A for int4, uint4 + if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) { + k /= 2; + } + if (outputIndex == 2) { + if (expected.second.size() != k * actual->byteSize()) + throw std::runtime_error("Expected and actual size 3rd output have different size"); + } + + const auto &precision = actual->getTensorDesc().getPrecision(); + auto expected_offset = 0; + auto actual_offset = 0; + for (size_t i = 0; i < numPerBatch.size(); i++) { + auto validNums = numPerBatch[i]; + switch (precision) { + case InferenceEngine::Precision::FP32: { + switch (expected.first) { + case ngraph::element::Type_t::f32: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer) + expected_offset * 6, + reinterpret_cast(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f); + break; + case ngraph::element::Type_t::f64: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer) + expected_offset * 6, + reinterpret_cast(actualBuffer) + actual_offset * 6, validNums *6, 1e-5f); + break; + default: + break; + } + + const auto fBuffer = lockedMemory.as(); + for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) { + ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5) + << "Invalid default value: " << fBuffer[i] << " at index: " << i; + } + break; + } + case InferenceEngine::Precision::I32: { + switch (expected.first) { + case ngraph::element::Type_t::i32: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer) + expected_offset, + reinterpret_cast(actualBuffer) + actual_offset, validNums, 0); + break; + case ngraph::element::Type_t::i64: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer) + expected_offset, + reinterpret_cast(actualBuffer) + actual_offset, validNums, 0); + break; + default: + break; + } + const auto iBuffer = lockedMemory.as(); + for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) { + ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i; + } + break; + } + default: + FAIL() << "Comparator for " << precision << " precision isn't supported"; + } + expected_offset += validNums; + actual_offset += maxOutputBoxesPerBatch; + } + } else { + const auto &expectedBuffer = expected.second.data(); + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + + auto k = static_cast(expected.first.size()) / actual->getTensorDesc().getPrecision().size(); + // W/A for int4, uint4 + if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) { + k /= 2; + } + if (outputIndex == 2) { + if (expected.second.size() != k * actual->byteSize()) + throw std::runtime_error("Expected and actual size 3rd output have different size"); + } + + const auto &precision = actual->getTensorDesc().getPrecision(); + size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size()); + switch (precision) { + case InferenceEngine::Precision::I32: { + switch (expected.first) { + case ngraph::element::Type_t::i32: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), size, 0); + break; + case ngraph::element::Type_t::i64: + LayerTestsUtils::LayerTestsCommon::Compare( + reinterpret_cast(expectedBuffer), + reinterpret_cast(actualBuffer), size, 0); + break; + default: + break; + } + break; + } + default: + FAIL() << "Comparator for " << precision << " precision isn't supported"; + } + } + } +} + +void MatrixNmsLayerTest::SetUp() { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + op::v8::MatrixNms::Attributes attrs; + TopKParams topKParams; + ThresholdParams thresholdParams; + + std::tie(inShapeParams, inPrecisions, attrs.sort_result_type, attrs.output_type, topKParams, thresholdParams, + attrs.background_class, attrs.normalized, attrs.decay_function, targetDevice) = this->GetParam(); + + std::tie(attrs.nms_top_k, attrs.keep_top_k) = topKParams; + std::tie(attrs.score_threshold, attrs.gaussian_sigma, attrs.post_threshold) = thresholdParams; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + auto realClasses = numClasses; + if (attrs.background_class >=0 && attrs.background_class <= numClasses) { + realClasses = realClasses - 1; + } + + maxOutputBoxesPerClass = 0; + if (attrs.nms_top_k >= 0) + maxOutputBoxesPerClass = std::min(numBoxes, static_cast(attrs.nms_top_k)); + else + maxOutputBoxesPerClass = numBoxes; + + maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses; + if (attrs.keep_top_k >= 0) + maxOutputBoxesPerBatch = + std::min(maxOutputBoxesPerBatch, static_cast(attrs.keep_top_k)); + Precision paramsPrec, maxBoxPrec, thrPrec; + std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions; + + const std::vector boxesShape{numBatches, numBoxes, 4}, scoresShape{numBatches, numClasses, numBoxes}; + auto ngPrc = convertIE2nGraphPrc(paramsPrec); + auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape}); + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + auto nms = std::make_shared(paramOuts[0], paramOuts[1], attrs); + auto nms_0_identity = std::make_shared(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1})); + auto nms_1_identity = std::make_shared(nms->output(1), opset5::Constant::create(attrs.output_type, Shape{1}, {1})); + auto nms_2_identity = std::make_shared(nms->output(2), opset5::Constant::create(attrs.output_type, Shape{1}, {1})); + function = std::make_shared(OutputVector{nms_0_identity, nms_1_identity, nms_2_identity}, params, "NMS"); +} + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp new file mode 100644 index 00000000000000..e8532bad22706f --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp @@ -0,0 +1,270 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/multiclass_nms.hpp" + +namespace LayerTestsDefinitions { + +using namespace ngraph; +using namespace InferenceEngine; +using namespace FuncTestUtils::PrecisionUtils; + +std::string MulticlassNmsLayerTest::getTestCaseName(testing::TestParamInfo obj) { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + int32_t nmsTopK, backgroundClass, keepTopK; + element::Type outType; + + op::util::NmsBase::SortResultType sortResultType; + + InputfloatVar inFloatVar; + InputboolVar inboolVar; + + std::string targetDevice; + + std::tie(inShapeParams, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param; + + size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + + Precision paramsPrec, maxBoxPrec, thrPrec; + std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions; + + float iouThr, scoreThr, nmsEta; + std::tie(iouThr, scoreThr, nmsEta) = inFloatVar; + + bool sortResCB, normalized; + std::tie(sortResCB, normalized) = inboolVar; + + std::ostringstream result; + result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_"; + result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_"; + result << "nmsTopK=" << nmsTopK << "_"; + result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_backgroundClass=" << backgroundClass << "_"; + result << "keepTopK=" << keepTopK << "_outType=" << outType << "_"; + result << "sortResultType=" << sortResultType << "_sortResCrossBatch=" << sortResCB << "_nmsEta=" << nmsEta << "_normalized=" << normalized << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void MulticlassNmsLayerTest::GenerateInputs() { + size_t it = 0; + for (const auto& input : cnnNetwork.getInputsInfo()) { + const auto& info = input.second; + Blob::Ptr blob; + + if (it == 1) { + blob = make_blob_with_precision(info->getTensorDesc()); + blob->allocate(); + CommonTestUtils::fill_data_random_float(blob, 1, 0, 1000); + } else { + blob = GenerateInput(*info); + } + inputs.push_back(blob); + it++; + } +} + +void MulticlassNmsLayerTest::Compare(const std::vector>>& expectedOutputs, + const std::vector& actualOutputs) { + auto batchIndex = -1; + std::vector numPerBatch(numBatches); + for (int outputIndex = static_cast(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) { + const auto& actual = actualOutputs[outputIndex]; + const auto _dims = actual->getTensorDesc().getDims(); + if (_dims.size() == 1 && _dims[0] == numBatches) { + batchIndex = outputIndex; + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + auto buffer = reinterpret_cast(actualBuffer); + std::copy_n(buffer, numBatches, numPerBatch.begin()); + } + } + + for (int outputIndex = static_cast(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) { + const auto& expected = expectedOutputs[outputIndex]; + const auto& actual = actualOutputs[outputIndex]; + + // Compare Selected Outputs & Selected Indices + if (outputIndex != batchIndex) { + const auto& expectedBuffer = expected.second.data(); + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + + auto k = static_cast(expected.first.size()) / actual->getTensorDesc().getPrecision().size(); + // W/A for int4, uint4 + if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) { + k /= 2; + } + if (outputIndex == 2) { + if (expected.second.size() != k * actual->byteSize()) + throw std::runtime_error("Expected and actual size 3rd output have different " + "size"); + } + + const auto& precision = actual->getTensorDesc().getPrecision(); + auto expected_offset = 0; + auto actual_offset = 0; + for (size_t i = 0; i < numPerBatch.size(); i++) { + auto validNums = numPerBatch[i]; + switch (precision) { + case InferenceEngine::Precision::FP32: { + switch (expected.first) { + case ngraph::element::Type_t::f32: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer) + expected_offset * 6, + reinterpret_cast(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f); + break; + case ngraph::element::Type_t::f64: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer) + expected_offset * 6, + reinterpret_cast(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f); + break; + default: + break; + } + + const auto fBuffer = lockedMemory.as(); + for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) { + ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5) + << "Invalid default value: " << fBuffer[i] << " at index: " << i; + } + break; + } + case InferenceEngine::Precision::I32: { + switch (expected.first) { + case ngraph::element::Type_t::i32: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer) + expected_offset, + reinterpret_cast(actualBuffer) + actual_offset, validNums, 0); + break; + case ngraph::element::Type_t::i64: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer) + expected_offset, + reinterpret_cast(actualBuffer) + actual_offset, validNums, 0); + break; + default: + break; + } + const auto iBuffer = lockedMemory.as(); + for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) { + ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i; + } + break; + } + default: + FAIL() << "Comparator for " << precision << " precision isn't supported"; + } + expected_offset += validNums; + actual_offset += maxOutputBoxesPerBatch; + } + } else { + const auto& expectedBuffer = expected.second.data(); + auto memory = InferenceEngine::as(actual); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + + auto k = static_cast(expected.first.size()) / actual->getTensorDesc().getPrecision().size(); + // W/A for int4, uint4 + if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) { + k /= 2; + } + if (outputIndex == 2) { + if (expected.second.size() != k * actual->byteSize()) + throw std::runtime_error("Expected and actual size 3rd output have different " + "size"); + } + + const auto& precision = actual->getTensorDesc().getPrecision(); + size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size()); + switch (precision) { + case InferenceEngine::Precision::I32: { + switch (expected.first) { + case ngraph::element::Type_t::i32: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), reinterpret_cast(actualBuffer), + size, 0); + break; + case ngraph::element::Type_t::i64: + LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), reinterpret_cast(actualBuffer), + size, 0); + break; + default: + break; + } + break; + } + default: + FAIL() << "Comparator for " << precision << " precision isn't supported"; + } + } + } +} + +void MulticlassNmsLayerTest::SetUp() { + InputShapeParams inShapeParams; + InputPrecisions inPrecisions; + op::v8::MulticlassNms::Attributes attrs; + size_t maxOutBoxesPerClass, backgroundClass, keepTopK; + element::Type outType; + + op::util::NmsBase::SortResultType sortResultType; + + InputfloatVar inFloatVar; + InputboolVar inboolVar; + + std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = + this->GetParam(); + + // size_t numBatches, numBoxes, numClasses; + std::tie(numBatches, numBoxes, numClasses) = inShapeParams; + auto realClasses = numClasses; + if (backgroundClass >= 0 && backgroundClass <= numClasses) { + realClasses = realClasses - 1; + } + + maxOutputBoxesPerClass = 0; + if (maxOutBoxesPerClass >= 0) + maxOutputBoxesPerClass = std::min(numBoxes, static_cast(maxOutBoxesPerClass)); + else + maxOutputBoxesPerClass = numBoxes; + + maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses; + if (keepTopK >= 0) + maxOutputBoxesPerBatch = std::min(maxOutputBoxesPerBatch, static_cast(keepTopK)); + + Precision paramsPrec, maxBoxPrec, thrPrec; + std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions; + + float iouThr, scoreThr, nmsEta; + std::tie(iouThr, scoreThr, nmsEta) = inFloatVar; + + bool sortResCB, normalized; + std::tie(sortResCB, normalized) = inboolVar; + + const std::vector boxesShape {numBatches, numBoxes, 4}, scoresShape {numBatches, numClasses, numBoxes}; + auto ngPrc = convertIE2nGraphPrc(paramsPrec); + auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape}); + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(params)); + + attrs.iou_threshold = iouThr; + attrs.score_threshold = scoreThr; + attrs.nms_eta = nmsEta; + attrs.sort_result_type = sortResultType; + attrs.sort_result_across_batch = sortResCB; + attrs.output_type = outType; + attrs.nms_top_k = maxOutBoxesPerClass; + attrs.keep_top_k = keepTopK; + attrs.background_class = backgroundClass; + attrs.normalized = normalized; + + auto nms = std::make_shared(paramOuts[0], paramOuts[1], attrs); + + auto nms_0_identity = std::make_shared(nms->output(0), opset5::Constant::create(ngPrc, Shape {1}, {1})); + auto nms_1_identity = std::make_shared(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1})); + auto nms_2_identity = std::make_shared(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1})); + function = std::make_shared(OutputVector {nms_0_identity, nms_1_identity, nms_2_identity}, params, "MulticlassNMS"); +} + +} // namespace LayerTestsDefinitions diff --git a/ngraph/core/src/op/matrix_nms.cpp b/ngraph/core/src/op/matrix_nms.cpp index 7d3731f3b114de..3cac8707883edd 100644 --- a/ngraph/core/src/op/matrix_nms.cpp +++ b/ngraph/core/src/op/matrix_nms.cpp @@ -74,7 +74,8 @@ bool ngraph::op::v8::MatrixNms::visit_attributes(AttributeVisitor& visitor) namespace ngraph { template <> - EnumNames& EnumNames::get() + NGRAPH_API EnumNames& + EnumNames::get() { static auto enum_names = EnumNames( "op::v8::MatrixNms::DecayFunction", diff --git a/ngraph/core/src/op/util/nms_base.cpp b/ngraph/core/src/op/util/nms_base.cpp index 4fce4c46fc49f9..7a9b4f3d35cf10 100644 --- a/ngraph/core/src/op/util/nms_base.cpp +++ b/ngraph/core/src/op/util/nms_base.cpp @@ -163,7 +163,7 @@ void op::util::NmsBase::validate_and_infer_types() namespace ngraph { template <> - EnumNames& + NGRAPH_API EnumNames& EnumNames::get() { static auto enum_names = EnumNames(