diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/multiclass_nms.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/multiclass_nms.hpp index 10392676d4920e..cc9a1d18b6af82 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/multiclass_nms.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/multiclass_nms.hpp @@ -46,7 +46,8 @@ namespace ngraph const ngraph::element::Type output_type, const std::vector& selected_outputs, const std::vector& selected_indices, - int64_t valid_outputs); + const std::vector& valid_outputs, + const ngraph::element::Type selected_scores_type); } // namespace reference } // namespace runtime } // namespace ngraph diff --git a/ngraph/core/reference/src/runtime/reference/multiclass_nms.cpp b/ngraph/core/reference/src/runtime/reference/multiclass_nms.cpp index 324b39afb14253..f41739f765e219 100644 --- a/ngraph/core/reference/src/runtime/reference/multiclass_nms.cpp +++ b/ngraph/core/reference/src/runtime/reference/multiclass_nms.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "ngraph/runtime/reference/multiclass_nms.hpp" #include "ngraph/shape.hpp" @@ -23,20 +24,20 @@ namespace ngraph { struct Rectangle { - Rectangle(float y_left, float x_left, float y_right, float x_right) - : y1{y_left} - , x1{x_left} - , y2{y_right} + Rectangle(float x_left, float y_left, float x_right, float y_right) + : x1{x_left} + , y1{y_left} , x2{x_right} + , y2{y_right} { } Rectangle() = default; - float y1 = 0.0f; float x1 = 0.0f; - float y2 = 0.f; + float y1 = 0.0f; float x2 = 0.0f; + float y2 = 0.0f; }; static float intersectionOverUnion(const Rectangle& boxI, const Rectangle& boxJ) @@ -63,34 +64,33 @@ namespace ngraph struct SelectedIndex { - SelectedIndex(int64_t batch_idx, int64_t class_idx, int64_t box_idx) - : batch_index(batch_idx) - , class_index(class_idx) - , box_index(box_idx) + SelectedIndex(int64_t batch_idx, int64_t box_idx, int64_t num_box) + : flattened_index(batch_idx * num_box + box_idx) { } SelectedIndex() = default; - int64_t batch_index = 0; - int64_t class_index = 0; - int64_t box_index = 0; + int64_t flattened_index = 0; }; - struct SelectedScore + struct SelectedOutput { - SelectedScore(float batch_idx, float class_idx, float score) - : batch_index{batch_idx} - , class_index{class_idx} + SelectedOutput(float class_idx, float score, float x1, float y1, float x2, float y2) + : class_index{class_idx} , box_score{score} + , xmin{x1} + , ymin{y1} + , xmax{x2} + , ymax{y2} { } - SelectedScore() = default; + SelectedOutput() = default; - float batch_index = 0.0f; float class_index = 0.0f; float box_score = 0.0f; + float xmin, ymin, xmax, ymax; }; struct BoxInfo @@ -142,45 +142,252 @@ namespace ngraph const Shape& selected_indices_shape, int64_t* valid_outputs) { - BoxInfo info; - intersectionOverUnion(Rectangle{}, Rectangle{}); - *valid_outputs = 0; + auto func = [iou_threshold](float iou) { + return iou <= iou_threshold ? 1.0f : 0.0f; + }; + + // boxes shape: {num_batches, num_boxes, 4} + // scores shape: {num_batches, num_classes, num_boxes} + int64_t num_batches = static_cast(scores_data_shape[0]); + int64_t num_classes = static_cast(scores_data_shape[1]); + int64_t num_boxes = static_cast(boxes_data_shape[1]); + + SelectedIndex* selected_indices_ptr = + reinterpret_cast(selected_indices); + SelectedOutput* selected_scores_ptr = + reinterpret_cast(selected_outputs); + + size_t boxes_per_class = static_cast(nms_top_k); + + std::vector filteredBoxes; + + for (int64_t batch = 0; batch < num_batches; batch++) + { + const float* boxesPtr = boxes_data + batch * num_boxes * 4; + Rectangle* r = reinterpret_cast(const_cast(boxesPtr)); + + int64_t num_dets = 0; + + for (int64_t class_idx = 0; class_idx < num_classes; class_idx++) + { + const float* scoresPtr = + scores_data + batch * (num_classes * num_boxes) + class_idx * num_boxes; + + std::vector candidate_boxes; + candidate_boxes.reserve(num_boxes); + + for (int64_t box_idx = 0; box_idx < num_boxes; box_idx++) + { + if (scoresPtr[box_idx] > score_threshold) + { + candidate_boxes.emplace_back( + r[box_idx], box_idx, scoresPtr[box_idx], 0, batch, class_idx); + } + } + + std::priority_queue sorted_boxes(std::less(), + std::move(candidate_boxes)); + + std::vector selected; + // Get the next box with top score, filter by iou_threshold + + BoxInfo next_candidate; + float original_score; + + while (!sorted_boxes.empty() && selected.size() < boxes_per_class) + { + next_candidate = sorted_boxes.top(); + original_score = next_candidate.score; + sorted_boxes.pop(); + + bool should_hard_suppress = false; + for (int64_t j = static_cast(selected.size()) - 1; + j >= next_candidate.suppress_begin_index; + --j) + { + float iou = + intersectionOverUnion(next_candidate.box, selected[j].box); + next_candidate.score *= func(iou); + + if (iou >= iou_threshold) + { + should_hard_suppress = true; + break; + } + + if (next_candidate.score <= score_threshold) + { + break; + } + } + + next_candidate.suppress_begin_index = selected.size(); + + if (!should_hard_suppress) + { + if (next_candidate.score == original_score) + { + selected.push_back(next_candidate); + continue; + } + if (next_candidate.score > score_threshold) + { + sorted_boxes.push(next_candidate); + } + } + } + + for (const auto& box_info : selected) + { + filteredBoxes.push_back(box_info); + } + num_dets += filteredBoxes.size(); + } + + *valid_outputs++ = num_dets; + } + + bool sort_result_across_batch = false; // TODO + + if (sort_result_across_batch) + { + std::sort(filteredBoxes.begin(), + filteredBoxes.end(), + [](const BoxInfo& l, const BoxInfo& r) { + return (l.score > r.score) || + (l.score == r.score && l.batch_index < r.batch_index) || + (l.score == r.score && l.batch_index == r.batch_index && + l.class_index < r.class_index) || + (l.score == r.score && l.batch_index == r.batch_index && + l.class_index == r.class_index && l.index < r.index); + }); + } + + size_t max_num_of_selected_indices = selected_indices_shape[0]; + size_t output_size = std::min(filteredBoxes.size(), max_num_of_selected_indices); + + size_t idx; + for (idx = 0; idx < output_size; idx++) + { + const auto& box_info = filteredBoxes[idx]; + SelectedIndex selected_index{ + box_info.batch_index, box_info.index, num_boxes}; + SelectedOutput selected_score{static_cast(box_info.class_index), + box_info.score, + box_info.box.x1, box_info.box.y1, + box_info.box.x2, box_info.box.y2}; + + selected_indices_ptr[idx] = selected_index; + selected_scores_ptr[idx] = selected_score; + } + + SelectedIndex selected_index_filler{0, 0, 0}; + SelectedOutput selected_score_filler{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + for (; idx < max_num_of_selected_indices; idx++) + { + selected_indices_ptr[idx] = selected_index_filler; + selected_scores_ptr[idx] = selected_score_filler; + } } void multiclass_nms_postprocessing(const HostTensorVector& outputs, const ngraph::element::Type output_type, const std::vector& selected_outputs, const std::vector& selected_indices, - int64_t valid_outputs) - { - outputs[0]->set_shape(Shape{static_cast(valid_outputs), 6}); - float* ptr = outputs[0]->get_data_ptr(); - memcpy(ptr, selected_outputs.data(), valid_outputs * sizeof(float) * 6); + const std::vector& valid_outputs, + const ngraph::element::Type selected_scores_type) + { + auto num_selected = std::accumulate(valid_outputs.begin(), valid_outputs.end(), 0); + + /* shape & type */ + + outputs[0]->set_element_type(selected_scores_type); // "selected_outputs" + outputs[0]->set_shape(Shape{static_cast(num_selected), 6}); + + size_t num_of_outputs = outputs.size(); + + if (num_of_outputs >= 2) + { + outputs[1]->set_element_type(output_type); // "selected_indices" + outputs[1]->set_shape(Shape{static_cast(num_selected)}); + } + + if (num_of_outputs >= 3) + { + outputs[2]->set_element_type(output_type); // "selected_num" + outputs[2]->set_shape(Shape{valid_outputs.size()}); + } + + /* data */ + size_t selected_outputs_size = num_selected * 6; + + switch (selected_scores_type) + { + case element::Type_t::bf16: + { + bfloat16* scores_ptr = outputs[0]->get_data_ptr(); + for (size_t i = 0; i < selected_outputs_size; ++i) + { + scores_ptr[i] = bfloat16(selected_outputs[i]); + } + } + break; + case element::Type_t::f16: + { + float16* scores_ptr = outputs[0]->get_data_ptr(); + for (size_t i = 0; i < selected_outputs_size; ++i) + { + scores_ptr[i] = float16(selected_outputs[i]); + } + } + break; + case element::Type_t::f32: + { + float* scores_ptr = outputs[0]->get_data_ptr(); + memcpy(scores_ptr, selected_outputs.data(), selected_outputs_size * sizeof(float)); + } + break; + default:; + } + + if (num_of_outputs < 2) + { + return; + } + + size_t selected_indices_size = num_selected * 1; - outputs[1]->set_shape(Shape{static_cast(valid_outputs), 1}); if (output_type == ngraph::element::i64) { int64_t* indices_ptr = outputs[1]->get_data_ptr(); - memcpy(indices_ptr, selected_indices.data(), valid_outputs * sizeof(int64_t)); + memcpy(indices_ptr, selected_indices.data(), selected_indices_size * sizeof(int64_t)); } else { int32_t* indices_ptr = outputs[1]->get_data_ptr(); - for (size_t i = 0; i < (size_t)valid_outputs; ++i) + for (size_t i = 0; i < selected_indices_size; ++i) { indices_ptr[i] = static_cast(selected_indices[i]); } } + if (num_of_outputs < 3) + { + return; + } + if (output_type == ngraph::element::i64) { int64_t* valid_outputs_ptr = outputs[2]->get_data_ptr(); - *valid_outputs_ptr = valid_outputs; + memcpy(valid_outputs_ptr, valid_outputs.data(), valid_outputs.size() * sizeof(int64_t)); } else { int32_t* valid_outputs_ptr = outputs[2]->get_data_ptr(); - *valid_outputs_ptr = static_cast(valid_outputs); + for (size_t i = 0; i < valid_outputs.size(); ++i) + { + valid_outputs_ptr[i] = static_cast(valid_outputs[i]); + } } } } // namespace reference diff --git a/ngraph/test/backend/multiclass_nms.in.cpp b/ngraph/test/backend/multiclass_nms.in.cpp index e6b2e105f7de6f..5162d036ac0e01 100644 --- a/ngraph/test/backend/multiclass_nms.in.cpp +++ b/ngraph/test/backend/multiclass_nms.in.cpp @@ -32,37 +32,43 @@ static string s_manifest = "${MANIFEST}"; NGRAPH_TEST(${BACKEND_NAME}, multiclass_nms_SCORE_point_box_format) { - std::vector boxes_data = {0.5, 0.5, 1.0, 1.0, 0.5, 0.6, 1.0, 1.0, - 0.5, 0.4, 1.0, 1.0, 0.5, 10.5, 1.0, 1.0, - 0.5, 10.6, 1.0, 1.0, 0.5, 100.5, 1.0, 1.0}; + std::vector boxes_data = {0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, + 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, + 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}; std::vector scores_data = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3}; - const int64_t max_output_boxes_per_class_data = 3; - const float iou_threshold_data = 0.5f; - const float score_threshold_data = 0.0f; + const int64_t nms_top_k = 3; + const float iou_threshold = 0.5f; + const float score_threshold = 0.0f; const auto sort_result_type = op::v8::MulticlassNms::SortResultType::SCORE; + const auto keep_top_k = -1; + const auto background_class = -1; + const auto nms_eta = 1.0f; + const auto boxes_shape = Shape{1, 6, 4}; const auto scores_shape = Shape{1, 1, 6}; const auto boxes = make_shared(element::f32, boxes_shape); const auto scores = make_shared(element::f32, scores_shape); - auto max_output_boxes_per_class = - op::Constant::create(element::i64, Shape{}, {max_output_boxes_per_class_data}); - auto iou_threshold = op::Constant::create(element::f32, Shape{}, {iou_threshold_data}); - auto score_threshold = - op::Constant::create(element::f32, Shape{}, {score_threshold_data}); - auto soft_nms_sigma = op::Constant::create(element::f32, Shape{}, {0.0f}); + auto nms = make_shared(boxes, scores, - sort_result_type); + sort_result_type, + element::i64, + iou_threshold, + score_threshold, + nms_top_k, + keep_top_k, + background_class, + nms_eta); auto f = make_shared(nms, ParameterVector{boxes, scores}); auto backend = runtime::Backend::create("${BACKEND_NAME}"); - auto selected_indeces = backend->create_tensor(element::i64, Shape{3, 3}); - auto selected_scores = backend->create_tensor(element::f32, Shape{3, 3}); + auto selected_outputs = backend->create_tensor(element::f32, Shape{3, 6}); // TODO: dynamic shape + auto selected_indeces = backend->create_tensor(element::i64, Shape{3}); // TODO auto valid_outputs = backend->create_tensor(element::i64, Shape{1}); auto backend_boxes = backend->create_tensor(element::f32, boxes_shape); @@ -72,15 +78,17 @@ NGRAPH_TEST(${BACKEND_NAME}, multiclass_nms_SCORE_point_box_format) auto handle = backend->compile(f); - handle->call({selected_indeces, selected_scores, valid_outputs}, + handle->call({selected_outputs, selected_indeces, valid_outputs}, {backend_boxes, backend_scores}); + auto selected_scores_value = read_vector(selected_outputs); auto selected_indeces_value = read_vector(selected_indeces); - auto selected_scores_value = read_vector(selected_scores); auto valid_outputs_value = read_vector(valid_outputs); - std::vector expected_selected_indices = {0, 0, 3, 0, 0, 0, 0, 0, 5}; - std::vector expected_selected_scores = {0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 0.0, 0.0, 0.3}; + std::vector expected_selected_indices = {3, 0, 5}; + std::vector expected_selected_scores = {0.0, 0.95, 0.0, 10.0, 1.0, 11.0, + 0.0, 0.9, 0.0, 0.0, 1.0, 1.0, + 0.0, 0.3, 0.0, 100.0, 1.0, 101.0}; std::vector expected_valid_outputs = {3}; EXPECT_EQ(expected_selected_indices, selected_indeces_value); @@ -676,4 +684,4 @@ NGRAPH_TEST(${BACKEND_NAME}, multiclass_nms_by_IOU_and_scores_without_constants) EXPECT_EQ(expected_selected_scores, selected_scores_value); EXPECT_EQ(expected_valid_outputs, valid_outputs_value); } -*/ \ No newline at end of file +*/ diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp index f0e55154042503..38583072198ce1 100644 --- a/ngraph/test/runtime/interpreter/evaluates_map.cpp +++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp @@ -905,8 +905,7 @@ namespace &valid_outputs, info.sort_result_descending); - auto selected_scores_type = - (inputs.size() < 4) ? element::f32 : inputs[3]->get_element_type(); + auto selected_scores_type = element::f32; // FIXME runtime::reference::nms5_postprocessing(outputs, info.output_type, @@ -962,7 +961,7 @@ namespace max_output_boxes_per_batch = std::min(max_output_boxes_per_batch, (int64_t)keep_top_k); - result[0] = Dimension(0, max_output_boxes_per_batch * scores_ps[0].get_length()); + result[0] = max_output_boxes_per_batch * scores_ps[0].get_length(); } } @@ -1015,7 +1014,7 @@ namespace std::vector selected_outputs(info.selected_outputs_shape_size); std::vector selected_indices(info.selected_indices_shape_size); - int64_t valid_outputs = 0; + std::vector valid_outputs(inputs[0]->get_shape()[0]); runtime::reference::multiclass_nms(info.boxes_data.data(), info.boxes_shape, @@ -1032,13 +1031,17 @@ namespace info.selected_outputs_shape, selected_indices.data(), info.selected_indices_shape, - &valid_outputs); + valid_outputs.data()); + + auto selected_scores_type = element::f32; // FIXME runtime::reference::multiclass_nms_postprocessing(outputs, op->get_output_type(), selected_outputs, selected_indices, - valid_outputs); + valid_outputs, + selected_scores_type); + return true; } @@ -2608,10 +2611,6 @@ namespace { continue; } - if (element_type != node->get_output_element_type(i)) - { - throw std::logic_error("Output node element types is not equal"); - } } switch (element_type) {