diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 22a55d743bd153..1841631937b171 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -6,6 +6,7 @@ #include "ngraph/ops.hpp" #include "ngraph_ops/nms_ie_internal.hpp" #include "cldnn_itt.h" +#include "cldnn/runtime/debug_configuration.hpp" using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -231,6 +232,12 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::s OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive"); InitProfileInfo(op->get_friendly_name(), op->get_type_name()); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "Process " << "op::v" << op->get_type_info().version << "::" << op->get_type_name() << " operation " + << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl; + } + bool is_created = false; const ngraph::NodeTypeInfo* op_type_info = &op->get_type_info(); while (op_type_info != nullptr) { @@ -251,8 +258,8 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::s if (!is_created) { IE_THROW() << "Operation: " << op->get_friendly_name() - << " of type " << op->get_type_name() - << "(op::v" << op->get_type_info().version << ") is not supported"; + << " of type " << op->get_type_name() + << "(op::v" << op->get_type_info().version << ") is not supported"; } } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp index 2bed9a76962a74..bed25eb288a72b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp @@ -23,12 +23,16 @@ ParamsKey BorderKernelRef::GetSupportedKey() const { k.EnableInputLayout(DataLayout::byxf); k.EnableInputLayout(DataLayout::bfzyx); k.EnableInputLayout(DataLayout::bfwzyx); + k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16); k.EnableOutputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::yxfb); k.EnableOutputLayout(DataLayout::byxf); k.EnableOutputLayout(DataLayout::bfzyx); k.EnableOutputLayout(DataLayout::bfwzyx); + k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16); k.EnableTensorOffset(); k.EnableTensorPitches(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/border_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/border_gpu_ref.cl index 38234b49b5fbd4..3bc1f018f1fb41 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/border_gpu_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/border_gpu_ref.cl @@ -5,10 +5,35 @@ #include "include/batch_headers/data_types.cl" #include "include/batch_headers/fetch_data.cl" +inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x) +{ +#if INPUT0_DIMS < 5 + return INPUT0_GET_INDEX(b, f, y, x); +#elif INPUT0_DIMS == 5 + return INPUT0_GET_INDEX(b, f, z, y, x); +#elif INPUT0_DIMS == 6 + return INPUT0_GET_INDEX(b, f, w, z, y, x); +#else +#error [clDNN border_gpu_ref.cl]: input format - not supported +#endif +} + +inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint x) +{ +#if OUTPUT_DIMS < 5 + return OUTPUT_GET_INDEX(b, f, y, x); +#elif OUTPUT_DIMS == 5 + return OUTPUT_GET_INDEX(b, f, z, y, x); +#elif OUTPUT_DIMS == 6 + return OUTPUT_GET_INDEX(b, f, w, z, y, x); +#else +#error [clDNN border_gpu_ref.cl]: output format - not supported +#endif +} KERNEL(border_gpu_ref)( - const __global UNIT_TYPE* input, - __global UNIT_TYPE* output) + const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output) { // [CONSTEXPR] // Border sizes (left-top set and right-bottom set): @@ -72,7 +97,7 @@ KERNEL(border_gpu_ref)( const uint out_w = out_yw / OUTPUT_SIZE_Y; #ifdef BORDER_TYPE_CONSTANT - UNIT_TYPE in_val = TO_UNIT_TYPE(BORDER_VALUE); + INPUT0_TYPE in_val = TO_INPUT0_TYPE(BORDER_VALUE); if (out_x >= blt_sx & out_x < in_lx & out_y >= blt_sy & out_y < in_ly & @@ -88,7 +113,7 @@ KERNEL(border_gpu_ref)( const uint in_f = out_f - blt_sf; const uint in_b = out_b - blt_sb; - const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x); + const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x); in_val = input[in_pos]; } #elif defined BORDER_TYPE_EDGE @@ -99,8 +124,8 @@ KERNEL(border_gpu_ref)( const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? 0 : in_sf - 1); const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? 0 : in_sb - 1); - const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x); - UNIT_TYPE in_val = input[in_pos]; + const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x); + INPUT0_TYPE in_val = input[in_pos]; #elif defined BORDER_TYPE_MIRROR const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - 1 - out_x : in_sx + in_lx - 1 - out_x); const uint in_y = (out_y >= blt_sy & out_y < in_ly) ? out_y - blt_sy : (out_y < blt_sy ? blt_sy - 1 - out_y : in_sy + in_ly - 1 - out_y); @@ -109,8 +134,8 @@ KERNEL(border_gpu_ref)( const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - 1 - out_f : in_sf + in_lf - 1 - out_f); const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - 1 - out_b : in_sb + in_lb - 1 - out_b); - const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x); - UNIT_TYPE in_val = input[in_pos]; + const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x); + INPUT0_TYPE in_val = input[in_pos]; #elif defined BORDER_TYPE_MIRROR_101 const uint in_x = (out_x >= blt_sx & out_x < in_lx) ? out_x - blt_sx : (out_x < blt_sx ? blt_sx - out_x : in_sx + in_lx - 2 - out_x); const uint in_y = (out_y >= blt_sy & out_y < in_ly) ? out_y - blt_sy : (out_y < blt_sy ? blt_sy - out_y : in_sy + in_ly - 2 - out_y); @@ -119,12 +144,12 @@ KERNEL(border_gpu_ref)( const uint in_f = (out_f >= blt_sf & out_f < in_lf) ? out_f - blt_sf : (out_f < blt_sf ? blt_sf - out_f : in_sf + in_lf - 2 - out_f); const uint in_b = (out_b >= blt_sb & out_b < in_lb) ? out_b - blt_sb : (out_b < blt_sb ? blt_sb - out_b : in_sb + in_lb - 2 - out_b); - const uint in_pos = GET_DATA_INDEX_6D(INPUT0, in_b, in_f, in_w, in_z, in_y, in_x); - UNIT_TYPE in_val = input[in_pos]; + const uint in_pos = FUNC_CALL(get_input_index)(in_b, in_f, in_w, in_z, in_y, in_x); + INPUT0_TYPE in_val = input[in_pos]; #else #error Unsupported border type. #endif - const uint out_pos = GET_DATA_INDEX_6D(OUTPUT, out_b, out_f, out_w, out_z, out_y, out_x); + const uint out_pos = FUNC_CALL(get_output_index)(out_b, out_f, out_w, out_z, out_y, out_x); output[out_pos] = in_val; } diff --git a/inference-engine/thirdparty/clDNN/src/border.cpp b/inference-engine/thirdparty/clDNN/src/border.cpp index 886275d6719562..f68fe34c517679 100644 --- a/inference-engine/thirdparty/clDNN/src/border.cpp +++ b/inference-engine/thirdparty/clDNN/src/border.cpp @@ -94,16 +94,6 @@ border_inst::typed_primitive_inst(network& network, border_node const& node) : p auto rb_sizes = argument.right_bottom_sizes.sub(tensor(0)); auto b_type = argument.type; - CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), - "Input format", - input_format.value, - "supported border primitive input formats", - format::bfyx, - format::yxfb, - format::byxf, - format::bfzyx, - format::bfwzyx); - tensor null_tensor = tensor(0); // Check if sizes of border are in proper range. diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp index 83ef5ef102e9f0..c95189ea099f9c 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp @@ -72,22 +72,36 @@ attach_border_impl::attach_border_impl() { std::make_tuple(data_types::f16, format::yxfb), std::make_tuple(data_types::i8, format::yxfb), std::make_tuple(data_types::u8, format::yxfb), + std::make_tuple(data_types::f32, format::bfyx), std::make_tuple(data_types::f16, format::bfyx), std::make_tuple(data_types::i8, format::bfyx), std::make_tuple(data_types::u8, format::bfyx), + std::make_tuple(data_types::f32, format::byxf), std::make_tuple(data_types::f16, format::byxf), std::make_tuple(data_types::i8, format::byxf), std::make_tuple(data_types::u8, format::byxf), + std::make_tuple(data_types::f32, format::bfzyx), std::make_tuple(data_types::f16, format::bfzyx), std::make_tuple(data_types::i8, format::bfzyx), std::make_tuple(data_types::u8, format::bfzyx), + std::make_tuple(data_types::f32, format::bfwzyx), std::make_tuple(data_types::f16, format::bfwzyx), std::make_tuple(data_types::i8, format::bfwzyx), std::make_tuple(data_types::u8, format::bfwzyx), + + std::make_tuple(data_types::f32, format::b_fs_yx_fsv16), + std::make_tuple(data_types::f16, format::b_fs_yx_fsv16), + std::make_tuple(data_types::i8, format::b_fs_yx_fsv16), + std::make_tuple(data_types::u8, format::b_fs_yx_fsv16), + + std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16), + std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16), + std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16), + std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16), }); } diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index a6f60ee0f3e56b..74f07313392fe6 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -47,6 +47,7 @@ #include "lstm_gemm_inst.h" #include "mutable_data_inst.h" #include "pooling_inst.h" +#include "border_inst.h" #include "primitive_inst.h" #include "prior_box_inst.h" #include "proposal_inst.h" @@ -1295,6 +1296,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::input_layout::type_id() && prim.type() != cldnn::softmax::type_id() && prim.type() != cldnn::prior_box::type_id() && + prim.type() != cldnn::border::type_id() && prim.type() != cldnn::resample::type_id() && prim.type() != cldnn::crop::type_id() && prim.type() != cldnn::scale::type_id() && diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp index df8a8d47c203a3..d2b3daf18685a7 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp @@ -106,6 +106,84 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { } } +TEST(border_gpu, basic_fsv16_0x0x1x2_0x0x3x4_border_constant) { + // Input (XY) : 4x3 + // Output (XY): 10x7 + + constexpr auto in_size_b = 1; + constexpr auto in_size_f = 1; + constexpr auto in_size_y = 3; + constexpr auto in_size_x = 4; + + constexpr auto blt_size_b = 0; + constexpr auto blt_size_f = 0; + constexpr auto blt_size_y = 1; + constexpr auto blt_size_x = 2; + + constexpr auto brb_size_b = 0; + constexpr auto brb_size_f = 0; + constexpr auto brb_size_y = 3; + constexpr auto brb_size_x = 4; + + constexpr auto out_size_b = in_size_b + blt_size_b + brb_size_b; + constexpr auto out_size_f = in_size_f + blt_size_f + brb_size_f; + constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; + constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); + + topology topology; + topology.add( + input_layout("input", input->get_layout()) + ); + topology.add( + reorder("border_input", "input", cldnn::format::b_fs_yx_fsv16, cldnn::data_types::f32), + border("border", "border_input", + {blt_size_b, blt_size_f, blt_size_x, blt_size_y}, + {brb_size_b, brb_size_f, brb_size_x, brb_size_y}, + border_type::constant, 0.0f), + reorder("output", "border", cldnn::format::yxfb, cldnn::data_types::f32) + ); + + std::vector input_data = { + 1, -2, 3, -4, + 5, 6, 7, 8, + -10, 12, 13, -13, + }; + std::vector out_data = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, -2, 3, -4, 0, 0, 0, 0, + 0, 0, 5, 6, 7, 8, 0, 0, 0, 0, + 0, 0, -10, 12, 13, -13, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + set_values(input, input_data); + + cldnn::network network(engine, topology); + network.set_input_data("input", input); + auto outputs = network.execute(); + + auto output = outputs.at("output").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); + + for (auto b = 0; b < out_size_b; ++b) { // B + for (auto f = 0; f < out_size_f; ++f) { // F + for (auto y = 0; y < out_size_y; ++y) { // Y + for (auto x = 0; x < out_size_x; ++x) { // X + auto output_off = ((y * out_size_x + x) * out_size_f + f) * out_size_b + b; // YXFB + + EXPECT_EQ(output_ptr[output_off], out_data[output_off]); + } + } + } + } +} + TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) { constexpr auto in_size_b = 1;