From b731ce13d85fffd850e786333ac6d743178c6a6a Mon Sep 17 00:00:00 2001 From: Gleb Kazantaev Date: Thu, 28 May 2020 16:45:48 +0300 Subject: [PATCH 01/24] Fixed NMSIE shape infer function (#648) --- .../transformations/src/ngraph_ops/nms_ie.cpp | 13 ++++++++--- .../convert_nms_to_nms_ie.cpp | 15 +++++++++---- .../convert_gather_to_gather_ie.cpp | 2 ++ .../convert_nms_to_nms_ie_test.cpp | 22 +++++++++---------- .../transformations/convert_topk_test.cpp | 1 + 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp index 9b7b7caf34215f..c7f696c075aa4e 100644 --- a/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp +++ b/inference-engine/src/transformations/src/ngraph_ops/nms_ie.cpp @@ -34,11 +34,18 @@ std::shared_ptr op::NonMaxSuppressionIE::clone_with_new_inputs(const ngrap } void op::NonMaxSuppressionIE::validate_and_infer_types() { + auto squeeze_input = [](const Output & input) -> std::shared_ptr { + return std::make_shared(input, opset1::Constant::create(element::i64, Shape{1}, {0})); + }; + // Calculate output shape using opset1::NonMaxSuppression + auto max_output_boxes_per_class = std::dynamic_pointer_cast(input_value(2).get_node_shared_ptr()); auto nms = std::make_shared(input_value(0), input_value(1), - std::make_shared(input_value(2), opset1::Constant::create(element::i64, Shape{1}, {0})), - std::make_shared(input_value(3), opset1::Constant::create(element::i64, Shape{1}, {0})), - std::make_shared(input_value(4), opset1::Constant::create(element::i64, Shape{1}, {0}))); + /* second input is used for output calculation and only if it's Constant output shape won't be dynamic */ + max_output_boxes_per_class ? opset1::Constant::create(element::i64, Shape{}, max_output_boxes_per_class->cast_vector()) : + squeeze_input(input_value(2)), + squeeze_input(input_value(3)), + squeeze_input(input_value(4))); set_output_type(0, nms->output(0).get_element_type(), nms->output(0).get_partial_shape()); } diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp index c50a393f098ef1..5b2e755249a556 100644 --- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp +++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_nms_to_nms_ie.cpp @@ -42,10 +42,17 @@ void ngraph::pass::ConvertNMSToNMSIE::convert_nms_to_nms_ie() { auto new_max_per_class = nms->input_value(2); if (max_output_boxes_per_class_rank.get_length() == 0) { - new_max_per_class = std::make_shared( - nms->input_value(2), - opset1::Constant::create(element::i64, Shape{1}, {0})); - new_ops.push_back(new_max_per_class.get_node_shared_ptr()); + // WA: we need to create Constant manually because it requires by NMS shape inference + // otherwise we will get dynamic shape until first CF is executed. It can be resolved + // if CF will be executed right after transformation and before Validate pass. + if (auto new_max_per_class_const = std::dynamic_pointer_cast(new_max_per_class.get_node_shared_ptr())) { + new_max_per_class = opset1::Constant::create(element::i64, Shape{1}, new_max_per_class_const->cast_vector()); + } else { + new_max_per_class = std::make_shared( + nms->input_value(2), + opset1::Constant::create(element::i64, Shape{1}, {0})); + new_ops.push_back(new_max_per_class.get_node_shared_ptr()); + } } auto new_iou_threshold = nms->input_value(3); if (iou_threshold_rank.get_length() == 0) { diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp index e284ebf717e592..afd0ded67e84c8 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_gather_to_gather_ie.cpp @@ -33,6 +33,7 @@ TEST(TransformationTests, ConvertGatherToGatherIEStatic1) { pass::InitNodeInfo().run_on_function(f); pass::ConvertGatherToGatherIE().run_on_function(f); ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; } { @@ -60,6 +61,7 @@ TEST(TransformationTests, ConvertGatherToGatherIEStatic2) { pass::InitNodeInfo().run_on_function(f); pass::ConvertGatherToGatherIE().run_on_function(f); ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; } { diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp index 860e916e8bb29e..bd19243e73c740 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_test.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include "ngraph_test_utils.hpp" @@ -33,25 +34,26 @@ TEST(TransformationTests, ConvertNMSToNMSIEStatic) { f = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + const auto & orig_shape = f->get_output_partial_shape(0); pass::InitNodeInfo().run_on_function(f); pass::ConvertNMSToNMSIE().run_on_function(f); - f->validate_nodes_and_infer_types(); ASSERT_NO_THROW(check_rt_info(f)); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; } { auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); - auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10}); + auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10}); auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75}); auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7}); - auto nms = std::make_shared(boxes, scores, - std::make_shared(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})), + auto nms = std::make_shared(boxes, scores, max_output_boxes_per_class, std::make_shared(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), std::make_shared(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), 0, true); f_ref = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static"; } auto res = compare_functions(f, f_ref); @@ -80,11 +82,10 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic1) { { auto boxes = std::make_shared(element::f32, PartialShape::dynamic()); auto scores = std::make_shared(element::f32, PartialShape::dynamic()); - auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10}); + auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10}); auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75}); auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7}); - auto nms = std::make_shared(boxes, scores, - std::make_shared(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})), + auto nms = std::make_shared(boxes, scores, max_output_boxes_per_class, std::make_shared(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), std::make_shared(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), 0, true); @@ -118,11 +119,10 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic2) { { auto boxes = std::make_shared(element::f32, PartialShape{DYN, 1000, 4}); auto scores = std::make_shared(element::f32, PartialShape{DYN, 1, 1000}); - auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10}); + auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10}); auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75}); auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7}); - auto nms = std::make_shared(boxes, scores, - std::make_shared(max_output_boxes_per_class, opset1::Constant::create(element::i64, Shape{1}, {0})), + auto nms = std::make_shared(boxes, scores, max_output_boxes_per_class, std::make_shared(iou_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), std::make_shared(score_threshold, opset1::Constant::create(element::i64, Shape{1}, {0})), 0, true); @@ -132,4 +132,4 @@ TEST(TransformationTests, ConvertNMSToNMSIEDynamic2) { auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; -} \ No newline at end of file +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp index c66d71929c443a..197661f2dc3f25 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_topk_test.cpp @@ -33,6 +33,7 @@ TEST(TransformationTests, ConvertTopKToTopKIEStatic) { ngraph::pass::ConvertTopKToTopKIE().run_on_function(f); ASSERT_NO_THROW(check_rt_info(f)); ngraph::pass::ConstantFolding().run_on_function(f); + ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static"; } { From 23f41213bbe94c4135271ea39d6b2fe4c1a45efe Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 28 May 2020 17:22:19 +0300 Subject: [PATCH 02/24] [IE TESTS] MOVE plugin tests (#659) --- .../unit_test_utils/CMakeLists.txt | 2 ++ .../mocks}/mock_engine/CMakeLists.txt | 0 .../mocks}/mock_engine/dllmain.cpp | 0 .../mocks}/mock_engine/mock_plugin.cpp | 0 .../mocks}/mock_engine/mock_plugin.hpp | 0 .../unit/inference_engine/CMakeLists.txt | 4 ++- .../cpp_interfaces/ie_plugin_test.cpp | 2 +- .../unit/inference_engine/ie_plugin_ptr.cpp} | 27 +++++++++++-------- .../tests_deprecated/CMakeLists.txt | 2 -- 9 files changed, 22 insertions(+), 15 deletions(-) rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/CMakeLists.txt (100%) rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/dllmain.cpp (100%) rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/mock_plugin.cpp (100%) rename inference-engine/{tests_deprecated => tests/ie_test_utils/unit_test_utils/mocks}/mock_engine/mock_plugin.hpp (100%) rename inference-engine/{tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp => tests/unit/inference_engine/ie_plugin_ptr.cpp} (78%) diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt index 91076ff09ff43a..df611c700cc312 100644 --- a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt +++ b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt @@ -4,6 +4,8 @@ set(TARGET_NAME unitTestUtils) +add_subdirectory(mocks/mock_engine) + list(APPEND EXPORT_DEPENDENCIES commonTestUtils_s inference_engine_s diff --git a/inference-engine/tests_deprecated/mock_engine/CMakeLists.txt b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/CMakeLists.txt similarity index 100% rename from inference-engine/tests_deprecated/mock_engine/CMakeLists.txt rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/CMakeLists.txt diff --git a/inference-engine/tests_deprecated/mock_engine/dllmain.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/dllmain.cpp similarity index 100% rename from inference-engine/tests_deprecated/mock_engine/dllmain.cpp rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/dllmain.cpp diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp similarity index 100% rename from inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp similarity index 100% rename from inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp rename to inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp diff --git a/inference-engine/tests/unit/inference_engine/CMakeLists.txt b/inference-engine/tests/unit/inference_engine/CMakeLists.txt index 629a2cecfbb901..3eb21a4349fb6f 100644 --- a/inference-engine/tests/unit/inference_engine/CMakeLists.txt +++ b/inference-engine/tests/unit/inference_engine/CMakeLists.txt @@ -10,6 +10,8 @@ addIeTargetTest( LINK_LIBRARIES unitTestUtils ADD_CPPLINT + DEPENDENCIES + mock_engine LABELS IE -) \ No newline at end of file +) diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp index 5e6224ac3e85d0..de0097b32cf4ea 100644 --- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp +++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp @@ -13,7 +13,6 @@ #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp" #include "unit_test_utils/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp" #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp" -#include "unit_test_utils/mocks/mock_iinfer_request.hpp" using namespace ::testing; using namespace std; @@ -163,3 +162,4 @@ TEST_F(InferenceEnginePluginInternalTest, pluginInternalEraseMagicAndNameWhenImp ASSERT_EQ(mockExeNetworkInternal->exportString, mock_plugin_impl->importedString); mock_plugin_impl->importedString = {}; } + diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp similarity index 78% rename from inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp rename to inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp index 2920d4b3f02180..a885def9322e85 100644 --- a/inference-engine/tests_deprecated/unit/inference_engine_tests/inference_engine_plugin_test.cpp +++ b/inference-engine/tests/unit/inference_engine/ie_plugin_ptr.cpp @@ -2,12 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "tests_common.hpp" - #include +#include #include "details/ie_so_loader.h" -#include "../tests_deprecated/mock_engine/mock_plugin.hpp" +#include "unit_test_utils/mocks/mock_engine/mock_plugin.hpp" #include "unit_test_utils/mocks/mock_error_listener.hpp" #include "unit_test_utils/mocks/mock_iinference_plugin.hpp" @@ -19,20 +18,25 @@ using namespace InferenceEngine::details; IE_SUPPRESS_DEPRECATED_START -class PluginTest: public TestsCommon { +class PluginTest: public ::testing::Test { protected: unique_ptr sharedObjectLoader; std::function createPluginEngineProxy; - InferenceEnginePluginPtr getPtr() ; - virtual void SetUp() { + InferenceEnginePluginPtr getPtr(); + + std::string get_mock_engine_name() { + std::string mockEngineName("mock_engine"); + return CommonTestUtils::pre + mockEngineName + IE_BUILD_POSTFIX + CommonTestUtils::ext; + } + virtual void SetUp() { std::string libraryName = get_mock_engine_name(); sharedObjectLoader.reset(new SharedObjectLoader(libraryName.c_str())); createPluginEngineProxy = make_std_function("CreatePluginEngineProxy"); } template std::function make_std_function(const std::string& functionName) { - std::function ptr (reinterpret_cast(sharedObjectLoader->get_symbol(functionName.c_str()))); + std::function ptr(reinterpret_cast(sharedObjectLoader->get_symbol(functionName.c_str()))); return ptr; } @@ -43,7 +47,7 @@ class PluginTest: public TestsCommon { TEST_F(PluginTest, canCreatePlugin) { auto ptr = make_std_function("CreatePluginEngineProxy"); - unique_ptr> smart_ptr(ptr(nullptr), [](IInferencePlugin *p) { + unique_ptr> smart_ptr(ptr(nullptr), [](IInferencePlugin *p) { p->Release(); }); @@ -62,8 +66,7 @@ TEST_F(PluginTest, shouldThrowExceptionIfPluginNotExist) { ACTION_TEMPLATE(CallListenerWithErrorMessage, HAS_1_TEMPLATE_PARAMS(int, k), - AND_1_VALUE_PARAMS(pointer)) -{ + AND_1_VALUE_PARAMS(pointer)) { InferenceEngine::IErrorListener & data = ::std::get(args); data.onError(pointer); } @@ -71,7 +74,7 @@ ACTION_TEMPLATE(CallListenerWithErrorMessage, InferenceEnginePluginPtr PluginTest::getPtr() { InferenceEnginePluginPtr smart_ptr(get_mock_engine_name()); return smart_ptr; -}; +} TEST_F(PluginTest, canSetConfiguration) { InferenceEnginePluginPtr ptr = getPtr(); @@ -86,3 +89,5 @@ TEST_F(PluginTest, canSetConfiguration) { ASSERT_STREQ(reinterpret_cast(*ptr)->config["key"].c_str(), "value"); } + +IE_SUPPRESS_DEPRECATED_END \ No newline at end of file diff --git a/inference-engine/tests_deprecated/CMakeLists.txt b/inference-engine/tests_deprecated/CMakeLists.txt index 5b3563e4ee0e78..66ad5012a317a2 100644 --- a/inference-engine/tests_deprecated/CMakeLists.txt +++ b/inference-engine/tests_deprecated/CMakeLists.txt @@ -8,8 +8,6 @@ enable_testing() -add_subdirectory(mock_engine) - add_subdirectory(helpers) if (ENABLE_GAPI_TESTS) From 77162bf8ee95f263e81907303e9e54cef6d4605b Mon Sep 17 00:00:00 2001 From: Andrew Bakalin Date: Thu, 28 May 2020 18:01:56 +0300 Subject: [PATCH 03/24] [VPU][Tests] Fix sanitizer issue in unit tests (#630) --- .../tests/unit/vpu/base/graph_transformer_tests.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp index 80a3f6f8fd30f4..21283429ee01bc 100644 --- a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp +++ b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp @@ -243,6 +243,10 @@ bool checkExecutionOrder(const Model& model, const std::vector& execOrder) auto it = execOrder.begin(); for (const auto& stage : model->getStages()) { + if (it == execOrder.end()) { + return true; + } + if (stage->id() == *it) { ++it; } From 33aca7d2c48b91b19728305fffafa75ddf62bc38 Mon Sep 17 00:00:00 2001 From: Vladimir Gavrilov Date: Thu, 28 May 2020 18:08:24 +0300 Subject: [PATCH 04/24] SplitConcatPairToInterpolate inserts Interpolate when input is 2D (#596) * SplitConcatPairToInterpolate transformation was moved to middle stage and is applied only for 4D and 5D inputs. --- model-optimizer/automation/package_BOM.txt | 2 +- .../tf/SplitConcatPairToInterpolate_test.py | 412 ----------------- .../SplitConcatPairToInterpolate.py | 39 +- .../SplitConcatPairToInterpolate_test.py | 427 ++++++++++++++++++ 4 files changed, 452 insertions(+), 428 deletions(-) delete mode 100644 model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py rename model-optimizer/extensions/{front/tf => middle}/SplitConcatPairToInterpolate.py (81%) create mode 100644 model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index 5b5143859094e3..e4da215ad3cda9 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -436,7 +436,6 @@ extensions/front/tf/sparse_segment_sum_ext.py extensions/front/tf/sparse_to_dense_ext.py extensions/front/tf/sparse_weighted_sum.py extensions/front/tf/split_ext.py -extensions/front/tf/SplitConcatPairToInterpolate.py extensions/front/tf/ssd_support.json extensions/front/tf/ssd_support_api_v1.14.json extensions/front/tf/ssd_support_api_v1.15.json @@ -568,6 +567,7 @@ extensions/middle/SliceConverter.py extensions/middle/SliceLikeToStridedSlice.py extensions/middle/space_to_depth.py extensions/middle/sparse_reshape.py +extensions/middle/SplitConcatPairToInterpolate.py extensions/middle/ssd_anchors_to_const.py extensions/middle/SwapAxesMiddleReplacer.py extensions/middle/TensorIterator_utils.py diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py deleted file mode 100644 index 6eb9e5fee41b82..00000000000000 --- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py +++ /dev/null @@ -1,412 +0,0 @@ -""" - Copyright (c) 2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - - -import unittest - -import numpy as np - -from extensions.front.tf.SplitConcatPairToInterpolate import SplitConcatPairToInterpolate -from mo.front.common.partial_infer.utils import int64_array -from mo.utils.ir_engine.compare_graphs import compare_graphs -from mo.utils.unittest.graph import build_graph - -graph_node_attrs_for_2d_spatial_case = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, - 'split_axis_const': { - 'kind': 'op', - 'value': np.array(3, dtype=np.int64), - 'op': 'Const', - 'type': 'Const' - }, - 'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'}, - 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3}, - 'split_data_0': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, - 'split_data_1': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, - 'split_data_2': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, - 'concat_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - - -graph_node_attrs_for_3d_spatial_case = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 3, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, - 'split_axis_const': { - 'kind': 'op', - 'value': np.array(4, dtype=np.int64), - 'op': 'Const', - 'type': 'Const' - }, - 'split_axis_const_data': {'value': None, 'shape': np.array(4, dtype=np.int64).shape, 'kind': 'data'}, - 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 4}, - 'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, - 'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, - 'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, - 'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - - -graph_edges = [ - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'split', {'in': 0}), - ('split_axis_const', 'split_axis_const_data'), - ('split_axis_const_data', 'split', {'in': 1}), - ('split', 'split_data_0', {'out': 0}), - ('split', 'split_data_1', {'out': 1}), - ('split', 'split_data_2', {'out': 2}), - ('split_data_0', 'concat', {'in': 0}), - ('split_data_0', 'concat', {'in': 1}), - ('split_data_1', 'concat', {'in': 2}), - ('split_data_1', 'concat', {'in': 3}), - ('split_data_2', 'concat', {'in': 4}), - ('split_data_2', 'concat', {'in': 5}), - ('concat', 'concat_data'), - ('concat_data', 'abs'), - ('abs', 'abs_data'), - ('abs_data', 'output') - ] - - -ref_graph_edges = [ - ('placeholder', 'placeholder_data'), - ('placeholder_data', 'interpolate', {'in': 0}), - ('placeholder_data', 'shape'), - ('shape', 'sslice', {'in': 0}), - ('slice_begin', 'sslice', {'in': 1}), - ('slice_end', 'sslice', {'in': 2}), - ('sslice', 'sslice_data'), - ('scales', 'scales_data'), - ('sslice_data', 'mul', {'in': 0}), - ('scales_data', 'mul', {'in': 1}), - ('mul', 'mul_data'), - ('mul_data', 'interpolate', {'in': 1}), - ('interpolate', 'interpolate_data'), - ('interpolate_data', 'abs'), - ('abs', 'abs_data'), - ('abs_data', 'output'), - ] - - -ref_graph_node_attrs_for_2d_spatial_case_1 = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'interpolate': { - 'type': 'Interpolate', - 'kind': 'op', - 'op': 'Interpolate', - 'axes': int64_array([3]), - 'mode': 'nearest' - }, - 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, - 'slice_begin': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([3]), - 'shape': int64_array([1]) - }, - 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([4])}, - 'sslice': { - 'kind': 'op', - 'type': 'StridedSlice', - 'op': 'StridedSlice', - 'begin_mask': int64_array([1]), - 'end_mask': int64_array([1]), - 'new_axis_mask': int64_array([0]), - 'shrink_axis_mask': int64_array([0]), - 'ellipsis_mask': int64_array([0]), - }, - 'sslice_data': {'kind': 'data', 'shape': None}, - 'scales': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([2]), - 'shape': int64_array([1]) - }, - 'scales_data': {'kind': 'data', 'shape': None}, - 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, - 'mul_data': {'kind': 'data', 'shape': None}, - 'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - -ref_graph_node_attrs_for_2d_spatial_case_2 = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'interpolate': { - 'type': 'Interpolate', - 'kind': 'op', - 'op': 'Interpolate', - 'axes': int64_array([2]), - 'mode': 'nearest' - }, - 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, - 'slice_begin': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([2]), - 'shape': int64_array([1]) - }, - 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([3])}, - 'sslice': { - 'kind': 'op', - 'type': 'StridedSlice', - 'op': 'StridedSlice', - 'begin_mask': int64_array([1]), - 'end_mask': int64_array([1]), - 'new_axis_mask': int64_array([0]), - 'shrink_axis_mask': int64_array([0]), - 'ellipsis_mask': int64_array([0]), - }, - 'sslice_data': {'kind': 'data', 'shape': None}, - 'scales': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([2]), - 'shape': int64_array([1]) - }, - 'scales_data': {'kind': 'data', 'shape': None}, - 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, - 'mul_data': {'kind': 'data', 'shape': None}, - 'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - - -ref_graph_node_attrs_for_3d_spatial_case_1 = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 3, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'interpolate': { - 'type': 'Interpolate', - 'kind': 'op', - 'op': 'Interpolate', - 'axes': int64_array([4]), - 'mode': 'nearest' - }, - 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, - 'slice_begin': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([4]), - 'shape': int64_array([1]) - }, - 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])}, - 'sslice': { - 'kind': 'op', - 'type': 'StridedSlice', - 'op': 'StridedSlice', - 'begin_mask': int64_array([1]), - 'end_mask': int64_array([1]), - 'new_axis_mask': int64_array([0]), - 'shrink_axis_mask': int64_array([0]), - 'ellipsis_mask': int64_array([0]), - }, - 'sslice_data': {'kind': 'data', 'shape': None}, - 'scales': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([2]), - 'shape': int64_array([1]) - }, - 'scales_data': {'kind': 'data', 'shape': None}, - 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, - 'mul_data': {'kind': 'data', 'shape': None}, - 'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - - -ref_graph_node_attrs_for_3d_spatial_case_2 = { - 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, - 'placeholder_data': { - 'value': None, - 'shape': int64_array([1, 3, 100, 120, 150]), - 'kind': 'data', - 'data_type': None - }, - 'interpolate': { - 'type': 'Interpolate', - 'kind': 'op', - 'op': 'Interpolate', - 'axes': int64_array([3]), - 'mode': 'nearest' - }, - 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, - 'slice_begin': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([4]), - 'shape': int64_array([1]) - }, - 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])}, - 'sslice': { - 'kind': 'op', - 'type': 'StridedSlice', - 'op': 'StridedSlice', - 'begin_mask': int64_array([1]), - 'end_mask': int64_array([1]), - 'new_axis_mask': int64_array([0]), - 'shrink_axis_mask': int64_array([0]), - 'ellipsis_mask': int64_array([0]), - }, - 'sslice_data': {'kind': 'data', 'shape': None}, - 'scales': { - 'type': 'Const', - 'op': 'Const', - 'kind': 'op', - 'value': int64_array([2]), - 'shape': int64_array([1]) - }, - 'scales_data': {'kind': 'data', 'shape': None}, - 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, - 'mul_data': {'kind': 'data', 'shape': None}, - 'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, - 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, - 'output': {'kind': 'op', 'op': 'Result'}, - } - - -class SplitConcatPairToInterpolateTest(unittest.TestCase): - def test_spatial_2d_split_concat_1(self): - graph = build_graph( - nodes_attrs=graph_node_attrs_for_2d_spatial_case, - edges=graph_edges - ) - ref_graph = build_graph( - nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_1, - edges=ref_graph_edges - ) - SplitConcatPairToInterpolate().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, ref_graph, 'output') - self.assertTrue(flag, resp) - - def test_spatial_2d_split_concat_2(self): - graph = build_graph( - nodes_attrs=graph_node_attrs_for_2d_spatial_case, - edges=graph_edges, - update_attributes={ - 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, - 'split_axis_const': { - 'kind': 'op', - 'value': np.array(2, dtype=np.int64), - 'op': 'Const', - 'type': 'Const' - }, - 'split_axis_const_data': {'value': None, 'shape': np.array(2, dtype=np.int64).shape, 'kind': 'data'}, - 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 2}, - 'split_data_0': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, - 'split_data_1': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, - 'split_data_2': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, - 'concat_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, - } - ) - ref_graph = build_graph( - nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_2, - edges=ref_graph_edges - ) - SplitConcatPairToInterpolate().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, ref_graph, 'output') - self.assertTrue(flag, resp) - - def test_spatial_3d_split_concat_1(self): - graph = build_graph( - nodes_attrs=graph_node_attrs_for_3d_spatial_case, - edges=graph_edges - ) - ref_graph = build_graph( - nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_1, - edges=ref_graph_edges - ) - SplitConcatPairToInterpolate().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, ref_graph, 'output') - self.assertTrue(flag, resp) - - def test_spatial_3d_split_concat_2(self): - graph = build_graph( - nodes_attrs=graph_node_attrs_for_3d_spatial_case, - edges=graph_edges, - update_attributes={ - 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, - 'split_axis_const': { - 'kind': 'op', - 'value': np.array(3, dtype=np.int64), - 'op': 'Const', - 'type': 'Const' - }, - 'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'}, - 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3}, - 'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, - 'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, - 'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, - 'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, - 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, - } - ) - ref_graph = build_graph( - nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_2, - edges=ref_graph_edges - ) - SplitConcatPairToInterpolate().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, ref_graph, 'output') - self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py similarity index 81% rename from model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py rename to model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py index f46bb9255b7d5c..b55e2f106c16f2 100644 --- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py +++ b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate.py @@ -20,8 +20,9 @@ from extensions.ops.elementwise import Mul from extensions.ops.interpolate import Interpolate from mo.front.common.partial_infer.utils import int64_array -from mo.front.common.replacement import FrontReplacementSubgraph +from mo.front.tf.graph_utils import create_op_with_const_inputs from mo.graph.graph import Graph, Node +from mo.middle.replacement import MiddleReplacementPattern from mo.ops.const import Const from mo.ops.shape import Shape from mo.ops.strided_slice import StridedSlice @@ -53,6 +54,9 @@ def get_concat_after_split(split: Node) -> Optional[Node]: def get_interpolate_pattern(split: Node) -> dict: + split_shape = split.in_port(0).data.get_shape() + if len(split_shape) not in {4, 5}: + return {} concat = get_concat_after_split(split) if concat is None: return {} @@ -79,19 +83,19 @@ def replace_interpolate_pattern(graph: Graph, match: dict): mul_node = Mul(graph, dict(name=split_node_name + '/Mul_')).create_node() scales_node.out_port(0).connect(mul_node.in_port(1)) - slice_begin = Const(graph, dict(name=split_node_name + '/slice_begin_', - value=int64_array([axis]))).create_node() - slice_end = Const(graph, dict(name=split_node_name + '/slice_end_', - value=int64_array([axis + 1]))).create_node() - - strided_slice_node = StridedSlice(graph, - {'name': split_node_name + '/StridedSlice_', - 'begin_mask': int64_array([1]), - 'end_mask': int64_array([1]), - 'new_axis_mask': int64_array([0]), - 'shrink_axis_mask': int64_array([0]), - 'ellipsis_mask': int64_array([0]), - }).create_node([shape_node, slice_begin, slice_end]) + strided_slice_node = create_op_with_const_inputs(graph, + StridedSlice, + {1: int64_array([axis]), 2: int64_array([axis + 1])}, + { + 'name': split_node_name + '/StridedSlice_', + 'begin_mask': int64_array([1]), + 'end_mask': int64_array([1]), + 'new_axis_mask': int64_array([0]), + 'shrink_axis_mask': int64_array([0]), + 'ellipsis_mask': int64_array([0]) + }) + shape_node.out_port(0).connect(strided_slice_node.in_port(0)) + strided_slice_node.out_port(0).connect(mul_node.in_port(0)) interp_node = Interpolate(graph, dict(name=split_node_name + '/Interpolate_', @@ -106,7 +110,7 @@ def replace_interpolate_pattern(graph: Graph, match: dict): split_connection.get_source().connect(shape_node.in_port(0)) -class SplitConcatPairToInterpolate(FrontReplacementSubgraph): +class SplitConcatPairToInterpolate(MiddleReplacementPattern): """ This transformation looks for Interpolation layer implemented using simple operations, i.e. Split and Concat, and replaces found pattern with a sequence of Shape, StridedSlice, Const, Mul, Interpolate. @@ -146,6 +150,11 @@ class SplitConcatPairToInterpolate(FrontReplacementSubgraph): by number of output ports of 'split'. """ enabled = True + force_clean_up = True + + def run_before(self): + from extensions.middle.InterpolateSequenceToInterpolate import InterpolateSequenceToInterpolate + return [InterpolateSequenceToInterpolate] def find_and_replace_pattern(self, graph: Graph): log.debug('Enabled replacement of a pair of Split and Concat with Interpolate.') diff --git a/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py new file mode 100644 index 00000000000000..b7f4fac23b5f66 --- /dev/null +++ b/model-optimizer/extensions/middle/SplitConcatPairToInterpolate_test.py @@ -0,0 +1,427 @@ +""" + Copyright (c) 2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + + +import unittest + +import numpy as np + +from extensions.middle.SplitConcatPairToInterpolate import SplitConcatPairToInterpolate +from mo.front.common.partial_infer.utils import int64_array +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph + +graph_node_attrs_for_2d_spatial_case = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, + 'split_axis_const': { + 'kind': 'op', + 'value': np.array(3, dtype=np.int64), + 'op': 'Const', + 'type': 'Const' + }, + 'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'}, + 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3}, + 'split_data_0': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, + 'split_data_1': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, + 'split_data_2': {'value': None, 'shape': int64_array([1, 100, 120, 50]), 'kind': 'data'}, + 'concat_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + + +graph_node_attrs_for_3d_spatial_case = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 3, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, + 'split_axis_const': { + 'kind': 'op', + 'value': np.array(4, dtype=np.int64), + 'op': 'Const', + 'type': 'Const' + }, + 'split_axis_const_data': {'value': None, 'shape': np.array(4, dtype=np.int64).shape, 'kind': 'data'}, + 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 4}, + 'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, + 'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, + 'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 120, 50]), 'kind': 'data'}, + 'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, + } + + +graph_edges = [ + ('placeholder', 'placeholder_data'), + ('placeholder_data', 'split', {'in': 0}), + ('split_axis_const', 'split_axis_const_data'), + ('split_axis_const_data', 'split', {'in': 1}), + ('split', 'split_data_0', {'out': 0}), + ('split', 'split_data_1', {'out': 1}), + ('split', 'split_data_2', {'out': 2}), + ('split_data_0', 'concat', {'in': 0}), + ('split_data_0', 'concat', {'in': 1}), + ('split_data_1', 'concat', {'in': 2}), + ('split_data_1', 'concat', {'in': 3}), + ('split_data_2', 'concat', {'in': 4}), + ('split_data_2', 'concat', {'in': 5}), + ('concat', 'concat_data'), + ('concat_data', 'abs'), + ('abs', 'abs_data'), + ('abs_data', 'output') +] + + +ref_graph_edges = [ + ('placeholder', 'placeholder_data'), + ('placeholder_data', 'interpolate', {'in': 0}), + ('placeholder_data', 'shape'), + ('shape', 'shape_data'), + ('shape_data', 'sslice', {'in': 0}), + ('slice_begin', 'slice_begin_data'), + ('slice_begin_data', 'sslice', {'in': 1}), + ('slice_end', 'slice_end_data'), + ('slice_end_data', 'sslice', {'in': 2}), + ('sslice', 'sslice_data'), + ('scales', 'scales_data'), + ('sslice_data', 'mul', {'in': 0}), + ('scales_data', 'mul', {'in': 1}), + ('mul', 'mul_data'), + ('mul_data', 'interpolate', {'in': 1}), + ('interpolate', 'interpolate_data'), + ('interpolate_data', 'abs'), + ('abs', 'abs_data'), + ('abs_data', 'output'), + ] + + +ref_graph_node_attrs_for_2d_spatial_case_1 = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'interpolate': { + 'type': 'Interpolate', + 'kind': 'op', + 'op': 'Interpolate', + 'axes': int64_array([3]), + 'mode': 'nearest' + }, + 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, + 'shape_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_begin': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([3]), + 'shape': int64_array([1]) + }, + 'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([4])}, + 'slice_end_data': {'kind': 'data', 'shape': None, 'value': None}, + 'sslice': { + 'kind': 'op', + 'type': 'StridedSlice', + 'op': 'StridedSlice', + 'begin_mask': int64_array([1]), + 'end_mask': int64_array([1]), + 'new_axis_mask': int64_array([0]), + 'shrink_axis_mask': int64_array([0]), + 'ellipsis_mask': int64_array([0]), + }, + 'sslice_data': {'kind': 'data', 'shape': None}, + 'scales': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([2]), + 'shape': int64_array([1]) + }, + 'scales_data': {'kind': 'data', 'shape': None}, + 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, + 'mul_data': {'kind': 'data', 'shape': None}, + 'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 100, 120, 300]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + +ref_graph_node_attrs_for_2d_spatial_case_2 = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'interpolate': { + 'type': 'Interpolate', + 'kind': 'op', + 'op': 'Interpolate', + 'axes': int64_array([2]), + 'mode': 'nearest' + }, + 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, + 'shape_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_begin': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([2]), + 'shape': int64_array([1]) + }, + 'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([3])}, + 'slice_end_data': {'kind': 'data', 'shape': None, 'value': None}, + 'sslice': { + 'kind': 'op', + 'type': 'StridedSlice', + 'op': 'StridedSlice', + 'begin_mask': int64_array([1]), + 'end_mask': int64_array([1]), + 'new_axis_mask': int64_array([0]), + 'shrink_axis_mask': int64_array([0]), + 'ellipsis_mask': int64_array([0]), + }, + 'sslice_data': {'kind': 'data', 'shape': None}, + 'scales': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([2]), + 'shape': int64_array([1]) + }, + 'scales_data': {'kind': 'data', 'shape': None}, + 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, + 'mul_data': {'kind': 'data', 'shape': None}, + 'interpolate_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + + +ref_graph_node_attrs_for_3d_spatial_case_1 = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 3, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'interpolate': { + 'type': 'Interpolate', + 'kind': 'op', + 'op': 'Interpolate', + 'axes': int64_array([4]), + 'mode': 'nearest' + }, + 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, + 'shape_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_begin': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([4]), + 'shape': int64_array([1]) + }, + 'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])}, + 'slice_end_data': {'kind': 'data', 'shape': None, 'value': None}, + 'sslice': { + 'kind': 'op', + 'type': 'StridedSlice', + 'op': 'StridedSlice', + 'begin_mask': int64_array([1]), + 'end_mask': int64_array([1]), + 'new_axis_mask': int64_array([0]), + 'shrink_axis_mask': int64_array([0]), + 'ellipsis_mask': int64_array([0]), + }, + 'sslice_data': {'kind': 'data', 'shape': None}, + 'scales': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([2]), + 'shape': int64_array([1]) + }, + 'scales_data': {'kind': 'data', 'shape': None}, + 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, + 'mul_data': {'kind': 'data', 'shape': None}, + 'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 120, 300]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + + +ref_graph_node_attrs_for_3d_spatial_case_2 = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': int64_array([1, 3, 100, 120, 150]), + 'kind': 'data', + 'data_type': None + }, + 'interpolate': { + 'type': 'Interpolate', + 'kind': 'op', + 'op': 'Interpolate', + 'axes': int64_array([3]), + 'mode': 'nearest' + }, + 'shape': {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}, + 'shape_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_begin': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([4]), + 'shape': int64_array([1]) + }, + 'slice_begin_data': {'kind': 'data', 'shape': None, 'value': None}, + 'slice_end': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'value': int64_array([5])}, + 'slice_end_data': {'kind': 'data', 'shape': None, 'value': None}, + 'sslice': { + 'kind': 'op', + 'type': 'StridedSlice', + 'op': 'StridedSlice', + 'begin_mask': int64_array([1]), + 'end_mask': int64_array([1]), + 'new_axis_mask': int64_array([0]), + 'shrink_axis_mask': int64_array([0]), + 'ellipsis_mask': int64_array([0]), + }, + 'sslice_data': {'kind': 'data', 'shape': None}, + 'scales': { + 'type': 'Const', + 'op': 'Const', + 'kind': 'op', + 'value': int64_array([2]), + 'shape': int64_array([1]) + }, + 'scales_data': {'kind': 'data', 'shape': None}, + 'mul': {'kind': 'op', 'op': 'Mul', 'type': 'Multiply'}, + 'mul_data': {'kind': 'data', 'shape': None}, + 'interpolate_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, + 'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + + +class SplitConcatPairToInterpolateTest(unittest.TestCase): + def test_spatial_2d_split_concat_1(self): + graph = build_graph( + nodes_attrs=graph_node_attrs_for_2d_spatial_case, + edges=graph_edges + ) + ref_graph = build_graph( + nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_1, + edges=ref_graph_edges + ) + SplitConcatPairToInterpolate().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, ref_graph, 'output') + self.assertTrue(flag, resp) + + def test_spatial_2d_split_concat_2(self): + graph = build_graph( + nodes_attrs=graph_node_attrs_for_2d_spatial_case, + edges=graph_edges, + update_attributes={ + 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, + 'split_axis_const': { + 'kind': 'op', + 'value': np.array(2, dtype=np.int64), + 'op': 'Const', + 'type': 'Const' + }, + 'split_axis_const_data': {'value': None, 'shape': np.array(2, dtype=np.int64).shape, 'kind': 'data'}, + 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 2}, + 'split_data_0': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, + 'split_data_1': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, + 'split_data_2': {'value': None, 'shape': int64_array([1, 100, 40, 150]), 'kind': 'data'}, + 'concat_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 100, 240, 150]), 'kind': 'data'}, + } + ) + ref_graph = build_graph( + nodes_attrs=ref_graph_node_attrs_for_2d_spatial_case_2, + edges=ref_graph_edges + ) + SplitConcatPairToInterpolate().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, ref_graph, 'output') + self.assertTrue(flag, resp) + + def test_spatial_3d_split_concat_1(self): + graph = build_graph( + nodes_attrs=graph_node_attrs_for_3d_spatial_case, + edges=graph_edges + ) + ref_graph = build_graph( + nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_1, + edges=ref_graph_edges + ) + SplitConcatPairToInterpolate().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, ref_graph, 'output') + self.assertTrue(flag, resp) + + def test_spatial_3d_split_concat_2(self): + graph = build_graph( + nodes_attrs=graph_node_attrs_for_3d_spatial_case, + edges=graph_edges, + update_attributes={ + 'split': {'type': 'Split', 'kind': 'op', 'op': 'Split', 'num_splits': 3}, + 'split_axis_const': { + 'kind': 'op', + 'value': np.array(3, dtype=np.int64), + 'op': 'Const', + 'type': 'Const' + }, + 'split_axis_const_data': {'value': None, 'shape': np.array(3, dtype=np.int64).shape, 'kind': 'data'}, + 'concat': {'type': 'Concat', 'kind': 'op', 'axis': 3}, + 'split_data_0': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, + 'split_data_1': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, + 'split_data_2': {'value': None, 'shape': int64_array([1, 3, 100, 40, 150]), 'kind': 'data'}, + 'concat_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, + 'abs_data': {'value': None, 'shape': int64_array([1, 3, 100, 240, 150]), 'kind': 'data'}, + } + ) + ref_graph = build_graph( + nodes_attrs=ref_graph_node_attrs_for_3d_spatial_case_2, + edges=ref_graph_edges + ) + SplitConcatPairToInterpolate().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, ref_graph, 'output') + self.assertTrue(flag, resp) From bb41994f565eb74d4222ec8656b38e8ca2cccbdd Mon Sep 17 00:00:00 2001 From: Gleb Kazantaev Date: Thu, 28 May 2020 18:27:54 +0300 Subject: [PATCH 05/24] Removed StridedSlice to StridedSliceIE transformation (#661) --- .../src/convert_function_to_cnn_network.cpp | 2 - .../src/ie_cnn_layer_builder_ngraph.cpp | 10 +- .../include/ngraph_ops/strided_slice_ie.hpp | 51 ---------- .../convert_opset1_to_legacy_tbl.hpp | 1 - ...vert_strided_slice_to_strided_slice_ie.hpp | 37 ------- .../src/ngraph_ops/strided_slice_ie.cpp | 62 ------------ .../convert_opset1_to_legacy.cpp | 1 - ...vert_strided_slice_to_strided_slice_ie.cpp | 55 ----------- ...t_stridedslice_to_stridedslice_ie_test.cpp | 97 ------------------- 9 files changed, 1 insertion(+), 315 deletions(-) delete mode 100644 inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp delete mode 100644 inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp delete mode 100644 inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp delete mode 100644 inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp delete mode 100644 inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp index 5b2b30153912cc..22db9348541fb0 100644 --- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp @@ -34,7 +34,6 @@ #include "ngraph_ops/nms_ie.hpp" #include "ngraph_ops/crop_ie.hpp" #include "ngraph_ops/selu_ie.hpp" -#include "ngraph_ops/strided_slice_ie.hpp" #include "ngraph_ops/rnn_cell_ie.hpp" #include "ngraph_ops/topk_ie.hpp" #include "generic_ie.hpp" @@ -555,7 +554,6 @@ std::shared_ptr convertFunctionToICNNNetwork(const std::shared_p std::make_shared>(), std::make_shared>(), std::make_shared>(), - std::make_shared>(), std::make_shared>(), std::make_shared>(), std::make_shared>(), diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp index 67ab2ba8dd2e44..07e6adf3e08180 100644 --- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp +++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp @@ -42,7 +42,6 @@ #include "ngraph_ops/scaleshift.hpp" #include "ngraph_ops/tile_ie.hpp" #include "ngraph_ops/topk_ie.hpp" -#include "ngraph_ops/strided_slice_ie.hpp" #include "ngraph_ops/rnn_cell_ie.hpp" #include "ngraph_ops/hard_sigmoid_ie.hpp" #include "generic_ie.hpp" @@ -2114,17 +2113,10 @@ CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr template <> CNNLayer::Ptr NodeConverter::createLayer( const std::shared_ptr& layer) const { - THROW_IE_EXCEPTION << "StridedSlice operation has a form that is not supported." << layer->get_friendly_name() - << " should be converted to StridedSliceIE operation"; -} - -template <> -CNNLayer::Ptr NodeConverter::createLayer( - const std::shared_ptr& layer) const { LayerParams params = {layer->get_friendly_name(), "StridedSlice", details::convertPrecision(layer->get_output_element_type(0))}; auto res = std::make_shared(params); - auto castedLayer = std::dynamic_pointer_cast(layer); + auto castedLayer = std::dynamic_pointer_cast(layer); if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name; std::string value; diff --git a/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp deleted file mode 100644 index cb83c4d5223deb..00000000000000 --- a/inference-engine/src/transformations/include/ngraph_ops/strided_slice_ie.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include - -#include "ngraph/op/op.hpp" - -namespace ngraph { -namespace op { - -class INFERENCE_ENGINE_API_CLASS(StridedSliceIE) : public Op { -public: - static constexpr NodeTypeInfo type_info{"StridedSliceIE", 1}; - const NodeTypeInfo& get_type_info() const override { return type_info; } - - StridedSliceIE(const Output& data, - const Output& begin, - const Output& end, - const Output& strides, - const std::vector& begin_mask, - const std::vector& end_mask, - const std::vector& new_axis_mask, - const std::vector& shrink_axis_mask, - const std::vector& ellipsis_mask); - - void validate_and_infer_types() override; - - std::shared_ptr clone_with_new_inputs(const OutputVector & new_args) const override; - - const std::vector& get_begin_mask() const { return m_begin_mask; } - const std::vector& get_end_mask() const { return m_end_mask; } - const std::vector& get_new_axis_mask() const { return m_new_axis_mask; } - const std::vector& get_shrink_axis_mask() const { return m_shrink_axis_mask; } - const std::vector& get_ellipsis_mask() const { return m_ellipsis_mask; } - -protected: - const std::vector m_begin_mask; - const std::vector m_end_mask; - const std::vector m_new_axis_mask; - const std::vector m_shrink_axis_mask; - const std::vector m_ellipsis_mask; -}; - -} // namespace op -} // namespace ngraph diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp index a0b619e81ba9f7..6b06d0dd7b3195 100644 --- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp @@ -53,7 +53,6 @@ NGRAPH_PASS(ConvertHardSigmoidToHardSigmoidIE, ::ngraph::pass) NGRAPH_PASS(ConvertCellsToCellsIE, ::ngraph::pass) NGRAPH_PASS(ConvertInterpolateToInterpOrResample, ::ngraph::pass) NGRAPH_PASS(ConvertStridedSliceToCrop, ::ngraph::pass) -NGRAPH_PASS(ConvertStridedSliceToStridedSliceIE, ::ngraph::pass) NGRAPH_PASS(ConvertPowerToPowerIE, ::ngraph::pass) NGRAPH_PASS(ConvertSqrtToPowerIE, ::ngraph::pass) NGRAPH_PASS(ConvertPReLUToReLUIE, ::ngraph::pass) diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp deleted file mode 100644 index df2fd645696d89..00000000000000 --- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include - -#include - -namespace ngraph { -namespace pass { - -class INFERENCE_ENGINE_API_CLASS(ConvertStridedSliceToStridedSliceIE); - -} // namespace pass -} // namespace ngraph - -/* - * Description: - * This transformation converts opset1::StridedSlice to legacy StridedSliceIE - * StridedSliceIE takes begin, end and strides inputs ony in i32 precision. - * Inputs with precision != i32 are converted with Convert operation. - */ - -class ngraph::pass::ConvertStridedSliceToStridedSliceIE: public ngraph::pass::GraphRewrite { -public: - ConvertStridedSliceToStridedSliceIE() : GraphRewrite() { - convert_strided_slice_to_strided_slice_ie(); - } - -private: - void convert_strided_slice_to_strided_slice_ie(); -}; diff --git a/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp deleted file mode 100644 index 085d96513931c8..00000000000000 --- a/inference-engine/src/transformations/src/ngraph_ops/strided_slice_ie.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ngraph_ops/strided_slice_ie.hpp" - -#include -#include -#include -#include -#include - -using namespace std; -using namespace ngraph; - -constexpr NodeTypeInfo op::StridedSliceIE::type_info; - -op::StridedSliceIE::StridedSliceIE(const Output &data, const Output &begin, const Output &end, - const Output &strides, const std::vector &begin_mask, - const std::vector &end_mask, const std::vector &new_axis_mask, - const std::vector &shrink_axis_mask, - const std::vector &ellipsis_mask) - : Op({data, begin, end, strides}) - , m_begin_mask(begin_mask) - , m_end_mask(end_mask) - , m_new_axis_mask(new_axis_mask) - , m_shrink_axis_mask(shrink_axis_mask) - , m_ellipsis_mask(ellipsis_mask) { - constructor_validate_and_infer_types(); -} - -std::shared_ptr op::StridedSliceIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const { - check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), m_begin_mask, - m_end_mask, m_new_axis_mask, m_shrink_axis_mask, m_ellipsis_mask); -} - -void op::StridedSliceIE::validate_and_infer_types() { - const auto& begin_mask_et = input_value(1).get_element_type(); - const auto& end_mask_et = input_value(2).get_element_type(); - const auto& strides_et = input_value(3).get_element_type(); - - NODE_VALIDATION_CHECK(this, - begin_mask_et.is_integral_number(), - "Begin mask must have i32 type, but its: ", - begin_mask_et); - - NODE_VALIDATION_CHECK(this, - end_mask_et == element::i32, - "End mask must have i32 type, but its: ", - end_mask_et); - - NODE_VALIDATION_CHECK(this, - strides_et.is_integral_number(), - "Strides must have i32 type, but its: ", - strides_et); - - // Calculate output shape via opset1::StridedSlice operation - auto slice = std::make_shared(input_value(0), input_value(1), input_value(2), input_value(3), - m_begin_mask, m_end_mask, m_new_axis_mask, m_shrink_axis_mask, m_ellipsis_mask); - set_output_type(0, slice->output(0).get_element_type(), slice->output(0).get_partial_shape()); -} diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp index ac8a2c6fa888fd..2a23c392bb8b14 100644 --- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp +++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp @@ -44,7 +44,6 @@ #include #include #include -#include #include #include diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp deleted file mode 100644 index 48f0a9d287d803..00000000000000 --- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp" - -#include -#include - -#include - -#include -#include - -void ngraph::pass::ConvertStridedSliceToStridedSliceIE::convert_strided_slice_to_strided_slice_ie() { - auto slice = std::make_shared(element::f32, Shape{}, pattern::has_class()); - - ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) { - auto slice = std::dynamic_pointer_cast (m.get_match_root()); - if (!slice) { - return false; - } - - auto data_node = slice->input_value(0); - auto begin_node = std::dynamic_pointer_cast(slice->input_value(1).get_node_shared_ptr()); - auto end_node = std::dynamic_pointer_cast(slice->input_value(2).get_node_shared_ptr()); - auto stride_node = std::dynamic_pointer_cast(slice->input_value(3).get_node_shared_ptr()); - - if (!begin_node || !end_node || !stride_node) { - return false; - } - - auto converted_begin = std::make_shared(begin_node, element::i32); - auto converted_end = std::make_shared(end_node, element::i32); - auto converted_stride = std::make_shared(stride_node, element::i32); - - auto slice_ie = std::make_shared(data_node, - converted_begin, - converted_end, - converted_stride, - slice->get_begin_mask(), - slice->get_end_mask(), - slice->get_new_axis_mask(), - slice->get_shrink_axis_mask(), - slice->get_ellipsis_mask()); - slice_ie->set_friendly_name(slice->get_friendly_name()); - - ngraph::copy_runtime_info(slice, {converted_begin, converted_end, converted_stride, slice_ie}); - ngraph::replace_node(slice, slice_ie); - return true; - }; - - auto m = std::make_shared(slice, "ConvertStridedSliceToStridedSliceIE"); - this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE); -} \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp deleted file mode 100644 index 41b58340f80c00..00000000000000 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_stridedslice_to_stridedslice_ie_test.cpp +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "ngraph_test_utils.hpp" - -using namespace testing; - -TEST(TransformationTests, ConvertStridedSliceToStridedSliceIEStatic) { - std::shared_ptr f(nullptr), f_ref(nullptr); - { - auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 2, 3, 4}); - auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0}); - auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1}); - auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1}); - - std::vector begin_mask{0, 0, 0, 0}; - std::vector end_mask{1, 1, 1, 1}; - - auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); - - f = std::make_shared(ngraph::NodeVector{ss}, ngraph::ParameterVector{data}); - ngraph::pass::InitNodeInfo().run_on_function(f); - ngraph::pass::ConvertStridedSliceToStridedSliceIE().run_on_function(f); - ASSERT_NO_THROW(check_rt_info(f)); - ngraph::pass::ConstantFolding().run_on_function(f); - } - - { - auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 2, 3, 4}); - auto begin = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {0, 0, 0, 0}); - auto end = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {-1, -1, -1, -1}); - auto stride = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {1}); - - std::vector begin_mask{0, 0, 0, 0}, end_mask{1, 1, 1, 1}, new_axis_mask{}, shrink_axis_mask{}, ellipsis_mask{}; - - auto ss = std::make_shared(data, begin, end, stride, - begin_mask, end_mask, new_axis_mask, shrink_axis_mask, ellipsis_mask); - - f_ref = std::make_shared(ngraph::NodeVector{ss}, ngraph::ParameterVector{data}); - } - - auto res = compare_functions(f, f_ref); - ASSERT_TRUE(res.first) << res.second; -} - -TEST(TransformationTests, ConvertStridedSliceToStridedSliceIEDynamic) { - std::shared_ptr f(nullptr), f_ref(nullptr); - { - auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(4)); - auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0}); - auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1}); - auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1}); - - std::vector begin_mask{0, 0, 0, 0}; - std::vector end_mask{1, 1, 1, 1}; - - auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); - - f = std::make_shared(ngraph::NodeVector{ss}, ngraph::ParameterVector{data}); - ngraph::pass::InitNodeInfo().run_on_function(f); - ngraph::pass::ConvertStridedSliceToStridedSliceIE().run_on_function(f); - ASSERT_NO_THROW(check_rt_info(f)); - ngraph::pass::ConstantFolding().run_on_function(f); - } - - { - auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(4)); - auto begin = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {0, 0, 0, 0}); - auto end = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {-1, -1, -1, -1}); - auto stride = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{4}, {1}); - - std::vector begin_mask{0, 0, 0, 0}, end_mask{1, 1, 1, 1}, new_axis_mask{}, shrink_axis_mask{}, ellipsis_mask{}; - - auto ss = std::make_shared(data, begin, end, stride, - begin_mask, end_mask, new_axis_mask, shrink_axis_mask, ellipsis_mask); - - f_ref = std::make_shared(ngraph::NodeVector{ss}, ngraph::ParameterVector{data}); - } - - auto res = compare_functions(f, f_ref); - ASSERT_TRUE(res.first) << res.second; -} From 5f6999ed7e2eb505f358c45e495f9a897e2490df Mon Sep 17 00:00:00 2001 From: Andrey Somsikov Date: Thu, 28 May 2020 18:31:10 +0300 Subject: [PATCH 06/24] Remove Safety dependency (#627) Safety tool should be isolated from the environment it is validating: https://github.com/pyupio/safety/security/advisories/GHSA-7q25-qrjw-6fg2 Suggesting docker solution by default. --- model-optimizer/README.md | 2 +- model-optimizer/requirements_dev.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/model-optimizer/README.md b/model-optimizer/README.md index f48d7f843f09b0..260149bee92039 100644 --- a/model-optimizer/README.md +++ b/model-optimizer/README.md @@ -192,7 +192,7 @@ of the tool and can not be applied to the current version of Model Optimizer. 1. Run the following command:
-    safety check -r requirements_file
+    cat requirements_file | docker run -i --rm pyupio/safety safety check --stdin
 
> **NOTE**: here requirements_file is one of the following: requirements.txt, requirements_caffe.txt, requirements_tf.txt, requirements_mxnet.txt, requirements_dev.txt. diff --git a/model-optimizer/requirements_dev.txt b/model-optimizer/requirements_dev.txt index 38ecd7cfb0e71c..2123de2a031fd5 100644 --- a/model-optimizer/requirements_dev.txt +++ b/model-optimizer/requirements_dev.txt @@ -4,6 +4,5 @@ pyenchant==1.6.11 astroid==2.1.0 pylint==2.1.1 Sphinx==1.6.5 -safety==1.8.5 test-generator==0.1.1 defusedxml>=0.5.0 From e51e1682ca4d7ddb05844087e96ddd393f687829 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 28 May 2020 22:40:20 +0300 Subject: [PATCH 07/24] Enabled Unit tests and remove IReaderPtr (#653) * Enabled Unit tests and remove IReaderPtr * Fixed unicode tests for Windows * Fixed typo --- .../src/inference_engine/ie_core.cpp | 130 +----------- .../inference_engine/ie_network_reader.cpp | 193 ++++++++++++++++++ .../inference_engine/ie_network_reader.hpp | 33 +++ .../src/readers/reader_api/ie_reader_ptr.hpp | 36 ---- .../inference_engine/net_reader_test.cpp | 2 +- 5 files changed, 230 insertions(+), 164 deletions(-) create mode 100644 inference-engine/src/inference_engine/ie_network_reader.cpp create mode 100644 inference-engine/src/inference_engine/ie_network_reader.hpp delete mode 100644 inference-engine/src/readers/reader_api/ie_reader_ptr.hpp diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp index 0aab9219911f27..754e530359c4d7 100644 --- a/inference-engine/src/inference_engine/ie_core.cpp +++ b/inference-engine/src/inference_engine/ie_core.cpp @@ -5,33 +5,28 @@ #include "ie_core.hpp" #include -#include #include #include #include #include -#include -#include #include #include #include #include #include -#include "ie_blob_stream.hpp" -#include #include #include "cpp/ie_cnn_net_reader.h" #include "cpp/ie_plugin_cpp.hpp" #include "cpp_interfaces/base/ie_plugin_base.hpp" #include "details/ie_exception_conversion.hpp" #include "details/ie_so_pointer.hpp" -#include "file_utils.h" #include "ie_icore.hpp" #include "ie_plugin.hpp" #include "ie_plugin_config.hpp" #include "ie_profiling.hpp" #include "ie_util_internal.hpp" +#include "ie_network_reader.hpp" #include "multi-device/multi_device_config.hpp" #include "xml_parse_utils.h" @@ -133,79 +128,6 @@ Parameter copyParameterValue(const Parameter & value) { } // namespace -class Reader: public IReader { -private: - InferenceEngine::IReaderPtr ptr; - std::once_flag readFlag; - std::string name; - std::string location; - - InferenceEngine::IReaderPtr getReaderPtr() { - std::call_once(readFlag, [&] () { - FileUtils::FilePath libraryName = FileUtils::toFilePath(location); - FileUtils::FilePath readersLibraryPath = FileUtils::makeSharedLibraryName(getInferenceEngineLibraryPath(), libraryName); - - if (!FileUtils::fileExist(readersLibraryPath)) { - THROW_IE_EXCEPTION << "Please, make sure that Inference Engine ONNX reader library " - << FileUtils::fromFilePath(::FileUtils::makeSharedLibraryName({}, libraryName)) << " is in " - << getIELibraryPath(); - } - ptr = IReaderPtr(readersLibraryPath); - }); - - return ptr; - } - - InferenceEngine::IReaderPtr getReaderPtr() const { - return const_cast(this)->getReaderPtr(); - } - - void Release() noexcept override { - delete this; - } - -public: - using Ptr = std::shared_ptr; - Reader(const std::string& name, const std::string location): name(name), location(location) {} - bool supportModel(std::istream& model) const override { - auto reader = getReaderPtr(); - return reader->supportModel(model); - } - CNNNetwork read(std::istream& model, const std::vector& exts) const override { - auto reader = getReaderPtr(); - return reader->read(model, exts); - } - CNNNetwork read(std::istream& model, std::istream& weights, const std::vector& exts) const override { - auto reader = getReaderPtr(); - return reader->read(model, weights, exts); - } - std::vector getDataFileExtensions() const override { - auto reader = getReaderPtr(); - return reader->getDataFileExtensions(); - } - std::string getName() const { - return name; - } -}; - -namespace { - -// Extension to plugins creator -std::multimap readers; - -void registerReaders() { - static std::mutex readerMutex; - std::lock_guard lock(readerMutex); - // TODO: Read readers info from XML - auto onnxReader = std::make_shared("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX)); - readers.emplace("onnx", onnxReader); - readers.emplace("prototxt", onnxReader); - auto irReader = std::make_shared("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX)); - readers.emplace("xml", irReader); -} - -} // namespace - CNNNetReaderPtr CreateCNNNetReaderPtr() noexcept { auto loader = createCnnReaderLoader(); return CNNNetReaderPtr(loader); @@ -374,57 +296,12 @@ class Core::Impl : public ICore { CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override { IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork) - - std::ifstream modelStream(modelPath, std::ios::binary); - if (!modelStream.is_open()) - THROW_IE_EXCEPTION << "Model file " << modelPath << " cannot be opened!"; - - auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1); - for (auto it = readers.lower_bound(fileExt); it != readers.upper_bound(fileExt); it++) { - auto reader = it->second; - if (reader->supportModel(modelStream)) { - // Find weights - std::string bPath = binPath; - if (bPath.empty()) { - auto pathWoExt = modelPath; - auto pos = modelPath.rfind('.'); - if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos); - for (const auto& ext : reader->getDataFileExtensions()) { - bPath = pathWoExt + "." + ext; - if (!FileUtils::fileExist(bPath)) { - bPath.clear(); - } else { - break; - } - } - } - if (!bPath.empty()) { - std::ifstream binStream; - binStream.open(bPath, std::ios::binary); - if (!binStream.is_open()) - THROW_IE_EXCEPTION << "Weights file " << bPath << " cannot be opened!"; - return reader->read(modelStream, binStream, extensions); - } - return reader->read(modelStream, extensions); - } - } - THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model: " << modelPath; + return details::ReadNetwork(modelPath, binPath, extensions); } CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override { IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork) - std::istringstream modelStream(model); - details::BlobStream binStream(weights); - - for (auto it = readers.begin(); it != readers.end(); it++) { - auto reader = it->second; - if (reader->supportModel(modelStream)) { - if (weights) - return reader->read(modelStream, binStream, extensions); - return reader->read(modelStream, extensions); - } - } - THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model from string!"; + return details::ReadNetwork(model, weights, extensions); } ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName, @@ -704,7 +581,6 @@ Core::Impl::Impl() { opsetNames.insert("opset1"); opsetNames.insert("opset2"); opsetNames.insert("opset3"); - registerReaders(); } Core::Impl::~Impl() {} diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp new file mode 100644 index 00000000000000..9d739b6afb3c46 --- /dev/null +++ b/inference-engine/src/inference_engine/ie_network_reader.cpp @@ -0,0 +1,193 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ie_network_reader.hpp" + +#include
+#include +#include +#include +#include + +#include +#include +#include +#include + +namespace InferenceEngine { + +namespace details { + +/** + * @brief This class defines the name of the fabric for creating an IReader object in DLL + */ +template <> +class SOCreatorTrait { +public: + /** + * @brief A name of the fabric for creating IReader object in DLL + */ + static constexpr auto name = "CreateReader"; +}; + +} // namespace details + +/** + * @brief This class is a wrapper for reader interfaces + */ +class Reader: public IReader { +private: + InferenceEngine::details::SOPointer ptr; + std::once_flag readFlag; + std::string name; + std::string location; + + InferenceEngine::details::SOPointer getReaderPtr() { + std::call_once(readFlag, [&] () { + FileUtils::FilePath libraryName = FileUtils::toFilePath(location); + FileUtils::FilePath readersLibraryPath = FileUtils::makeSharedLibraryName(getInferenceEngineLibraryPath(), libraryName); + + if (!FileUtils::fileExist(readersLibraryPath)) { + THROW_IE_EXCEPTION << "Please, make sure that Inference Engine ONNX reader library " + << FileUtils::fromFilePath(::FileUtils::makeSharedLibraryName({}, libraryName)) << " is in " + << getIELibraryPath(); + } + ptr = InferenceEngine::details::SOPointer(readersLibraryPath); + }); + + return ptr; + } + + InferenceEngine::details::SOPointer getReaderPtr() const { + return const_cast(this)->getReaderPtr(); + } + + void Release() noexcept override { + delete this; + } + +public: + using Ptr = std::shared_ptr; + Reader(const std::string& name, const std::string location): name(name), location(location) {} + bool supportModel(std::istream& model) const override { + auto reader = getReaderPtr(); + return reader->supportModel(model); + } + CNNNetwork read(std::istream& model, const std::vector& exts) const override { + auto reader = getReaderPtr(); + return reader->read(model, exts); + } + CNNNetwork read(std::istream& model, std::istream& weights, const std::vector& exts) const override { + auto reader = getReaderPtr(); + return reader->read(model, weights, exts); + } + std::vector getDataFileExtensions() const override { + auto reader = getReaderPtr(); + return reader->getDataFileExtensions(); + } + std::string getName() const { + return name; + } +}; + +namespace { + +// Extension to plugins creator +std::multimap readers; + +void registerReaders() { + IE_PROFILING_AUTO_SCOPE(details::registerReaders) + static bool initialized = false; + static std::mutex readerMutex; + std::lock_guard lock(readerMutex); + if (initialized) return; + // TODO: Read readers info from XML + auto onnxReader = std::make_shared("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX)); + readers.emplace("onnx", onnxReader); + readers.emplace("prototxt", onnxReader); + auto irReader = std::make_shared("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX)); + readers.emplace("xml", irReader); + initialized = true; +} + +} // namespace + +CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector& exts) { + IE_PROFILING_AUTO_SCOPE(details::ReadNetwork) + // Register readers if it is needed + registerReaders(); + + // Fix unicode name +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + std::wstring model_path = InferenceEngine::details::multiByteCharToWString(modelPath.c_str()); +#else + std::string model_path = modelPath; +#endif + // Try to open model file + std::ifstream modelStream(model_path, std::ios::binary); + if (!modelStream.is_open()) + THROW_IE_EXCEPTION << "Model file " << modelPath << " cannot be opened!"; + + // Find reader for model extension + auto fileExt = modelPath.substr(modelPath.find_last_of(".") + 1); + for (auto it = readers.lower_bound(fileExt); it != readers.upper_bound(fileExt); it++) { + auto reader = it->second; + // Check that reader supports the model + if (reader->supportModel(modelStream)) { + // Find weights + std::string bPath = binPath; + if (bPath.empty()) { + auto pathWoExt = modelPath; + auto pos = modelPath.rfind('.'); + if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos); + for (const auto& ext : reader->getDataFileExtensions()) { + bPath = pathWoExt + "." + ext; + if (!FileUtils::fileExist(bPath)) { + bPath.clear(); + } else { + break; + } + } + } + if (!bPath.empty()) { + // Open weights file +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + std::wstring weights_path = InferenceEngine::details::multiByteCharToWString(bPath.c_str()); +#else + std::string weights_path = bPath; +#endif + std::ifstream binStream; + binStream.open(weights_path, std::ios::binary); + if (!binStream.is_open()) + THROW_IE_EXCEPTION << "Weights file " << bPath << " cannot be opened!"; + + // read model with weights + return reader->read(modelStream, binStream, exts); + } + // read model without weights + return reader->read(modelStream, exts); + } + } + THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model: " << modelPath; +} + +CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector& exts) { + IE_PROFILING_AUTO_SCOPE(details::ReadNetwork) + // Register readers if it is needed + registerReaders(); + std::istringstream modelStream(model); + details::BlobStream binStream(weights); + + for (auto it = readers.begin(); it != readers.end(); it++) { + auto reader = it->second; + if (reader->supportModel(modelStream)) { + if (weights) + return reader->read(modelStream, binStream, exts); + return reader->read(modelStream, exts); + } + } + THROW_IE_EXCEPTION << "Unknown model format! Cannot read the model from string!"; +} + +} // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/ie_network_reader.hpp b/inference-engine/src/inference_engine/ie_network_reader.hpp new file mode 100644 index 00000000000000..2d8ea6338fc4da --- /dev/null +++ b/inference-engine/src/inference_engine/ie_network_reader.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace InferenceEngine { +namespace details { + +/** + * @brief Reads IR xml and bin files + * @param modelPath path to IR file + * @param binPath path to bin file, if path is empty, will try to read bin file with the same name as xml and + * if bin file with the same name was not found, will load IR without weights. + * @param exts vector with extensions + * @return CNNNetwork + */ +CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector& exts); +/** + * @brief Reads IR xml and bin (with the same name) files + * @param model string with IR + * @param weights shared pointer to constant blob with weights + * @param exts vector with extensions + * @return CNNNetwork + */ +CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector& exts); + +} // namespace details +} // namespace InferenceEngine diff --git a/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp b/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp deleted file mode 100644 index 9c3aee3ac51249..00000000000000 --- a/inference-engine/src/readers/reader_api/ie_reader_ptr.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include
-#include "ie_reader.hpp" - -namespace InferenceEngine { -namespace details { - -/** - * @brief This class defines the name of the fabric for creating an IReader object in DLL - */ -template <> -class SOCreatorTrait { -public: - /** - * @brief A name of the fabric for creating IReader object in DLL - */ - static constexpr auto name = "CreateReader"; -}; - -} // namespace details - -/** - * @brief A C++ helper to work with objects created by the plugin. - * - * Implements different interfaces. - */ -using IReaderPtr = InferenceEngine::details::SOPointer; - -} // namespace InferenceEngine diff --git a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp index 4df6db84f56944..2d008bbd905049 100644 --- a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp +++ b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp @@ -107,7 +107,7 @@ TEST_P(NetReaderTest, ReadNetworkTwiceSeparately) { #ifdef ENABLE_UNICODE_PATH_SUPPORT -TEST_P(NetReaderTest, DISABLED_ReadCorrectModelWithWeightsUnicodePath) { +TEST_P(NetReaderTest, ReadCorrectModelWithWeightsUnicodePath) { GTEST_COUT << "params.modelPath: '" << _modelPath << "'" << std::endl; GTEST_COUT << "params.weightsPath: '" << _weightsPath << "'" << std::endl; GTEST_COUT << "params.netPrc: '" << _netPrc.name() << "'" << std::endl; From 5cc8114322d7fcd8057a80a3229a9bb16276fa70 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Fri, 29 May 2020 09:11:22 +0300 Subject: [PATCH 08/24] [ MO: CVS-32286 ] IdentityN fix (#668) --- .../front/tf/identityN_to_identity.py | 15 ++++++++++++++- .../front/tf/identityN_to_identity_test.py | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/model-optimizer/extensions/front/tf/identityN_to_identity.py b/model-optimizer/extensions/front/tf/identityN_to_identity.py index 4e3d38ff0bf089..7578ef97d8d8da 100644 --- a/model-optimizer/extensions/front/tf/identityN_to_identity.py +++ b/model-optimizer/extensions/front/tf/identityN_to_identity.py @@ -29,6 +29,11 @@ class IdentityN_to_Identity(FrontReplacementPattern): IdentityN Identity Identity / \ | | output_0 output_1 output_0 output_1 + + ATTENTION: not all in/outputs of the IdentityN may survive during ModelOptimizer pipeline. + And it breaks the original operation semantics. + For example, output_1 may be not be used during network output computations. + To preserve this unused in/output ports we disconnect the corresponding out/input port. """ enabled = True @@ -41,12 +46,20 @@ def replace_identityN(node: Node): dtypes = node.data_types for idx, port in node.in_ports().items(): - assert node.is_out_port_connected(idx), 'IdentityN {} has inconsistent input and output ports'.format(name) + if not node.is_in_port_connected(idx) or not node.is_out_port_connected(idx): + # ATTENTION section in the description above + continue assert idx < len(dtypes), 'IdentityN {} has inconsistent `data_types` attribute {}'.format(name, dtypes) identity = Identity(graph, {'name': '{}/{}_port'.format(name, idx), 'data_type': dtypes[idx]}).create_node() port.get_connection().set_destination(identity.in_port(0)) node.out_port(idx).get_connection().set_source(identity.out_port(0)) + # ATTENTION section in the description above + for in_port in node.in_ports().values(): + in_port.disconnect() + for out_port in node.out_ports().values(): + out_port.disconnect() + def find_and_replace_pattern(self, graph: Graph): for identityN in graph.get_op_nodes(op='IdentityN'): self.replace_identityN(identityN) diff --git a/model-optimizer/extensions/front/tf/identityN_to_identity_test.py b/model-optimizer/extensions/front/tf/identityN_to_identity_test.py index f6422ce8ff8bc2..71571d7783a8cf 100644 --- a/model-optimizer/extensions/front/tf/identityN_to_identity_test.py +++ b/model-optimizer/extensions/front/tf/identityN_to_identity_test.py @@ -61,3 +61,20 @@ def test_identityN(self): (flag, resp) = compare_graphs(graph, graph_ref, 'output0', check_op_attrs=True) self.assertTrue(flag, resp) + + def test_identityN_unused_ports(self): + graph = build_graph(nodes, [ + *connect('placeholder_0', '0:identityN'), + *connect('placeholder_1', '1:identityN'), + *connect('identityN:0', 'output0'), + ], nodes_with_edges_only=True) + + IdentityN_to_Identity().find_and_replace_pattern(graph) + + graph_ref = build_graph(nodes, [ + *connect('placeholder_0', 'identity0'), + *connect('identity0', 'output0'), + ], nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output0', check_op_attrs=True) + self.assertTrue(flag, resp) From e290b14ab147596f6b5607786ce383283a35b7b8 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Fri, 29 May 2020 09:15:47 +0300 Subject: [PATCH 09/24] [ MO Interpolate ] Fixing broken model reshape-ability (#619) --- model-optimizer/automation/package_BOM.txt | 1 + .../extensions/back/InterpolateReshape.py | 154 ++++++++++++++++++ .../back/InterpolateReshape_test.py | 97 +++++++++++ 3 files changed, 252 insertions(+) create mode 100644 model-optimizer/extensions/back/InterpolateReshape.py create mode 100644 model-optimizer/extensions/back/InterpolateReshape_test.py diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index e4da215ad3cda9..2d19e07fbcb815 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -29,6 +29,7 @@ extensions/back/GatherNormalizer.py extensions/back/GroupedConvWeightsNormalize.py extensions/back/I64ToI32.py extensions/back/insert_compatibility_l2normalization.py +extensions/back/InterpolateReshape.py extensions/back/InterpolateToInterpOrResample.py extensions/back/kaldi_remove_memory_output.py extensions/back/LeakyReLUMutation.py diff --git a/model-optimizer/extensions/back/InterpolateReshape.py b/model-optimizer/extensions/back/InterpolateReshape.py new file mode 100644 index 00000000000000..e1ecbebbcd8a9b --- /dev/null +++ b/model-optimizer/extensions/back/InterpolateReshape.py @@ -0,0 +1,154 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import numpy as np + +from extensions.ops.elementwise import Mul +from extensions.ops.gather import Gather +from mo.back.replacement import BackReplacementPattern +from mo.front.caffe.extractors.utils import get_canonical_axis_index +from mo.front.common.partial_infer.utils import int64_array +from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input +from mo.graph.graph import Graph +from mo.ops.shape import Shape + + +class InterpolateConcat(BackReplacementPattern): + """ + Replaces hard-coded 1-port input of Interpolate with reshape-able sub-graph using the following Concat inputs + + BEFORE: + input Const + shape=[1, 3, 30, 40] value=[60, 160] + \ / + Interpolate(axes=(2, 3)) input_1 + shape=[1, 3, 60, 160] shape=[1, 4, 60, 160] + \ / + Concat(axis=1) + shape=[1, 7, 60, 160] + AFTER: + input + shape=[1, 3, 30, 40] input_1 + | shape=[1, 4, 60, 160] + | / | + | ShapeOf | + | | | + | Gather | + | indices=(2, 3); axis=0 | + \ | | + Interpolate(axes=(2, 3)) | + shape=[1, 3, 60, 160] | + \ / + Concat(axis=1) + shape=[1, 7, 60, 160] + + """ + enabled = True + graph_condition = [lambda graph: graph.graph['cmd_params'].keep_shape_ops] + force_shape_inference = True + id = 'reshape_interpolate_through_concat' + + @staticmethod + def make_interpolate_reshapeable(interpolate, concat): + assert interpolate.soft_get('type') == 'Interpolate' + assert concat.soft_get('type') == 'Concat' + + output_shape = interpolate.out_port(0).data.get_shape() + + interp_axes = [get_canonical_axis_index(output_shape, axis) for axis in interpolate.axes] + concat_axis = get_canonical_axis_index(output_shape, concat.axis) + if concat_axis in interp_axes: + return + + concat_srcs = [port.get_source() for port in concat.in_ports().values()] + non_interp_concat_srcs = [src for src in concat_srcs if src.node.soft_get('type') != 'Interpolate'] + if len(non_interp_concat_srcs) == 0: + return + + graph = interpolate.graph + src = non_interp_concat_srcs[0] + + shape = Shape(graph, {'name': src.node.soft_get('name', src.node.id) + '/Shape'}).create_node() + shape.in_port(0).connect(src) + gather = create_op_with_const_inputs(graph, Gather, {1: np.array(interpolate.axes, dtype=np.int32), 2: int64_array(0)}, + {'name': shape.name + '/Gathered'}, shape) + interpolate.in_port(1).get_connection().set_source(gather.out_port(0)) + + def find_and_replace_pattern(self, graph: Graph): + for interpolate in graph.get_op_nodes(type='Interpolate'): + if interpolate.in_port(1).get_source().node.soft_get('type') != 'Const': + continue + dsts = interpolate.out_port(0).get_destinations() + if len(dsts) == 1 and dsts[0].node.soft_get('type') == 'Concat': + self.make_interpolate_reshapeable(interpolate, dsts[0].node) + + +class InterpolateReshapeWA(BackReplacementPattern): + """ + Replaces hard-coded 1-port input of Interpolate with reshape-able sub-graph. + WARNING: Could cause troubles if model has hard-coded Interpolate intentionally -- rare situation + + BEFORE: + input Const + shape=[1, 3, 30, 40] value=[60, 160] + \ / + Interpolate(axes=(2, 3)) + shape=[1, 3, 60, 160] + + AFTER: + input + shape=[1, 3, 30, 40] + | \ + | ShapeOf + | | + | Gather Const + | indices=(2, 3); axis=0 value=[2, 4] + | \ / + | Multiply + | / + Interpolate(axes=(2, 3)) + shape=[1, 3, 60, 160] + """ + enabled = False + graph_condition = [lambda graph: graph.graph['cmd_params'].keep_shape_ops] + force_shape_inference = True + id = 'reshape_interpolate_wa' + + def run_after(self): + return [InterpolateConcat] + + @staticmethod + def make_interpolate_reshapeable(interpolate): + assert interpolate.soft_get('type') == 'Interpolate' + axes = interpolate.axes + input_shape = interpolate.in_port(0).data.get_shape() + output_shape = interpolate.out_port(0).data.get_shape() + if not np.all(np.remainder(output_shape, input_shape) == 0) and \ + not np.all(np.remainder(input_shape, output_shape) == 0): + return + graph = interpolate.graph + name = interpolate.soft_get('name', interpolate.id) + shape = Shape(graph, {'name': name + '/ShapeOf'}).create_node() + shape.in_port(0).connect(interpolate.in_port(0).get_source()) + gather = create_op_with_const_inputs(graph, Gather, {1: np.array(axes, dtype=np.int32), 2: int64_array(0)}, + {'name': shape.name + '/Gathered'}, shape) + multipliers = output_shape[axes] / input_shape[axes] + mul = create_op_node_with_second_input(graph, Mul, multipliers, {'name': gather.name + '/Multiplied'}, gather) + interpolate.in_port(1).get_connection().set_source(mul.out_port(0)) + + def find_and_replace_pattern(self, graph: Graph): + for interpolate in graph.get_op_nodes(type='Interpolate'): + if interpolate.in_port(1).get_source().node.soft_get('type') == 'Const': + self.make_interpolate_reshapeable(interpolate) diff --git a/model-optimizer/extensions/back/InterpolateReshape_test.py b/model-optimizer/extensions/back/InterpolateReshape_test.py new file mode 100644 index 00000000000000..f793a4b592fceb --- /dev/null +++ b/model-optimizer/extensions/back/InterpolateReshape_test.py @@ -0,0 +1,97 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import unittest +from argparse import Namespace + +import numpy as np + +from extensions.back.InterpolateReshape import InterpolateReshapeWA, InterpolateConcat +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \ + connect_data + +nodes = { + **regular_op_with_shaped_data('placeholder', [1, 3, 30, 40], {'type': 'Parameter'}), + **valued_const_with_data('out_shape', np.array([60, 160])), + + **regular_op_with_shaped_data('interpolate', [1, 3, 60, 160], {'type': 'Interpolate', 'axes': [2, 3]}), + + **regular_op_with_shaped_data('shape', [4], {'type': 'ShapeOf'}), + **valued_const_with_data('indices', np.array([2, 3])), + **valued_const_with_data('axis', np.array(0)), + **regular_op_with_shaped_data('gather', [2], {'type': 'Gather'}), + + **valued_const_with_data('multiplier', np.array([2, 4])), + **regular_op_with_shaped_data('mul', [2], {'type': 'Multiply'}), + + **regular_op_with_shaped_data('placeholder_1', [1, 3, 60, 160], {'type': 'Parameter'}), + **regular_op_with_shaped_data('concat', [1, 7, 60, 160], {'type': 'Concat', 'axis': 1}), + + **result(), +} + + +class TestInterpolateReshapeWA(unittest.TestCase): + def test_interpolate_reshape_graph_comparison(self): + graph = build_graph(nodes, [ + *connect('placeholder', '0:interpolate'), + *connect('out_shape', '1:interpolate'), + *connect('interpolate', 'output'), + ], nodes_with_edges_only=True) + InterpolateReshapeWA().find_and_replace_pattern(graph) + graph.graph['cmd_params'] = Namespace(keep_shape_ops=True) + graph.clean_up() + graph_ref = build_graph(nodes, [ + *connect('placeholder', '0:interpolate'), + *connect_data('placeholder', 'shape'), + *connect('shape', '0:gather'), + *connect('indices', '1:gather'), + *connect('axis', '2:gather'), + *connect('gather', '0:mul'), + *connect('multiplier', '1:mul'), + *connect('mul', '1:interpolate'), + *connect('interpolate', 'output'), + ], nodes_with_edges_only=True) + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) + + +class TestInterpolateConcat(unittest.TestCase): + def test_interpolate_concat_reshape_graph_comparison(self): + graph = build_graph(nodes, [ + *connect('placeholder', '0:interpolate'), + *connect('out_shape', '1:interpolate'), + *connect('interpolate', '0:concat'), + *connect('placeholder_1', '1:concat'), + *connect('concat', 'output'), + ], nodes_with_edges_only=True) + InterpolateConcat().find_and_replace_pattern(graph) + graph.graph['cmd_params'] = Namespace(keep_shape_ops=True) + graph.clean_up() + graph_ref = build_graph(nodes, [ + *connect('placeholder', '0:interpolate'), + *connect('placeholder_1', 'shape'), + *connect('shape', '0:gather'), + *connect('indices', '1:gather'), + *connect('axis', '2:gather'), + *connect('gather', '1:interpolate'), + *connect('interpolate', '0:concat'), + *connect_data('placeholder_1', '1:concat'), + *connect('concat', 'output'), + ], nodes_with_edges_only=True) + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) From 67d733d5a8db85efffbfd0d713746f8e1f099efc Mon Sep 17 00:00:00 2001 From: "Gladilov, Gleb" Date: Fri, 29 May 2020 09:32:10 +0300 Subject: [PATCH 10/24] Enables VPU maintainers notification in case of PR to VPU related folders and files (#667) --- CODEOWNERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index b3bd89148d9ce9..9d19cd384ebfa5 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -33,6 +33,14 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins /inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers /inference-engine/include/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers /inference-engine/thirdparty/movidius/ @openvinotoolkit/openvino-ie-vpu-maintainers +/inference-engine/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tests_deprecated/behavior/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers +/inference-engine/tools/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers +/inference-engine/scripts/run_tests_myriad_multistick.sh @openvinotoolkit/openvino-ie-vpu-maintainers # IE GNA: /inference-engine/src/gna_plugin/ @openvinotoolkit/openvino-ie-gna-maintainers From 09192b804e071c0205f504f761a087b184c60fdb Mon Sep 17 00:00:00 2001 From: Artyom Anokhov Date: Fri, 29 May 2020 13:50:17 +0300 Subject: [PATCH 11/24] [OpenVINO scripts] Fixed *.sh files index from 644 to 755 (#664) * Fixed *.sh files index from 644 to 755 * Added convert.py executable permission --- inference-engine/samples/build_samples.sh | 0 inference-engine/scripts/dependencies.sh | 0 inference-engine/scripts/run_tests_myriad_multistick.sh | 0 inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh | 0 inference-engine/thirdparty/fluid/check.sh | 0 inference-engine/thirdparty/fluid/checksum.sh | 0 .../thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh | 0 inference-engine/thirdparty/fluid/update.sh | 0 install_dependencies.sh | 0 model-optimizer/mo/utils/convert.py | 0 ngraph/maint/apply-code-format.sh | 0 ngraph/maint/bash_lib.sh | 0 ngraph/maint/check-code-format.sh | 0 ngraph/maint/clang_format_lib.sh | 0 ngraph/test/update_convolution_reference.sh | 0 ngraph/test/update_dyn_replace_slice_reference.sh | 0 ngraph/test/update_dyn_slice_reference.sh | 0 scripts/demo/demo_benchmark_app.sh | 0 scripts/demo/demo_security_barrier_camera.sh | 0 scripts/demo/demo_squeezenet_download_convert_run.sh | 0 scripts/demo/utils.sh | 0 scripts/install_dependencies/install_4_14_kernel.sh | 0 scripts/install_dependencies/install_GST_dependencies.sh | 0 scripts/install_dependencies/install_NCS_udev_rules.sh | 0 scripts/install_dependencies/install_NEO_OCL_driver.sh | 0 scripts/install_dependencies/install_openvino_dependencies.sh | 0 scripts/setupvars/setupvars.sh | 0 27 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 inference-engine/samples/build_samples.sh mode change 100644 => 100755 inference-engine/scripts/dependencies.sh mode change 100644 => 100755 inference-engine/scripts/run_tests_myriad_multistick.sh mode change 100644 => 100755 inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh mode change 100644 => 100755 inference-engine/thirdparty/fluid/check.sh mode change 100644 => 100755 inference-engine/thirdparty/fluid/checksum.sh mode change 100644 => 100755 inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh mode change 100644 => 100755 inference-engine/thirdparty/fluid/update.sh mode change 100644 => 100755 install_dependencies.sh mode change 100644 => 100755 model-optimizer/mo/utils/convert.py mode change 100644 => 100755 ngraph/maint/apply-code-format.sh mode change 100644 => 100755 ngraph/maint/bash_lib.sh mode change 100644 => 100755 ngraph/maint/check-code-format.sh mode change 100644 => 100755 ngraph/maint/clang_format_lib.sh mode change 100644 => 100755 ngraph/test/update_convolution_reference.sh mode change 100644 => 100755 ngraph/test/update_dyn_replace_slice_reference.sh mode change 100644 => 100755 ngraph/test/update_dyn_slice_reference.sh mode change 100644 => 100755 scripts/demo/demo_benchmark_app.sh mode change 100644 => 100755 scripts/demo/demo_security_barrier_camera.sh mode change 100644 => 100755 scripts/demo/demo_squeezenet_download_convert_run.sh mode change 100644 => 100755 scripts/demo/utils.sh mode change 100644 => 100755 scripts/install_dependencies/install_4_14_kernel.sh mode change 100644 => 100755 scripts/install_dependencies/install_GST_dependencies.sh mode change 100644 => 100755 scripts/install_dependencies/install_NCS_udev_rules.sh mode change 100644 => 100755 scripts/install_dependencies/install_NEO_OCL_driver.sh mode change 100644 => 100755 scripts/install_dependencies/install_openvino_dependencies.sh mode change 100644 => 100755 scripts/setupvars/setupvars.sh diff --git a/inference-engine/samples/build_samples.sh b/inference-engine/samples/build_samples.sh old mode 100644 new mode 100755 diff --git a/inference-engine/scripts/dependencies.sh b/inference-engine/scripts/dependencies.sh old mode 100644 new mode 100755 diff --git a/inference-engine/scripts/run_tests_myriad_multistick.sh b/inference-engine/scripts/run_tests_myriad_multistick.sh old mode 100644 new mode 100755 diff --git a/inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh b/inference-engine/thirdparty/clDNN/create_unixmake_gcc.sh old mode 100644 new mode 100755 diff --git a/inference-engine/thirdparty/fluid/check.sh b/inference-engine/thirdparty/fluid/check.sh old mode 100644 new mode 100755 diff --git a/inference-engine/thirdparty/fluid/checksum.sh b/inference-engine/thirdparty/fluid/checksum.sh old mode 100644 new mode 100755 diff --git a/inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh b/inference-engine/thirdparty/fluid/modules/gapi/doc/slides/get_sty.sh old mode 100644 new mode 100755 diff --git a/inference-engine/thirdparty/fluid/update.sh b/inference-engine/thirdparty/fluid/update.sh old mode 100644 new mode 100755 diff --git a/install_dependencies.sh b/install_dependencies.sh old mode 100644 new mode 100755 diff --git a/model-optimizer/mo/utils/convert.py b/model-optimizer/mo/utils/convert.py old mode 100644 new mode 100755 diff --git a/ngraph/maint/apply-code-format.sh b/ngraph/maint/apply-code-format.sh old mode 100644 new mode 100755 diff --git a/ngraph/maint/bash_lib.sh b/ngraph/maint/bash_lib.sh old mode 100644 new mode 100755 diff --git a/ngraph/maint/check-code-format.sh b/ngraph/maint/check-code-format.sh old mode 100644 new mode 100755 diff --git a/ngraph/maint/clang_format_lib.sh b/ngraph/maint/clang_format_lib.sh old mode 100644 new mode 100755 diff --git a/ngraph/test/update_convolution_reference.sh b/ngraph/test/update_convolution_reference.sh old mode 100644 new mode 100755 diff --git a/ngraph/test/update_dyn_replace_slice_reference.sh b/ngraph/test/update_dyn_replace_slice_reference.sh old mode 100644 new mode 100755 diff --git a/ngraph/test/update_dyn_slice_reference.sh b/ngraph/test/update_dyn_slice_reference.sh old mode 100644 new mode 100755 diff --git a/scripts/demo/demo_benchmark_app.sh b/scripts/demo/demo_benchmark_app.sh old mode 100644 new mode 100755 diff --git a/scripts/demo/demo_security_barrier_camera.sh b/scripts/demo/demo_security_barrier_camera.sh old mode 100644 new mode 100755 diff --git a/scripts/demo/demo_squeezenet_download_convert_run.sh b/scripts/demo/demo_squeezenet_download_convert_run.sh old mode 100644 new mode 100755 diff --git a/scripts/demo/utils.sh b/scripts/demo/utils.sh old mode 100644 new mode 100755 diff --git a/scripts/install_dependencies/install_4_14_kernel.sh b/scripts/install_dependencies/install_4_14_kernel.sh old mode 100644 new mode 100755 diff --git a/scripts/install_dependencies/install_GST_dependencies.sh b/scripts/install_dependencies/install_GST_dependencies.sh old mode 100644 new mode 100755 diff --git a/scripts/install_dependencies/install_NCS_udev_rules.sh b/scripts/install_dependencies/install_NCS_udev_rules.sh old mode 100644 new mode 100755 diff --git a/scripts/install_dependencies/install_NEO_OCL_driver.sh b/scripts/install_dependencies/install_NEO_OCL_driver.sh old mode 100644 new mode 100755 diff --git a/scripts/install_dependencies/install_openvino_dependencies.sh b/scripts/install_dependencies/install_openvino_dependencies.sh old mode 100644 new mode 100755 diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh old mode 100644 new mode 100755 From a4f13ae9fe0f65f5eed67689e1a5ba461df9dfa7 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Fri, 29 May 2020 14:09:20 +0300 Subject: [PATCH 12/24] fix constant folding of Concat op (#675) --- ngraph/src/ngraph/op/concat.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ngraph/src/ngraph/op/concat.cpp b/ngraph/src/ngraph/op/concat.cpp index a4ebe34c028081..5a54eac7717bb5 100644 --- a/ngraph/src/ngraph/op/concat.cpp +++ b/ngraph/src/ngraph/op/concat.cpp @@ -191,6 +191,8 @@ namespace break; TYPE_CASE(u64)(args, out, concatenation_axis); break; + TYPE_CASE(f16)(args, out, concatenation_axis); + break; TYPE_CASE(f32)(args, out, concatenation_axis); break; TYPE_CASE(f64)(args, out, concatenation_axis); From 5f8f9ec108abd958ee954cc0dd04c433d4d96182 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Fri, 29 May 2020 13:29:18 +0200 Subject: [PATCH 13/24] [nGraph] Reorder nGraph LSTMSequence inputs and outputs dimensions (#560) * Reorder nGraph LSTMSequence input/outpt dimensions * Update nGraph pythonAPI for LSTMSequence * Reorder axes in ONNX importer LSTM * Tests update * Fix clang warning * Use opset3 namespace * Style apply * Tests update * Use opset1 namespace * Remove usage of GetOutputElement in ONNX importer LSTM * Remove opset0 header * Use Node::output() --- ngraph/python/src/ngraph/ops.py | 6 +- ngraph/python/test/ngraph/test_create_op.py | 18 +- .../ngraph/frontend/onnx_import/op/lstm.cpp | 34 ++-- ngraph/src/ngraph/op/fused/lstm_sequence.cpp | 184 +++++++++--------- ngraph/src/ngraph/op/fused/lstm_sequence.hpp | 4 +- ngraph/test/attributes.cpp | 29 ++- ngraph/test/onnx/onnx_import_rnn.in.cpp | 1 + ngraph/test/type_prop/lstm_sequence.cpp | 98 ++++++++-- 8 files changed, 231 insertions(+), 143 deletions(-) diff --git a/ngraph/python/src/ngraph/ops.py b/ngraph/python/src/ngraph/ops.py index 58c0f8970bfb08..51a299ef8f97c8 100644 --- a/ngraph/python/src/ngraph/ops.py +++ b/ngraph/python/src/ngraph/ops.py @@ -472,11 +472,11 @@ def lstm_sequence( ) -> Node: """Return a node which performs LSTMSequence operation. - :param X: The input tensor. Shape: [seq_length, batch_size, input_size]. + :param X: The input tensor. Shape: [batch_size, seq_length, input_size]. :param initial_hidden_state: The hidden state tensor. - Shape: [num_directions, batch_size, hidden_size]. + Shape: [batch_size, num_directions, hidden_size]. :param initial_cell_state: The cell state tensor. - Shape: [num_directions, batch_size, hidden_size]. + Shape: [batch_size, num_directions, hidden_size]. :param sequence_lengths: Specifies real sequence lengths for each batch element. Shape: [batch_size]. Integer type. :param W: Tensor with weights for matrix multiplication operation with input portion of data. diff --git a/ngraph/python/test/ngraph/test_create_op.py b/ngraph/python/test/ngraph/test_create_op.py index 662dc6a047f7be..abb50adce9e26d 100644 --- a/ngraph/python/test/ngraph/test_create_op.py +++ b/ngraph/python/test/ngraph/test_create_op.py @@ -258,9 +258,9 @@ def test_lstm_sequence_operator_bidirectional(dtype): num_directions = 2 seq_length = 2 - X_shape = [seq_length, batch_size, input_size] - H_t_shape = [num_directions, batch_size, hidden_size] - C_t_shape = [num_directions, batch_size, hidden_size] + X_shape = [batch_size, seq_length, input_size] + H_t_shape = [batch_size, num_directions, hidden_size] + C_t_shape = [batch_size, num_directions, hidden_size] seq_len_shape = [batch_size] W_shape = [num_directions, 4 * hidden_size, input_size] R_shape = [num_directions, 4 * hidden_size, hidden_size] @@ -323,9 +323,9 @@ def test_lstm_sequence_operator_reverse(dtype): num_directions = 1 seq_length = 2 - X_shape = [seq_length, batch_size, input_size] - H_t_shape = [num_directions, batch_size, hidden_size] - C_t_shape = [num_directions, batch_size, hidden_size] + X_shape = [batch_size, seq_length, input_size] + H_t_shape = [batch_size, num_directions, hidden_size] + C_t_shape = [batch_size, num_directions, hidden_size] seq_len_shape = [batch_size] W_shape = [num_directions, 4 * hidden_size, input_size] R_shape = [num_directions, 4 * hidden_size, hidden_size] @@ -389,9 +389,9 @@ def test_lstm_sequence_operator_forward(dtype): num_directions = 1 seq_length = 2 - X_shape = [seq_length, batch_size, input_size] - H_t_shape = [num_directions, batch_size, hidden_size] - C_t_shape = [num_directions, batch_size, hidden_size] + X_shape = [batch_size, seq_length, input_size] + H_t_shape = [batch_size, num_directions, hidden_size] + C_t_shape = [batch_size, num_directions, hidden_size] seq_len_shape = [batch_size] W_shape = [num_directions, 4 * hidden_size, input_size] R_shape = [num_directions, 4 * hidden_size, hidden_size] diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp index 08e472e9eef942..16f0c45b49268d 100644 --- a/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp +++ b/ngraph/src/ngraph/frontend/onnx_import/op/lstm.cpp @@ -24,13 +24,13 @@ #include "default_opset.hpp" #include "exceptions.hpp" #include "lstm.hpp" +#include "ngraph/builder/reshape.hpp" #include "ngraph/builder/split.hpp" #include "ngraph/frontend/onnx_import/op/lstm.hpp" #include "ngraph/op/add.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/op/fused/lstm_sequence.hpp" #include "ngraph/op/get_output_element.hpp" -#include "ngraph/opsets/opset0.hpp" #include "ngraph/shape.hpp" #include "ngraph/type/element_type.hpp" @@ -71,7 +71,8 @@ namespace ngraph // ----- Mandatory inputs ------ // Packed input sequences. Shape: [seq_length, batch_size, input_size] - m_map[LSTMInput::LSTM_INPUT_X] = ng_inputs.at(0); + m_map[LSTMInput::LSTM_INPUT_X] = + builder::opset1::reorder_axes(ng_inputs.at(0), {1, 0, 2}); // Weight tensor for the gates. // Shape: [num_directions, 4*hidden_size, input_size] m_map[LSTMInput::LSTM_INPUT_W] = ng_inputs.at(1); @@ -82,7 +83,7 @@ namespace ngraph const std::size_t hidden_size = m_map[LSTMInput::LSTM_INPUT_R]->get_shape().back(); const std::size_t batch_size = - m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(1); + m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(0); const std::size_t num_directions = m_map[LSTMInput::LSTM_INPUT_W]->get_shape().front(); @@ -115,33 +116,35 @@ namespace ngraph Shape{batch_size}, std::vector( batch_size, - m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(0))); + m_map[LSTMInput::LSTM_INPUT_X]->get_shape().at(1))); } // The initial value of the hidden. // Shape [num_directions, batch_size, hidden_size] if (ng_inputs.size() > 5 && !ng_inputs.at(5)->is_null()) { - m_map[LSTMInput::LSTM_INPUT_INIT_H] = ng_inputs.at(5); + m_map[LSTMInput::LSTM_INPUT_INIT_H] = + builder::opset1::reorder_axes(ng_inputs.at(5), {1, 0, 2}); } else { m_map[LSTMInput::LSTM_INPUT_INIT_H] = default_opset::Constant::create( element::f32, - Shape{num_directions, batch_size, hidden_size}, - std::vector(num_directions * batch_size * hidden_size, 0.f)); + Shape{batch_size, num_directions, hidden_size}, + std::vector(batch_size * num_directions * hidden_size, 0.f)); } // The initial value of the cell. // Shape [num_directions, batch_size, hidden_size] if (ng_inputs.size() > 6 && !ng_inputs.at(6)->is_null()) { - m_map[LSTMInput::LSTM_INPUT_INIT_C] = ng_inputs.at(6); + m_map[LSTMInput::LSTM_INPUT_INIT_C] = + builder::opset1::reorder_axes(ng_inputs.at(6), {1, 0, 2}); } else { m_map[LSTMInput::LSTM_INPUT_INIT_C] = default_opset::Constant::create( element::f32, - Shape{num_directions, batch_size, hidden_size}, - std::vector(num_directions * batch_size * hidden_size, 0.f)); + Shape{batch_size, num_directions, hidden_size}, + std::vector(batch_size * num_directions * hidden_size, 0.f)); } // The weight tensor for peepholes. Shape [num_directions, 3*hidde_size] if (ng_inputs.size() > 7 && !ng_inputs.at(7)->is_null()) @@ -239,9 +242,14 @@ namespace ngraph attributes.m_activations, attributes.m_clip_threshold, attributes.m_input_forget); - return {std::make_shared(lstmSequence, 0), - std::make_shared(lstmSequence, 1), - std::make_shared(lstmSequence, 2)}; + + const auto Y = lstmSequence->output(0); + const auto Y_h = lstmSequence->output(1); + const auto Y_c = lstmSequence->output(2); + + return {builder::opset1::reorder_axes(Y, {2, 1, 0, 3}), + builder::opset1::reorder_axes(Y_h, {1, 0, 2}), + builder::opset1::reorder_axes(Y_c, {1, 0, 2})}; } } // namespace set_1 diff --git a/ngraph/src/ngraph/op/fused/lstm_sequence.cpp b/ngraph/src/ngraph/op/fused/lstm_sequence.cpp index 2e411d7fc6603f..a8b758446c84a0 100644 --- a/ngraph/src/ngraph/op/fused/lstm_sequence.cpp +++ b/ngraph/src/ngraph/op/fused/lstm_sequence.cpp @@ -20,19 +20,13 @@ #include "ngraph/builder/autobroadcast.hpp" #include "ngraph/builder/reshape.hpp" #include "ngraph/builder/split.hpp" -#include "ngraph/frontend/onnx_import/utils/reshape.hpp" -#include "ngraph/op/concat.hpp" -#include "ngraph/op/constant.hpp" -#include "ngraph/op/fused/lstm_cell.hpp" -#include "ngraph/op/get_output_element.hpp" -#include "ngraph/op/greater.hpp" -#include "ngraph/op/reverse_sequence.hpp" -#include "ngraph/op/select.hpp" + +#include "ngraph/opsets/opset1.hpp" using namespace ngraph; using namespace std; -constexpr NodeTypeInfo op::LSTMSequence::type_info; +constexpr NodeTypeInfo op::v0::LSTMSequence::type_info; bool ngraph::op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("hidden_size", m_hidden_size); @@ -46,7 +40,7 @@ bool ngraph::op::v0::LSTMSequence::visit_attributes(AttributeVisitor& visitor) visitor.on_attribute("weights_format", m_weights_format); return true; } -NodeVector op::LSTMSequence::decompose_op() const +NodeVector op::v0::LSTMSequence::decompose_op() const { NodeVector results; if (m_direction == direction::FORWARD || m_direction == direction::REVERSE) @@ -60,55 +54,55 @@ NodeVector op::LSTMSequence::decompose_op() const // Stack together respective outputs from both forward and reverse passess. shared_ptr Y{ - make_shared(NodeVector{fwd_results.at(0), rev_results.at(0)}, 1)}; + make_shared(NodeVector{fwd_results.at(0), rev_results.at(0)}, 1)}; shared_ptr Y_h{ - make_shared(NodeVector{fwd_results.at(1), rev_results.at(1)}, 0)}; + make_shared(NodeVector{fwd_results.at(1), rev_results.at(1)}, 1)}; shared_ptr Y_c{ - make_shared(NodeVector{fwd_results.at(2), rev_results.at(2)}, 0)}; + make_shared(NodeVector{fwd_results.at(2), rev_results.at(2)}, 1)}; results = NodeVector{Y, Y_h, Y_c}; } return results; } -shared_ptr op::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const +shared_ptr op::v0::LSTMSequence::clone_with_new_inputs(const OutputVector& new_args) const { check_new_args_count(this, new_args); if (new_args.size() == 8) { - return make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - new_args.at(7), // P - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip_threshold, - m_input_forget); + return make_shared(new_args.at(0), // X + new_args.at(1), // initial_hidden_state + new_args.at(2), // initial_cell_state + new_args.at(3), // sequence_lengths + new_args.at(4), // W + new_args.at(5), // R + new_args.at(6), // B + new_args.at(7), // P + m_hidden_size, + m_direction, + m_weights_format, + m_activations_alpha, + m_activations_beta, + m_activations, + m_clip_threshold, + m_input_forget); } else if (new_args.size() == 7) { - return make_shared(new_args.at(0), // X - new_args.at(1), // initial_hidden_state - new_args.at(2), // initial_cell_state - new_args.at(3), // sequence_lengths - new_args.at(4), // W - new_args.at(5), // R - new_args.at(6), // B - m_hidden_size, - m_direction, - m_weights_format, - m_activations_alpha, - m_activations_beta, - m_activations, - m_clip_threshold, - m_input_forget); + return make_shared(new_args.at(0), // X + new_args.at(1), // initial_hidden_state + new_args.at(2), // initial_cell_state + new_args.at(3), // sequence_lengths + new_args.at(4), // W + new_args.at(5), // R + new_args.at(6), // B + m_hidden_size, + m_direction, + m_weights_format, + m_activations_alpha, + m_activations_beta, + m_activations, + m_clip_threshold, + m_input_forget); } else { @@ -116,46 +110,44 @@ shared_ptr op::LSTMSequence::clone_with_new_inputs(const OutputVector& new } } -shared_ptr op::LSTMSequence::get_masked_node(const Output& data, - int32_t time_step, - size_t batch_axis, - const Output& default_value) const +shared_ptr op::v0::LSTMSequence::get_masked_node(const Output& data, + int32_t time_step, + size_t batch_axis, + const Output& default_value) const { Output mask_value = default_value; // Create zero mask value node. if (!mask_value.get_node_shared_ptr()) { - mask_value = op::Constant::create(data.get_element_type(), - data.get_shape(), - vector(shape_size(data.get_shape()), 0.f)); + mask_value = opset1::Constant::create(data.get_element_type(), + data.get_shape(), + vector(shape_size(data.get_shape()), 0.f)); } // Create predicate nodes. The condition is whether current time step value // is greater than sequence length for respective batch inputs. - shared_ptr curr_time_step_node = op::Constant::create( + shared_ptr curr_time_step_node = opset1::Constant::create( element::i32, data.get_shape(), vector(shape_size(data.get_shape()), time_step)); - Output batch_seq_length = - builder::legacy_broadcast_for_binary_operation( - curr_time_step_node, input_value(3).get_node_shared_ptr(), batch_axis) - .at(1); + Output batch_seq_length = builder::opset1::legacy_broadcast_for_binary_operation( + curr_time_step_node, input_value(3).get_node_shared_ptr(), batch_axis); // Create mask node deciding whether or not to mask batch data. shared_ptr mask_condition = - make_shared(curr_time_step_node, batch_seq_length); + make_shared(curr_time_step_node, batch_seq_length); // Select values depnding on mask_condition. // Select(, , ) - return make_shared(mask_condition, mask_value, data); + return make_shared(mask_condition, mask_value, data); } -NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const +NodeVector op::v0::LSTMSequence::lstm_pass(bool is_reverse) const { // ------ VARIABLE'S NAMES AND ACRONYM DEFINITIONS ------ // The names used below are analogous to the one used in ONNX documentation. // // ------ INPUTS ------ - // X - The input tensor. [seq_length, batch_size, input_size] + // X - The input tensor. [batch_size, seq_length, input_size] // W - The weight tensor. [num_directions, 4*hidden_size, input_size] // R - The recurrence weight tensor. [num_directions, 4*hidden_size, hidden_size] // B - The bias tensor for input gate. [num_directions, 8*hidden_size] @@ -167,14 +159,14 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const // c - cell gate // t - time step (t-1 means previous time step) // ------ VARIABLE NAMES ------ - // H_t - Hidden state vector at current time step. - // C_t - Cell state vector at current time step. + // H_t - Hidden state vector at current time step. [batch_size, num_directions, hidden_size] + // C_t - Cell state vector at current time step. [batch_size, num_directions, hidden_size] // h_list - The list of hidden states at all processed time steps. NodeVector h_list; shared_ptr X = input_value(0).get_node_shared_ptr(); - shared_ptr H_t = prepare_input(input_value(1), is_reverse); - shared_ptr C_t = prepare_input(input_value(2), is_reverse); + shared_ptr H_t = prepare_input(input_value(1), is_reverse, 1); + shared_ptr C_t = prepare_input(input_value(2), is_reverse, 1); shared_ptr seq_lengths = input_value(3).get_node_shared_ptr(); shared_ptr W = prepare_input(input_value(4), is_reverse); shared_ptr R = prepare_input(input_value(5), is_reverse); @@ -183,34 +175,34 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const if (is_reverse) { - X = make_shared(X, seq_lengths, 1 /*batch_axis*/, 0 /*seq_axis*/); + X = make_shared(X, seq_lengths, 0 /*batch_axis*/, 1 /*seq_axis*/); } - NodeVector in_seqs = builder::split(X, X->get_shape().at(0)); + NodeVector in_seqs = builder::opset1::split(X, X->get_shape().at(1), 1); for (auto& in_x : in_seqs) { - // remove first empty dim, after above split. - in_x = builder::squeeze(in_x); + // Remove empty dim, after above split. + in_x = builder::opset1::squeeze(in_x, {1}); } int32_t time_step{1}; for (const auto& in_x : in_seqs) { - shared_ptr lstm_cell = make_shared(in_x, - H_t, - C_t, - W, - R, - B, - P, - m_hidden_size, - m_weights_format, - m_activations, - m_activations_alpha, - m_activations_beta, - m_clip_threshold, - m_input_forget); + shared_ptr lstm_cell = make_shared(in_x, + H_t, + C_t, + W, + R, + B, + P, + m_hidden_size, + m_weights_format, + m_activations, + m_activations_alpha, + m_activations_beta, + m_clip_threshold, + m_input_forget); Output H = lstm_cell->output(0); Output C = lstm_cell->output(1); @@ -220,7 +212,7 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const // Mask hidden state tensor in order to handle mixed sequence lengths. // This results in zeroing out values in batches with sequence shorter // than current time_step. - h_list.push_back(get_masked_node(builder::expand_dims(H), time_step, 1)); + h_list.push_back(get_masked_node(builder::opset1::expand_dims(H, 1), time_step, 0)); // Reference implementation in ONNX Runtime doesn't mask values of Y_h // and Y_c outputs, thus here we make sure that only appropriate batches // (in respect to its sequence length) are updated. Those batches which @@ -230,36 +222,38 @@ NodeVector op::LSTMSequence::lstm_pass(bool is_reverse) const time_step++; } // The tensor that concats all the intermediate output values of the hidden. - // It has shape [seq_length, batch_size, hidden_size] - shared_ptr Y{make_shared(h_list, 0)}; + // It has shape [batch_size, seq_length, hidden_size] + shared_ptr Y{make_shared(h_list, 1)}; // Get back the original order of the output data. if (is_reverse) { - Y = make_shared(Y, seq_lengths, 1 /*batch_axis*/, 0 /*seq_axis*/); + Y = make_shared(Y, seq_lengths, 0 /*batch_axis*/, 1 /*seq_axis*/); } // Expand Y so that it has expected shape: - // [seq_length, num_directions, batch_size, hidden_size] - Y = builder::expand_dims(Y, 1); + // [batch_size, num_directions, seq_length, hidden_size] + Y = builder::opset1::expand_dims(Y, 1); // expand H_t and C_t so that it has expected shape: - // [num_directions, batch_size, hidden_size] - auto Y_h = builder::expand_dims(H_t); - auto Y_c = builder::expand_dims(C_t); + // [ batch_size, num_directions, hidden_size] + auto Y_h = builder::opset1::expand_dims(H_t, 1); + auto Y_c = builder::opset1::expand_dims(C_t, 1); return {Y, Y_h, Y_c}; } -shared_ptr op::LSTMSequence::prepare_input(Output node, bool is_reverse) const +shared_ptr op::v0::LSTMSequence::prepare_input(Output node, + bool is_reverse, + size_t num_direction_axis) const { // In bidirectional mode inputs are stacked together, so we must split them. shared_ptr tmp = node.get_node_shared_ptr(); if (m_direction == direction::BIDIRECTIONAL) { - tmp = builder::split(node, 2).at(is_reverse ? 1 : 0); + tmp = builder::opset1::split(node, 2, num_direction_axis).at(is_reverse ? 1 : 0); } // Since we have forward LSTM we can squeeze `num_directions` axis from inputs. - return builder::squeeze(tmp); + return builder::opset1::squeeze(tmp, {num_direction_axis}); } namespace ngraph diff --git a/ngraph/src/ngraph/op/fused/lstm_sequence.hpp b/ngraph/src/ngraph/op/fused/lstm_sequence.hpp index a583b4f3cd69d5..dc7b4209795cd8 100644 --- a/ngraph/src/ngraph/op/fused/lstm_sequence.hpp +++ b/ngraph/src/ngraph/op/fused/lstm_sequence.hpp @@ -173,7 +173,9 @@ namespace ngraph NodeVector lstm_pass(bool is_reverse = false) const; // Split(bi-directional) and squeeze input data to remove 'num_direction' dimension. - std::shared_ptr prepare_input(Output node, bool is_reverse) const; + std::shared_ptr prepare_input(Output node, + bool is_reverse, + size_t num_direction_axis = 0) const; std::vector m_activations_alpha; std::vector m_activations_beta; diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp index 709b7b5fcc7905..264dbbfb4ccb2e 100644 --- a/ngraph/test/attributes.cpp +++ b/ngraph/test/attributes.cpp @@ -1104,16 +1104,27 @@ TEST(attributes, lstm_cell_op) TEST(attributes, lstm_sequence_op) { FactoryRegistry::get().register_factory(); - const auto X = make_shared(element::f32, Shape{1, 2, 4}); - const auto initial_hidden_state = make_shared(element::f32, Shape{1, 2, 3}); - const auto initial_cell_state = make_shared(element::f32, Shape{1, 2, 3}); - const auto sequence_lengths = make_shared(element::i32, Shape{2}); - const auto W = make_shared(element::f32, Shape{1, 12, 4}); - const auto R = make_shared(element::f32, Shape{1, 12, 3}); - const auto B = make_shared(element::f32, Shape{1, 12}); - const auto hidden_size = 3; - const auto lstm_direction = op::LSTMSequence::direction::FORWARD; + const auto batch_size = 4; + const auto num_directions = 2; + const auto seq_length = 8; + const auto input_size = 16; + const auto hidden_size = 64; + + const auto X = + make_shared(element::f32, Shape{batch_size, seq_length, input_size}); + const auto initial_hidden_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto initial_cell_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); + const auto W = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, input_size}); + const auto R = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); + + const auto lstm_direction = op::LSTMSequence::direction::BIDIRECTIONAL; const auto weights_format = op::LSTMWeightsFormat::ICOF; const std::vector activations_alpha = {1, 2, 3}; const std::vector activations_beta = {4, 5, 6}; diff --git a/ngraph/test/onnx/onnx_import_rnn.in.cpp b/ngraph/test/onnx/onnx_import_rnn.in.cpp index 626f65db0f9d9b..091afda954fc46 100644 --- a/ngraph/test/onnx/onnx_import_rnn.in.cpp +++ b/ngraph/test/onnx/onnx_import_rnn.in.cpp @@ -39,6 +39,7 @@ using namespace ngraph; static std::string s_manifest = "${MANIFEST}"; +// ONNX LSTM tests (implemented by nGraph LSTMCell and LSTMSequence) NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip) { auto function = onnx_import::import_onnx_model( diff --git a/ngraph/test/type_prop/lstm_sequence.cpp b/ngraph/test/type_prop/lstm_sequence.cpp index 0b5ff51a83b800..f3d814b92efd43 100644 --- a/ngraph/test/type_prop/lstm_sequence.cpp +++ b/ngraph/test/type_prop/lstm_sequence.cpp @@ -21,16 +21,28 @@ using namespace std; using namespace ngraph; -TEST(type_prop, lstm_sequence) +TEST(type_prop, lstm_sequence_forward) { - const auto X = make_shared(element::f32, Shape{1, 2, 4}); - const auto W = make_shared(element::f32, Shape{1, 12, 4}); - const auto R = make_shared(element::f32, Shape{1, 12, 3}); - const auto initial_hidden_state = make_shared(element::f32, Shape{1, 2, 3}); - const auto initial_cell_state = make_shared(element::f32, Shape{1, 2, 3}); - const auto B = make_shared(element::f32, Shape{1, 12}); - const auto sequence_lengths = make_shared(element::i32, Shape{2}); - const auto hidden_size = 3; + const auto batch_size = 8; + const auto num_directions = 1; + const auto seq_length = 6; + const auto input_size = 4; + const auto hidden_size = 128; + + const auto X = + make_shared(element::f32, Shape{batch_size, seq_length, input_size}); + const auto initial_hidden_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto initial_cell_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); + const auto W = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, input_size}); + const auto R = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); + + const auto lstm_direction = op::LSTMSequence::direction::FORWARD; const auto lstm_sequence = make_shared(X, initial_hidden_state, @@ -40,7 +52,7 @@ TEST(type_prop, lstm_sequence) R, B, hidden_size, - op::LSTMSequence::direction::FORWARD); + lstm_direction); EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); EXPECT_EQ(lstm_sequence->get_direction(), op::LSTMSequence::direction::FORWARD); EXPECT_EQ(lstm_sequence->get_weights_format(), op::LSTMWeightsFormat::IFCO); @@ -52,9 +64,69 @@ TEST(type_prop, lstm_sequence) EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f); EXPECT_FALSE(lstm_sequence->get_input_forget()); EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(0), (Shape{1, 1, 2, 3})); + EXPECT_EQ(lstm_sequence->get_output_shape(0), + (Shape{batch_size, num_directions, seq_length, hidden_size})); + EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); + EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); + EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); + EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); +} + +TEST(type_prop, lstm_sequence_bidirectional) +{ + const auto batch_size = 24; + const auto num_directions = 2; + const auto seq_length = 12; + const auto input_size = 8; + const auto hidden_size = 256; + + const auto X = + make_shared(element::f32, Shape{batch_size, seq_length, input_size}); + const auto initial_hidden_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto initial_cell_state = + make_shared(element::f32, Shape{batch_size, num_directions, hidden_size}); + const auto sequence_lengths = make_shared(element::i32, Shape{batch_size}); + const auto W = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, input_size}); + const auto R = make_shared(element::f32, + Shape{num_directions, 4 * hidden_size, hidden_size}); + const auto B = make_shared(element::f32, Shape{num_directions, 4 * hidden_size}); + + const auto weights_format = op::LSTMWeightsFormat::FICO; + const auto lstm_direction = op::LSTMSequence::direction::BIDIRECTIONAL; + const std::vector activations_alpha = {2.7, 7.0, 32.367}; + const std::vector activations_beta = {0.0, 5.49, 6.0}; + const std::vector activations = {"tanh", "sigmoid", "sigmoid"}; + + const auto lstm_sequence = make_shared(X, + initial_hidden_state, + initial_cell_state, + sequence_lengths, + W, + R, + B, + hidden_size, + lstm_direction, + weights_format, + activations_alpha, + activations_beta, + activations); + EXPECT_EQ(lstm_sequence->get_hidden_size(), hidden_size); + EXPECT_EQ(lstm_sequence->get_direction(), op::LSTMSequence::direction::BIDIRECTIONAL); + EXPECT_EQ(lstm_sequence->get_weights_format(), op::LSTMWeightsFormat::FICO); + EXPECT_EQ(lstm_sequence->get_activations_alpha(), activations_alpha); + EXPECT_EQ(lstm_sequence->get_activations_beta(), activations_beta); + EXPECT_EQ(lstm_sequence->get_activations()[0], "tanh"); + EXPECT_EQ(lstm_sequence->get_activations()[1], "sigmoid"); + EXPECT_EQ(lstm_sequence->get_activations()[2], "sigmoid"); + EXPECT_EQ(lstm_sequence->get_clip_threshold(), 0.f); + EXPECT_FALSE(lstm_sequence->get_input_forget()); + EXPECT_EQ(lstm_sequence->get_output_element_type(0), element::f32); + EXPECT_EQ(lstm_sequence->get_output_shape(0), + (Shape{batch_size, num_directions, seq_length, hidden_size})); EXPECT_EQ(lstm_sequence->get_output_element_type(1), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{1, 2, 3})); + EXPECT_EQ(lstm_sequence->get_output_shape(1), (Shape{batch_size, num_directions, hidden_size})); EXPECT_EQ(lstm_sequence->get_output_element_type(2), element::f32); - EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{1, 2, 3})); + EXPECT_EQ(lstm_sequence->get_output_shape(2), (Shape{batch_size, num_directions, hidden_size})); } From 011128cb543fbd5f8a0cf783547d3ff255df365d Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 29 May 2020 14:45:59 +0300 Subject: [PATCH 14/24] Python: Fixed installation rules to install additional .so files generated from .pyx (#676) --- .../python/src/openvino/inference_engine/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index aa2a30c0555ce0..9ce70b546629d3 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -14,6 +14,7 @@ set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON) # create target cython_add_module(${TARGET_NAME} ${SOURCE}) +set(INSTALLED_TARGETS ${TARGET_NAME}) file(GLOB OTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx) @@ -26,6 +27,7 @@ foreach(PYX_FILE ${OTHER_SOURCES}) add_dependencies(${TARGET_NAME} ${PYX_NAME}) target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) + list(APPEND INSTALLED_TARGETS ${PYX_NAME}) endforeach() function(python_disable_deprecated_warnings) @@ -64,7 +66,7 @@ endif() # install -install(TARGETS ${TARGET_NAME} +install(TARGETS ${INSTALLED_TARGETS} RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION} ARCHIVE DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION} LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION}) From be3b711972442435479136419a0f58b3713d4bc2 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Fri, 29 May 2020 15:44:12 +0300 Subject: [PATCH 15/24] Pre-processing(GAPI): AVX2/AVX512 implementation of 3C/4C Resize via universal intrinsics. (#612) --- .../ie_bridges/c/src/CMakeLists.txt | 6 + .../src/preprocessing/CMakeLists.txt | 6 + .../ie_preprocess_gapi_kernels_avx2.cpp | 234 +++++++++++++- .../ie_preprocess_gapi_kernels_avx512.cpp | 289 +++++++++++++++++- .../ie_preprocess_gapi_kernels_sse42.cpp | 222 ++++++++++++-- .../ie_preprocess_gapi_kernels.cpp | 58 +++- .../fluid_test_computations/CMakeLists.txt | 6 + .../thirdparty/ocv/opencv_hal_avx.hpp | 87 +++++- .../thirdparty/ocv/opencv_hal_avx512.hpp | 202 +++++++++++- .../thirdparty/ocv/opencv_hal_sse.hpp | 65 +++- 10 files changed, 1093 insertions(+), 82 deletions(-) diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt index ef8527adf1cf9e..ab981fd7939366 100644 --- a/inference-engine/ie_bridges/c/src/CMakeLists.txt +++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt @@ -21,6 +21,12 @@ target_include_directories(${TARGET_NAME} PUBLIC "${InferenceEngine_C_API_SOURCE add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) +# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5 +if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND + (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5)) + set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized") +endif() + # export export(TARGETS ${TARGET_NAME} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake") diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt index 9201a6ed53ffa0..adc52f065f1890 100644 --- a/inference-engine/src/preprocessing/CMakeLists.txt +++ b/inference-engine/src/preprocessing/CMakeLists.txt @@ -168,6 +168,12 @@ target_link_libraries(${TARGET_NAME} PRIVATE fluid PUBLIC inference_engine ${INT target_include_directories(${TARGET_NAME} INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}") +# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5 +if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND + (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5)) + set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized") +endif() + if(WIN32) set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) endif() diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp index da16de2cc51c21..71c23ced0b07d9 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp @@ -5,8 +5,6 @@ #include #include -#include "ie_preprocess_gapi_kernels.hpp" -#include "ie_preprocess_gapi_kernels_impl.hpp" #include "ie_preprocess_gapi_kernels_avx2.hpp" #include @@ -44,16 +42,6 @@ namespace kernels { namespace avx { -static inline v_uint16x16 v_expand_low(const v_uint8x32& a) { - return v_uint16x16(_mm256_unpacklo_epi8(a.val, _mm256_setzero_si256())); -} - -static inline v_uint16x16 v_expand_high(const v_uint8x32& a) { - return v_uint16x16(_mm256_unpackhi_epi8(a.val, _mm256_setzero_si256())); -} - -//------------------------------------------------------------------------------ - void mergeRow_8UC2(const uint8_t in0[], const uint8_t in1[], uint8_t out[], int length) { mergeRow_8UC2_Impl(in0, in1, out, length); @@ -114,8 +102,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } - - void calculate_nv12_to_rgb(const uchar **srcY, const uchar *srcUV, uchar **dstRGBx, @@ -145,6 +131,226 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz, calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf); } +template +void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr int half_nlanes = (v_uint8::nlanes / 2); + const int shift = (half_nlanes / 4); + + if (4 == lpi) { + GAPI_DbgAssert(inSz.width >= half_nlanes); + + v_uint8 shuf_mask1 = v_setr_s8(0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15, + 0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15); + + v_uint8 shuf_mask2 = v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14, + 1, 5, 9, 13, 3, 7, 11, 15, + 0, 4, 8, 12, 2, 6, 10, 14, + 1, 5, 9, 13, 3, 7, 11, 15); + + v_uint8 shuf_mask3 = v_setr_s8(0, 1, 8, 9, 2, 3, 10, 11, + 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, + 4, 5, 12, 13, 6, 7, 14, 15); + + // vertical pass + v_int16 b0 = vx_setall_s16(beta[0]); + v_int16 b1 = vx_setall_s16(beta[1]); + v_int16 b2 = vx_setall_s16(beta[2]); + v_int16 b3 = vx_setall_s16(beta[3]); + + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) { + v_int16 val0_0 = v_load_ccache_expand(&src0[0][w]); + v_int16 val0_1 = v_load_ccache_expand(&src0[1][w]); + v_int16 val0_2 = v_load_ccache_expand(&src0[2][w]); + v_int16 val0_3 = v_load_ccache_expand(&src0[3][w]); + + v_int16 val1_0 = v_load_ccache_expand(&src1[0][w]); + v_int16 val1_1 = v_load_ccache_expand(&src1[1][w]); + v_int16 val1_2 = v_load_ccache_expand(&src1[2][w]); + v_int16 val1_3 = v_load_ccache_expand(&src1[3][w]); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); + + v_uint8 q2 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q0, q1); + v_uint8 q3 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q0, q1); + + v_uint8 q4 = v_shuffle_s8(q2, shuf_mask1); + v_uint8 q5 = v_shuffle_s8(q3, shuf_mask1); + + v_uint8 q6 = v256_permute2x128<0x20>(q4, q5); + v_uint8 q7 = v256_permute2x128<0x31>(q4, q5); + + vx_store(&tmp[4 * w + 0], q6); + vx_store(&tmp[4 * w + 2 * half_nlanes], q7); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + v_uint8 val_0, val_1, val_2, val_3; + GAPI_DbgAssert(outSz.width >= half_nlanes); + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + v_int16 a10 = vx_load(&clone[4 * x]); + v_int16 a32 = vx_load(&clone[4 * (x + 4)]); + v_int16 a54 = vx_load(&clone[4 * (x + 8)]); + v_int16 a76 = vx_load(&clone[4 * (x + 12)]); + + for (int c = 0; c < chanNum; ++c) { + v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0); + v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift); + v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2); + v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3); + + v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0)); + v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1)); + v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2)); + v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3)); + + v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0)); + v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1)); + v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2)); + v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3)); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); + + v_uint8 q2 = v_shuffle_s8(q0, shuf_mask2); + v_uint8 q3 = v_shuffle_s8(q1, shuf_mask2); + + v_uint8 q4 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q2, q3); + v_uint8 q5 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q2, q3); + + v_uint8 q6 = v256_permute4x64<0xD8>(q4); + v_uint8 q7 = v256_permute4x64<0xD8>(q5); + + v_uint8 q8 = v_shuffle_s8(q6, shuf_mask3); + v_uint8 q9 = v_shuffle_s8(q7, shuf_mask3); + + v_store_low(&dst[c][0][x], q8); + v_store_high(&dst[c][1][x], q8); + v_store_low(&dst[c][2][x], q9); + v_store_high(&dst[c][3][x], q9); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; + } + } + } else { // if any lpi + for (int l = 0; l < lpi; ++l) { + short beta0 = beta[l]; + + // vertical pass + GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes); + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) { + v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w])); + v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w])); + v_int16 t = v_mulhrs(s0 - s1, beta0) + s1; + v_pack_u_store(tmp + w, t); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + GAPI_DbgAssert(outSz.width >= half_nlanes); + + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + for (int c = 0; c < chanNum; ++c) { + v_int16 a0 = vx_load(&alpha[x]); // as signed Q1.1.14 + v_int16 sx = vx_load(&mapsx[x]); // as integer (int16) + v_int16 t0 = v_gather_chan(tmp, sx, c, 0); + v_int16 t1 = v_gather_chan(tmp, sx, c, 1); + v_int16 d = v_mulhrs(t0 - t1, a0) + t1; + v_pack_u_store(&dst[c][l][x], d); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; + } + } + } + } +} + +// Resize (bi-linear, 8UC3) +void calcRowLinear_8U(C3, std::array, 3> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr const int chanNum = 3; + + calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); +} + +// Resize (bi-linear, 8UC4) +void calcRowLinear_8U(C4, std::array, 4> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr const int chanNum = 4; + + calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); +} + void copyRow_8U(const uint8_t in[], uint8_t out[], int length) { copyRow_8U_impl(in, out, length); } diff --git a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp index 6b6e4cfd635b1d..5b900d52c5455f 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp @@ -4,10 +4,7 @@ #include #include -#include -#include "ie_preprocess_gapi_kernels.hpp" -#include "ie_preprocess_gapi_kernels_impl.hpp" #include "ie_preprocess_gapi_kernels_avx512.hpp" #include @@ -38,17 +35,6 @@ namespace gapi { namespace kernels { namespace avx512 { -//---------------------------------------------------------------------- - -static inline v_uint16x32 v_expand_low(const v_uint8x64& a) { - return v_uint16x32(_mm512_unpacklo_epi8(a.val, _mm512_setzero_si512())); -} - -static inline v_uint16x32 v_expand_high(const v_uint8x64& a) { - return v_uint16x32(_mm512_unpackhi_epi8(a.val, _mm512_setzero_si512())); -} - -//------------------------------------------------------------------------------ void mergeRow_8UC2(const uint8_t in0[], const uint8_t in1[], uint8_t out[], int length) { @@ -110,8 +96,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } - - void calculate_nv12_to_rgb(const uchar **srcY, const uchar *srcUV, uchar **dstRGBx, @@ -141,6 +125,278 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz, calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf); } +// Resize (bi-linear, 8U, generic number of channels) +template +void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr int half_nlanes = (v_uint8::nlanes / 2); + const int shift = (half_nlanes / 4); + + if (4 == lpi) { + GAPI_DbgAssert(inSz.width >= half_nlanes); + + + v_uint8 shuf_mask1 = v_setr_s8(0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15, + 0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15); + + v_uint8 shuf_mask2 = v_setr_s8(0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15); + + v_uint32 idx1 = v_set_s32(23, 21, 7, 5, 22, 20, 6, 4, 19, 17, 3, 1, 18, 16, 2, 0); + v_uint32 idx2 = v_set_s32(31, 29, 15, 13, 30, 28, 14, 12, 27, 25, 11, 9, 26, 24, 10, 8); + v_uint32 idx3 = v_set_s32(29, 25, 21, 17, 13, 9, 5, 1, 28, 24, 20, 16, 12, 8, 4, 0); + v_uint32 idx4 = v_set_s32(31, 27, 23, 19, 15, 11, 7, 3, 30, 26, 22, 18, 14, 10, 6, 2); + + // vertical pass + v_int16 b0 = vx_setall_s16(beta[0]); + v_int16 b1 = vx_setall_s16(beta[1]); + v_int16 b2 = vx_setall_s16(beta[2]); + v_int16 b3 = vx_setall_s16(beta[3]); + + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) { + v_int16 val0_0 = v_load_ccache_expand(&src0[0][w]); + v_int16 val0_1 = v_load_ccache_expand(&src0[1][w]); + v_int16 val0_2 = v_load_ccache_expand(&src0[2][w]); + v_int16 val0_3 = v_load_ccache_expand(&src0[3][w]); + + v_int16 val1_0 = v_load_ccache_expand(&src1[0][w]); + v_int16 val1_1 = v_load_ccache_expand(&src1[1][w]); + v_int16 val1_2 = v_load_ccache_expand(&src1[2][w]); + v_int16 val1_3 = v_load_ccache_expand(&src1[3][w]); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); +#if 1 + v_uint8 q2 = v_permutex2_s32(q0, q1, idx1); + v_uint8 q3 = v_permutex2_s32(q0, q1, idx2); + + v_uint8 q4 = v_shuffle_s8(q2, shuf_mask1); + v_uint8 q5 = v_shuffle_s8(q3, shuf_mask1); + + //Second variant of decompose. It'll be usefull in the future. +#else + v_uint8 q2 = v_mblend_shiftleft(q0, q1); + v_uint8 q3 = v_mblend_shiftright(q0, q1); + + v_uint8 mask1 = v_setr_s8(0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15, + 0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15, + 0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15, + 0, 8, 4, 12, 1, 9, 5, 13, + 2, 10, 6, 14, 3, 11, 7, 15); + + v_uint8 q4 = v_shuffle_s8(q2, mask1); + v_uint8 q5 = v_shuffle_s8(q3, mask1); + + v_uint64 idx1 = v_set_s64(11, 10, 3, 2, 9, 8, 1, 0); + v_uint64 idx2 = v_set_s64(15, 14, 7, 6, 13, 12, 5, 4); + + v_uint8 q6 = v_permutex2_s64(q4, q5, idx1); + v_uint8 q7 = v_permutex2_s64(q4, q5, idx2); +#endif + + vx_store(&tmp[4 * w + 0], q4); + vx_store(&tmp[4 * w + 2 * half_nlanes], q5); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + v_uint8 val_0, val_1, val_2, val_3; + + GAPI_DbgAssert(outSz.width >= half_nlanes); + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + v_int16 a10 = vx_load(&clone[4 * x]); + v_int16 a32 = vx_load(&clone[4 * (x + 8)]); + v_int16 a54 = vx_load(&clone[4 * (x + 16)]); + v_int16 a76 = vx_load(&clone[4 * (x + 24)]); + + for (int c = 0; c < chanNum; ++c) { + v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0); + v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift); + v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2); + v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3); + + v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0)); + v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1)); + v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2)); + v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3)); + + v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0)); + v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1)); + v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2)); + v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3)); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); + + v_uint8 q2 = v_shuffle_s8(q0, shuf_mask1); + v_uint8 q3 = v_shuffle_s8(q1, shuf_mask1); +#if 1 + v_uint8 q4 = v_permutex2_s32(q2, q3, idx3); + v_uint8 q5 = v_permutex2_s32(q2, q3, idx4); + + v_uint8 q6 = v_shuffle_s8(q4, shuf_mask2); + v_uint8 q7 = v_shuffle_s8(q5, shuf_mask2); + + + //Second variant of decompose. It'll be usefull in the future. +#else + v_uint8 q4 = v_mask_blend_shiftleft<0xCCCCCCCC /*0b11001100110011001100110011001100*/, 4>(q2, q3); + v_uint8 q5 = v_mask_blend_shiftright<0xCCCCCCCC /*0b11001100110011001100110011001100*/, 4>(q2, q3); + + v_int32 idx = v_set_s32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0); + + v_uint8 q6 = v_permutex_s32(idx, q4); + v_uint8 q7 = v_permutex_s32(idx, q5); + + v_uint8 mask2 = v_setr_s8(0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15, + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15); + + v_uint8 q8 = v_shuffle_s8(q6, mask2); + v_uint8 q9 = v_shuffle_s8(q7, mask2); +#endif + v_store_low(&dst[c][0][x], q6); + v_store_high(&dst[c][1][x], q6); + v_store_low(&dst[c][2][x], q7); + v_store_high(&dst[c][3][x], q7); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; + } + } + } else { // if any lpi + for (int l = 0; l < lpi; ++l) { + short beta0 = beta[l]; + + // vertical pass + GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes); + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) { + v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w])); + v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w])); + v_int16 t = v_mulhrs(s0 - s1, beta0) + s1; + v_pack_u_store(tmp + w, t); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + GAPI_DbgAssert(outSz.width >= half_nlanes); + + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + for (int c = 0; c < chanNum; ++c) { + v_int16 a0 = vx_load(&alpha[x]); // as signed Q1.1.14 + v_int16 sx = vx_load(&mapsx[x]); // as integer (int16) + v_int16 t0 = v_gather_chan(tmp, sx, c, 0); + v_int16 t1 = v_gather_chan(tmp, sx, c, 1); + v_int16 d = v_mulhrs(t0 - t1, a0) + t1; + v_pack_u_store(&dst[c][l][x], d); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; + } + } + } + } +} + +// Resize (bi-linear, 8UC3) +void calcRowLinear_8U(C3, std::array, 3> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr const int chanNum = 3; + + calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); +} + +// Resize (bi-linear, 8UC4) +void calcRowLinear_8U(C4, std::array, 4> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + constexpr const int chanNum = 4; + + calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); +} + void copyRow_8U(const uint8_t in[], uint8_t out[], int length) { copyRow_8U_impl(in, out, length); } @@ -153,3 +409,4 @@ void copyRow_32F(const float in[], float out[], int length) { } // namespace kernels } // namespace gapi } // namespace InferenceEngine + diff --git a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp index cf121f4296e8f2..8b994d82b8e641 100644 --- a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp @@ -50,18 +50,6 @@ namespace InferenceEngine { namespace gapi { namespace kernels { -//---------------------------------------------------------------------- - -static inline v_uint16x8 v_expand_low(const v_uint8x16& a) { - return v_uint16x8(_mm_unpacklo_epi8(a.val, _mm_setzero_si128())); -} - -static inline v_uint16x8 v_expand_high(const v_uint8x16& a) { - return v_uint16x8(_mm_unpackhi_epi8(a.val, _mm_setzero_si128())); -} - -//------------------------------------------------------------------------------ - // Resize (bi-linear, 8U) void calcRowLinear_8U(uint8_t *dst[], const uint8_t *src0[], @@ -485,9 +473,12 @@ void calcRowLinear_8U(uint8_t *dst[], } } +// Resize 3C/4C universal intrinsic implementation for SSE42 version is a bit slower than original sometimes. +// Remove original implementation when I find a cause. +#if 1 // Resize (bi-linear, 8U, generic number of channels) template -void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, +void calcRowLinear_8UC_Impl_(std::array, chanNum> &dst, const uint8_t *src0[], const uint8_t *src1[], const short alpha[], @@ -498,9 +489,11 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, const Size &inSz, const Size &outSz, int lpi) { + const int half_nlanes = (v_uint8::nlanes / 2); + if (4 == lpi) { // vertical pass - GAPI_DbgAssert(inSz.width >= 8); + GAPI_DbgAssert(inSz.width >= half_nlanes); __m128i b0 = _mm_set1_epi16(beta[0]); __m128i b1 = _mm_set1_epi16(beta[1]); @@ -508,7 +501,7 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, __m128i b3 = _mm_set1_epi16(beta[3]); for (int w = 0; w < inSz.width*chanNum; ) { - for (; w <= inSz.width*chanNum - 8 && w >= 0; w += 8) { + for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) { //-------------------------------------------- // reworked from: ie_preprocess_data_sse42.cpp // function: resize_bilinear_u8 @@ -558,14 +551,14 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, } if (w < inSz.width*chanNum) { - w = inSz.width*chanNum - 8; + w = inSz.width*chanNum - half_nlanes; } } // horizontal pass - GAPI_DbgAssert(outSz.width >= 8); + GAPI_DbgAssert(outSz.width >= half_nlanes); for (int x = 0; x < outSz.width; ) { - for (; x <= outSz.width - 8 && x >= 0; x += 8) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { //-------------------------------------------- // reworked from: ie_preprocess_data_sse42.cpp // function: resize_bilinear_u8 @@ -645,17 +638,18 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, } if (x < outSz.width) { - x = outSz.width - 8; + x = outSz.width - half_nlanes; } } + } else { // if any lpi for (int l = 0; l < lpi; l++) { short beta0 = beta[l]; // vertical pass - GAPI_DbgAssert(inSz.width*chanNum >= 8); + GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes); for (int w = 0; w < inSz.width*chanNum; ) { - for (; w <= inSz.width*chanNum - 8; w += 8) { + for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) { v_int16x8 s0 = v_reinterpret_as_s16(v_load_expand(&src0[l][w])); v_int16x8 s1 = v_reinterpret_as_s16(v_load_expand(&src1[l][w])); v_int16x8 t = v_mulhrs(s0 - s1, beta0) + s1; @@ -663,14 +657,14 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, } if (w < inSz.width*chanNum) { - w = inSz.width*chanNum - 8; + w = inSz.width*chanNum - half_nlanes; } } // horizontal pass - GAPI_DbgAssert(outSz.width >= 8); + GAPI_DbgAssert(outSz.width >= half_nlanes); for (int x = 0; x < outSz.width; ) { - for (; x <= outSz.width - 8 && x >= 0; x += 8) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { for (int c = 0; c < chanNum; c++) { v_int16x8 a0 = v_load(&alpha[x]); // as signed Q1.1.14 v_int16x8 sx = v_load(&mapsx[x]); // as integer (int16) @@ -682,12 +676,186 @@ void calcRowLinear_8UC_Impl(std::array, chanNum> &dst, } if (x < outSz.width) { - x = outSz.width - 8; + x = outSz.width - half_nlanes; + } + } + } + } +} +#else +// Resize 3C/4C universal intrinsic implementation for SSE42 version is a bit slower sometimes. +// Gonna turn it on when I find a cause. +template +void calcRowLinear_8UC_Impl_(std::array, chanNum> &dst, + const uint8_t *src0[], + const uint8_t *src1[], + const short alpha[], + const short clone[], // 4 clones of alpha + const short mapsx[], + const short beta[], + uint8_t tmp[], + const Size &inSz, + const Size &outSz, + int lpi) { + const int half_nlanes = (v_uint8::nlanes / 2); + + if (4 == lpi) { + // vertical pass + GAPI_DbgAssert(inSz.width >= half_nlanes); + + v_int16 b0 = vx_setall_s16(beta[0]); + v_int16 b1 = vx_setall_s16(beta[1]); + v_int16 b2 = vx_setall_s16(beta[2]); + v_int16 b3 = vx_setall_s16(beta[3]); + + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes && w >= 0; w += half_nlanes) { + v_int16 val0_0 = v_reinterpret_as_s16(vx_load_expand(&src0[0][w])); + v_int16 val0_1 = v_reinterpret_as_s16(vx_load_expand(&src0[1][w])); + v_int16 val0_2 = v_reinterpret_as_s16(vx_load_expand(&src0[2][w])); + v_int16 val0_3 = v_reinterpret_as_s16(vx_load_expand(&src0[3][w])); + + v_int16 val1_0 = v_reinterpret_as_s16(vx_load_expand(&src1[0][w])); + v_int16 val1_1 = v_reinterpret_as_s16(vx_load_expand(&src1[1][w])); + v_int16 val1_2 = v_reinterpret_as_s16(vx_load_expand(&src1[2][w])); + v_int16 val1_3 = v_reinterpret_as_s16(vx_load_expand(&src1[3][w])); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), b0); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), b1); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), b2); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), b3); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); + + v_uint8 q2 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q0, q1); + v_uint8 q3 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q0, q1); + + v_uint8 mask = v_setr_s8(0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15); + + v_uint8 q4 = v_shuffle_s8(q2, mask); + v_uint8 q5 = v_shuffle_s8(q3, mask); + + vx_store(&tmp[4 * w + 0], q4); + vx_store(&tmp[4 * w + 2 * half_nlanes], q5); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + GAPI_DbgAssert(outSz.width >= half_nlanes); + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + v_int16 a10 = vx_load(&clone[4 * x]); + v_int16 a32 = vx_load(&clone[4 * (x + 2)]); + v_int16 a54 = vx_load(&clone[4 * (x + 4)]); + v_int16 a76 = vx_load(&clone[4 * (x + 6)]); + + v_uint8 val_0 = vx_setzero_u8(); + v_uint8 val_1 = vx_setzero_u8(); + v_uint8 val_2 = vx_setzero_u8(); + v_uint8 val_3 = vx_setzero_u8(); + + for (int c = 0; c < chanNum; ++c) { + int shift = (half_nlanes / 4); + + v_gather_channel(val_0, tmp, mapsx, chanNum, c, x, 0); + v_gather_channel(val_1, tmp, mapsx, chanNum, c, x, shift); + v_gather_channel(val_2, tmp, mapsx, chanNum, c, x, shift * 2); + v_gather_channel(val_3, tmp, mapsx, chanNum, c, x, shift * 3); + + v_int16 val0_0 = v_reinterpret_as_s16(v_expand_low(val_0)); + v_int16 val0_1 = v_reinterpret_as_s16(v_expand_low(val_1)); + v_int16 val0_2 = v_reinterpret_as_s16(v_expand_low(val_2)); + v_int16 val0_3 = v_reinterpret_as_s16(v_expand_low(val_3)); + + v_int16 val1_0 = v_reinterpret_as_s16(v_expand_high(val_0)); + v_int16 val1_1 = v_reinterpret_as_s16(v_expand_high(val_1)); + v_int16 val1_2 = v_reinterpret_as_s16(v_expand_high(val_2)); + v_int16 val1_3 = v_reinterpret_as_s16(v_expand_high(val_3)); + + v_int16 t0 = v_mulhrs(v_sub_wrap(val0_0, val1_0), a10); + v_int16 t1 = v_mulhrs(v_sub_wrap(val0_1, val1_1), a32); + v_int16 t2 = v_mulhrs(v_sub_wrap(val0_2, val1_2), a54); + v_int16 t3 = v_mulhrs(v_sub_wrap(val0_3, val1_3), a76); + + v_int16 r0 = v_add_wrap(val1_0, t0); + v_int16 r1 = v_add_wrap(val1_1, t1); + v_int16 r2 = v_add_wrap(val1_2, t2); + v_int16 r3 = v_add_wrap(val1_3, t3); + + v_uint8 q0 = v_packus(r0, r1); + v_uint8 q1 = v_packus(r2, r3); + + v_uint8 mask = v_setr_s8(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15); + + v_uint8 q2 = v_shuffle_s8(q0, mask); + v_uint8 q3 = v_shuffle_s8(q1, mask); + + v_uint8 q4 = v_blend_shiftleft<0xCC /*0b11001100*/, 4>(q2, q3); + v_uint8 q5 = v_blend_shiftright<0xCC /*0b11001100*/, 4>(q2, q3); + + v_store_low(&dst[c][0][x], q4); + v_store_high(&dst[c][1][x], q4); + v_store_low(&dst[c][2][x], q5); + v_store_high(&dst[c][3][x], q5); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; + } + } + + } else { // if any lpi + for (int l = 0; l < lpi; ++l) { + short beta0 = beta[l]; + + // vertical pass + GAPI_DbgAssert(inSz.width*chanNum >= half_nlanes); + for (int w = 0; w < inSz.width*chanNum; ) { + for (; w <= inSz.width*chanNum - half_nlanes; w += half_nlanes) { + v_int16 s0 = v_reinterpret_as_s16(vx_load_expand(&src0[l][w])); + v_int16 s1 = v_reinterpret_as_s16(vx_load_expand(&src1[l][w])); + v_int16 t = v_mulhrs(s0 - s1, beta0) + s1; + v_pack_u_store(tmp + w, t); + } + + if (w < inSz.width*chanNum) { + w = inSz.width*chanNum - half_nlanes; + } + } + + // horizontal pass + GAPI_DbgAssert(outSz.width >= half_nlanes); + for (int x = 0; x < outSz.width; ) { + for (; x <= outSz.width - half_nlanes && x >= 0; x += half_nlanes) { + for (int c = 0; c < chanNum; ++c) { + v_int16 a0 = vx_load(&alpha[x]); // as signed Q1.1.14 + v_int16 sx = vx_load(&mapsx[x]); // as integer (int16) + v_int16 t0 = v_gather_chan(tmp, sx, c, 0); + v_int16 t1 = v_gather_chan(tmp, sx, c, 1); + v_int16 d = v_mulhrs(t0 - t1, a0) + t1; + v_pack_u_store(&dst[c][l][x], d); + } + } + + if (x < outSz.width) { + x = outSz.width - half_nlanes; } } } } } +#endif // Resize (bi-linear, 8UC3) void calcRowLinear_8U(C3, std::array, 3> &dst, @@ -703,7 +871,7 @@ void calcRowLinear_8U(C3, std::array, 3> &dst, int lpi) { constexpr const int chanNum = 3; - calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); + calcRowLinear_8UC_Impl_(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); } // Resize (bi-linear, 8UC4) @@ -719,7 +887,7 @@ void calcRowLinear_8U(C4, std::array, 4> &dst, const Size &outSz, int lpi) { constexpr const int chanNum = 4; - calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); + calcRowLinear_8UC_Impl_(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); } // Resize (bi-linear, 32F) diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp index 667e9d6be18410..2272ba5970645f 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp @@ -894,24 +894,62 @@ static void calcRowLinearC(const cv::gapi::fluid::View & in, } } - #ifdef HAVE_SSE +#ifdef HAVE_AVX512 + if (with_cpu_x86_avx512_core()) { + if (std::is_same::value) { + if (inSz.width >= 64 && outSz.width >= 32) { + avx512::calcRowLinear_8UC(dst, + reinterpret_cast(src0), + reinterpret_cast(src1), + reinterpret_cast(alpha), + reinterpret_cast(clone), + reinterpret_cast(mapsx), + reinterpret_cast(beta), + reinterpret_cast(tmp), + inSz, outSz, lpi); + return; + } + } + } +#endif + +#ifdef HAVE_AVX2 + if (with_cpu_x86_avx2()) { + if (std::is_same::value) { + if (inSz.width >= 32 && outSz.width >= 16) { + avx::calcRowLinear_8UC(dst, + reinterpret_cast(src0), + reinterpret_cast(src1), + reinterpret_cast(alpha), + reinterpret_cast(clone), + reinterpret_cast(mapsx), + reinterpret_cast(beta), + reinterpret_cast(tmp), + inSz, outSz, lpi); + return; + } + } + } +#endif + +#ifdef HAVE_SSE if (with_cpu_x86_sse42()) { if (std::is_same::value) { if (inSz.width >= 16 && outSz.width >= 8) { calcRowLinear_8UC(dst, - reinterpret_cast(src0), - reinterpret_cast(src1), - reinterpret_cast(alpha), - reinterpret_cast(clone), - reinterpret_cast(mapsx), - reinterpret_cast(beta), - reinterpret_cast(tmp), - inSz, outSz, lpi); + reinterpret_cast(src0), + reinterpret_cast(src1), + reinterpret_cast(alpha), + reinterpret_cast(clone), + reinterpret_cast(mapsx), + reinterpret_cast(beta), + reinterpret_cast(tmp), + inSz, outSz, lpi); return; } } } - #endif // HAVE_SSE +#endif // HAVE_SSE auto length = out[0].get().length(); diff --git a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt index 94b935f6918391..36b3d9a089ad08 100644 --- a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt +++ b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt @@ -7,6 +7,12 @@ file(GLOB HDR *.hpp) add_library(fluid_test_computations SHARED ${SRC} ${HDR}) +# Workaround to avoid warnings caused with bug in the avx512intrin.h of GCC5 +if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND + (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 5.5)) + set_target_properties(fluid_test_computations PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized") +endif() + target_include_directories(fluid_test_computations PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(fluid_test_computations PRIVATE inference_engine_preproc_s inference_engine fluid) diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp index eb592b1212d049..046f604d57654a 100644 --- a/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp +++ b/inference-engine/thirdparty/ocv/opencv_hal_avx.hpp @@ -48,7 +48,7 @@ inline __m256d _v256_permute2x128(const __m256d& a, const __m256d& b) { return _mm256_permute2f128_pd(a, b, imm); } template -inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b) +static inline _Tpvec v256_permute2x128(const _Tpvec& a, const _Tpvec& b) { return _Tpvec(_v256_permute2x128(a.val, b.val)); } template @@ -60,7 +60,7 @@ inline __m256d _v256_permute4x64(const __m256d& a) { return _mm256_permute4x64_pd(a, imm); } template -inline _Tpvec v256_permute4x64(const _Tpvec& a) +static inline _Tpvec v256_permute4x64(const _Tpvec& a) { return _Tpvec(_v256_permute4x64(a.val)); } inline __m128i _v256_extract_high(const __m256i& v) @@ -730,6 +730,11 @@ OPENCV_HAL_IMPL_AVX_EXPAND(v_int16x16, v_int32x8, short, _mm256_cvtepi16_e OPENCV_HAL_IMPL_AVX_EXPAND(v_uint32x8, v_uint64x4, unsigned, _mm256_cvtepu32_epi64) OPENCV_HAL_IMPL_AVX_EXPAND(v_int32x8, v_int64x4, int, _mm256_cvtepi32_epi64) +static inline v_int16x16 v_load_ccache_expand(const uchar* ptr) +{ + return v_int16x16(_mm256_cvtepu8_epi16(_mm_lddqu_si128((const __m128i*)ptr))); +} + inline v_uint8x32 v_mul_wrap(const v_uint8x32& a, const v_uint8x32& b) { __m256i ad = _mm256_srai_epi16(a.val, 8); @@ -1925,6 +1930,37 @@ inline v_uint8x32 v_pack_u(const v_int16x16& a, const v_int16x16& b) return v_uint8x32(_v256_shuffle_odd_64(_mm256_packus_epi16(a.val, b.val))); } +static inline v_uint8x32 v_packus(const v_int16x16& a, const v_int16x16& b) +{ + return v_uint8x32(_mm256_packus_epi16(a.val, b.val)); +} + +template +static inline v_uint8x32 v_blend_shiftleft(const v_uint8x32& a, const v_uint8x32& b) +{ + return v_uint8x32(_mm256_blend_epi16(a.val, _mm256_slli_si256(b.val, shift), mask)); +} + +template +static inline v_uint8x32 v_blend_shiftright(const v_uint8x32& a, const v_uint8x32& b) +{ + return v_uint8x32(_mm256_blend_epi16(_mm256_srli_si256(a.val, shift), b.val, mask)); +} + +static inline v_uint8x32 v_setr_s8(char b0, char b1, char b2, char b3, char b4, + char b5, char b6, char b7, char b8, char b9, + char b10, char b11, char b12, char b13, char b14, + char b15, char b16, char b17, char b18, char b19, + char b20, char b21, char b22, char b23, char b24, + char b25, char b26, char b27, char b28, char b29, + char b30, char b31) +{ + return v_uint8x32(_mm256_setr_epi8(b0, b1, b2, b3, b4, b5, b6, b7, + b8, b9, b10, b11, b12, b13, b14, b15, + b16, b17, b18, b19, b20, b21, b22, b23, + b24, b25, b26, b27, b28, b29, b30, b31)); +} + inline void v_pack_store(schar* ptr, const v_int16x16& a) { v_store_low(ptr, v_pack(a, a)); } @@ -3075,9 +3111,7 @@ static inline v_uint16x16 v_mulhi(const v_uint16x16& a, uint16_t b) static inline v_int16x16 v_mulhrs(const v_int16x16& a, const v_int16x16& b) { - v_int16x16 r; - r.val = _mm256_mulhrs_epi16(a.val, b.val); - return r; + return v_int16x16(_mm256_mulhrs_epi16(a.val, b.val)); } static inline v_int16x16 v_mulhrs(const v_int16x16& a, short b) @@ -3110,6 +3144,49 @@ static inline v_float32x8 operator* (const v_float32x8& a, float b) return a * v256_setall_f32(b); } +static inline v_uint8x32 v_shuffle_s8(const v_uint8x32& a, const v_uint8x32& mask) +{ + return v_uint8x32(_mm256_shuffle_epi8(a.val, mask.val)); +} + +static inline void v_gather_channel(v_uint8x32& vec, const uint8_t tmp[], const short mapsx[], + int chanNum, int c, int x, int shift) +{ + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 0] + c)]), 0); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 1] + c)]), 1); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 2] + c)]), 2); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 3] + c)]), 3); + + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 4); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 5); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 2] + 1) + c)]), 6); + vec.val = _mm256_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 3] + 1) + c)]), 7); +} + +namespace { + template + static inline v_int16x16 v_gather_chan(const uchar src[], const v_int16x16& index, int channel, int pos) { + v_int16x16 r; + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 0) + pos) + channel]), 0); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 1) + pos) + channel]), 1); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 2) + pos) + channel]), 2); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 3) + pos) + channel]), 3); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 4) + pos) + channel]), 4); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 5) + pos) + channel]), 5); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 6) + pos) + channel]), 6); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 7) + pos) + channel]), 7); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 8) + pos) + channel]), 8); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 9) + pos) + channel]), 9); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 10) + pos) + channel]), 10); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 11) + pos) + channel]), 11); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 12) + pos) + channel]), 12); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 13) + pos) + channel]), 13); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 14) + pos) + channel]), 14); + r.val = _mm256_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(_mm256_extract_epi16(index.val, 15) + pos) + channel]), 15); + return r; + } +} // namespace + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp b/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp index 2f88c191651259..1f786b7bebb2ea 100644 --- a/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp +++ b/inference-engine/thirdparty/ocv/opencv_hal_avx512.hpp @@ -89,7 +89,7 @@ inline __m256 _v512_extract_high(const __m512& v) { return _mm512_extractf32x8_ps(v, 1); } inline __m256d _v512_extract_high(const __m512d& v) -{ return _mm512_extractf64x4_pd(v, 1); } +{ return _mm512_mask_extractf64x4_pd(_mm256_setzero_pd(), (__mmask8) -1, v, 1); } inline __m256i _v512_extract_low(const __m512i& v) { return _mm512_castsi512_si256(v); } @@ -1936,7 +1936,7 @@ OPENCV_HAL_IMPL_AVX512_EXPAND_Q(v_int32x16, schar, _mm512_cvtepi8_epi32) /* pack */ // 16 inline v_int8x64 v_pack(const v_int16x32& a, const v_int16x32& b) -{ return v_int8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } +{ return v_int8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8)-1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b) { @@ -1946,7 +1946,7 @@ inline v_uint8x64 v_pack(const v_uint16x32& a, const v_uint16x32& b) inline v_uint8x64 v_pack_u(const v_int16x32& a, const v_int16x32& b) { - return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val))); + return v_uint8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8)-1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi16(a.val, b.val))); } inline void v_pack_store(schar* ptr, const v_int16x32& a) @@ -2007,7 +2007,9 @@ void v_rshr_pack_store(schar* ptr, const v_int16x32& a) // 32 inline v_int16x32 v_pack(const v_int32x16& a, const v_int32x16& b) -{ return v_int16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi32(a.val, b.val))); } +{ return v_int16x32(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1, + _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), + _mm512_packs_epi32(a.val, b.val))); } inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b) { @@ -2016,7 +2018,9 @@ inline v_uint16x32 v_pack(const v_uint32x16& a, const v_uint32x16& b) } inline v_uint16x32 v_pack_u(const v_int32x16& a, const v_int32x16& b) -{ return v_uint16x32(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packus_epi32(a.val, b.val))); } +{ return v_uint16x32(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1, + _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), + _mm512_packus_epi32(a.val, b.val))); } inline void v_pack_store(short* ptr, const v_int32x16& a) { v_store_low(ptr, v_pack(a, a)); } @@ -2118,7 +2122,7 @@ void v_rshr_pack_store(int* ptr, const v_int64x8& a) // pack boolean inline v_uint8x64 v_pack_b(const v_uint16x32& a, const v_uint16x32& b) -{ return v_uint8x64(_mm512_permutexvar_epi64(_v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } +{ return v_uint8x64(_mm512_mask_permutexvar_epi64(_mm512_setzero_si512(), (__mmask8) -1, _v512_set_epu64(7, 5, 3, 1, 6, 4, 2, 0), _mm512_packs_epi16(a.val, b.val))); } inline v_uint8x64 v_pack_b(const v_uint32x16& a, const v_uint32x16& b, const v_uint32x16& c, const v_uint32x16& d) @@ -3069,9 +3073,7 @@ static inline v_uint16x32 v_mulhi(const v_uint16x32& a, uint16_t b) static inline v_int16x32 v_mulhrs(const v_int16x32& a, const v_int16x32& b) { - v_int16x32 r; - r.val = _mm512_mulhrs_epi16(a.val, b.val); - return r; + return v_int16x32(_mm512_mulhrs_epi16(a.val, b.val)); } static inline v_int16x32 v_mulhrs(const v_int16x32& a, short b) @@ -3104,6 +3106,188 @@ static inline v_float32x16 operator* (const v_float32x16& a, float b) return a * v512_setall_f32(b); } +template +static inline v_uint8x64 v_mask_blend_shiftleft(const v_uint8x64& a, const v_uint8x64& b) +{ + return v_uint8x64(_mm512_mask_blend_epi16(mask, + a.val, _mm512_bslli_epi128(b.val, shift))); +} + +template +static inline v_uint8x64 v_mask_blend_shiftright(const v_uint8x64& a, const v_uint8x64& b) +{ + return v_uint8x64(_mm512_mask_blend_epi16(mask, + _mm512_bsrli_epi128(a.val, shift), b.val)); +} + +static inline v_uint8x64 v_packus(const v_int16x32& a, const v_int16x32& b) +{ + return v_uint8x64(_mm512_packus_epi16(a.val, b.val)); +} + + +#define word(b0, b1, b2, b3) \ + (((uint32_t)((uint8_t)(b0)) << 0*8) \ + | ((uint32_t)((uint8_t)(b1)) << 1*8) \ + | ((uint32_t)((uint8_t)(b2)) << 2*8) \ + | ((uint32_t)((uint8_t)(b3)) << 3*8)) + +static inline v_uint8x64 v_setr_s8(char b0, char b1, char b2, char b3, char b4, + char b5, char b6, char b7, char b8, char b9, + char b10, char b11, char b12, char b13, char b14, + char b15, char b16, char b17, char b18, char b19, + char b20, char b21, char b22, char b23, char b24, + char b25, char b26, char b27, char b28, char b29, + char b30, char b31, char b32, char b33, char b34, + char b35, char b36, char b37, char b38, char b39, + char b40, char b41, char b42, char b43, char b44, + char b45, char b46, char b47, char b48, char b49, + char b50, char b51, char b52, char b53, char b54, + char b55, char b56, char b57, char b58, char b59, + char b60, char b61, char b62, char b63) +{ + return v_uint8x64(_mm512_setr_epi32(word(b0, b1, b2, b3), word(b4, b5, b6, b7), word(b8, b9, b10, b11), + word(b12, b13, b14, b15), word(b16, b17, b18, b19), word(b20, b21, b22, b23), + word(b24, b25, b26, b27), word(b28, b29, b30, b31), word(b32, b33, b34, b35), + word(b36, b37, b38, b39), word(b40, b41, b42, b43), word(b44, b45, b46, b47), + word(b48, b49, b50, b51), word(b52, b53, b54, b55), word(b56, b57, b58, b59), + word(b60, b61, b62, b63))); +} + +static inline v_uint64x8 v_set_s64(int b7, int b6, int b5, int b4, int b3, int b2, int b1, int b0) +{ + return v_uint64x8(_mm512_set_epi64(b7, b6, b5, b4, b3, b2, b1, b0)); +} + +static inline v_uint32x16 v_set_s32(int b15, int b14, int b13, int b12, int b11, int b10, int b9, int b8, + int b7, int b6, int b5, int b4, int b3, int b2, int b1, int b0) +{ + return v_uint32x16(_mm512_set_epi32(b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0)); +} + +static inline v_uint8x64 v_shuffle_s8(const v_uint8x64& a, const v_uint8x64& mask) +{ + return v_uint8x64(_mm512_shuffle_epi8(a.val, mask.val)); +} +static inline v_int16x32 v_load_ccache_expand(const uchar* ptr) +{ + return v_int16x32(_mm512_cvtepu8_epi16(_mm256_lddqu_si256((const __m256i*)ptr))); \ +} +static inline __m512i v512_insert_epi16(__m512i target, const uchar x, const int index) +{ + return _mm512_mask_set1_epi16(target, 1UL << index, x); +} +static inline __m512i v512_insert_epi32(__m512i target, const int32_t x, const int index) +{ + return _mm512_mask_set1_epi32(target, 1UL << index, x); +} + +static inline void v_gather_channel(v_uint8x64& vec, const uint8_t tmp[], const short mapsx[], + int chanNum, int c, int x, int shift) +{ + __m256i vec1 = _mm256_setzero_si256(); + __m256i vec2 = _mm256_setzero_si256(); + + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 0] + c)]), 0); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 1] + c)]), 1); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 2] + c)]), 2); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 3] + c)]), 3); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 4] + c)]), 4); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 5] + c)]), 5); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 6] + c)]), 6); + vec1 = _mm256_insert_epi32(vec1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 7] + c)]), 7); + + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 0); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 1); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 2] + 1) + c)]), 2); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 3] + 1) + c)]), 3); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 4] + 1) + c)]), 4); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 5] + 1) + c)]), 5); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 6] + 1) + c)]), 6); + vec2 = _mm256_insert_epi32(vec2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 7] + 1) + c)]), 7); + + vec.val = _mm512_inserti32x8(_mm512_castsi256_si512(vec1), vec2, 1); +} + +static inline v_uint8x64 v_permutex2_s64(const v_uint8x64& a, const v_uint8x64& b, const v_uint64x8& idxs) +{ + return v_uint8x64(_mm512_permutex2var_epi64(a.val, idxs.val, b.val)); +} + +static inline v_uint8x64 v_permutex_s32(const v_uint8x64& a, const v_uint64x8 idxs) +{ + return v_uint8x64(_mm512_permutexvar_epi32(idxs.val, a.val)); +} + +static inline v_uint8x64 v_permutex2_s32(const v_uint8x64& a, const v_uint8x64& b, const v_uint32x16 idxs) +{ + return v_uint8x64(_mm512_permutex2var_epi32(a.val, idxs.val, b.val)); +} + +#if defined(__GNUC__) + +int _mm512_cvtsi512_si32(__m512i a) +{ + __v16si b = (__v16si)a; + return b[0]; +} + +#endif + +template +static inline int v512_extract_epi32(__m512i target) +{ + return _mm512_cvtsi512_si32(_mm512_mask_alignr_epi32(_mm512_setzero_si512(), (__mmask16)-1, target, target, index)); +} + +template +static inline int v512_extract_epi16(__m512i target) +{ + return (v512_extract_epi32(target) >> (index % 2 ? 16 : 0)) & 0xFFFF; +} + +namespace { + template + static inline v_int16x32 v_gather_chan(const uchar src[], const v_int16x32& index, int channel, int pos) { + v_int16x32 r; + + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<0>(index.val) + pos) + channel]), 0); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<1>(index.val) + pos) + channel]), 1); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<2>(index.val) + pos) + channel]), 2); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<3>(index.val) + pos) + channel]), 3); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<4>(index.val) + pos) + channel]), 4); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<5>(index.val) + pos) + channel]), 5); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<6>(index.val) + pos) + channel]), 6); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<7>(index.val) + pos) + channel]), 7); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<8>(index.val) + pos) + channel]), 8); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<9>(index.val) + pos) + channel]), 9); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<10>(index.val) + pos) + channel]), 10); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<11>(index.val) + pos) + channel]), 11); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<12>(index.val) + pos) + channel]), 12); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<13>(index.val) + pos) + channel]), 13); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<14>(index.val) + pos) + channel]), 14); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<15>(index.val) + pos) + channel]), 15); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<16>(index.val) + pos) + channel]), 16); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<17>(index.val) + pos) + channel]), 17); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<18>(index.val) + pos) + channel]), 18); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<19>(index.val) + pos) + channel]), 19); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<20>(index.val) + pos) + channel]), 20); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<21>(index.val) + pos) + channel]), 21); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<22>(index.val) + pos) + channel]), 22); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<23>(index.val) + pos) + channel]), 23); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<24>(index.val) + pos) + channel]), 24); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<25>(index.val) + pos) + channel]), 25); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<26>(index.val) + pos) + channel]), 26); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<27>(index.val) + pos) + channel]), 27); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<28>(index.val) + pos) + channel]), 28); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<29>(index.val) + pos) + channel]), 29); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<30>(index.val) + pos) + channel]), 30); + r.val = v512_insert_epi16(r.val, *reinterpret_cast(&src[chanNum*(v512_extract_epi16<31>(index.val) + pos) + channel]), 31); + + return r; + } +} // namespace + CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END //! @endcond diff --git a/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp b/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp index cfeb2962d35f76..1e75ee715204a8 100644 --- a/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp +++ b/inference-engine/thirdparty/ocv/opencv_hal_sse.hpp @@ -371,6 +371,12 @@ inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& a) {return v_float32x inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& a) {return v_float64x2(_mm_castps_pd(a.val)); } //////////////// PACK /////////////// +static inline v_uint8x16 v_packus(const v_int16x8& a, const v_int16x8& b) { + v_uint8x16 res; + res.val = _mm_packus_epi16(a.val, b.val); + return res; +} + inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) { __m128i delta = _mm_set1_epi16(255); @@ -1526,7 +1532,17 @@ inline _Tpwsvec v_load_expand(const _Tps* ptr) \ { \ __m128i a = _mm_loadl_epi64((const __m128i*)ptr); \ return _Tpwsvec(_mm_srai_##wsuffix(_mm_unpacklo_##suffix(a, a), shift)); \ -} +}\ +inline _Tpwuvec v_expand_low(const _Tpuvec& a) { \ + _Tpwuvec res; \ + res.val = _mm_cvtepu8_epi16(a.val); \ + return res; \ +} \ +inline _Tpwuvec v_expand_high(const _Tpuvec& a) { \ + _Tpwuvec res; \ + res.val = _mm_unpackhi_epi8(a.val, _mm_setzero_si128()); \ + return res; \ +} \ OPENCV_HAL_IMPL_SSE_EXPAND(v_uint8x16, v_uint16x8, uchar, v_int8x16, v_int16x8, schar, epi8, epi16, 8) OPENCV_HAL_IMPL_SSE_EXPAND(v_uint16x8, v_uint32x4, ushort, v_int16x8, v_int32x4, short, epi16, epi32, 16) @@ -2921,6 +2937,12 @@ static inline v_int16x8 v_saturate_s16(const v_int32x4& a) { return r; } +static inline v_uint8x16 v_packus_s16(const v_int16x8& a, const v_int16x8& b) { + v_uint8x16 r; + r.val = _mm_packus_epi16(a.val, b.val); + return r; +} + // for each j=index[k], load two chars src[j] and src[j+1] static inline v_uint8x16 v_gather_pairs(const uchar src[], const v_int16x8& index) { v_uint8x16 r; @@ -3030,6 +3052,47 @@ static inline v_float32x4 operator* (const v_float32x4& a, float b) { return a * v_setall_f32(b); } +template +static inline v_uint8x16 v_blend_shiftleft(const v_uint8x16& a, const v_uint8x16& b) { + v_uint8x16 res; + res.val = _mm_blend_epi16(a.val, _mm_slli_si128(b.val, shift), mask /*0xCC 0b11001100*/); + return res; +} + +template +static inline v_uint8x16 v_blend_shiftright(const v_uint8x16& a, const v_uint8x16& b) { + v_uint8x16 res; + res.val = _mm_blend_epi16(_mm_srli_si128(a.val, shift), b.val, mask /*0xCC 0b11001100*/); + return res; +} + +static inline v_uint8x16 v_setr_s8(char b0, char b1, char b2, char b3, char b4, + char b5, char b6, char b7, char b8, char b9, + char b10, char b11, char b12, char b13, char b14, + char b15) { + v_uint8x16 res; + res.val = _mm_setr_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, + b9, b10, b11, b12, b13, b14, b15); + return res; +} + + +static inline v_uint8x16 v_shuffle_s8(const v_uint8x16& a, const v_uint8x16& mask) { + v_uint8x16 res; + res.val = _mm_shuffle_epi8(a.val, mask.val); + return res; +} + +static inline void v_gather_channel(v_uint8x16& vec, const uint8_t tmp[], const short mapsx[], + int chanNum, int c, int x, int shift) +{ + vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 0] + c)]), 0); + vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + shift + 1] + c)]), 1); + + vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 0] + 1) + c)]), 2); + vec.val = _mm_insert_epi32(vec.val, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + shift + 1] + 1) + c)]), 3); +} + //! @} CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END From 11bd4f8a4274234936d493f42cce9fd595c73939 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Fri, 29 May 2020 17:46:40 +0300 Subject: [PATCH 16/24] Do not use ONNX reader if ONNX importer was disabled (#683) --- inference-engine/src/inference_engine/CMakeLists.txt | 5 +++++ inference-engine/src/inference_engine/ie_network_reader.cpp | 2 ++ 2 files changed, 7 insertions(+) diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index ce94bb3567fea8..4ae0d560700aa1 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -119,6 +119,11 @@ add_library(${TARGET_NAME}_obj OBJECT target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API) +# TODO: Remove this definitios when readers will be loaded from xml +if(NGRAPH_ONNX_IMPORT_ENABLE) + target_compile_definitions(${TARGET_NAME}_obj PRIVATE ONNX_IMPORT_ENABLE) +endif() + target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $) diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp index 9d739b6afb3c46..eabfb3dbbbc6b0 100644 --- a/inference-engine/src/inference_engine/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/ie_network_reader.cpp @@ -103,9 +103,11 @@ void registerReaders() { std::lock_guard lock(readerMutex); if (initialized) return; // TODO: Read readers info from XML +#ifdef ONNX_IMPORT_ENABLE auto onnxReader = std::make_shared("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX)); readers.emplace("onnx", onnxReader); readers.emplace("prototxt", onnxReader); +#endif auto irReader = std::make_shared("IR", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX)); readers.emplace("xml", irReader); initialized = true; From 6cfa77223e5cdca39ce04acca4635f2d7d7d1cd0 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Fri, 29 May 2020 19:09:01 +0300 Subject: [PATCH 17/24] [ nG ] Added F16 folding support (#686) --- ngraph/src/ngraph/op/transpose.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ngraph/src/ngraph/op/transpose.cpp b/ngraph/src/ngraph/op/transpose.cpp index 758bea5fbfaa5f..d95d2dad4cb994 100644 --- a/ngraph/src/ngraph/op/transpose.cpp +++ b/ngraph/src/ngraph/op/transpose.cpp @@ -181,6 +181,8 @@ namespace break; TYPE_CASE(bf16)(arg1, arg2, out); break; + TYPE_CASE(f16)(arg1, arg2, out); + break; TYPE_CASE(f32)(arg1, arg2, out); break; TYPE_CASE(f64)(arg1, arg2, out); From f7052a107d24c2e72ef8fe2d25d8d1f56fe72e71 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 29 May 2020 20:10:30 +0300 Subject: [PATCH 18/24] [IE CLDNN] Optimized FQ kernel in fsv16 layout (#573) - Optimized FQ kernel in fsv16 layout. Enabled scaleshift transform for FP16 precision - Disabled activation_opt kernel with fused ops in some cases --- .../activation/activation_kernel_opt.cpp | 6 +- .../quantize/quantize_kernel_base.cpp | 8 ++- .../quantize/quantize_kernel_base.h | 6 +- .../quantize/quantize_kernel_params.h | 39 +++++++++++- .../quantize/quantize_kernel_ref.cpp | 31 +++++++--- .../quantize/quantize_kernel_ref.h | 2 +- .../quantize_kernel_scale_shift_opt.cpp | 61 +++++++++++++++---- .../quantize_kernel_scale_shift_opt.h | 2 +- .../core/cl_kernels/quantize_gpu_ref.cl | 12 +++- .../quantize_gpu_scale_shift_opt.cl | 58 ++++++++++++++++-- .../thirdparty/clDNN/src/gpu/quantize_gpu.cpp | 21 ++++++- .../graph_optimizer/prepare_quantization.cpp | 3 - .../clDNN/src/include/quantize_inst.h | 19 +++++- .../tests/test_cases/fusings_gpu_test.cpp | 61 ++++++++++++------- 14 files changed, 263 insertions(+), 66 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp index f48b0e275fe3e1..2830d4066e151f 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp @@ -76,11 +76,13 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co return false; } + if (params.output.GetLayout() != params.inputs[0].GetLayout()) return false; - if (!params.fused_ops.empty() && params.output.GetLayout() != DataLayout::bfyx && - params.output.GetLayout() != DataLayout::bfzyx) + if (!params.fused_ops.empty() && + ((params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx) || + ((params.output.X().v * params.output.Y().v) % 4 != 0))) return false; return true; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp index 92ce82357130d8..e9c1b39c8dfe44 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp @@ -33,7 +33,7 @@ bool QuantizeKernelBase::Validate(const Params& p, const optional_params&) const return true; } -JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params) const { +JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { JitConstants jit = MakeBaseParamsJitConstants(params); if (params.packed_binary_output) { @@ -55,6 +55,10 @@ JitConstants QuantizeKernelBase::GetJitConstants(const quantize_params& params) jit.AddConstant(MakeJitConstant("LEVELS", static_cast(params.levels))); + jit.AddConstant(MakeJitConstant("LWS_0", runInfo.lws0)); + jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1)); + jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2)); + return jit; } @@ -70,7 +74,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio auto runInfo = SetDefault(newParams, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options); - auto cldnn_jit = GetJitConstants(newParams); + auto cldnn_jit = GetJitConstants(newParams, runInfo); std::string jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h index 960fd4a7b009df..480e786ab847bf 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.h @@ -25,9 +25,11 @@ class QuantizeKernelBase : public common_kernel_base { using common_kernel_base::common_kernel_base; virtual ~QuantizeKernelBase() {} - virtual JitConstants GetJitConstants(const quantize_params& params) const; - virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0; bool Validate(const Params& p, const optional_params& o) const override; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + +protected: + virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const; + virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h index 388c0d6fb56b58..58ae821da63325 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h @@ -22,12 +22,47 @@ namespace kernel_selector { // quantize_params //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct quantize_params : public base_params { - quantize_params() : base_params(KernelType::QUANTIZE), - levels(0), packed_binary_output(false), scale_shift_opt(false) {} + quantize_params() + : base_params(KernelType::QUANTIZE) + , levels(0) + , packed_binary_output(false) + , scale_shift_opt(false) + , has_post_scale(true) + , has_post_shift(true) + , has_pre_shift(true) + , has_clamp(true) + , per_tensor_input_range(false) + , per_tensor_input_scale(false) + , per_tensor_input_shift(false) + , per_tensor_output_scale(false) + , per_tensor_output_shift(false) + , in_lo(0.0f) + , in_hi(0.0f) + , in_scale(0.0f) + , in_shift(0.0f) + , out_scale(0.0f) + , out_shift(0.0f) { } int levels; bool packed_binary_output; bool scale_shift_opt; + bool has_post_scale; + bool has_post_shift; + bool has_pre_shift; + bool has_clamp; + + bool per_tensor_input_range; + bool per_tensor_input_scale; + bool per_tensor_input_shift; + bool per_tensor_output_scale; + bool per_tensor_output_shift; + + float in_lo; + float in_hi; + float in_scale; + float in_shift; + float out_scale; + float out_shift; virtual ParamsKey GetParamsKey() const { auto k = base_params::GetParamsKey(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp index e1189153192a11..27fe85f5af4b4d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp @@ -18,6 +18,8 @@ #include "kernel_selector_utils.h" #include +static const size_t sub_group_size = 32; + namespace kernel_selector { ParamsKey QuantizeKernelRef::GetSupportedKey() const { ParamsKey k; @@ -43,21 +45,34 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params, auto output = params.output; - runInfo.gws0 = output.Batch().v; - runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v; - runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16); + if (output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { + runInfo.gws0 = output.Batch().v; + runInfo.gws1 = Align(output.Feature().v, sub_group_size); + runInfo.gws2 = output.Y().v * output.X().v * output.Z().v; + + runInfo.lws0 = 1; + runInfo.lws1 = sub_group_size; + runInfo.lws2 = 1; + } else { + runInfo.gws0 = output.Batch().v; + runInfo.gws1 = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v; + runInfo.gws2 = Align(output.X().v * output.Y().v * output.Z().v, 16); - runInfo.lws0 = 1; - runInfo.lws1 = 1; - runInfo.lws2 = 16; + runInfo.lws0 = 1; + runInfo.lws1 = 1; + runInfo.lws2 = 16; + } runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; return runInfo; } -JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params) const { - JitConstants jit = Parent::GetJitConstants(params); +JitConstants QuantizeKernelRef::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { + JitConstants jit = Parent::GetJitConstants(params, runInfo); + if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && !params.packed_binary_output) { + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); + } return jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h index ca4287b3b19fae..f0263b231cb6ba 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.h @@ -26,7 +26,7 @@ class QuantizeKernelRef : public QuantizeKernelBase { QuantizeKernelRef() : QuantizeKernelBase("quantize_gpu_ref") {} virtual ~QuantizeKernelRef() {} - JitConstants GetJitConstants(const quantize_params& params) const override; + JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override; CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; ParamsKey GetSupportedKey() const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp index 49f87576ecbd0a..679d663ecedff0 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp @@ -18,6 +18,8 @@ #include "kernel_selector_utils.h" #include +static const size_t sub_group_size = 32; + namespace kernel_selector { ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const { ParamsKey k; @@ -60,27 +62,60 @@ CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& p auto output = params.output; - auto global = GetTensorFriendlyWorkGroups(output); - auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); + if (output.GetLayout() == DataLayout::b_fs_yx_fsv16) { + runInfo.gws0 = output.Y().v * output.X().v; + runInfo.gws1 = Align(output.Feature().v, sub_group_size); + runInfo.gws2 = output.Batch().v; + + runInfo.lws0 = 1; + runInfo.lws1 = sub_group_size; + runInfo.lws2 = 1; + } else { + auto global = GetTensorFriendlyWorkGroups(output); + auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo); - runInfo.gws0 = global[0]; - runInfo.gws1 = global[1]; - runInfo.gws2 = global[2]; + runInfo.gws0 = global[0]; + runInfo.gws1 = global[1]; + runInfo.gws2 = global[2]; - runInfo.lws0 = local[0]; - runInfo.lws1 = local[1]; - runInfo.lws2 = local[2]; + runInfo.lws0 = local[0]; + runInfo.lws1 = local[1]; + runInfo.lws2 = local[2]; + } runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16; return runInfo; } -JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params) const { - JitConstants jit = Parent::GetJitConstants(params); - - auto tensor_jits = GetTensorFriendlyWorkGroupsJit(params.output); - jit.Merge(tensor_jits); +JitConstants QuantizeKernelScaleShift::GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const { + JitConstants jit = Parent::GetJitConstants(params, runInfo); + + if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16) { + jit.AddConstant(MakeJitConstant("GWS_BATCH", 2)); + jit.AddConstant(MakeJitConstant("GWS_FEATURE", 1)); + jit.AddConstant(MakeJitConstant("GWS_YX", 0)); + jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); + } else { + auto tensor_jits = GetTensorFriendlyWorkGroupsJit(params.output); + jit.Merge(tensor_jits); + } + + jit.AddConstant(MakeJitConstant("HAS_POST_SCALE", params.has_post_scale)); + jit.AddConstant(MakeJitConstant("HAS_POST_SHIFT", params.has_post_shift)); + jit.AddConstant(MakeJitConstant("HAS_PRE_SHIFT", params.has_pre_shift)); + jit.AddConstant(MakeJitConstant("HAS_CLAMP", params.has_clamp)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_RANGE", params.per_tensor_input_range)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SCALE", params.per_tensor_input_scale)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_INPUT_SHIFT", params.per_tensor_input_shift)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SCALE", params.per_tensor_output_scale)); + jit.AddConstant(MakeJitConstant("PER_TENSOR_OUTPUT_SHIFT", params.per_tensor_output_shift)); + jit.AddConstant(MakeJitConstant("IN_LO_VAL", params.in_lo)); + jit.AddConstant(MakeJitConstant("IN_HI_VAL", params.in_hi)); + jit.AddConstant(MakeJitConstant("IN_SCALE_VAL", params.in_scale)); + jit.AddConstant(MakeJitConstant("IN_SHIFT_VAL", params.in_shift)); + jit.AddConstant(MakeJitConstant("OUT_SCALE_VAL", params.out_scale)); + jit.AddConstant(MakeJitConstant("OUT_SHIFT_VAL", params.out_shift)); return jit; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h index d1c3fc8d32e041..d88dfb32f66544 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.h @@ -26,7 +26,7 @@ class QuantizeKernelScaleShift : public QuantizeKernelBase { QuantizeKernelScaleShift() : QuantizeKernelBase("quantize_gpu_scale_shift_opt") {} virtual ~QuantizeKernelScaleShift() {} - JitConstants GetJitConstants(const quantize_params& params) const override; + JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& runInfo) const override; CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override; bool Validate(const Params& p, const optional_params& o) const override; ParamsKey GetSupportedKey() const override; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl index 6599e07ac9f445..f0fe9c908b470c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl @@ -16,7 +16,10 @@ #include "include/data_types.cl" #include "include/fetch.cl" -__attribute__((intel_reqd_sub_group_size(16))) +#ifdef SUB_GROUP_SIZE +__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) +#endif +__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2))) KERNEL(quantize_ref)(const __global INPUT0_TYPE* input, const __global INPUT1_TYPE* input_low, const __global INPUT2_TYPE* input_high, @@ -110,10 +113,15 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input, const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, y, x); #endif - INPUT0_TYPE val = input[input_offset]; + +#if OUTPUT_LAYOUT_B_FS_YX_FSV16 + if (of >= OUTPUT_FEATURE_NUM) + return; +#else if (x >= OUTPUT_SIZE_X || y >= OUTPUT_SIZE_Y || z >= OUTPUT_SIZE_Z) return; +#endif INPUT0_TYPE input_low_val = input_low[input_low_offset]; INPUT0_TYPE input_high_val = input_high[input_high_offset]; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl index 4c41a6f332d8a0..fecc649a8b4214 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl @@ -16,6 +16,10 @@ #include "include/data_types.cl" #include "include/fetch.cl" +#ifdef SUB_GROUP_SIZE +__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) +#endif +__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2))) KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, const __global INPUT1_TYPE* input_low, const __global INPUT2_TYPE* input_high, @@ -52,11 +56,13 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, const int output_offset = OUTPUT_GET_INDEX(b, of, y, x); #endif +#if HAS_CLAMP && !PER_TENSOR_INPUT_RANGE #if INPUT1_DIMS == 4 const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x); #elif INPUT1_DIMS == 5 const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x); #endif +#endif #if INPUT7_DIMS == 4 const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, y, x); @@ -64,17 +70,61 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input, const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, z, y, x); #endif +#if PER_TENSOR_INPUT_SCALE + INPUT1_TYPE input_scale_val = IN_SCALE_VAL; +#else INPUT1_TYPE input_scale_val = input_scale[scales_offset]; +#endif +#if PER_TENSOR_INPUT_SHIFT + INPUT1_TYPE input_shift_val = IN_SHIFT_VAL; +#else INPUT1_TYPE input_shift_val = input_shift[scales_offset]; +#endif + +#if PER_TENSOR_OUTPUT_SCALE + INPUT1_TYPE output_scale_val = OUT_SCALE_VAL; +#else INPUT1_TYPE output_scale_val = output_scale[scales_offset]; +#endif + +#if PER_TENSOR_OUTPUT_SHIFT + INPUT1_TYPE output_shift_val = OUT_SHIFT_VAL; +#else INPUT1_TYPE output_shift_val = output_shift[scales_offset]; +#endif + +#if PER_TENSOR_INPUT_RANGE && HAS_CLAMP + INPUT1_TYPE input_low_val = IN_LO_VAL; + INPUT1_TYPE input_high_val = IN_HI_VAL; +#elif HAS_CLAMP INPUT1_TYPE input_low_val = input_low[in_range_offset]; INPUT1_TYPE input_high_val = input_high[in_range_offset]; - INPUT1_TYPE val = min(max(TO_INPUT1_TYPE(input[input_offset]),input_low_val), input_high_val); +#endif + +#if HAS_CLAMP + INPUT1_TYPE val = min(max(TO_INPUT1_TYPE(input[input_offset]), input_low_val), input_high_val); +#else + INPUT1_TYPE val = TO_INPUT1_TYPE(input[input_offset]); +#endif +#if HAS_PRE_SHIFT + val = round(val * input_scale_val + input_shift_val); +#else + val = round(val * input_scale_val); +#endif + +#if HAS_POST_SCALE + val = val*output_scale_val; +#endif +#if HAS_POST_SHIFT + val += output_shift_val; +#endif + +#if OUTPUT_LAYOUT_B_FS_YX_FSV16 + if (of < OUTPUT_FEATURE_NUM) +#endif #if OUTPUT_IS_FP - output[output_offset] = TO_OUTPUT_TYPE_SAT(round(val * input_scale_val + input_shift_val) * output_scale_val + output_shift_val); + output[output_offset] = TO_OUTPUT_TYPE_SAT(val); #else - // TODO: the outer round should be deleted once output range is correct - output[output_offset] = TO_OUTPUT_TYPE_SAT(round(round(val * input_scale_val + input_shift_val) * output_scale_val + output_shift_val)); + output[output_offset] = TO_OUTPUT_TYPE_SAT(round(val)); #endif } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp index a869aa307b6c28..34db0348382a31 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp @@ -57,9 +57,26 @@ struct quantize_gpu : typed_primitive_gpu_impl { auto quantize_optional_params = get_default_optional_params(arg.get_program()); - quantize_params.levels = arg.get_primitive()->levels; - quantize_params.packed_binary_output = arg.get_output_layout().data_type == data_types::bin; + quantize_params.levels = arg.get_levels(); + quantize_params.packed_binary_output = arg.get_packed_binary_output(); quantize_params.scale_shift_opt = arg.get_scale_shift_opt(); + quantize_params.has_post_scale = arg.get_need_post_scale(); + quantize_params.has_post_shift = arg.get_need_post_shift(); + quantize_params.has_pre_shift = arg.get_need_pre_shift(); + quantize_params.has_clamp = arg.get_need_clamp(); + + quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range(); + quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale(); + quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift(); + quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale(); + quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift(); + + quantize_params.in_lo = arg.get_input_lo_val(); + quantize_params.in_hi = arg.get_input_hi_val(); + quantize_params.in_scale = arg.get_input_scale_val(); + quantize_params.in_shift = arg.get_input_shift_val(); + quantize_params.out_scale = arg.get_output_scale_val(); + quantize_params.out_shift = arg.get_output_shift_val(); for (size_t i = 1; i < arg.inputs_count(); i++) { quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp index 5eb670fd2942d8..885275677388bf 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp @@ -111,9 +111,6 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant()) return; - if (quantize_node.input().get_output_layout().data_type == data_types::f16) - return; - auto &input_low = quantize_node.get_dependency(1).template as(); auto &input_high = quantize_node.get_dependency(2).template as(); auto &output_low = quantize_node.get_dependency(3).template as(); diff --git a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h index 6e05a4fa3e1e5f..d7003d1c6d6b47 100644 --- a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h @@ -34,8 +34,25 @@ struct typed_program_node : public typed_program_node_base { program_node& input(size_t index = 0) const { return get_dependency(index); } size_t inputs_count() const { return get_dependencies().size(); } + int get_levels() const { return get_primitive()->levels; } + bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::bin; } bool get_scale_shift_opt() const { return scale_shift_opt; } - bool get_need_pre_shift() { return need_pre_shift; } + bool get_need_pre_shift() const { return need_pre_shift; } + bool get_need_post_scale() const { return need_post_scale; } + bool get_need_post_shift() const { return need_post_shift; } + bool get_need_clamp() const { return need_clamp; } + bool get_per_tensor_input_scale() const { return per_tensor_input_scale; } + bool get_per_tensor_input_shift() const { return per_tensor_input_shift; } + bool get_per_tensor_input_range() const { return per_tensor_input_range; } + bool get_per_tensor_output_scale() const { return per_tensor_output_scale; } + bool get_per_tensor_output_shift() const { return per_tensor_output_shift; } + float get_input_scale_val() const { return in_scale; } + float get_input_shift_val() const { return in_shift; } + float get_input_lo_val() const { return in_lo; } + float get_input_hi_val() const { return in_hi; } + float get_output_scale_val() const { return out_scale; } + float get_output_shift_val() const { return out_shift; } + void set_scale_shift_opt() { scale_shift_opt = true; } void set_need_post_scale() { need_post_scale = true; } void set_need_post_shift() { need_post_shift = true; } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 31dc29d04b3db1..c2af2edfbb2bdb 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -692,7 +692,7 @@ TEST_P(conv_fp32_quantize_u8, basic) { reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32) ); - tolerance = 1e-5f; + tolerance = 1.0f; execute(p); } @@ -701,6 +701,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_quantize_u8, // For now only b_fs_yx_fsv16 supports this case bc_test_params{CASE_CONV_FP32_2, 2, 3}, bc_test_params{CASE_CONV_FP32_3, 2, 3}, + + bc_test_params{CASE_CONV_FP16_2, 2, 3}, + bc_test_params{CASE_CONV_FP16_3, 2, 3}, }), ); class conv_fp32_scale_quantize_i8 : public ConvFusingTest {}; @@ -731,6 +734,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_quantize_i8, // For now only b_fs_yx_fsv16 supports this case bc_test_params{CASE_CONV_FP32_2, 2, 4}, bc_test_params{CASE_CONV_FP32_3, 2, 4}, + + bc_test_params{CASE_CONV_FP16_2, 2, 4}, + bc_test_params{CASE_CONV_FP16_3, 2, 4}, }), ); class conv_fp32_scale_activation_quantize_i8 : public ConvFusingTest {}; @@ -751,7 +757,7 @@ TEST_P(conv_fp32_scale_activation_quantize_i8, basic) { reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32) ); - tolerance = 1e-2f; + tolerance = 1.0f; execute(p); } @@ -760,36 +766,42 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8, // For now only b_fs_yx_fsv16 supports this case bc_test_params{CASE_CONV_FP32_2, 2, 5}, bc_test_params{CASE_CONV_FP32_3, 2, 5}, + + bc_test_params{CASE_CONV_FP16_2, 2, 5}, + bc_test_params{CASE_CONV_FP16_3, 2, 5}, }), ); -class conv_fp32_scale_activation_quantize_i8_eltwise_fp32 : public ConvFusingTest {}; -TEST_P(conv_fp32_scale_activation_quantize_i8_eltwise_fp32, basic) { +class conv_fp32_scale_activation_quantize_u8_eltwise_fp32 : public ConvFusingTest {}; +TEST_P(conv_fp32_scale_activation_quantize_u8_eltwise_fp32, basic) { auto p = GetParam(); create_topologies(input_layout("input", get_input_layout(p)), data("weights", get_mem(get_weights_layout(p))), data("bias", get_mem(get_bias_layout(p))), - data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in_lo", get_mem(get_per_channel_layout(p), 0)), data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), - data("out_lo", get_mem(get_single_element_layout(p), -127)), - data("out_hi", get_mem(get_single_element_layout(p), 127)), + data("out_lo", get_mem(get_single_element_layout(p), 0)), + data("out_hi", get_mem(get_single_element_layout(p), 255)), data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count()/255)), data("eltwise_data", get_mem(get_output_layout(p))), convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation), scale("scale", "conv_prim", "scale_data"), activation("activation_scale", "scale", activation_func::exp), - quantize("quantize", "activation_scale", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8), - eltwise("sum", { "quantize", "eltwise_data"}, eltwise_mode::sum, data_types::f32), + quantize("quantize", "activation_scale", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8), + eltwise("sum", { "quantize", "eltwise_data"}, eltwise_mode::sum, p.default_type), reorder("reorder_bfyx", "sum", p.default_format, data_types::f32) ); - tolerance = 1e-2f; + tolerance = 1.0f; execute(p); } -INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_eltwise_fp32, +INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_u8_eltwise_fp32, ::testing::ValuesIn(std::vector{ // For now only b_fs_yx_fsv16 supports this case bc_test_params{CASE_CONV_FP32_2, 2, 6}, bc_test_params{CASE_CONV_FP32_3, 2, 6}, + + bc_test_params{CASE_CONV_FP16_2, 2, 6}, + bc_test_params{CASE_CONV_FP16_3, 2, 6}, }), ); class conv_fp32_scale_activation_quantize_i8_activation : public ConvFusingTest {}; @@ -811,7 +823,7 @@ TEST_P(conv_fp32_scale_activation_quantize_i8_activation, basic) { activation("activation_quantize", "quantize", activation_func::relu), reorder("reorder_bfyx", "activation_quantize", p.default_format, data_types::f32) ); - tolerance = 1e-2f; + tolerance = 1.0f; execute(p); } @@ -819,6 +831,9 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_acti ::testing::ValuesIn(std::vector{ bc_test_params{CASE_CONV_FP32_2, 2, 6}, bc_test_params{CASE_CONV_FP32_3, 2, 6}, + + bc_test_params{CASE_CONV_FP16_2, 2, 6}, + bc_test_params{CASE_CONV_FP16_3, 2, 6}, }), ); @@ -2486,8 +2501,8 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_quantize_i8, mvn_test_params{ CASE_MVN_3D_U8_2, 2, 4 }, }), ); -class mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8 : public MVNFusingTest {}; -TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) { +class mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8 : public MVNFusingTest {}; +TEST_P(mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8, basic) { auto p = GetParam(); create_topologies( input_layout("input", get_input_layout(p)), @@ -2495,18 +2510,18 @@ TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) { data("scale_data", get_mem(get_per_channel_layout(p))), scale("scale", "mvn", "scale_data"), activation("act", "scale", activation_func::hyperbolic_tan), - data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in_low", get_mem(get_per_channel_layout(p), 0)), data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)), - data("out_low", get_mem(get_single_element_layout(p), -127, 127)), - data("out_high", get_mem(get_single_element_layout(p), -127, 127)), - quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8), + data("out_low", get_mem(get_single_element_layout(p), 0)), + data("out_high", get_mem(get_single_element_layout(p), 255)), + quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8), data("eltw_data", get_mem(layout{ data_types::i8, p.input_format, p.input_size })), eltwise("eltw", {"quant", "eltw_data"}, eltwise_mode::sum, data_types::f32), data("in_low2", get_mem(get_per_channel_layout(p), min_random, 0)), data("in_high2", get_mem(get_per_channel_layout(p), 1, max_random)), - data("out_low2", get_mem(get_single_element_layout(p), -127, 127)), - data("out_high2", get_mem(get_single_element_layout(p), -127, 127)), - quantize("quant2", "eltw", "in_low2", "in_high2", "out_low2", "out_high2", 255, data_types::i8), + data("out_low2", get_mem(get_single_element_layout(p), -128)), + data("out_high2", get_mem(get_single_element_layout(p), 127)), + quantize("quant2", "eltw", "in_low2", "in_high2", "out_low2", "out_high2", 256, data_types::i8), reorder("reorder_bfyx", "quant2", format::bfyx, data_types::f32) ); @@ -2514,7 +2529,7 @@ TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) { execute(p); } -INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, +INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_activation_quantize_u8_eltwise_fp32_quantize_i8, ::testing::ValuesIn(std::vector{ // Full using for fp input not supported yet, it may lead to output padding and non-optimal kernel // mvn_test_params{ CASE_MVN_F32_1, 2, 7 }, @@ -3461,7 +3476,7 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) { input_layout("input", get_input_layout(p)), data("weights", get_mem(get_weights_layout(p))), data("scale1_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())), - data("in1_lo", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in1_lo", get_mem(get_per_channel_layout(p), 0)), data("in1_hi", get_mem(get_per_channel_layout(p), 1, max_random)), data("out1_lo", get_mem(get_single_element_layout(p), 0)), data("out1_hi", get_mem(get_single_element_layout(p), 255)), From 963f55a1894ede3cb9336ce14a5319d3adae26ec Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Fri, 29 May 2020 20:57:32 +0300 Subject: [PATCH 19/24] Fixed CODEOWNERS paths (#684) --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 9d19cd384ebfa5..4905affcc7dccd 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -16,7 +16,7 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins /inference-engine/ @openvinotoolkit/openvino-ie-maintainers /inference-engine/src/transformations/ @GlebKazantaev @ichuraev /inference-engine/src/legacy_api/ @openvinotoolkit/openvino-ngraph-maintainers -/inference-engine/src/ir_readers/ @openvinotoolkit/openvino-ngraph-maintainers +/inference-engine/src/readers/ @openvinotoolkit/openvino-ngraph-maintainers # IE CPU: /inference-engine/src/mkldnn_plugin/ @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers From 3a24eb6a6233646f660f0522fe0209fc17da4d93 Mon Sep 17 00:00:00 2001 From: Vladimir Gavrilov Date: Fri, 29 May 2020 21:01:09 +0300 Subject: [PATCH 20/24] MO fails generating IR from XLNET model due to a bug in the transformation ConvertGroupedStridedSlice (#625) * Small fix in the transformation ConvertGroupedStridedSlice. Now VariadicSplit is generated only in the case when node has at least 2 output nodes. * Added unittests for the case when there is only one StridedSlice. --- .../middle/ConvertGroupedStridedSlice.py | 2 +- .../middle/ConvertGroupedStridedSlice_test.py | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py index 3e1d83178a5034..e948f0adea8cff 100644 --- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py +++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py @@ -70,7 +70,7 @@ def find_and_replace_pattern(self, graph: Graph): # Get all StridedSlice consumers out_nodes = [node for node in input_data.out_nodes() if node.op == 'StridedSlice' and node.in_node(0).name == input_data.name] - if len(out_nodes) < 1: + if len(out_nodes) <= 1: continue valid_for_replacement = True diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py index 6eaf6d7c28a481..0dd50b9ed9ef53 100644 --- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py +++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py @@ -17,6 +17,7 @@ import unittest import numpy as np +from generator import generator, generate from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice from mo.front.common.partial_infer.utils import int64_array @@ -82,7 +83,24 @@ 'sslice_2/unsqueeze_const_data': {'kind': 'data', 'value': None, 'shape': None}, } +one_strided_slice_case_node_attributes = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None}, + 'sslice': {'type': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None, + 'shrink_axis_mask': np.array([0, 0, 0, 0])}, + 'sslice_data': {'value': None, 'shape': None, 'kind': 'data'}, + 'op_output': {'kind': 'op', 'op': 'Result'}, +} + +one_strided_slice_case_edges = [ + ('placeholder', 'placeholder_data'), + ('placeholder_data', 'sslice'), + ('sslice', 'sslice_data'), + ('sslice_data', 'op_output'), +] + +@generator class ConvertGroupedStridedSliceTests(unittest.TestCase): def test_1(self): graph = build_graph(nodes_attributes, @@ -604,6 +622,37 @@ def test_8(self): (flag, resp) = compare_graphs(graph, graph_ref, 'concat_1_data', check_op_attrs=True) self.assertTrue(flag, resp) + # Test for the case when there is only 1 StridedSlice. + @generate(*[(np.array([1, 227, 227, 54]), + np.array([slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 18, 1)]), + np.array([1, 227, 227, 18])), + (np.array([57, 16, 100, 23]), + np.array([slice(3, 16, 1), slice(0, 16, 1), slice(0, 100, 1), slice(0, 23, 1)]), + np.array([13, 16, 100, 23])), + (np.array([16, 800, 1024, 17]), + np.array([slice(0, 16, 1), slice(0, 800, 1), slice(13, 817, 1), slice(0, 17, 1)]), + np.array([16, 800, 804, 17]))]) + def test_9(self, input_shape, slices, output_shape): + graph = build_graph(nodes_attrs=one_strided_slice_case_node_attributes, + edges=one_strided_slice_case_edges, + update_attributes={ + 'placeholder_data': {'shape': input_shape}, + 'sslice': {'slices': slices}, + 'sslice_data': {'shape': output_shape}, + }) + graph.graph['layout'] = 'NHWC' + graph_ref = build_graph(nodes_attrs=one_strided_slice_case_node_attributes, + edges=one_strided_slice_case_edges, + update_attributes={ + 'placeholder_data': {'shape': input_shape}, + 'sslice': {'slices': slices}, + 'sslice_data': {'shape': output_shape}, + }) + pattern = ConvertGroupedStridedSlice() + pattern.find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, graph_ref, 'op_output', check_op_attrs=True) + self.assertTrue(flag, resp) + class AddReshapeAfterStridedSliceTests(unittest.TestCase): def test_ss_1_shrink_last(self): From cbad43f3a55f923510b7611a7bea842c52f20881 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Fri, 29 May 2020 21:20:16 +0300 Subject: [PATCH 21/24] [Python API] Fix PreProcessInfo tests (#690) --- .../python/tests/test_PreProcessInfo.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py b/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py index 499fdb3c83493b..5cfc99631d07a0 100644 --- a/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py +++ b/inference-engine/ie_bridges/python/tests/test_PreProcessInfo.py @@ -8,45 +8,53 @@ test_net_xml, test_net_bin = model_path() -def get_preprocess_info(): +def test_preprocess_info(): ie_core = IECore() net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) - return net.input_info["data"].preprocess_info - - -def test_preprocess_info(): - assert isinstance(get_preprocess_info(), PreProcessInfo) + assert isinstance(net.input_info["data"].preprocess_info, PreProcessInfo) def test_color_format(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info assert preprocess_info.color_format == ColorFormat.RAW def test_color_format_setter(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info preprocess_info.color_format = ColorFormat.BGR assert preprocess_info.color_format == ColorFormat.BGR def test_resize_algorithm(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info assert preprocess_info.resize_algorithm == ResizeAlgorithm.NO_RESIZE def test_resize_algorithm_setter(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info preprocess_info.resize_algorithm = ResizeAlgorithm.RESIZE_BILINEAR assert preprocess_info.resize_algorithm == ResizeAlgorithm.RESIZE_BILINEAR def test_mean_variant(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info assert preprocess_info.mean_variant == MeanVariant.NONE def test_mean_variant_setter(): - preprocess_info = get_preprocess_info() + ie_core = IECore() + net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) + preprocess_info = net.input_info["data"].preprocess_info preprocess_info.mean_variant = MeanVariant.MEAN_IMAGE assert preprocess_info.mean_variant == MeanVariant.MEAN_IMAGE From 3ef1a26174522d995ec1027cd410ff89e27f5a1e Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Fri, 29 May 2020 21:28:17 +0300 Subject: [PATCH 22/24] [IE TOOLS] Use input_info in python benchmark app (#660) --- tools/benchmark/benchmark.py | 2 +- tools/benchmark/main.py | 8 ++++---- tools/benchmark/utils/inputs_filling.py | 24 ++++++++++++------------ tools/benchmark/utils/utils.py | 6 +++--- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py index 95965c9e653933..0764bbf9958f1b 100644 --- a/tools/benchmark/benchmark.py +++ b/tools/benchmark/benchmark.py @@ -66,7 +66,7 @@ def read_network(self, path_to_model: str): ie_network = self.ie.read_network(xml_filename, bin_filename) - input_info = ie_network.inputs + input_info = ie_network.input_info if not input_info: raise AttributeError('No inputs info is provided') diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py index 8945cc3ee66d9f..df40950c61ffe2 100644 --- a/tools/benchmark/main.py +++ b/tools/benchmark/main.py @@ -175,12 +175,12 @@ def set_throughput_streams(): # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() - shapes = {k: v.shape.copy() for k, v in ie_network.inputs.items()} + shapes = {k: v.input_data.shape.copy() for k, v in ie_network.input_info.items()} reshape = False if args.shape: - reshape |= update_shapes(shapes, args.shape, ie_network.inputs) + reshape |= update_shapes(shapes, args.shape, ie_network.input_info) if args.batch_size and args.batch_size != ie_network.batch_size: - reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.inputs) + reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.input_info) if reshape: start_time = datetime.utcnow() @@ -259,7 +259,7 @@ def set_throughput_streams(): if args.paths_to_input: for path in args.paths_to_input: paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None) - set_inputs(paths_to_input, batch_size, exe_network.inputs, infer_requests) + set_inputs(paths_to_input, batch_size, exe_network.input_info, infer_requests) if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, diff --git a/tools/benchmark/utils/inputs_filling.py b/tools/benchmark/utils/inputs_filling.py index b27f6ebcf569fb..ae7fa64a089010 100644 --- a/tools/benchmark/utils/inputs_filling.py +++ b/tools/benchmark/utils/inputs_filling.py @@ -47,13 +47,13 @@ def set_inputs(paths_to_input, batch_size, input_info, requests): def get_inputs(paths_to_input, batch_size, input_info, requests): input_image_sizes = {} for key in sorted(input_info.keys()): - if is_image(input_info[key]): - input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3]) + if is_image(input_info[key].input_data): + input_image_sizes[key] = (input_info[key].input_data.shape[2], input_info[key].input_data.shape[3]) logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key, - input_info[key].precision, - input_info[key].layout, + input_info[key].input_data.precision, + input_info[key].input_data.layout, " ".join(str(x) for x in - input_info[key].shape))) + input_info[key].input_data.shape))) images_count = len(input_image_sizes.keys()) binaries_count = len(input_info) - images_count @@ -102,31 +102,31 @@ def get_inputs(paths_to_input, batch_size, input_info, requests): input_data = {} keys = list(sorted(input_info.keys())) for key in keys: - if is_image(input_info[key]): + if is_image(input_info[key].input_data): # input is image if len(image_files) > 0: input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), - len(keys), input_info[key]) + len(keys), input_info[key].input_data) continue # input is binary if len(binary_files): input_data[key] = fill_blob_with_binary(binary_files, request_id, batch_size, keys.index(key), - len(keys), input_info[key]) + len(keys), input_info[key].input_data) continue # most likely input is image info - if is_image_info(input_info[key]) and len(input_image_sizes) == 1: + if is_image_info(input_info[key].input_data) and len(input_image_sizes) == 1: image_size = input_image_sizes[list(input_image_sizes.keys()).pop()] logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" + str(image_size[1])) - input_data[key] = fill_blob_with_image_info(image_size, input_info[key]) + input_data[key] = fill_blob_with_image_info(image_size, input_info[key].input_data) continue # fill with random data logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if is_image( - input_info[key]) else "some binary data")) - input_data[key] = fill_blob_with_random(input_info[key]) + input_info[key].input_data) else "some binary data")) + input_data[key] = fill_blob_with_random(input_info[key].input_data) requests_input_data.append(input_data) diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py index 2cef9f9de5a114..16c28bb3828642 100644 --- a/tools/benchmark/utils/utils.py +++ b/tools/benchmark/utils/utils.py @@ -62,10 +62,10 @@ def next_step(additional_info='', step_id=0): def config_network_inputs(ie_network: IENetwork): - input_info = ie_network.inputs + input_info = ie_network.input_info for key in input_info.keys(): - if is_image(input_info[key]): + if is_image(input_info[key].input_data): # Set the precision of input data provided by the user # Should be called before load of the network to the plugin input_info[key].precision = 'U8' @@ -261,7 +261,7 @@ def update_shapes(shapes, shapes_string: str, inputs_info): def adjust_shapes_batch(shapes, batch_size: int, inputs_info): updated = False for name, data in inputs_info.items(): - layout = data.layout + layout = data.input_data.layout batch_index = layout.index('N') if 'N' in layout else -1 if batch_index != -1 and shapes[name][batch_index] != batch_size: shapes[name][batch_index] = batch_size From e2729b87f3a09bb5f0730bc92d1d9a27b09f4884 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Fri, 29 May 2020 22:56:58 +0300 Subject: [PATCH 23/24] [LPT] Convolution regression tests (#543) * [LPT] Base test infrastructure extending & Convolution test * [LPT] LPT test infrastructure refactoring --- ...oncat_neighboring_graph_transformation.cpp | 6 +- .../concat_transformation.cpp | 6 +- ...oncat_with_intermediate_transformation.cpp | 6 +- .../convolution_transformation.cpp | 39 ++++++ .../layer_transformation.cpp | 49 ++++++++ ...oncat_neighboring_graph_transformation.cpp | 2 +- .../concat_transformation.cpp | 2 +- ...oncat_with_intermediate_transformation.cpp | 2 +- .../convolution_transformation.cpp | 39 ++++++ .../layer_transformation.cpp | 43 +++++++ ...oncat_neighboring_graph_transformation.hpp | 4 +- .../concat_transformation.hpp | 4 +- ...oncat_with_intermediate_transformation.hpp | 4 +- .../convolution_transformation.hpp | 36 ++++++ ...oncat_neighboring_graph_transformation.cpp | 2 + .../concat_transformation.cpp | 2 + ...oncat_with_intermediate_transformation.cpp | 2 + .../convolution_transformation.cpp | 114 ++++++++++++++++++ .../layer_transformation.cpp | 74 ++++++++---- .../layer_transformation.hpp | 67 ++-------- 20 files changed, 409 insertions(+), 94 deletions(-) create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp index 23a38b7dd3b2bb..5116acce4d7311 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp @@ -17,9 +17,9 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8() + LayerTestsUtils::LayerTransformationParamsFactory::createParams(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8() }; INSTANTIATE_TEST_CASE_P(LPT, ConcatNeighboringGraphTransformation, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp index f77cedbbf52b9e..bbc88cfaaabb6a 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp @@ -17,9 +17,9 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8() + LayerTestsUtils::LayerTransformationParamsFactory::createParams(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8() }; INSTANTIATE_TEST_CASE_P(LPT, ConcatTransformation, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp index 3e739a7910dae5..32cf7fbd700376 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp @@ -17,9 +17,9 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamCpu(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamI8I8(), - LayerTestsUtils::LayerTransformationParamsFactory::createParamU8I8() + LayerTestsUtils::LayerTransformationParamsFactory::createParams(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsI8I8(), + LayerTestsUtils::LayerTransformationParamsFactory::createParamsU8I8() }; const std::vector transparentIntermediateValues = { true, false }; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp new file mode 100644 index 00000000000000..e97ebad56132fe --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams() +}; + +const std::vector fqOnActivationsValues = { true, false }; + +const std::vector fqOnWeightsValues = { true, false }; + +INSTANTIATE_TEST_CASE_P(LPT, ConvolutionTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(fqOnActivationsValues), + ::testing::ValuesIn(fqOnWeightsValues)), + ConvolutionTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp new file mode 100644 index 00000000000000..4416509d2466f7 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +#include "ie_util_internal.hpp" +#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp" +#include "low_precision_transformations/convolution.hpp" +#include "low_precision_transformations/scaleshift_to_convolution.hpp" + + +namespace LayerTestsUtils { + +InferenceEngine::details::LowPrecisionTransformations LayerTransformation::getLowPrecisionTransformations( + const InferenceEngine::details::LayerTransformation::Params& params) const { + return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params). + add(InferenceEngine::details::LayerTransformation::Params(params). + setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), "Convolution"). + addCleanup( + InferenceEngine::details::LayerTransformation::Params(params).setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), + "ScaleShift"); +} + +InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParams() { + return InferenceEngine::details::LayerTransformation::Params( + true, + true, + true, + InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel, + InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None, + true, + true, + true); +} + +} // namespace LayerTestsUtils diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp index b7d0c6b304d659..c2800a72271d8a 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_neighboring_graph_transformation.cpp @@ -16,7 +16,7 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu() + LayerTestsUtils::LayerTransformationParamsFactory::createParams() }; INSTANTIATE_TEST_CASE_P(LPT, ConcatNeighboringGraphTransformation, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp index fbfd9160129440..5f768d1458afae 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp @@ -16,7 +16,7 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu() + LayerTestsUtils::LayerTransformationParamsFactory::createParams() }; INSTANTIATE_TEST_CASE_P(LPT, ConcatTransformation, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp index c0a9751247476f..e5c330f16409fb 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp @@ -16,7 +16,7 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsFactory::createParamGpu() + LayerTestsUtils::LayerTransformationParamsFactory::createParams() }; const std::vector transparentIntermediates = { true, false }; diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp new file mode 100644 index 00000000000000..b99db86cf74503 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/convolution_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsFactory::createParams() +}; + +const std::vector fqOnActivationsValues = { true, false }; + +const std::vector fqOnWeightsValues = { true, false }; + +INSTANTIATE_TEST_CASE_P(LPT, ConvolutionTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn(fqOnActivationsValues), + ::testing::ValuesIn(fqOnWeightsValues)), + ConvolutionTransformation::getTestCaseName); +} // namespace + + + + diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp new file mode 100644 index 00000000000000..3a686050cbdf56 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/layer_transformation.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +#include "ie_util_internal.hpp" +#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp" +#include "low_precision_transformations/convolution.hpp" +#include "low_precision_transformations/scaleshift_to_convolution.hpp" + + +namespace LayerTestsUtils { + +InferenceEngine::details::LowPrecisionTransformations LayerTransformation::getLowPrecisionTransformations( + const InferenceEngine::details::LayerTransformation::Params& params) const { + return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params); +} + +InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParams() { + return InferenceEngine::details::LayerTransformation::Params( + true, + true, + true, + InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel, + InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None, + true, + true, + true); +} +} // namespace LayerTestsUtils diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp index 84e1e35b472711..eee5532dc44fc0 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_neighboring_graph_transformation.hpp @@ -11,7 +11,9 @@ namespace LayerTestsDefinitions { -class ConcatNeighboringGraphTransformation : public LayerTestsUtils::LayerTransformation { +class ConcatNeighboringGraphTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { public: static std::string getTestCaseName(testing::TestParamInfo obj); InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override; diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp index 1ccb9789004ec2..cf9aa3c605cb77 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_transformation.hpp @@ -11,7 +11,9 @@ namespace LayerTestsDefinitions { -class ConcatTransformation : public LayerTestsUtils::LayerTransformation { +class ConcatTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp index 30224a72f9c3d7..d1d21f33c56164 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_intermediate_transformation.hpp @@ -20,7 +20,9 @@ typedef std::tuple< // multichannel bool> ConcatWithIntermediateTransformationParams; -class ConcatWithIntermediateTransformation : public LayerTestsUtils::LayerTransformation { +class ConcatWithIntermediateTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { public: static std::string getTestCaseName(testing::TestParamInfo obj); diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp new file mode 100644 index 00000000000000..13f7f6ee9361fd --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_transformation.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::Precision, + InferenceEngine::SizeVector, + std::string, + InferenceEngine::details::LayerTransformation::Params, + bool, // fqOnActivations + bool // fqOnWeights +> ConvolutionTransformationParams; + +class ConvolutionTransformation : + public testing::WithParamInterface, + public LayerTestsUtils::LayerTransformation { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + +private: + void validate(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp index 46053e77c56955..8af28552ee59d2 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_neighboring_graph_transformation.cpp @@ -132,6 +132,8 @@ void ConcatNeighboringGraphTransformation::validate() { const InferenceEngine::CNNLayerPtr outputLayer = it.second->getCreatorLayer().lock(); EXPECT_TRUE(outputLayer != nullptr); EXPECT_EQ("ScaleShift", outputLayer->type); + + checkParentPrecision(outputLayer, params.updatePrecisions); } // check quantized FQ layers map: should includes all FQ diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp index 022473e6424f68..2a594774a54402 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_transformation.cpp @@ -89,6 +89,8 @@ void ConcatTransformation::validate() { EXPECT_TRUE(outputLayer != nullptr); EXPECT_EQ("ScaleShift", outputLayer->type); + checkParentPrecision(outputLayer, params.updatePrecisions); + IE_SUPPRESS_DEPRECATED_END } diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp index ec274f03342ab2..aca75dfee2ee9e 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_intermediate_transformation.cpp @@ -172,6 +172,8 @@ void ConcatWithIntermediateTransformation::validate() { children = CNNNetworkHelper::getChildren(*concat); EXPECT_EQ(1ul, children.size()); EXPECT_EQ("ScaleShift", children[0]->type); + + checkParentPrecision(children[0], params.updatePrecisions); } else { std::vector children = CNNNetworkHelper::getChildren(*intermediate); EXPECT_EQ(2ul, children.size()); diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp new file mode 100644 index 00000000000000..c0f87d40c1ac80 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_transformation.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision_transformations/convolution_transformation.hpp" + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/pass/convert_prc.hpp" +#include "ngraph_functions/builders.hpp" + + +namespace LayerTestsDefinitions { + +std::string ConvolutionTransformation::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + InferenceEngine::details::LayerTransformation::Params params; + bool fqOnActivations; + bool fqOnWeights; + std::tie(netPrecision, inputShapes, targetDevice, params, fqOnActivations, fqOnWeights) = obj.param; + + std::ostringstream result; + result << netPrecision.name() << "_" << targetDevice << "_" << toString(params) << + (fqOnActivations ? "" : "_noFqOnActivations") << + (fqOnWeights ? "" : "_noFqOnWeights"); + return result.str(); +} + +void ConvolutionTransformation::SetUp() { + threshold = 0.1f; + + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + bool fqOnActivations; + bool fqOnWeights; + std::tie(netPrecision, inputShape, targetDevice, params, fqOnActivations, fqOnWeights) = this->GetParam(); + auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const float k = 50.f; + + const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); + const auto fakeQuantizeOnActivations = fqOnActivations ? + ngraph::builder::makeFakeQuantize( + input, precision, 256ul, { 1ul }, + { 0.f }, { 255.f / k }, { 0.f }, { 255.f / k }) : + nullptr; + + auto weights = ngraph::opset1::Constant::create( + precision, + ngraph::Shape{ inputShape[1], inputShape[1], 1, 1 }, + std::vector(inputShape[1] * inputShape[1], 1)); + + const auto convolution = std::make_shared( + fakeQuantizeOnActivations == nullptr ? input : fakeQuantizeOnActivations, + fqOnWeights ? + ngraph::builder::makeFakeQuantize( + weights, precision, 255ul, { 1ul }, + { -128.f / k }, { 127.f / k }, { -128.f / k }, { 127.f / k }) : + weights->output(0), + ngraph::Strides{ 1, 1 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::CoordinateDiff{ 0, 0 }, + ngraph::Strides{ 1, 1 }); + + ngraph::ResultVector results {std::make_shared(convolution)}; + function = std::make_shared(results, ngraph::ParameterVector { input }, "ConvolutionTransformation"); + + validate(); +} + +void ConvolutionTransformation::validate() { + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + InferenceEngine::details::LayerTransformation::Params params; + bool fqOnActivations; + bool fqOnWeights; + std::tie(netPrecision, inputShape, targetDevice, params, fqOnActivations, fqOnWeights) = this->GetParam(); + + const InferenceEngine::CNNNetwork network = transform(params); + + IE_SUPPRESS_DEPRECATED_START + + InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo(); + EXPECT_EQ(1, outputs.size()); + + std::map::iterator it = outputs.begin(); + const InferenceEngine::CNNLayerPtr outputLayer = it->second->getCreatorLayer().lock(); + EXPECT_TRUE(outputLayer != nullptr); + EXPECT_EQ(fqOnActivations & fqOnWeights ? "ScaleShift" : "Convolution", outputLayer->type); + + IE_SUPPRESS_DEPRECATED_END +} + +TEST_P(ConvolutionTransformation, CompareWithRefImpl) { + Run(); + + if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) { + PluginCache::get().reset(); + } +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp index 0d0ece7098c9bf..db998a6b626826 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -23,7 +24,8 @@ namespace LayerTestsUtils { -InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamU8I8() { + +InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsU8I8() { return InferenceEngine::details::LayerTransformation::Params( false, true, @@ -37,7 +39,7 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF { InferenceEngine::Precision::I8 }); } -InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamU8U8() { +InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsU8U8() { return InferenceEngine::details::LayerTransformation::Params( false, true, @@ -51,7 +53,7 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF { InferenceEngine::Precision::U8 }); } -InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamI8I8() { +InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamsI8I8() { return InferenceEngine::details::LayerTransformation::Params( false, true, @@ -65,28 +67,52 @@ InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsF { InferenceEngine::Precision::I8 }); } -InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamCpu() { - return InferenceEngine::details::LayerTransformation::Params( - true, - true, - true, - InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel, - InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None, - true, - true, - true); +InferenceEngine::details::LowPrecisionTransformer LayerTransformation::getLowPrecisionTransformer( + const InferenceEngine::details::LayerTransformation::Params& params) const { + InferenceEngine::details::LowPrecisionTransformer transformer(getLowPrecisionTransformations(params)); + return transformer; } -InferenceEngine::details::LayerTransformation::Params LayerTransformationParamsFactory::createParamGpu() { - // not completed - return InferenceEngine::details::LayerTransformation::Params( - true, - true, - true, - InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::UpdateLevel, - InferenceEngine::details::LayerTransformation::QuantizedTensorAlignment::None, - true, - true, - true); +InferenceEngine::CNNNetwork LayerTransformation::transform(InferenceEngine::details::LayerTransformation::Params& params) { + InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function)); + + auto transformer = getLowPrecisionTransformer(params); + transformer.transform(*cnnNetworkImp); + + return InferenceEngine::CNNNetwork(cnnNetworkImp); +} + +InferenceEngine::CNNNetwork LayerTransformation::transform(const InferenceEngine::details::LowPrecisionTransformations& transformations) { + InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function)); + + InferenceEngine::details::LowPrecisionTransformer transformer(transformations); + transformer.transform(*cnnNetworkImp); + + return InferenceEngine::CNNNetwork(cnnNetworkImp); } + +void LayerTransformation::checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision) { + EXPECT_EQ(1ul, layer->insData.size()) << "insert data count is no expected: " << layer->insData.size(); + const InferenceEngine::DataPtr insData = layer->insData[0].lock(); + EXPECT_TRUE(insData != nullptr) << "insert data is nullable"; + const InferenceEngine::Precision precision = insData->getTensorDesc().getPrecision(); + + const std::unordered_set expectedPrecisions = lowPrecision ? + std::unordered_set({ InferenceEngine::Precision::U8, InferenceEngine::Precision::I8 }) : + std::unordered_set({ InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP32 }); + EXPECT_TRUE((expectedPrecisions.find(precision) != expectedPrecisions.end())) << + "actual precision is " << precision; +} + +std::string LayerTransformation::toString(const InferenceEngine::details::LayerTransformation::Params& params) { + std::ostringstream result; + result << + (params.supportAsymmetricQuantization ? "asymmetric" : "symmetric") << "_" << + params.precisionsOnActivations << "_" << + params.precisionsOnWeights << "_" << + params.quantizedTensorAlignmentOnActivations; + + return result.str(); +} + } // namespace LayerTestsUtils diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp index 81a3a82fb9ba71..13fdb67581dafc 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/low_precision_transformations/layer_transformation.hpp @@ -21,70 +21,27 @@ typedef std::tuple< class LayerTransformationParamsFactory { public: - static InferenceEngine::details::LayerTransformation::Params createParamU8I8(); - static InferenceEngine::details::LayerTransformation::Params createParamU8U8(); - static InferenceEngine::details::LayerTransformation::Params createParamI8I8(); - static InferenceEngine::details::LayerTransformation::Params createParamCpu(); - static InferenceEngine::details::LayerTransformation::Params createParamGpu(); + static InferenceEngine::details::LayerTransformation::Params createParamsU8I8(); + static InferenceEngine::details::LayerTransformation::Params createParamsU8U8(); + static InferenceEngine::details::LayerTransformation::Params createParamsI8I8(); + static InferenceEngine::details::LayerTransformation::Params createParams(); }; -template -class LayerTransformation : public testing::WithParamInterface, public LayerTestsUtils::LayerTestsCommon { -public: +class LayerTransformation : public LayerTestsUtils::LayerTestsCommon { +protected: InferenceEngine::details::LowPrecisionTransformations getLowPrecisionTransformations( - const InferenceEngine::details::LayerTransformation::Params& params) const { - if (targetDevice == "CPU") { - return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params). - add(InferenceEngine::details::LayerTransformation::Params(params). - setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), "Convolution"). - addCleanup( - InferenceEngine::details::LayerTransformation::Params(params).setPrecisionsOnActivations({ InferenceEngine::Precision::U8 }), - "ScaleShift"); - } else if (targetDevice == "GPU") { - return InferenceEngine::details::LowPrecisionTransformer::getAllTransformations(params); - } else { - THROW_IE_EXCEPTION << "unknown target device " << targetDevice; - } - } + const InferenceEngine::details::LayerTransformation::Params& params) const; InferenceEngine::details::LowPrecisionTransformer getLowPrecisionTransformer( - const InferenceEngine::details::LayerTransformation::Params& params) const { - InferenceEngine::details::LowPrecisionTransformer transformer(getLowPrecisionTransformations(params)); - return transformer; - } - - InferenceEngine::CNNNetwork transform() { - return transform(LayerTransformationParamsFactory::createParamCpu()); - } - - InferenceEngine::CNNNetwork transform(InferenceEngine::details::LayerTransformation::Params& params) { - InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function)); - - auto transformer = getLowPrecisionTransformer(params); - transformer.transform(*cnnNetworkImp); - - return InferenceEngine::CNNNetwork(cnnNetworkImp); - } - - InferenceEngine::CNNNetwork transform(const InferenceEngine::details::LowPrecisionTransformations& transformations) { - InferenceEngine::details::CNNNetworkImplPtr cnnNetworkImp = cloneNet(InferenceEngine::CNNNetwork(function)); + const InferenceEngine::details::LayerTransformation::Params& params) const; - InferenceEngine::details::LowPrecisionTransformer transformer(transformations); - transformer.transform(*cnnNetworkImp); + InferenceEngine::CNNNetwork transform(InferenceEngine::details::LayerTransformation::Params& params); - return InferenceEngine::CNNNetwork(cnnNetworkImp); - } + InferenceEngine::CNNNetwork transform(const InferenceEngine::details::LowPrecisionTransformations& transformations); - static std::string toString(const InferenceEngine::details::LayerTransformation::Params& params) { - std::ostringstream result; - result << - (params.supportAsymmetricQuantization ? "asymmetric" : "symmetric") << "_" << - params.precisionsOnActivations << "_" << - params.precisionsOnWeights << "_" << - params.quantizedTensorAlignmentOnActivations; + static void checkParentPrecision(const InferenceEngine::CNNLayerPtr& layer, const bool lowPrecision); - return result.str(); - } + static std::string toString(const InferenceEngine::details::LayerTransformation::Params& params); }; } // namespace LayerTestsUtils From 9af51a165faa58d6f3f7c2eb6891f8cabf19bdb4 Mon Sep 17 00:00:00 2001 From: Denis Orlov Date: Sat, 30 May 2020 00:43:42 +0300 Subject: [PATCH 24/24] [GNA] Workaround support for callbacks (#591) --- inference-engine/src/gna_plugin/gna_infer_request.hpp | 7 +++++++ .../gna/shared_tests_instances/skip_tests_config.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/inference-engine/src/gna_plugin/gna_infer_request.hpp b/inference-engine/src/gna_plugin/gna_infer_request.hpp index 17688f4688a5ce..9641c17011689c 100644 --- a/inference-engine/src/gna_plugin/gna_infer_request.hpp +++ b/inference-engine/src/gna_plugin/gna_infer_request.hpp @@ -69,6 +69,13 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal { // execute input pre-processing. execDataPreprocessing(_inputs); inferRequestIdx = plg->QueueInference(_inputs, _outputs); + // workaround to unblock callback-based flows + if (_callback) { + auto infer_request = _publicInterface.lock(); + IE_ASSERT(infer_request != nullptr); + auto res = Wait(0); + _callback(infer_request, res); + } } InferenceEngine::StatusCode Wait(int64_t millis_timeout) override { diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index c17bf7445ceb9d..5348638b62d610 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -10,6 +10,6 @@ std::vector disabledTestPatterns() { return { // TODO: FIX BUG 31661 - ".*Behavior.*Callback.*" + ".*Behavior.*CallbackThrowException.*" }; }