From 2b03e32f34e89656e20179a69f09166cf4a37bad Mon Sep 17 00:00:00 2001 From: eshoguli Date: Fri, 19 Jan 2024 11:37:31 +0100 Subject: [PATCH] Convert: fuse refactoring + tests --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 33 +++--- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 105 ++++++++++++++++-- src/plugins/intel_cpu/src/nodes/eltwise.h | 1 + .../nodes/executors/aarch64/jit_eltwise.cpp | 48 +------- .../nodes/executors/aarch64/jit_eltwise.hpp | 2 +- .../intel_cpu/tests/functional/CMakeLists.txt | 2 + .../skip_tests_config.cpp | 4 + .../single_layer_tests/classes/eltwise.cpp | 1 - .../src/{ => common}/eltwise_chain.cpp | 36 +++++- .../include/common_test_utils/test_enums.hpp | 3 +- .../src/node_builders/eltwise.cpp | 3 + .../common_test_utils/src/test_enums.cpp | 3 + 12 files changed, 166 insertions(+), 75 deletions(-) rename src/plugins/intel_cpu/tests/functional/subgraph_tests/src/{ => common}/eltwise_chain.cpp (89%) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index cf40705d92ddce..4b9a2b7f1897b5 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -841,36 +841,35 @@ void GraphOptimizer::FuseMultiplyAndAdd(Graph &graph) { void GraphOptimizer::MergeConvertAndScaleShift(Graph& graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](NodePtr parentNode) { - return parentNode->getType() == Type::Convert && parentNode->getChildEdges().size() == 1 && - (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 || - parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) && - parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32; - }; - - auto isSuitableChildNode = [](NodePtr childNode) { - return childNode->getType() == Type::Eltwise && childNode->getParentEdges().size() != 2; - }; - auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { + CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift); auto parentNode = *parent; - if (!isSuitableParentNode(parentNode)) { + if (parentNode->getType() != Type::Convert) { parent++; continue; } - CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ParentNode); + const auto& childEdges = parentNode->getChildEdges(); + if (childEdges.size() != 1) { + parent++; + continue; + } - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSuitableChildNode(childNode)) { + const auto edge = childEdges[0].lock(); + auto childNode = edge->getChild(); + if (childNode->getType() != Type::Eltwise) { parent++; continue; } - CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ChildNode); + const auto eltwise = dynamic_cast(childNode.get()); + if (!eltwise->canFuseParent(parentNode)) { + parent++; + continue; + } - auto parents = parentNode->parentEdges; + const auto parents = parentNode->parentEdges; for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); if (!p_edge) continue; diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index cf57532ca1f1b9..f7a180c692c595 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -38,7 +38,6 @@ #include "utils/cpu_utils.hpp" #include -#include #include #include #include @@ -83,6 +82,68 @@ namespace ov { namespace intel_cpu { namespace node { +#if defined(OPENVINO_ARCH_ARM64) +namespace { +bool jitIsSupported(const Node* node, + const float alpha, + const float beta, + const float gamma, + const std::vector& input_precisions = {}) { + const Algorithm& algorithm = node->getAlgorithm(); + const auto is_supported = one_of(algorithm, + Algorithm::EltwiseAdd, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseMulAdd, + Algorithm::EltwisePowerStatic, + Algorithm::EltwiseRelu); + if (!is_supported) { + return false; + } + + const auto check_precisions = [&node]( + const std::vector& precisions, + const std::set& supported_precisions) { + const auto& input_precisions = precisions.size() == 0 ? node->getOriginalInputPrecisions() : precisions; + if (std::any_of(input_precisions.begin(), + input_precisions.end(), + [&supported_precisions](const ov::element::Type& precision) { + return supported_precisions.find(precision) == supported_precisions.end(); + })) { + return false; + } + + const auto& output_precisions = node->getOriginalOutputPrecisions(); + if (std::any_of(output_precisions.begin(), + output_precisions.end(), + [&supported_precisions](const ov::element::Type& precision) { + return supported_precisions.find(precision) == supported_precisions.end(); + })) { + return false; + } + + return true; + }; + + const std::set supported_precisions = { + ov::element::f16, + ov::element::f32, + ov::element::i32, + ov::element::u32 + }; + + if (!check_precisions(input_precisions, supported_precisions)) { + return false; + } + + if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) { + return false; + } + + return true; +} +} // namespace +#endif + #if defined(OPENVINO_ARCH_X86_64) template @@ -2253,7 +2314,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() { #endif #elif defined(OPENVINO_ARCH_ARM64) const bool useJit = canUseOptimizedImpl && - executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma()); + jitIsSupported(this, getAlpha(), getBeta(), getGamma()) && + executors::aarch64::JitEltwiseExecutor::isSupported(getAlgorithm(), getAlpha(), getBeta(), getGamma()); if (!useJit) { canUseOptimizedImpl = false; } @@ -2980,6 +3042,35 @@ bool Eltwise::appendAttrPostOps(DnnlPostOpsComposer& dnnlpoc, bool isLastPostOp, return true; } +bool Eltwise::canFuseParent(const NodePtr& parentNode) const { +#if defined(OPENVINO_ARCH_ARM64) + if (parentNode->getType() != Type::Convert) { + return false; + } + const auto& input_precisions = parentNode->getOriginalInputPrecisions(); + if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma(), input_precisions)) { + return false; + } +#else + const auto isSuitableParentNode = [](const Node* parentNode) { + return parentNode->getType() == Type::Convert && + (parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 || + parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) && + parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32; + }; + + auto isSuitableChildNode = [](const Node* childNode) { + return childNode->getParentEdges().size() != 2; + }; + + if (!isSuitableParentNode(parentNode.get()) || !isSuitableChildNode(this)) { + return false; + } +#endif + + return true; +} + bool Eltwise::canFuse(const NodePtr& node) const { auto isIntegerComputeSupported = [](const Node* node) { if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd, @@ -3004,14 +3095,14 @@ bool Eltwise::canFuse(const NodePtr& node) const { if (!mayiuse(dnnl::impl::cpu::aarch64::asimd) || (getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)) return false; - if (!executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma())) { + if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma())) { return false; } const auto eltwise = dynamic_cast(node.get()); - if ((eltwise == nullptr) || (!executors::aarch64::JitEltwiseExecutor::isSupported(eltwise, - eltwise->getAlpha(), - eltwise->getBeta(), - eltwise->getGamma()))) { + if ((eltwise == nullptr) || (!jitIsSupported(eltwise, + eltwise->getAlpha(), + eltwise->getBeta(), + eltwise->getGamma()))) { return false; } #else diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h index a9f64bdcc83367..97c0d2430b4fa6 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.h +++ b/src/plugins/intel_cpu/src/nodes/eltwise.h @@ -108,6 +108,7 @@ class Eltwise : public Node { void execute(dnnl::stream strm) override; bool created() const override; bool canBeInPlace() const override; + bool canFuseParent(const NodePtr& parentNode) const; bool canFuse(const NodePtr& node) const override; void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::unordered_map& postOpsMem, const int channelAxis = 1) override; void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem, const int channelAxis = 1) override; diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 9e3e32e86bcc39..970f6c6ffdb0ec 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -11,11 +11,10 @@ namespace executors { namespace aarch64 { bool JitEltwiseExecutor::isSupported( - const Node* node, + const Algorithm& algorithm, const float alpha, const float beta, const float gamma) { - const Algorithm& algorithm = node->getAlgorithm(); const auto is_supported = one_of(algorithm, Algorithm::EltwiseAdd, Algorithm::EltwiseMultiply, @@ -26,51 +25,6 @@ bool JitEltwiseExecutor::isSupported( return false; } - const auto check_precisions = [&node](const std::set& precisions) { - const auto& input_precisions = node->getOriginalInputPrecisions(); - if (std::any_of(input_precisions.begin(), - input_precisions.end(), - [&precisions](const ov::element::Type& precision) { - return precisions.find(precision) == precisions.end(); - })) { - return false; - } - - const auto& output_precisions = node->getOriginalOutputPrecisions(); - if (std::any_of(output_precisions.begin(), - output_precisions.end(), - [&precisions](const ov::element::Type& precision) { - return precisions.find(precision) == precisions.end(); - })) { - return false; - } - - return true; - }; - - const std::set supported_precisions = { - ov::element::f16, - ov::element::f32, - ov::element::i32, - ov::element::u32 - }; - - const auto parent = node->getParentEdgeAt(0)->getParent(); - if (parent->getType() == ov::intel_cpu::Type::Convert) { - const auto& input_precisions = parent->getOriginalInputPrecisions(); - if (input_precisions.size() != 1ull) { - return false; - } - // input precision will be changed after fuse - if (supported_precisions.find(input_precisions[0]) == supported_precisions.end()) { - return false; - } - } - - if (!check_precisions(supported_precisions)) { - return false; - } - if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) { return false; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp index 673e96459c99cb..a902dedf138982 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.hpp @@ -21,7 +21,7 @@ class JitEltwiseExecutor : public EltwiseExecutor { explicit JitEltwiseExecutor(const ExecutorContext::CPtr context); static bool isSupported( - const Node* node, + const Algorithm& algorithm, const float alpha, const float beta, const float gamma); diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index b753edd33bb3f8..412b4f069880e6 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -38,6 +38,8 @@ else() file(GLOB_RECURSE TMP_LIST_OF_COMMON_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/common/*.cpp) file(GLOB_RECURSE TMP_LIST_OF_ARM_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/arm/*.cpp) file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/arm/*.cpp) + # will be done in PR 19856 + file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/common/*.cpp) list(APPEND TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS ${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS}) set(TMP_EXPLICITLY_ENABLED_TESTS "${TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS}") diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index ddff2c7d345de3..dcd290b5f8652b 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -295,6 +295,10 @@ std::vector disabledTestPatterns() { #if defined(OPENVINO_ARCH_ARM) // TODO: rounding errors retVector.emplace_back(R"(.*iv_secondaryInputType=PARAMETER_opType=VECTOR_NetType=i32.*)"); + // not supported + retVector.emplace_back(R"(.*fma.*EltwiseLayerCPUTest.*)"); + retVector.emplace_back(R"(.*int_jit.*EltwiseLayerCPUTest.*)"); + retVector.emplace_back(R"(.*dyn.*EltwiseChainTest.*)"); #endif #if !defined(OPENVINO_ARCH_X86_64) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp index 542fb1186da6c3..f002b78e881370 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/eltwise.cpp @@ -273,7 +273,6 @@ const std::vector& netType() { const std::vector& netTypeJit() { static const std::vector netType = { ElementType::i32, - ElementType::u32, ElementType::f32}; return netType; } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_chain.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/common/eltwise_chain.cpp similarity index 89% rename from src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_chain.cpp rename to src/plugins/intel_cpu/tests/functional/subgraph_tests/src/common/eltwise_chain.cpp index e49e40f9c512b4..a1b9c67bbffab3 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/eltwise_chain.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/common/eltwise_chain.cpp @@ -165,7 +165,7 @@ std::vector> inputPrecisions = { std::vector> eltwiseOps = { { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT }, - { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD }, + { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD } }; INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest, @@ -178,6 +178,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest, ::testing::Values(ov::test::utils::DEVICE_CPU)), EltwiseChainTest::getTestCaseName); + + std::vector> inputShapesConvert = { + {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}} + }; + + std::vector> inputPrecisionsConvert = { + { ElementType::i8, ElementType::f32, ElementType::f32 }, + { ElementType::u8, ElementType::f32, ElementType::f32 }, + { ElementType::i16, ElementType::f32, ElementType::f32 }, + { ElementType::u16, ElementType::f32, ElementType::f32 }, + { ElementType::i32, ElementType::f32, ElementType::f32 }, + // { ElementType::u32, ElementType::f32, ElementType::f32 }, // plugin doesn't support + { ElementType::f16, ElementType::f32, ElementType::f32 }, + { ElementType::f32, ElementType::f32, ElementType::f32 }, + }; + + std::vector> eltwiseOpsConvert = { + { EltwiseTypes::CONVERT_OP, EltwiseTypes::MULTIPLY }, + { EltwiseTypes::CONVERT_OP, EltwiseTypes::ADD }, + { EltwiseTypes::CONVERT_OP, EltwiseTypes::DIVIDE }, + { EltwiseTypes::CONVERT_OP, EltwiseTypes::SUBTRACT }, + { EltwiseTypes::CONVERT_OP, EltwiseTypes::POWER }, + }; + + INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert, EltwiseChainTest, + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert)), + ::testing::Values(InputLayerType::CONSTANT), + ::testing::ValuesIn(inputPrecisionsConvert), + ::testing::ValuesIn(eltwiseOpsConvert), + ::testing::Values(false), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + EltwiseChainTest::getTestCaseName); + std::vector> inputShapesFQ = { {{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}}, {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}}, diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index 56fb77004dab17..bf312b4bbc6a56 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -60,7 +60,8 @@ enum EltwiseTypes { BITWISE_AND, BITWISE_NOT, BITWISE_OR, - BITWISE_XOR + BITWISE_XOR, + CONVERT_OP }; enum SqueezeOpType { diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp index 502594602ea522..7b5a0c410e69c1 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/eltwise.cpp @@ -9,6 +9,7 @@ #include "openvino/op/bitwise_not.hpp" #include "openvino/op/bitwise_or.hpp" #include "openvino/op/bitwise_xor.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/erf.hpp" #include "openvino/op/floor_mod.hpp" @@ -51,6 +52,8 @@ std::shared_ptr make_eltwise(const ov::Output& in0, return std::make_shared(in0, in1); case ov::test::utils::EltwiseTypes::BITWISE_XOR: return std::make_shared(in0, in1); + case ov::test::utils::EltwiseTypes::CONVERT_OP: + return std::make_shared(in0, ov::element::f32); default: { OPENVINO_THROW("Incorrect type of Eltwise operation"); } diff --git a/src/tests/test_utils/common_test_utils/src/test_enums.cpp b/src/tests/test_utils/common_test_utils/src/test_enums.cpp index f1050f47a28629..10029b79503ab8 100644 --- a/src/tests/test_utils/common_test_utils/src/test_enums.cpp +++ b/src/tests/test_utils/common_test_utils/src/test_enums.cpp @@ -82,6 +82,9 @@ std::ostream& operator<<(std::ostream& os, const ov::test::utils::EltwiseTypes t case ov::test::utils::EltwiseTypes::BITWISE_XOR: os << "BitwiseXor"; break; + case ov::test::utils::EltwiseTypes::CONVERT_OP: + os << "Convert"; + break; default: throw std::runtime_error("NOT_SUPPORTED_OP_TYPE"); }