Skip to content

Commit

Permalink
Convert: fuse refactoring + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eshoguli committed Jan 28, 2024
1 parent ac50235 commit 2b03e32
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 75 deletions.
33 changes: 16 additions & 17 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,36 +841,35 @@ void GraphOptimizer::FuseMultiplyAndAdd(Graph &graph) {
void GraphOptimizer::MergeConvertAndScaleShift(Graph& graph) {
auto& graphNodes = graph.GetNodes();

auto isSuitableParentNode = [](NodePtr parentNode) {
return parentNode->getType() == Type::Convert && parentNode->getChildEdges().size() == 1 &&
(parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 ||
parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) &&
parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32;
};

auto isSuitableChildNode = [](NodePtr childNode) {
return childNode->getType() == Type::Eltwise && childNode->getParentEdges().size() != 2;
};

auto parent = graphNodes.begin();
while (parent != graphNodes.end()) {
CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift);
auto parentNode = *parent;
if (!isSuitableParentNode(parentNode)) {
if (parentNode->getType() != Type::Convert) {
parent++;
continue;
}

CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ParentNode);
const auto& childEdges = parentNode->getChildEdges();
if (childEdges.size() != 1) {
parent++;
continue;
}

auto childNode = parentNode->getChildEdgeAt(0)->getChild();
if (!isSuitableChildNode(childNode)) {
const auto edge = childEdges[0].lock();
auto childNode = edge->getChild();
if (childNode->getType() != Type::Eltwise) {
parent++;
continue;
}

CPU_GRAPH_OPTIMIZER_SCOPE(MergeConvertAndScaleShift_ChildNode);
const auto eltwise = dynamic_cast<ov::intel_cpu::node::Eltwise*>(childNode.get());
if (!eltwise->canFuseParent(parentNode)) {
parent++;
continue;
}

auto parents = parentNode->parentEdges;
const auto parents = parentNode->parentEdges;
for (size_t i = 0; i < parents.size(); i++) {
auto p_edge = parents[i].lock();
if (!p_edge) continue;
Expand Down
105 changes: 98 additions & 7 deletions src/plugins/intel_cpu/src/nodes/eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#include "utils/cpu_utils.hpp"
#include <common/primitive_hashing_utils.hpp>

#include <ngraph/opsets/opset1.hpp>
#include <openvino/op/bitwise_and.hpp>
#include <openvino/op/bitwise_not.hpp>
#include <openvino/op/bitwise_or.hpp>
Expand Down Expand Up @@ -83,6 +82,68 @@ namespace ov {
namespace intel_cpu {
namespace node {

#if defined(OPENVINO_ARCH_ARM64)
namespace {
bool jitIsSupported(const Node* node,
const float alpha,
const float beta,
const float gamma,
const std::vector<ov::element::Type>& input_precisions = {}) {
const Algorithm& algorithm = node->getAlgorithm();
const auto is_supported = one_of(algorithm,
Algorithm::EltwiseAdd,
Algorithm::EltwiseMultiply,
Algorithm::EltwiseMulAdd,
Algorithm::EltwisePowerStatic,
Algorithm::EltwiseRelu);
if (!is_supported) {
return false;
}

const auto check_precisions = [&node](
const std::vector<ov::element::Type>& precisions,
const std::set<ov::element::Type>& supported_precisions) {
const auto& input_precisions = precisions.size() == 0 ? node->getOriginalInputPrecisions() : precisions;
if (std::any_of(input_precisions.begin(),
input_precisions.end(),
[&supported_precisions](const ov::element::Type& precision) {
return supported_precisions.find(precision) == supported_precisions.end();
})) {
return false;
}

const auto& output_precisions = node->getOriginalOutputPrecisions();
if (std::any_of(output_precisions.begin(),
output_precisions.end(),
[&supported_precisions](const ov::element::Type& precision) {
return supported_precisions.find(precision) == supported_precisions.end();
})) {
return false;
}

return true;
};

const std::set<ov::element::Type> supported_precisions = {
ov::element::f16,
ov::element::f32,
ov::element::i32,
ov::element::u32
};

if (!check_precisions(input_precisions, supported_precisions)) {
return false;
}

if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) {
return false;
}

return true;
}
} // namespace
#endif

#if defined(OPENVINO_ARCH_X86_64)

template<typename T>
Expand Down Expand Up @@ -2253,7 +2314,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
#endif
#elif defined(OPENVINO_ARCH_ARM64)
const bool useJit = canUseOptimizedImpl &&
executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma());
jitIsSupported(this, getAlpha(), getBeta(), getGamma()) &&
executors::aarch64::JitEltwiseExecutor::isSupported(getAlgorithm(), getAlpha(), getBeta(), getGamma());
if (!useJit) {
canUseOptimizedImpl = false;
}
Expand Down Expand Up @@ -2980,6 +3042,35 @@ bool Eltwise::appendAttrPostOps(DnnlPostOpsComposer& dnnlpoc, bool isLastPostOp,
return true;
}

bool Eltwise::canFuseParent(const NodePtr& parentNode) const {
#if defined(OPENVINO_ARCH_ARM64)
if (parentNode->getType() != Type::Convert) {
return false;
}
const auto& input_precisions = parentNode->getOriginalInputPrecisions();
if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma(), input_precisions)) {
return false;
}
#else
const auto isSuitableParentNode = [](const Node* parentNode) {
return parentNode->getType() == Type::Convert &&
(parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::u8 ||
parentNode->getOriginalInputPrecisionAtPort(0) == ov::element::i8) &&
parentNode->getOriginalOutputPrecisionAtPort(0) == ov::element::f32;
};

auto isSuitableChildNode = [](const Node* childNode) {
return childNode->getParentEdges().size() != 2;
};

if (!isSuitableParentNode(parentNode.get()) || !isSuitableChildNode(this)) {
return false;
}
#endif

return true;
}

bool Eltwise::canFuse(const NodePtr& node) const {
auto isIntegerComputeSupported = [](const Node* node) {
if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd,
Expand All @@ -3004,14 +3095,14 @@ bool Eltwise::canFuse(const NodePtr& node) const {
if (!mayiuse(dnnl::impl::cpu::aarch64::asimd) || (getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK))
return false;

if (!executors::aarch64::JitEltwiseExecutor::isSupported(this, getAlpha(), getBeta(), getGamma())) {
if (!jitIsSupported(this, getAlpha(), getBeta(), getGamma())) {
return false;
}
const auto eltwise = dynamic_cast<const Eltwise*>(node.get());
if ((eltwise == nullptr) || (!executors::aarch64::JitEltwiseExecutor::isSupported(eltwise,
eltwise->getAlpha(),
eltwise->getBeta(),
eltwise->getGamma()))) {
if ((eltwise == nullptr) || (!jitIsSupported(eltwise,
eltwise->getAlpha(),
eltwise->getBeta(),
eltwise->getGamma()))) {
return false;
}
#else
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/nodes/eltwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class Eltwise : public Node {
void execute(dnnl::stream strm) override;
bool created() const override;
bool canBeInPlace() const override;
bool canFuseParent(const NodePtr& parentNode) const;
bool canFuse(const NodePtr& node) const override;
void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::unordered_map<int, MemoryPtr>& postOpsMem, const int channelAxis = 1) override;
void appendPostOps(dnnl::post_ops& ops, const VectorDims &postOpDims, std::vector<const void*>& postOpsMem, const int channelAxis = 1) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@ namespace executors {
namespace aarch64 {

bool JitEltwiseExecutor::isSupported(
const Node* node,
const Algorithm& algorithm,
const float alpha,
const float beta,
const float gamma) {
const Algorithm& algorithm = node->getAlgorithm();
const auto is_supported = one_of(algorithm,
Algorithm::EltwiseAdd,
Algorithm::EltwiseMultiply,
Expand All @@ -26,51 +25,6 @@ bool JitEltwiseExecutor::isSupported(
return false;
}

const auto check_precisions = [&node](const std::set<ov::element::Type>& precisions) {
const auto& input_precisions = node->getOriginalInputPrecisions();
if (std::any_of(input_precisions.begin(),
input_precisions.end(),
[&precisions](const ov::element::Type& precision) {
return precisions.find(precision) == precisions.end();
})) {
return false;
}

const auto& output_precisions = node->getOriginalOutputPrecisions();
if (std::any_of(output_precisions.begin(),
output_precisions.end(),
[&precisions](const ov::element::Type& precision) {
return precisions.find(precision) == precisions.end();
})) {
return false;
}

return true;
};

const std::set<ov::element::Type> supported_precisions = {
ov::element::f16,
ov::element::f32,
ov::element::i32,
ov::element::u32
};

const auto parent = node->getParentEdgeAt(0)->getParent();
if (parent->getType() == ov::intel_cpu::Type::Convert) {
const auto& input_precisions = parent->getOriginalInputPrecisions();
if (input_precisions.size() != 1ull) {
return false;
}
// input precision will be changed after fuse
if (supported_precisions.find(input_precisions[0]) == supported_precisions.end()) {
return false;
}
}

if (!check_precisions(supported_precisions)) {
return false;
}

if ((algorithm == Algorithm::EltwiseRelu) && ((alpha != 0.f) || (beta != 0.f) || (gamma != 0.f))) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class JitEltwiseExecutor : public EltwiseExecutor {
explicit JitEltwiseExecutor(const ExecutorContext::CPtr context);

static bool isSupported(
const Node* node,
const Algorithm& algorithm,
const float alpha,
const float beta,
const float gamma);
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/tests/functional/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ else()
file(GLOB_RECURSE TMP_LIST_OF_COMMON_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/common/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/instances/arm/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/arm/*.cpp)
# will be done in PR 19856
file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_tests/src/common/*.cpp)
list(APPEND TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS
${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS})
set(TMP_EXPLICITLY_ENABLED_TESTS "${TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ std::vector<std::string> disabledTestPatterns() {
#if defined(OPENVINO_ARCH_ARM)
// TODO: rounding errors
retVector.emplace_back(R"(.*iv_secondaryInputType=PARAMETER_opType=VECTOR_NetType=i32.*)");
// not supported
retVector.emplace_back(R"(.*fma.*EltwiseLayerCPUTest.*)");
retVector.emplace_back(R"(.*int_jit.*EltwiseLayerCPUTest.*)");
retVector.emplace_back(R"(.*dyn.*EltwiseChainTest.*)");
#endif

#if !defined(OPENVINO_ARCH_X86_64)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ const std::vector<ElementType>& netType() {
const std::vector<ElementType>& netTypeJit() {
static const std::vector<ElementType> netType = {
ElementType::i32,
ElementType::u32,
ElementType::f32};
return netType;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ std::vector<std::vector<ElementType>> inputPrecisions = {

std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
{ EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
{ EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD },
{ EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD }
};

INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
Expand All @@ -178,6 +178,40 @@ INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
::testing::Values(ov::test::utils::DEVICE_CPU)),
EltwiseChainTest::getTestCaseName);


std::vector<std::vector<ov::Shape>> inputShapesConvert = {
{{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}}
};

std::vector<std::vector<ElementType>> inputPrecisionsConvert = {
{ ElementType::i8, ElementType::f32, ElementType::f32 },
{ ElementType::u8, ElementType::f32, ElementType::f32 },
{ ElementType::i16, ElementType::f32, ElementType::f32 },
{ ElementType::u16, ElementType::f32, ElementType::f32 },
{ ElementType::i32, ElementType::f32, ElementType::f32 },
// { ElementType::u32, ElementType::f32, ElementType::f32 }, // plugin doesn't support
{ ElementType::f16, ElementType::f32, ElementType::f32 },
{ ElementType::f32, ElementType::f32, ElementType::f32 },
};

std::vector<std::vector<EltwiseTypes>> eltwiseOpsConvert = {
{ EltwiseTypes::CONVERT_OP, EltwiseTypes::MULTIPLY },
{ EltwiseTypes::CONVERT_OP, EltwiseTypes::ADD },
{ EltwiseTypes::CONVERT_OP, EltwiseTypes::DIVIDE },
{ EltwiseTypes::CONVERT_OP, EltwiseTypes::SUBTRACT },
{ EltwiseTypes::CONVERT_OP, EltwiseTypes::POWER },
};

INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert, EltwiseChainTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert)),
::testing::Values(InputLayerType::CONSTANT),
::testing::ValuesIn(inputPrecisionsConvert),
::testing::ValuesIn(eltwiseOpsConvert),
::testing::Values(false),
::testing::Values(ov::test::utils::DEVICE_CPU)),
EltwiseChainTest::getTestCaseName);

std::vector<std::vector<ov::Shape>> inputShapesFQ = {
{{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ enum EltwiseTypes {
BITWISE_AND,
BITWISE_NOT,
BITWISE_OR,
BITWISE_XOR
BITWISE_XOR,
CONVERT_OP
};

enum SqueezeOpType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "openvino/op/bitwise_not.hpp"
#include "openvino/op/bitwise_or.hpp"
#include "openvino/op/bitwise_xor.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/erf.hpp"
#include "openvino/op/floor_mod.hpp"
Expand Down Expand Up @@ -51,6 +52,8 @@ std::shared_ptr<ov::Node> make_eltwise(const ov::Output<Node>& in0,
return std::make_shared<ov::op::v13::BitwiseOr>(in0, in1);
case ov::test::utils::EltwiseTypes::BITWISE_XOR:
return std::make_shared<ov::op::v13::BitwiseXor>(in0, in1);
case ov::test::utils::EltwiseTypes::CONVERT_OP:
return std::make_shared<ov::op::v0::Convert>(in0, ov::element::f32);
default: {
OPENVINO_THROW("Incorrect type of Eltwise operation");
}
Expand Down
3 changes: 3 additions & 0 deletions src/tests/test_utils/common_test_utils/src/test_enums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ std::ostream& operator<<(std::ostream& os, const ov::test::utils::EltwiseTypes t
case ov::test::utils::EltwiseTypes::BITWISE_XOR:
os << "BitwiseXor";
break;
case ov::test::utils::EltwiseTypes::CONVERT_OP:
os << "Convert";
break;
default:
throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
}
Expand Down

0 comments on commit 2b03e32

Please sign in to comment.