From 286e3386608889225657b6929ef64df8972bbb85 Mon Sep 17 00:00:00 2001 From: Zhiyuan Tan <66934674+BHbean@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:57:41 +0800 Subject: [PATCH] [RISCV64] add nhwc layout support for eltwise executor (#26531) ### Details: - *Add `nhwc` layout support for shl eltwise executor* - *Enable some tests with `nhwc` layout* ### Tickets: - *N/A* --- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 2 + .../src/nodes/executors/shl/shl_eltwise.cpp | 39 +++++++++++-------- .../src/common/concat_conv_sum_inplace.cpp | 2 - .../utils/riscv64/filter_cpu_info.cpp | 9 +++-- 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index ed4d936fa49ae6..5c3a358dff9d38 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2583,6 +2583,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(nodeDesc); }; + if (isChannelsFirstApplicable) + addDesc(supportedPrimitiveDescriptors, ChannelsFirst); addDesc(supportedPrimitiveDescriptors, Planar); canUseEltwiseExecPtr = !supportedPrimitiveDescriptors.empty(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp index 9506fa74505636..54f00ba20538b3 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp @@ -6,21 +6,11 @@ #include "shl_utils.hpp" #include "csinn/csi_nn.h" #include "utils/debug_capabilities.h" +#include "memory_desc/cpu_blocked_memory_desc.h" namespace ov { namespace intel_cpu { -inline void log_unsupported_prec(const std::vector& srcDescs, - const std::vector& dstDescs, - const Algorithm eltwiseAlgorithm) { - std::string srcPrec; - for (size_t i = 0; i < srcDescs.size(); i++) { - srcPrec += srcDescs[i]->getPrecision().to_string() + " "; - } - DEBUG_LOG(algToString(eltwiseAlgorithm), ": provided combination of src precisions: [", srcPrec, - "] and dst precision: ", dstDescs[0]->getPrecision().to_string(), " is not supported"); -} - bool ShlEltwiseExecutor::isEltwiseAlgorithmSupported(Algorithm algorithm) { if (one_of(algorithm, Algorithm::EltwiseAdd, Algorithm::EltwiseSubtract, @@ -53,6 +43,26 @@ bool ShlEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, return false; } + // check whether input and output layouts are equal + if(srcDescs.front()->hasLayoutType(LayoutType::nCsp16c) || srcDescs.front()->hasLayoutType(LayoutType::nCsp8c)) { + DEBUG_LOG("ShlEltwise does not support 'nCsp16c' or 'nCsp8c' layouts"); + return false; + } + const auto unifiedLayout = srcDescs.front()->hasLayoutType(LayoutType::ncsp) ? LayoutType::ncsp : LayoutType::nspc; + const auto unifiedRank = srcDescs.front()->as()->getBlockDims().size(); + auto has_unified_layout = [unifiedLayout, unifiedRank](const MemoryDescPtr& desc) { + if (desc->hasLayoutType(LayoutType::nspc)) { // ensure the same rank + if (desc->as()->getBlockDims().size() != unifiedRank) + return false; + } + return desc->hasLayoutType(unifiedLayout); + }; + if (!(std::all_of(srcDescs.cbegin(), srcDescs.cend(), has_unified_layout) && + std::all_of(dstDescs.cbegin(), dstDescs.cend(), has_unified_layout))) { + DEBUG_LOG("ShlEltwise needs to ensure all inputs and outputs are in the same 'ncsp' or 'nspc' layouts"); + return false; + } + for (const auto& srcDesc : srcDescs) { csinn_layout_enum supportedLayout = getShlDataLayoutByMemoryDesc(srcDesc); switch (eltwiseAttrs.algorithm) { @@ -93,14 +103,11 @@ bool ShlEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, srcTensors = std::vector(srcDescs.size()); dstTensors = std::vector(dstDescs.size()); - // Allocate Shl session - sess = ShlSession(); - for (size_t i = 0; i < srcDescs.size(); i++) { - srcTensors[i] = ShlTensor(sess, precisionToShlDataType(srcDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(srcDescs[i]), srcDescs[i]->getShape().getStaticDims()); + srcTensors[i] = ShlTensor(sess, precisionToShlDataType(srcDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(srcDescs[i]), srcDescs[i]->as()->getBlockDims()); } for (size_t i = 0; i < dstDescs.size(); i++) { - dstTensors[i] = ShlTensor(sess, precisionToShlDataType(dstDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(dstDescs[i]), dstDescs[i]->getShape().getStaticDims()); + dstTensors[i] = ShlTensor(sess, precisionToShlDataType(dstDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(dstDescs[i]), dstDescs[i]->as()->getBlockDims()); } std::function initFunc = nullptr; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_conv_sum_inplace.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_conv_sum_inplace.cpp index 7000812e6f672e..ffd87f159cc38e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_conv_sum_inplace.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/concat_conv_sum_inplace.cpp @@ -48,8 +48,6 @@ class ReLuConcatConvSumInPlaceTest : virtual public SubgraphBaseStaticTest { const size_t convOutChannels = 64; #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) const auto targetFormat = with_cpu_x86_avx512_core() ? nChw16c : nChw8c; -#elif defined(OV_CPU_WITH_SHL) - const auto targetFormat = nchw; #else const auto targetFormat = nhwc; #endif diff --git a/src/plugins/intel_cpu/tests/functional/utils/riscv64/filter_cpu_info.cpp b/src/plugins/intel_cpu/tests/functional/utils/riscv64/filter_cpu_info.cpp index 72a3d07f2640f4..71360dca2c92e0 100644 --- a/src/plugins/intel_cpu/tests/functional/utils/riscv64/filter_cpu_info.cpp +++ b/src/plugins/intel_cpu/tests/functional/utils/riscv64/filter_cpu_info.cpp @@ -64,9 +64,12 @@ std::vector filterCPUInfoForDeviceWithFP16(const std::vector< } std::vector filterCPUSpecificParams(const std::vector ¶msVector) { - static const std::vector supported_f = {CPUTestUtils::cpu_memory_format_t::ncw, - CPUTestUtils::cpu_memory_format_t::nchw, - CPUTestUtils::cpu_memory_format_t::ncdhw}; + static const std::vector supported_f = {CPUTestUtils::cpu_memory_format_t::nwc, + CPUTestUtils::cpu_memory_format_t::ncw, + CPUTestUtils::cpu_memory_format_t::nchw, + CPUTestUtils::cpu_memory_format_t::nhwc, + CPUTestUtils::cpu_memory_format_t::ndhwc, + CPUTestUtils::cpu_memory_format_t::ncdhw}; std::vector filteredParamsVector = paramsVector; filteredParamsVector.erase(std::remove_if(filteredParamsVector.begin(), filteredParamsVector.end(),