diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index f2b5a133307b71..c9d9a0563aa128 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -42,7 +42,7 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_bin_conv_call_args, field) template @@ -874,7 +874,7 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ } } }; - +#endif bool BinaryConvolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { @@ -913,7 +913,7 @@ BinaryConvolution::BinaryConvolution(const std::shared_ptr& op, co } paddingL = binConv->get_pads_begin(); paddingR = binConv->get_pads_end(); - +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(x64::avx512_core)) { implType = impl_desc_type::jit_avx512; } else if (mayiuse(x64::avx2)) { @@ -923,6 +923,9 @@ BinaryConvolution::BinaryConvolution(const std::shared_ptr& op, co } else { implType = impl_desc_type::ref; } +#else + implType = impl_desc_type::ref; +#endif } else { IE_THROW(NotImplemented) << errorMessage; } @@ -1092,7 +1095,7 @@ void BinaryConvolution::createPrimitive() { IMPLICATION(jcp.kw > 7, (jcp.t_pad == 0 && jcp.l_pad == 0) || (jcp.stride_w == 1 && jcp.stride_h == 1)); if (!args_ok) IE_THROW() << "BinaryConvolution with name '" << getName() << "' has unsupported parameters"; - +#if defined(OPENVINO_ARCH_X86_64) if (implType == impl_desc_type::jit_avx512) { bin_conv_kernel.reset(new jit_uni_bin_conv_kernel_f32(jcp, jcp_dw_conv, *attr.get())); } else if (implType == impl_desc_type::jit_avx2) { @@ -1102,6 +1105,7 @@ void BinaryConvolution::createPrimitive() { } if (bin_conv_kernel) bin_conv_kernel->create_ker(); +#endif } bool BinaryConvolution::canFuse(const NodePtr& node) const { diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 92c1cf7450200c..4cd3e4181a69a9 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -23,7 +23,7 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_def_conv_call_args, field) template @@ -667,7 +667,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ pop(reg_sampled_offs); } }; - +#endif bool DeformableConvolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), @@ -820,6 +820,7 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace(-1); impl_desc_type impl_type; +#if defined(OPENVINO_ARCH_X86_64) const int simd_w = mayiuse(cpu::x64::avx512_core) ? 16 : 8; auto &weiDims = getInputShapeAtPort(WEI_ID).getDims(); @@ -842,7 +843,10 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() { } else { impl_type = impl_desc_type::ref; } - +#else + impl_type = impl_desc_type::ref; +#endif +#if defined(OPENVINO_ARCH_X86_64) if (!enforceRef && mayiuse(cpu::x64::sse41)) { // optimized implementation auto dataFormat = memory::format_tag::nhwc; @@ -864,6 +868,7 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() { memory::data_type::f32, dataFormat)); supportedPrimitiveDescriptors.push_back({config, impl_type}); } else { +#endif // reference implementation config.inConfs[DATA_ID].setMemDesc(std::make_shared(getInputShapeAtPort(DATA_ID), memory::data_type::f32, memory::format_tag::nchw)); @@ -878,7 +883,9 @@ void DeformableConvolution::initSupportedPrimitiveDescriptors() { config.outConfs[0].setMemDesc(std::make_shared(getOutputShapeAtPort(DATA_ID), memory::data_type::f32, memory::format_tag::nchw)); supportedPrimitiveDescriptors.push_back({config, impl_type}); +#if defined(OPENVINO_ARCH_X86_64) } +#endif } void DeformableConvolution::DefConvExecutor::prepareSamplingWeights( @@ -1029,7 +1036,7 @@ DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defCo if (withModulation) { modStrides = descVector[MOD_ID]->getStrides(); } - +#if defined(OPENVINO_ARCH_X86_64) const VectorDims srcDims = descVector[DATA_ID]->getShape().getStaticDims(); const VectorDims weiDims = descVector[WEI_ID]->getShape().getStaticDims(); const VectorDims dstDims = descVector[descVector.size() - 1]->getShape().getStaticDims(); @@ -1080,11 +1087,13 @@ DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defCo jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4; jcp.nthr = dnnl_get_max_threads(); +#endif } DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr, const std::vector> &descVector) : DefConvExecutor(defConvAttr, descVector) { +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::avx512_core)) { def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32(jcp)); } else if (mayiuse(cpu::x64::avx2)) { @@ -1099,6 +1108,7 @@ DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr } else { IE_THROW() << "Can't compile DefConvJitExecutor"; } +#endif } void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const float* offsets, diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index 97a62297cac0db..b5433b4003298a 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -535,10 +535,11 @@ void DFT::prepareParams() { hasFFT = true; } } - +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::sse41)) { createJITKernels(hasDFT, hasFFT); } +#endif } std::vector DFT::getAxes() const { @@ -553,7 +554,7 @@ std::vector DFT::getAxes() const { std::sort(axes.begin(), axes.end()); return axes; } - +#if defined(OPENVINO_ARCH_X86_64) void DFT::createJITKernels(bool hasDFT, bool hasFFT) { if (hasDFT && dftKernel == nullptr) { if (mayiuse(cpu::x64::avx512_core)) { @@ -585,7 +586,7 @@ void DFT::createJITKernels(bool hasDFT, bool hasFFT) { fftKernel->create_ker(); } } - +#endif } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/dft.h b/src/plugins/intel_cpu/src/nodes/dft.h index 144ebde8989c78..ccaae57f137a12 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.h +++ b/src/plugins/intel_cpu/src/nodes/dft.h @@ -30,8 +30,9 @@ class DFT : public Node { private: std::vector getAxes() const; +#if defined(OPENVINO_ARCH_X86_64) void createJITKernels(bool hasDFT, bool hasFFT); - +#endif void dftNd(float* output, const VectorDims& outputShape, const VectorDims& outputStrides, diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 27732dea2d31ac..3fe4800d4e7348 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -25,7 +25,7 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_extract_image_patches_args, field) template @@ -270,7 +270,7 @@ struct jit_extract_image_patches_kernel : public jit_uni_extract_image_patches_k dd(i * jpp.SW * jpp.dtype_size); } }; - +#endif bool ExtractImagePatches::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto extImgPatcher = ngraph::as_type_ptr(op); @@ -378,8 +378,13 @@ void ExtractImagePatches::prepareParams() { const auto& out_dims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); const auto prcSize = getOriginalInputPrecisionAtPort(0).size(); ExtractImagePatchesKey key = {in_dims, out_dims, _ksizes, _strides, _rates, _auto_pad, prcSize}; +#if defined(OPENVINO_ARCH_X86_64) const auto isJit = mayiuse(x64::sse41); +#else + const auto isJit = false; +#endif auto buildExecutor = [&isJit](const ExtractImagePatchesKey& key) -> executorPtr { +#if defined(OPENVINO_ARCH_X86_64) if (isJit) { return std::make_shared(key.inDims, key.outDims, @@ -389,6 +394,7 @@ void ExtractImagePatches::prepareParams() { key.padType, key.prcSize); } else { +#else return std::make_shared(key.inDims, key.outDims, key.kSizes, @@ -396,7 +402,10 @@ void ExtractImagePatches::prepareParams() { key.rates, key.padType, key.prcSize); +#endif +#if defined(OPENVINO_ARCH_X86_64) } +#endif }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, buildExecutor); @@ -478,7 +487,7 @@ void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference( memset(my_dst_ptr, 0, num_bytes_to_set); }); } - +#if defined(OPENVINO_ARCH_X86_64) void ExtractImagePatches::ExtractImagePatchesJitExecutor::executeOptimizedGeneric( void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const { const char* src_data = reinterpret_cast(src); @@ -508,7 +517,7 @@ void ExtractImagePatches::ExtractImagePatchesJitExecutor::executeOptimizedGeneri (*pKernel)(&args); }); } - +#endif jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecutor::fillJpp( const VectorDims& inDims, const VectorDims& outDims, @@ -564,6 +573,7 @@ jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecuto } jpp.dtype_size = prcSize; +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(x64::avx512_core)) { jpp.block_size = cpu_isa_traits::vlen / prcSize; } else if (mayiuse(x64::avx2)) { @@ -573,10 +583,11 @@ jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecuto } else { jpp.block_size = 1; } - +#endif + jpp.block_size = 1; return jpp; } - +#if defined(OPENVINO_ARCH_X86_64) ExtractImagePatches::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecutor( const VectorDims& inDims, const VectorDims& outDims, @@ -606,7 +617,7 @@ void ExtractImagePatches::ExtractImagePatchesJitExecutor::exec( IE_THROW() << "Can't execute, kernel for extract image patches node is not compiled"; executeOptimizedGeneric(src, dst, istrides, ostrides); } - +#endif ExtractImagePatches::ExtractImagePatchesRefExecutor::ExtractImagePatchesRefExecutor( const VectorDims& inDims, const VectorDims& outDims, diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h index 9e9b708e348e7e..188d310ab254e3 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h @@ -95,7 +95,7 @@ class ExtractImagePatches : public Node { using executorPtr = std::shared_ptr; executorPtr execPtr = nullptr; - +#if defined(OPENVINO_ARCH_X86_64) struct ExtractImagePatchesJitExecutor : public ExtractImagePatchesExecutor { ExtractImagePatchesJitExecutor( const VectorDims& inDims, @@ -111,7 +111,7 @@ class ExtractImagePatches : public Node { private: std::unique_ptr pKernel; }; - +#endif struct ExtractImagePatchesRefExecutor : public ExtractImagePatchesExecutor { ExtractImagePatchesRefExecutor( const VectorDims& inDims, diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index 8a47a08be4a1cc..6b181b36b340a8 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -42,7 +42,7 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_quantize_call_args, field) template @@ -827,7 +827,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ } } }; - +#endif bool FakeQuantize::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto fq = std::dynamic_pointer_cast(op); @@ -1263,6 +1263,7 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() { return; impl_desc_type impl_type; +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; } else if (mayiuse(cpu::x64::avx2)) { @@ -1272,8 +1273,14 @@ void FakeQuantize::initSupportedPrimitiveDescriptors() { } else { impl_type = impl_desc_type::ref; } - +#else + impl_type = impl_desc_type::ref; +#endif +#if defined(OPENVINO_ARCH_X86_64) if (!mayiuse(cpu::x64::sse41) || getAxis() != 1) { +#else + if (getAxis() != 1) { +#endif impl_type = impl_desc_type::ref; if (!isBinarization()) { @@ -1414,6 +1421,7 @@ void FakeQuantize::prepareParams() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors."; +#if defined(OPENVINO_ARCH_X86_64) if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) { const auto& config = getSelectedPrimitiveDescriptor()->getConfig(); const auto& inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); @@ -1439,6 +1447,7 @@ void FakeQuantize::prepareParams() { auto result = cache->getOrCreate(key, buildExecutor); execPtr = result.first; } +#endif } void FakeQuantize::executeReference() { @@ -1557,7 +1566,7 @@ void FakeQuantize::executeReference() { }); } } - +#if defined(OPENVINO_ARCH_X86_64) void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); @@ -1728,7 +1737,7 @@ void FakeQuantize::executeQuantization(const std::unique_ptr; executorPtr execPtr = nullptr; - +#if defined(OPENVINO_ARCH_X86_64) struct FakeQuantizeJitExecutor : public FakeQuantizeExecutor { FakeQuantizeJitExecutor(const jit_quantize_params &_jqp); void exec(const FakeQuantize& node) override; std::unique_ptr pKernel; }; - +#endif void init() override; std::vector getDataFormats() const; void initializePostOpData(const VectorDims &postOpDims, const size_t bufferAlignment, bool doRounding); void initializePostOpDataLegacy(const VectorDims &dims, const size_t bufferAlignment); void executeReference(); +#if defined(OPENVINO_ARCH_X86_64) void executeBinarization(const std::unique_ptr &pKernel) const; void executeQuantization(const std::unique_ptr &pKernel) const; +#endif void appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem); void appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem); diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 768e5771111fd4..0c4451c2736c6b 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -32,8 +32,9 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::utils; using namespace Xbyak; +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_normalize_call_args, field) - +#endif #define THROW_ERROR IE_THROW() << "NormalizeL2 layer with name '" << getName() << "' " namespace ov { @@ -81,7 +82,7 @@ bool NormalizeKey::operator==(const NormalizeKey& rhs) const { static inline bool isFloatCompatible(memory::data_type type) { return memory::data_type::f32 == type || memory::data_type::bf16 == type; } - +#if defined(OPENVINO_ARCH_X86_64) template struct jit_uni_normalize_modulo_kernel_f32 : public jit_uni_normalize_modulo_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_normalize_modulo_kernel_f32) @@ -693,7 +694,7 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji } } }; - +#endif bool NormalizeL2::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto norm = ov::as_type_ptr(op); @@ -966,7 +967,7 @@ class NormalizeL2::NormalizeL2CornerCaseExecutor : public NormalizeL2::Normalize // *=================* *======* *=================* // *=================* JIT case *=================* - +#if defined(OPENVINO_ARCH_X86_64) template class NormalizeL2::NormalizeL2JitExecutor : public NormalizeL2::NormalizeL2Executor { public: @@ -1296,7 +1297,7 @@ class NormalizeL2::NormalizeL2JitExecutor : public NormalizeL2::NormalizeL2Execu std::shared_ptr normalize_modulo_kernel; std::shared_ptr normalize_kernel; }; - +#endif // *=================* *======* *=================* // *=============* Reference case *===============* @@ -1492,8 +1493,10 @@ std::shared_ptr NormalizeL2::NormalizeL2Execut const NormalizeL2Attrs& attrs, const dnnl::primitive_attr& kernel_attrs, const VectorDims& dims) { if (attrs.cornerCase) return std::make_shared>(dims); +#if defined(OPENVINO_ARCH_X86_64) else if (mayiuse(cpu::x64::sse41)) return std::make_shared>(attrs, kernel_attrs, dims); +#endif else if (attrs.layout == LayoutType::ncsp) return std::make_shared>(attrs, kernel_attrs, dims); else diff --git a/src/plugins/intel_cpu/src/nodes/normalize.h b/src/plugins/intel_cpu/src/nodes/normalize.h index c5e5f0106e3047..3b6c99bde4272c 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.h +++ b/src/plugins/intel_cpu/src/nodes/normalize.h @@ -17,7 +17,7 @@ namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) struct jit_normalize_config_params { bool is_nchw; bool is_nhwc; @@ -75,7 +75,7 @@ struct jit_uni_normalize_kernel { jit_normalize_config_params jcp_; const dnnl_primitive_attr &attr_; }; - +#endif class NormalizeL2 : public Node { public: NormalizeL2(const std::shared_ptr& op, const GraphContext::CPtr context); diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp index 60118d7087709c..4b563bbef4372e 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.cpp +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -652,7 +652,7 @@ std::vector> RDFTExecutor::generateTwiddles(const std::vector } return twiddles; } - +#if defined(OPENVINO_ARCH_X86_64) struct RDFTJitExecutor : public RDFTExecutor { RDFTJitExecutor(bool inverse, NodeDesc* primDesc) : RDFTExecutor(inverse) { enum dft_type rdftType = isInverse ? complex_to_real : real_to_complex; @@ -760,7 +760,7 @@ struct RDFTJitExecutor : public RDFTExecutor { int vlen; }; - +#endif struct RDFTRefExecutor : public RDFTExecutor { RDFTRefExecutor(bool inverse) : RDFTExecutor(inverse) {} @@ -906,12 +906,17 @@ void RDFT::prepareParams() { auto buildExecutor = [&] (const RDFTKey& key) -> std::shared_ptr { std::shared_ptr executor; NodeDesc* primDesc = getSelectedPrimitiveDescriptor(); +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::sse41)) { executor = std::make_shared(key.isInverse, primDesc); } else { +#else executor = std::make_shared(key.isInverse); primDesc->setImplementationType(ref_any); +#endif +#if defined(OPENVINO_ARCH_X86_64) } +#endif return executor; }; diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index 917982d0252433..fff1cd67341402 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -21,12 +21,14 @@ using namespace dnnl::impl::cpu; using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::utils; +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_args_logistic, field) +#endif namespace ov { namespace intel_cpu { namespace node { - +#if defined(OPENVINO_ARCH_X86_64) template struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_logistic_kernel_f32) @@ -226,6 +228,7 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_ } } }; +#endif bool RegionYolo::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { @@ -279,13 +282,16 @@ void RegionYolo::initSupportedPrimitiveDescriptors() { output_prec = Precision::FP32; } +#if defined(OPENVINO_ARCH_X86_64) if (Precision::BF16 == output_prec) { if (!mayiuse(avx512_core)) { output_prec = Precision::FP32; } } +#endif impl_desc_type impl_type; +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; } else if (mayiuse(x64::avx2)) { @@ -295,6 +301,9 @@ void RegionYolo::initSupportedPrimitiveDescriptors() { } else { impl_type = impl_desc_type::ref; } +#else + impl_type = impl_desc_type::ref; +#endif addSupportedPrimDesc({{LayoutType::ncsp, input_prec}}, {{LayoutType::ncsp, output_prec}}, @@ -306,6 +315,7 @@ void RegionYolo::createPrimitive() { updateLastInputDims(); } +#if defined(OPENVINO_ARCH_X86_64) jit_logistic_config_params jcp; jcp.src_dt = jcp.dst_dt = output_prec; jcp.src_data_size = jcp.dst_data_size = output_prec.size(); @@ -326,6 +336,7 @@ void RegionYolo::createPrimitive() { if (logistic_kernel) logistic_kernel->create_ker(); +#endif } inline float RegionYolo::logistic_scalar(float src) { diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.h b/src/plugins/intel_cpu/src/nodes/region_yolo.h index 660ec6d981b62f..da1a252eddaef0 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.h +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.h @@ -66,7 +66,7 @@ class RegionYolo : public Node { std::string errorPrefix; int block_size; - std::shared_ptr logistic_kernel; + std::shared_ptr logistic_kernel = nullptr; std::shared_ptr softmax_kernel; union U { diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index f7528365f6515a..5261949264fabb 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -31,7 +31,7 @@ namespace node { using ngPoolingMode = ngraph::opset9::ROIAlign::PoolingMode; using ngAlignedMode = ngraph::opset9::ROIAlign::AlignedMode; - +#if defined(OPENVINO_ARCH_X86_64) #define GET_OFF(field) offsetof(jit_roi_align_call_args, field) template @@ -648,7 +648,7 @@ struct jit_uni_roi_align_kernel_f32 : public jit_uni_roi_align_kernel, public ji } } }; - +#endif bool ROIAlign::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto roiAlign = ngraph::as_type_ptr(op); @@ -749,7 +749,7 @@ void ROIAlign::createJitKernel(const InferenceEngine::Precision& dataPrec, const jcp.layout = selectLayout; jcp.pooled_h = pooledH; jcp.pooled_w = pooledW; - +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::avx512_core)) { roi_align_kernel.reset(new jit_uni_roi_align_kernel_f32(jcp)); } else if (mayiuse(cpu::x64::avx2)) { @@ -757,7 +757,7 @@ void ROIAlign::createJitKernel(const InferenceEngine::Precision& dataPrec, const } else if (mayiuse(cpu::x64::sse41)) { roi_align_kernel.reset(new jit_uni_roi_align_kernel_f32(jcp)); } - +#endif if (roi_align_kernel) roi_align_kernel->create_ker(); } @@ -783,6 +783,7 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { config.outConfs.resize(1); impl_desc_type impl_type; +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; } else if (mayiuse(cpu::x64::avx2)) { @@ -792,11 +793,13 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { } else { impl_type = impl_desc_type::ref; } - +#else + impl_type = impl_desc_type::ref; +#endif std::vector> supportedFormats { {LayoutType::ncsp, LayoutType::ncsp} }; - +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::sse41)) { supportedFormats.push_back(std::make_pair(LayoutType::nspc, LayoutType::nspc)); if (impl_desc_type::jit_avx512 == impl_type) { @@ -805,7 +808,7 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { supportedFormats.push_back(std::make_pair(LayoutType::nCsp8c, LayoutType::nCsp8c)); } } - +#endif for (auto fmts : supportedFormats) { addSupportedPrimDesc({{fmts.first, inputPrec0}, {LayoutType::ncsp, Precision::FP32}, diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index 38f02f20ed2925..541728c32827ba 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -95,6 +95,7 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { THROW_SHCH_ERROR << "has unsupported precision: " << precision.name(); impl_desc_type impl_type; +#if defined(OPENVINO_ARCH_X86_64) if (mayiuse(cpu::x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; } else if (mayiuse(cpu::x64::avx2)) { @@ -104,6 +105,9 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { } else { impl_type = impl_desc_type::ref; } +#else + impl_type = impl_desc_type::ref; +#endif // use ncsp as default for non-quantized networks and nspc for quantized auto firstCreatorType = context->isGraphQuantized() ? LayoutType::nspc : LayoutType::ncsp; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index 0826571b065795..c7ff5f2bb0e055 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -119,6 +119,7 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); impl_desc_type impl_type = impl_desc_type::ref; +#if defined(OPENVINO_ARCH_X86_64) if (cpu::x64::mayiuse(impl::cpu::x64::avx512_core)) { impl_type = impl_desc_type::jit_avx512; } else if (cpu::x64::mayiuse(cpu::x64::avx2)) { @@ -126,6 +127,7 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { } else if (cpu::x64::mayiuse(cpu::x64::sse41)) { impl_type = impl_desc_type::jit_sse42; } +#endif NodeConfig config; config.dynBatchSupport = true; diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index c6458f63eb4696..95c500f3543497 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -105,9 +105,12 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Generic, Type::Generic); INTEL_CPU_NODE(CumSum, Type::CumSum); INTEL_CPU_NODE(Convolution, Type::Convolution); + INTEL_CPU_NODE(BinaryConvolution, Type::BinaryConvolution); INTEL_CPU_NODE(SpaceToBatch, Type::SpaceToBatch); INTEL_CPU_NODE(Lrn, Type::Lrn); INTEL_CPU_NODE(BatchToSpace, Type::BatchToSpace); + INTEL_CPU_NODE(DepthToSpace, Type::DepthToSpace); + INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth); INTEL_CPU_NODE(If, Type::If); INTEL_CPU_NODE(Broadcast, Type::Broadcast); INTEL_CPU_NODE(ExperimentalDetectronTopKROIs, Type::ExperimentalDetectronTopKROIs); @@ -125,6 +128,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Tile, Type::Tile); INTEL_CPU_NODE(GatherTree, Type::GatherTree); INTEL_CPU_NODE(FullyConnected, Type::FullyConnected); + INTEL_CPU_NODE(FakeQuantize, Type::FakeQuantize); INTEL_CPU_NODE(CTCGreedyDecoder, Type::CTCGreedyDecoder); INTEL_CPU_NODE(Transpose, Type::Transpose); INTEL_CPU_NODE(ReorgYolo, Type::ReorgYolo); @@ -150,6 +154,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Math, Type::Math); INTEL_CPU_NODE(MultiClassNms, Type::MulticlassNms); INTEL_CPU_NODE(Convert, Type::Convert); + INTEL_CPU_NODE(ColorConvert, Type::ColorConvert); INTEL_CPU_NODE(EmbeddingBagOffsetSum, Type::EmbeddingBagOffsetsSum); INTEL_CPU_NODE(Roll, Type::Roll); INTEL_CPU_NODE(Pad, Type::Pad); @@ -159,15 +164,18 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(ScatterUpdate, Type::ScatterUpdate); INTEL_CPU_NODE(ScatterUpdate, Type::ScatterElementsUpdate); INTEL_CPU_NODE(ScatterUpdate, Type::ScatterNDUpdate); + INTEL_CPU_NODE(ShuffleChannels, Type::ShuffleChannels); INTEL_CPU_NODE(TensorIterator, Type::TensorIterator); INTEL_CPU_NODE(Concat, Type::Concatenation); INTEL_CPU_NODE(OneHot, Type::OneHot); INTEL_CPU_NODE(ExperimentalDetectronDetectionOutput, Type::ExperimentalDetectronDetectionOutput); INTEL_CPU_NODE(Deconvolution, Type::Deconvolution); + INTEL_CPU_NODE(DeformableConvolution, Type::DeformableConvolution); INTEL_CPU_NODE(Range, Type::Range); INTEL_CPU_NODE(StridedSlice, Type::StridedSlice); INTEL_CPU_NODE(GRN, Type::GRN); INTEL_CPU_NODE(NonZero, Type::NonZero); + INTEL_CPU_NODE(NormalizeL2, Type::NormalizeL2); INTEL_CPU_NODE(PriorBox, Type::PriorBox); INTEL_CPU_NODE(PriorBoxClustered, Type::PriorBoxClustered); INTEL_CPU_NODE(Eye, Type::Eye); @@ -177,25 +185,17 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Gather, Type::Gather); INTEL_CPU_NODE(NonMaxSuppression, Type::NonMaxSuppression); INTEL_CPU_NODE(ROIPooling, Type::ROIPooling); + INTEL_CPU_NODE(ROIAlign, Type::ROIAlign); INTEL_CPU_NODE(TopK, Type::TopK); INTEL_CPU_NODE(Proposal, Type::Proposal); -#if defined(OPENVINO_ARCH_X86_64) - INTEL_CPU_NODE(GridSample, Type::GridSample); - INTEL_CPU_NODE(DeformableConvolution, Type::DeformableConvolution); - INTEL_CPU_NODE(DepthToSpace, Type::DepthToSpace); + INTEL_CPU_NODE(RegionYolo, Type::RegionYolo); INTEL_CPU_NODE(DFT, Type::DFT); INTEL_CPU_NODE(RDFT, Type::RDFT); - INTEL_CPU_NODE(ColorConvert, Type::ColorConvert); - INTEL_CPU_NODE(NormalizeL2, Type::NormalizeL2); - INTEL_CPU_NODE(BinaryConvolution, Type::BinaryConvolution); - INTEL_CPU_NODE(ROIAlign, Type::ROIAlign); - INTEL_CPU_NODE(RegionYolo, Type::RegionYolo); + INTEL_CPU_NODE(ExtractImagePatches, Type::ExtractImagePatches); +#if defined(OPENVINO_ARCH_X86_64) + INTEL_CPU_NODE(GridSample, Type::GridSample); INTEL_CPU_NODE(Interaction, Type::Interaction); INTEL_CPU_NODE(MHA, Type::MHA); - INTEL_CPU_NODE(ExtractImagePatches, Type::ExtractImagePatches); - INTEL_CPU_NODE(FakeQuantize, Type::FakeQuantize); - INTEL_CPU_NODE(ShuffleChannels, Type::ShuffleChannels); - INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth); INTEL_CPU_NODE(Snippet, Type::Subgraph); #endif } diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp index f086674954cf6e..8e0328a442cc7e 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/fake_quantize.cpp @@ -199,7 +199,7 @@ std::vector rangesShapes4D_jit = { {{1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}} }, }; - +#if defined(OPENVINO_ARCH_X86_64) const auto testParams4D_jit = ::testing::Combine(specificParams, ::testing::ValuesIn(rangesShapes4D_jit), ::testing::Values(Precision::FP32), @@ -207,7 +207,7 @@ const auto testParams4D_jit = ::testing::Combine(specificParams, ::testing::Values(false), ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_jit))); INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_4D_jit, FakeQuantizeLayerCPUTest, testParams4D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); - +#endif std::vector memForm4D_ref = { CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"}) @@ -232,7 +232,7 @@ const auto testParams4D_ref = ::testing::Combine(specificParams, ::testing::ValuesIn(memForm4D_ref)); INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_4D_ref, FakeQuantizeLayerCPUTest, testParams4D_ref, FakeQuantizeLayerCPUTest::getTestCaseName); - +#if defined(OPENVINO_ARCH_X86_64) std::vector memForm5D_jit = { CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}), CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), @@ -266,7 +266,7 @@ const auto testParams5D_jit = ::testing::Combine(specificParams, ::testing::ValuesIn(filterCPUSpecificParams(memForm5D_jit))); INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_5D_jit, FakeQuantizeLayerCPUTest, testParams5D_jit, FakeQuantizeLayerCPUTest::getTestCaseName); - +#endif std::vector memForm5D_ref = { CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"}) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/normalize.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/normalize.cpp index 7b50b243100753..2307d98ec63e73 100755 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/normalize.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/normalize.cpp @@ -107,21 +107,27 @@ namespace { /* ============= Common params ============= */ std::vector fusingParamsSet { emptyFusingSpec, +#if defined(OPENVINO_ARCH_X86_64) fusingMultiplyPerTensor, fusingRelu, fusingPReluPerChannel +#endif }; std::vector fusingParamsSetDynamic { emptyFusingSpec, +#if defined(OPENVINO_ARCH_X86_64) fusingMultiplyPerTensor, fusingRelu, fusingFakeQuantizePerTensor +#endif }; std::vector fusingParamsSetPerChannel { +#if defined(OPENVINO_ARCH_X86_64) fusingPReluPerChannel, fusingFakeQuantizePerChannel +#endif }; const float epsilon = 1e-4f;