Apply suggestions from code review: remove 'ov::with_cpu_x86_avx2_vnn…

…i_2' api, fix store_num and OPENVINO_THROW issue, keep brgconv related types only for new platform priorities list, update WeightsDecompressionImpl of fullyconnected Node, replace mayiuse() by hasHardwareSupport() for Precision check
openvinotoolkit · Jan 4, 2024 · 6a94fe2 · 6a94fe2
1 parent d6400d1
commit 6a94fe2
Show file tree

Hide file tree

Showing 9 changed files with 18 additions and 36 deletions.
diff --git a/src/inference/dev_api/ie_system_conf.h b/src/inference/dev_api/ie_system_conf.h
@@ -109,13 +109,6 @@ using ov::with_cpu_x86_avx2;
  */
 using ov::with_cpu_x86_avx2_vnni;
 
-/**
- * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
- * @ingroup    ie_dev_api_system_conf
- * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
- */
-using ov::with_cpu_x86_avx2_vnni_2;
-
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ie_dev_api_system_conf

diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -97,13 +97,6 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx2();
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni();
 
-/**
- * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
- * @ingroup    ov_dev_api_system_conf
- * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
- */
-OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni_2();
-
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ov_dev_api_system_conf

diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
@@ -60,11 +60,6 @@ bool with_cpu_x86_avx2_vnni() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI);
 }
 
-bool with_cpu_x86_avx2_vnni_2() {
-    return with_cpu_x86_avx2_vnni() &&
-           get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8 | Xbyak::util::Cpu::tAVX_NE_CONVERT);
-}
-
 bool with_cpu_x86_avx512f() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);
 }

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -474,8 +474,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     bool is_f16 = (prc == ov::element::f16);
     bool is_signed = prc.is_signed();
 
-    if (is_f16 && !mayiuse(cpu::x64::avx512_core) && !mayiuse(cpu::x64::avx2))
-        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx512_core or avx2.");
+    if (is_f16 && !mayiuse(cpu::x64::avx2))
+        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx2 or above.");
 
     // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512)
     // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128)
@@ -1208,14 +1208,14 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
                 xmm = Xmm(aux_vec_idxs[0]);
             }
             h->vcvtps2ph(xmm, ymm, 0x4);
-            if (store_num == 16) {
+            if (store_num == 8) {
                 h->uni_vmovdqu(ptr[reg + offset], xmm);
             } else {
                 data_idx = static_cast<int>(xmm.getIdx());
                 store_bytes<Vmm>(reg, offset, store_num * 2);
             }
         } else {
-            IE_THROW() << "Store emitter in " << name_ << " only support fp16 on platform with avx512_core or avx2.";
+            OPENVINO_THROW("Store emitter in ", name_, " only support fp16 on platform with avx512_core or avx2.");
         }
     } else {
         switch (store_num) {

diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
@@ -1019,9 +1019,6 @@ const std::vector<impl_desc_type>& Node::getDefaultImplPriority() {
         impl_desc_type::jit_avx512_dw,
         impl_desc_type::jit_avx512_1x1,
         impl_desc_type::jit_avx512,
-        // [WA]default support after fully evaluate
-        // impl_desc_type::brgconv_avx2_1x1,
-        // impl_desc_type::brgconv_avx2,
         impl_desc_type::jit_avx2_dw,
         impl_desc_type::jit_avx2_1x1,
         impl_desc_type::jit_avx2,

diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -330,7 +330,7 @@ ov::element::Type Convolution::fusedEltwisePrecision(const NodePtr& fusingNode)
 }
 
 const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
-    static const std::vector<impl_desc_type> priorities = {
+    static std::vector<impl_desc_type> priorities = {
         impl_desc_type::unknown,
         impl_desc_type::dw_acl,
         impl_desc_type::winograd_acl,
@@ -371,6 +371,14 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
         impl_desc_type::ref,
     };
 
+    priorities.erase(std::remove_if(priorities.begin(),
+                                    priorities.end(),
+                                    [](impl_desc_type type) {
+                                        return !isBrgConvAvailable() && (type == impl_desc_type::brgconv_avx2_1x1 ||
+                                                                         type == impl_desc_type::brgconv_avx2);
+                                    }),
+                     priorities.end());
+
     return priorities;
 }
 

diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -206,9 +206,8 @@ void FullyConnected::getSupportedDescriptors() {
         if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
             outputDataType = memory::data_type::bf16;
         }
-        // W.A. WeightsDecompression not supported on avx2_vnni_2
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2) &&
-            weightsDataType == memory::data_type::u8) {
+        // TODO: Ticket CVS-122347 - support WeightsDecompression with bf16 inputDataType on avx2_vnni_2
+        if (useWeightsDecompressionImpl && !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) {
             inputDataType = outputDataType = memory::data_type::f32;
         }
     } else if (inputDataType == memory::data_type::f16) {

diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp
@@ -2024,9 +2024,8 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
         inputPrecision = ov::element::f32;
     }
 
-    if ((inputPrecision == ov::element::bf16) && !mayiuse(avx512_core)) {
+    if (!hasHardwareSupport(inputPrecision))
         inputPrecision = ov::element::f32;
-    }
 
     // support input with rank<=3 only with float precision and planar layout.
     // Jit for avx2(gather is available) and ref for no-avx2 machine.

diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp
@@ -1829,10 +1829,8 @@ void MVN::initSupportedPrimitiveDescriptors() {
 
     ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(0);
     ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0);
-    if (!mayiuse(avx512_core) && !mayiuse(avx2_vnni_2)) {
-        if (outputPrecision == ov::element::bf16)
-            outputPrecision = ov::element::f32;
-    }
+    if (!hasHardwareSupport(outputPrecision))
+        outputPrecision = ov::element::f32;
 
     if (!fusedWith.empty()) {
         outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);