From d702611e4a0439fae3511dad78901a9ff5de88b9 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Mon, 26 Feb 2024 23:46:18 +0000 Subject: [PATCH 01/11] update --- .../external/onnxruntime_external_deps.cmake | 8 +-- cmake/onnxruntime_common.cmake | 5 -- onnxruntime/core/common/cpuid_info.cc | 63 +++++++------------ onnxruntime/core/common/cpuid_info.h | 5 +- 4 files changed, 24 insertions(+), 57 deletions(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 22d12b128dc1f..be90c19960181 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -257,13 +257,7 @@ if (onnxruntime_ENABLE_CPUINFO) endif() if (WIN32) # Exclude Windows ARM build and Windows Store - if (${onnxruntime_target_platform} MATCHES "^(ARM.*|arm.*)$" ) - message(WARNING "Cpuinfo not included for compilation problems with Windows ARM.") - set(CPUINFO_SUPPORTED FALSE) - elseif (WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib) - message(WARNING "Cpuinfo not included non-Desktop builds") - set(CPUINFO_SUPPORTED FALSE) - endif() + set(CPUINFO_SUPPORTED TRUE) elseif (NOT ${onnxruntime_target_platform} MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$") message(WARNING "Target processor architecture \"${onnxruntime_target_platform}\" is not supported in cpuinfo. " diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 6b8c2560b1714..fb56e3f3445d4 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -201,10 +201,6 @@ endif() if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64) - if((WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib) OR ((ARM64 OR ARM) AND MSVC)) - # msvc compiler report syntax error with cpuinfo arm source files - # and cpuinfo does not have code for getting arm uarch info under windows - else() # Link cpuinfo if supported # Using it mainly in ARM with Android. # Its functionality in detecting x86 cpu features are lacking, so is support for Windows. @@ -212,7 +208,6 @@ if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64) onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo) list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME}) endif() - endif() endif() if (NOT onnxruntime_BUILD_SHARED_LIB) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 711fd595e90fd..8530afabb7207 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -52,9 +52,9 @@ #if defined(CPUINFO_SUPPORTED) #include -#else -#include "core/common/cpuid_uarch.h" #endif // CPUINFO_SUPPORTED +#include "core/common/cpuid_uarch.h" + namespace onnxruntime { @@ -189,6 +189,12 @@ void CPUIDInfo::ArmLinuxInit() { #elif defined(_WIN32) void CPUIDInfo::ArmWindowsInit() { + pytorch_cpuinfo_init_ = cpuinfo_initialize(); + if (!pytorch_cpuinfo_init_) { + LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features."; + return; + } + // ARM32 certainly doesn't have fp16, so we will skip the logic to avoid using RegGetValueA Windows API #ifndef _M_ARM #pragma region Application Family or OneCore Family @@ -239,53 +245,26 @@ void CPUIDInfo::ArmWindowsInit() { lastUarch = uarch; } } - - switch (lastUarch) { - case cpuinfo_uarch_cortex_a55: - case cpuinfo_uarch_cortex_a55r0: - case cpuinfo_uarch_cortex_a76: - case cpuinfo_uarch_neoverse_n1: - case cpuinfo_uarch_cortex_a77: - case cpuinfo_uarch_exynos_m4: - case cpuinfo_uarch_exynos_m5: - has_fp16_ = true; - break; - default: - break; - } - if (!has_fp16_) { - /* - * Detecting fp16 support. Different cores should have the same instruction set. - * So we just check the first ID_AA64PFR0_EL1 - * Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0100), Op2(0b000), - */ - uint64_t ID_AA64PFR0_EL1; - unsigned long valsize = sizeof(uint64_t); - auto retCode = ::RegGetValueA( - HKEY_LOCAL_MACHINE, - "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", - "CP 4020", RRF_RT_REG_QWORD, nullptr, - &ID_AA64PFR0_EL1, &valsize); - if (retCode == ERROR_SUCCESS) { - // AdvSIMD, bits [23:20] - auto advSimd = ID_AA64PFR0_EL1 >> 20; - if ((advSimd & 0xfULL) == 1) { - has_fp16_ = true; - } - } - } #endif /* Application Family or OneCore Family */ has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0); #else has_arm_neon_dot_ = false; #endif - has_fp16_ |= has_arm_neon_dot_; - /* TODO: implement them when hw+sw is available for testing these features */ - has_arm_neon_i8mm_ = false; - has_arm_sve_i8mm_ = false; - has_arm_neon_bf16_ = false; + + if (pytorch_cpuinfo_init_) { + has_fp16_ = cpuinfo_has_arm_neon_fp16_arith() || has_arm_neon_dot_; + has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm(); + has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm(); + has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16(); + } else { + has_fp16_ = false; + has_arm_neon_i8mm_ = false; + has_arm_sve_i8mm_ = false; + has_arm_neon_bf16_ = false; + } } + #endif /* (arm or arm64) and windows */ #endif /* arm or arm64*/ diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index 2f8041e39f680..d9659943c681e 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -127,15 +127,15 @@ class CPUIDInfo { bool has_arm_neon_i8mm_{false}; bool has_arm_sve_i8mm_{false}; bool has_arm_neon_bf16_{false}; + bool pytorch_cpuinfo_init_{false}; #ifdef CPUIDINFO_ARCH_X86 void X86Init(); +#endif -#elif defined(CPUIDINFO_ARCH_ARM) #ifdef __linux__ - bool pytorch_cpuinfo_init_{false}; void ArmLinuxInit(); #elif defined(_WIN32) @@ -143,7 +143,6 @@ class CPUIDInfo { void ArmWindowsInit(); #endif /* (arm or arm64) and windows */ -#endif }; } // namespace onnxruntime From 8da9ebf20dfea2c08dcbe93083c889f762fe4483 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 00:24:43 +0000 Subject: [PATCH 02/11] update --- onnxruntime/core/common/cpuid_info.cc | 26 +++++++++++++++----------- onnxruntime/core/common/cpuid_info.h | 12 +----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 8530afabb7207..0621ad230a586 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -142,11 +142,6 @@ void CPUIDInfo::ArmLinuxInit() { // Pytorch CPUINFO only works on ARM linux or android // Assuming no hyper-threading, no NUMA groups #ifdef CPUINFO_SUPPORTED - pytorch_cpuinfo_init_ = cpuinfo_initialize(); - if (!pytorch_cpuinfo_init_) { - LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features."; - return; - } is_hybrid_ = cpuinfo_get_uarchs_count() > 1; has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot(); has_fp16_ = cpuinfo_has_arm_neon_fp16_arith(); @@ -189,11 +184,6 @@ void CPUIDInfo::ArmLinuxInit() { #elif defined(_WIN32) void CPUIDInfo::ArmWindowsInit() { - pytorch_cpuinfo_init_ = cpuinfo_initialize(); - if (!pytorch_cpuinfo_init_) { - LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features."; - return; - } // ARM32 certainly doesn't have fp16, so we will skip the logic to avoid using RegGetValueA Windows API #ifndef _M_ARM @@ -283,5 +273,19 @@ uint32_t CPUIDInfo::GetCurrentCoreIdx() const { return 0xFFFFFFFF; // don't know how to get core index #endif } - +CPUIDInfo::CPUIDInfo() { +#ifdef CPUIDINFO_ARCH_X86 + X86Init(); +#elif defined(CPUIDINFO_ARCH_ARM) + pytorch_cpuinfo_init_ = cpuinfo_initialize(); + if (!pytorch_cpuinfo_init_) { + LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features."; + } +#ifdef __linux__ + ArmLinuxInit(); +#elif defined(_WIN32) + ArmWindowsInit(); +#endif /* (arm or arm64) and windows */ +#endif +} } // namespace onnxruntime diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index d9659943c681e..ac0c3b4040aaf 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -93,17 +93,7 @@ class CPUIDInfo { } private: - CPUIDInfo() { -#ifdef CPUIDINFO_ARCH_X86 - X86Init(); -#elif defined(CPUIDINFO_ARCH_ARM) -#ifdef __linux__ - ArmLinuxInit(); -#elif defined(_WIN32) - ArmWindowsInit(); -#endif /* (arm or arm64) and windows */ -#endif - } + CPUIDInfo(); bool has_amx_bf16_{false}; bool has_avx_{false}; bool has_avx2_{false}; From 684c7033c8fd152fd10ac84e6dcccfcacb36d73d Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 00:45:54 +0000 Subject: [PATCH 03/11] update --- onnxruntime/core/common/cpuid_info.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 0621ad230a586..e2ae61a6fb2bd 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -277,10 +277,12 @@ CPUIDInfo::CPUIDInfo() { #ifdef CPUIDINFO_ARCH_X86 X86Init(); #elif defined(CPUIDINFO_ARCH_ARM) +#if CPUINFO_SUPPORTED pytorch_cpuinfo_init_ = cpuinfo_initialize(); if (!pytorch_cpuinfo_init_) { LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features."; } +#endif #ifdef __linux__ ArmLinuxInit(); #elif defined(_WIN32) From 70cf5872134dcdfab183962ee35b355dd307546a Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Mon, 26 Feb 2024 16:49:39 -0800 Subject: [PATCH 04/11] format code --- onnxruntime/core/common/cpuid_info.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index e2ae61a6fb2bd..bea7eb98fb00a 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -55,7 +55,6 @@ #endif // CPUINFO_SUPPORTED #include "core/common/cpuid_uarch.h" - namespace onnxruntime { #ifdef CPUIDINFO_ARCH_X86 @@ -184,7 +183,6 @@ void CPUIDInfo::ArmLinuxInit() { #elif defined(_WIN32) void CPUIDInfo::ArmWindowsInit() { - // ARM32 certainly doesn't have fp16, so we will skip the logic to avoid using RegGetValueA Windows API #ifndef _M_ARM #pragma region Application Family or OneCore Family @@ -241,7 +239,7 @@ void CPUIDInfo::ArmWindowsInit() { #else has_arm_neon_dot_ = false; #endif - + if (pytorch_cpuinfo_init_) { has_fp16_ = cpuinfo_has_arm_neon_fp16_arith() || has_arm_neon_dot_; has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm(); @@ -254,7 +252,6 @@ void CPUIDInfo::ArmWindowsInit() { has_arm_neon_bf16_ = false; } } - #endif /* (arm or arm64) and windows */ #endif /* arm or arm64*/ From 5cb01e0bd2063cfe3c2f8aa13e9f841f0a0fa42b Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 01:03:50 +0000 Subject: [PATCH 05/11] update --- onnxruntime/core/common/cpuid_info.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index bea7eb98fb00a..fbf14e0479044 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -52,8 +52,14 @@ #if defined(CPUINFO_SUPPORTED) #include -#endif // CPUINFO_SUPPORTED +#if defined(CPUIDINFO_ARCH_ARM) +//The following function is declared in "core/common/cpuid_uarch.h" but we cannot include the whole header file because +// some of its symbols are conflict with +void decodeMIDR(uint32_t midr, uint32_t uarch[1]); +#endif +#else #include "core/common/cpuid_uarch.h" +#endif // CPUINFO_SUPPORTED namespace onnxruntime { From 1555df06c76eec543eb270227ab8ada4fc84dcf7 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 01:35:46 +0000 Subject: [PATCH 06/11] update --- onnxruntime/core/common/cpuid_info.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index ac0c3b4040aaf..a40768906d2b6 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -117,7 +117,8 @@ class CPUIDInfo { bool has_arm_neon_i8mm_{false}; bool has_arm_sve_i8mm_{false}; bool has_arm_neon_bf16_{false}; - bool pytorch_cpuinfo_init_{false}; + // Now the following var is only used in ARM build, but later one we may expand the usage. + [[maybe_unused]] bool pytorch_cpuinfo_init_{false}; #ifdef CPUIDINFO_ARCH_X86 From 4c4eb28a4058a7b4d08663f941aade948d69199d Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 02:11:12 +0000 Subject: [PATCH 07/11] update --- onnxruntime/core/common/cpuid_info.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index a40768906d2b6..c8c832448a9bb 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -117,12 +117,14 @@ class CPUIDInfo { bool has_arm_neon_i8mm_{false}; bool has_arm_sve_i8mm_{false}; bool has_arm_neon_bf16_{false}; - // Now the following var is only used in ARM build, but later one we may expand the usage. - [[maybe_unused]] bool pytorch_cpuinfo_init_{false}; + #ifdef CPUIDINFO_ARCH_X86 void X86Init(); +#elif defined(CPUIDINFO_ARCH_ARM) + // Now the following var is only used in ARM build, but later one we may expand the usage. + bool pytorch_cpuinfo_init_{false}; #endif #ifdef __linux__ From e0e5b678121367a522d4246003809087bd3db9b8 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Mon, 26 Feb 2024 18:29:25 -0800 Subject: [PATCH 08/11] update --- onnxruntime/core/common/cpuid_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/common/cpuid_info.h b/onnxruntime/core/common/cpuid_info.h index c8c832448a9bb..a3936b4bd11a6 100644 --- a/onnxruntime/core/common/cpuid_info.h +++ b/onnxruntime/core/common/cpuid_info.h @@ -118,7 +118,6 @@ class CPUIDInfo { bool has_arm_sve_i8mm_{false}; bool has_arm_neon_bf16_{false}; - #ifdef CPUIDINFO_ARCH_X86 void X86Init(); From f0e62295d67bc225ae1a817986b87efc47a433dc Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 27 Feb 2024 17:22:33 +0000 Subject: [PATCH 09/11] update --- onnxruntime/core/common/cpuid_info.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index fbf14e0479044..6690da4a4f5e7 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -53,9 +53,11 @@ #if defined(CPUINFO_SUPPORTED) #include #if defined(CPUIDINFO_ARCH_ARM) -//The following function is declared in "core/common/cpuid_uarch.h" but we cannot include the whole header file because -// some of its symbols are conflict with +namespace onnxruntime { +// The following function is declared in "core/common/cpuid_uarch.h" but we cannot include the whole header file because +// some of its symbols are conflict with void decodeMIDR(uint32_t midr, uint32_t uarch[1]); +} // namespace onnxruntime #endif #else #include "core/common/cpuid_uarch.h" From 7d481a2220f1fbb83c38b730800cf3b0193f3545 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 29 Feb 2024 12:44:46 -0800 Subject: [PATCH 10/11] update ocmment --- cmake/external/onnxruntime_external_deps.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index d9c8962ea5157..cb75b0b8751bb 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -256,7 +256,6 @@ if (onnxruntime_ENABLE_CPUINFO) set(CPUINFO_SUPPORTED TRUE) endif() if (WIN32) - # Exclude Windows ARM build and Windows Store set(CPUINFO_SUPPORTED TRUE) elseif (NOT ${onnxruntime_target_platform} MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$") message(WARNING From 3d3605120a1ebe81934b39b77fcde1c58aa2478a Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Fri, 1 Mar 2024 10:25:48 -0800 Subject: [PATCH 11/11] update --- onnxruntime/core/common/cpuid_info.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index 6690da4a4f5e7..be881f6bc4bc2 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -249,7 +249,7 @@ void CPUIDInfo::ArmWindowsInit() { #endif if (pytorch_cpuinfo_init_) { - has_fp16_ = cpuinfo_has_arm_neon_fp16_arith() || has_arm_neon_dot_; + has_fp16_ = cpuinfo_has_arm_neon_fp16_arith(); has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm(); has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm(); has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();