Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable CPUINFO for all Windows build #19655

Merged
merged 15 commits into from
Mar 2, 2024
9 changes: 1 addition & 8 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -256,14 +256,7 @@ if (onnxruntime_ENABLE_CPUINFO)
set(CPUINFO_SUPPORTED TRUE)
endif()
if (WIN32)
# Exclude Windows ARM build and Windows Store
if (${onnxruntime_target_platform} MATCHES "^(ARM.*|arm.*)$" )
message(WARNING "Cpuinfo not included for compilation problems with Windows ARM.")
set(CPUINFO_SUPPORTED FALSE)
elseif (WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib)
message(WARNING "Cpuinfo not included non-Desktop builds")
set(CPUINFO_SUPPORTED FALSE)
endif()
set(CPUINFO_SUPPORTED TRUE)
elseif (NOT ${onnxruntime_target_platform} MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
message(WARNING
"Target processor architecture \"${onnxruntime_target_platform}\" is not supported in cpuinfo. "
Expand Down
5 changes: 0 additions & 5 deletions cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,13 @@ endif()


if (RISCV64 OR ARM64 OR ARM OR X86 OR X64 OR X86_64)
if((WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib) OR ((ARM64 OR ARM) AND MSVC))
# msvc compiler report syntax error with cpuinfo arm source files
# and cpuinfo does not have code for getting arm uarch info under windows
else()
# Link cpuinfo if supported
# Using it mainly in ARM with Android.
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
if (CPUINFO_SUPPORTED)
onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME})
endif()
endif()
endif()

if (NOT onnxruntime_BUILD_SHARED_LIB)
Expand Down
82 changes: 36 additions & 46 deletions onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@

#if defined(CPUINFO_SUPPORTED)
#include <cpuinfo.h>
#if defined(CPUIDINFO_ARCH_ARM)
namespace onnxruntime {
// The following function is declared in "core/common/cpuid_uarch.h" but we cannot include the whole header file because
// some of its symbols are conflict with <cpuinfo.h>
void decodeMIDR(uint32_t midr, uint32_t uarch[1]);
} // namespace onnxruntime
#endif
#else
#include "core/common/cpuid_uarch.h"
#endif // CPUINFO_SUPPORTED
Expand Down Expand Up @@ -142,11 +149,6 @@
// Pytorch CPUINFO only works on ARM linux or android
// Assuming no hyper-threading, no NUMA groups
#ifdef CPUINFO_SUPPORTED
pytorch_cpuinfo_init_ = cpuinfo_initialize();
if (!pytorch_cpuinfo_init_) {
LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";
return;
}
is_hybrid_ = cpuinfo_get_uarchs_count() > 1;
has_arm_neon_dot_ = cpuinfo_has_arm_neon_dot();
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
Expand Down Expand Up @@ -239,52 +241,24 @@
lastUarch = uarch;
}
}

switch (lastUarch) {
case cpuinfo_uarch_cortex_a55:
case cpuinfo_uarch_cortex_a55r0:
case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_neoverse_n1:
case cpuinfo_uarch_cortex_a77:
case cpuinfo_uarch_exynos_m4:
case cpuinfo_uarch_exynos_m5:
has_fp16_ = true;
break;
default:
break;
}
if (!has_fp16_) {
/*
* Detecting fp16 support. Different cores should have the same instruction set.
* So we just check the first ID_AA64PFR0_EL1
* Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0100), Op2(0b000),
*/
uint64_t ID_AA64PFR0_EL1;
unsigned long valsize = sizeof(uint64_t);
auto retCode = ::RegGetValueA(
HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"CP 4020", RRF_RT_REG_QWORD, nullptr,
&ID_AA64PFR0_EL1, &valsize);
if (retCode == ERROR_SUCCESS) {
// AdvSIMD, bits [23:20]
auto advSimd = ID_AA64PFR0_EL1 >> 20;
if ((advSimd & 0xfULL) == 1) {
has_fp16_ = true;
}
}
}
#endif /* Application Family or OneCore Family */

has_arm_neon_dot_ = (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0);
#else
has_arm_neon_dot_ = false;
#endif
has_fp16_ |= has_arm_neon_dot_;
/* TODO: implement them when hw+sw is available for testing these features */
has_arm_neon_i8mm_ = false;
has_arm_sve_i8mm_ = false;
has_arm_neon_bf16_ = false;

if (pytorch_cpuinfo_init_) {
has_fp16_ = cpuinfo_has_arm_neon_fp16_arith();
has_arm_neon_i8mm_ = cpuinfo_has_arm_i8mm();
has_arm_sve_i8mm_ = cpuinfo_has_arm_sve() && cpuinfo_has_arm_i8mm();
has_arm_neon_bf16_ = cpuinfo_has_arm_neon_bf16();
} else {
has_fp16_ = false;
has_arm_neon_i8mm_ = false;
has_arm_sve_i8mm_ = false;
has_arm_neon_bf16_ = false;
}
}

#endif /* (arm or arm64) and windows */
Expand All @@ -304,5 +278,21 @@
return 0xFFFFFFFF; // don't know how to get core index
#endif
}

CPUIDInfo::CPUIDInfo() {
#ifdef CPUIDINFO_ARCH_X86
X86Init();
#elif defined(CPUIDINFO_ARCH_ARM)
#if CPUINFO_SUPPORTED
pytorch_cpuinfo_init_ = cpuinfo_initialize();
if (!pytorch_cpuinfo_init_) {
LOGS_DEFAULT(WARNING) << "Failed to init pytorch cpuinfo library, may cause CPU EP performance degradation due to undetected CPU features.";

Check warning on line 288 in onnxruntime/core/common/cpuid_info.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/common/cpuid_info.cc:288: Lines should be <= 120 characters long [whitespace/line_length] [2]
}
#endif
#ifdef __linux__
ArmLinuxInit();
#elif defined(_WIN32)
ArmWindowsInit();
#endif /* (arm or arm64) and windows */
#endif
}
} // namespace onnxruntime
19 changes: 5 additions & 14 deletions onnxruntime/core/common/cpuid_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,7 @@ class CPUIDInfo {
}

private:
CPUIDInfo() {
#ifdef CPUIDINFO_ARCH_X86
X86Init();
#elif defined(CPUIDINFO_ARCH_ARM)
#ifdef __linux__
ArmLinuxInit();
#elif defined(_WIN32)
ArmWindowsInit();
#endif /* (arm or arm64) and windows */
#endif
}
CPUIDInfo();
bool has_amx_bf16_{false};
bool has_avx_{false};
bool has_avx2_{false};
Expand Down Expand Up @@ -131,19 +121,20 @@ class CPUIDInfo {
#ifdef CPUIDINFO_ARCH_X86

void X86Init();

#elif defined(CPUIDINFO_ARCH_ARM)
// Now the following var is only used in ARM build, but later one we may expand the usage.
bool pytorch_cpuinfo_init_{false};
#endif

#ifdef __linux__

bool pytorch_cpuinfo_init_{false};
void ArmLinuxInit();

#elif defined(_WIN32)

void ArmWindowsInit();

#endif /* (arm or arm64) and windows */
#endif
};

} // namespace onnxruntime
Loading