From 97b84b351cc3f6245659b0850bccd450a5dece71 Mon Sep 17 00:00:00 2001 From: Andrew Hopkins Date: Mon, 15 Jul 2024 10:38:46 -0700 Subject: [PATCH] Add support to detect Neoverse V2 cores at runtime or at build time and pick the optimal implementatin --- crypto/fipsmodule/cpucap/cpu_aarch64_linux.c | 5 ++++- crypto/fipsmodule/cpucap/cpucap.c | 3 +++ crypto/fipsmodule/cpucap/internal.h | 2 ++ include/openssl/arm_arch.h | 4 +++- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crypto/fipsmodule/cpucap/cpu_aarch64_linux.c b/crypto/fipsmodule/cpucap/cpu_aarch64_linux.c index bdb96c70bd..bdc1752c83 100644 --- a/crypto/fipsmodule/cpucap/cpu_aarch64_linux.c +++ b/crypto/fipsmodule/cpucap/cpu_aarch64_linux.c @@ -80,12 +80,15 @@ void OPENSSL_cpuid_setup(void) { // is supported. As of Valgrind 3.21 trying to read from that register will // cause Valgrind to crash. if (hwcap & kCPUID) { - // Check if the CPU model is Neoverse V1, + // Check if the CPU model is Neoverse V1 or V2, // which has a wide crypto/SIMD pipeline. OPENSSL_arm_midr = armv8_cpuid_probe(); if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1)) { OPENSSL_armcap_P |= ARMV8_NEOVERSE_V1; } + if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) { + OPENSSL_armcap_P |= ARMV8_NEOVERSE_V2; + } } // OPENSSL_armcap is a 32-bit, unsigned value which may start with "0x" to diff --git a/crypto/fipsmodule/cpucap/cpucap.c b/crypto/fipsmodule/cpucap/cpucap.c index 482856352c..8082b081fe 100644 --- a/crypto/fipsmodule/cpucap/cpucap.c +++ b/crypto/fipsmodule/cpucap/cpucap.c @@ -74,6 +74,9 @@ HIDDEN uint32_t OPENSSL_armcap_P = #endif #if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V1) || defined(__ARM_FEATURE_NEOVERSE_V1) ARMV8_NEOVERSE_V1 | +#endif +#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V2) || defined(__ARM_FEATURE_NEOVERSE_V2) + ARMV8_NEOVERSE_V2 | #endif 0; diff --git a/crypto/fipsmodule/cpucap/internal.h b/crypto/fipsmodule/cpucap/internal.h index 5d29a38040..530e4e299e 100644 --- a/crypto/fipsmodule/cpucap/internal.h +++ b/crypto/fipsmodule/cpucap/internal.h @@ -186,11 +186,13 @@ OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) { OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) { return ((OPENSSL_armcap_P & ARMV8_SHA3) != 0 && ((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 || + (OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 || (OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0)); } OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) { return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 || + (OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 || (OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0; } diff --git a/include/openssl/arm_arch.h b/include/openssl/arm_arch.h index fa7548809b..d2495bad66 100644 --- a/include/openssl/arm_arch.h +++ b/include/openssl/arm_arch.h @@ -82,11 +82,12 @@ // ARMV8_SHA3 indicates support for hardware SHA-3 instructions including EOR3. #define ARMV8_SHA3 (1 << 11) -// The Neoverse V1 and Apple M1 micro-architectures are detected to enable +// The Neoverse V1, V2, and Apple M1 micro-architectures are detected to enable // high unrolling factor of AES-GCM and other algorithms that leverage a // wide crypto pipeline and fast multiplier. #define ARMV8_NEOVERSE_V1 (1 << 12) #define ARMV8_APPLE_M1 (1 << 13) +#define ARMV8_NEOVERSE_V2 (1 << 14) // // MIDR_EL1 system register @@ -102,6 +103,7 @@ # define ARM_CPU_PART_CORTEX_A72 0xD08 # define ARM_CPU_PART_N1 0xD0C # define ARM_CPU_PART_V1 0xD40 +# define ARM_CPU_PART_V2 0xD4F # define MIDR_PARTNUM_SHIFT 4 # define MIDR_PARTNUM_MASK (0xfffUL << MIDR_PARTNUM_SHIFT)