Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aarch64 paged attention enablement #27841

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions cmake/developer_package/compile_flags/os_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

include(ProcessorCount)
include(CheckCXXCompilerFlag)
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)

#
# ov_disable_deprecated_warnings()
Expand Down Expand Up @@ -91,6 +93,52 @@ macro(ov_dev_package_no_errors)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ov_c_cxx_dev_no_errors}")
endmacro()

#
# ov_check_compiler_supports_sve(lang flags)
#
# Checks whether compiler for passed language supports SVE code compilation
#

macro(ov_check_compiler_supports_sve flags)
# Code to compile
set(SVE_CODE "
#include <arm_sve.h>
int main() {
svfloat64_t a;
a = svdup_n_f64(0);
(void)a; // to avoid warnings
return 0;
}")

# Save the current state of required flags
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})

# Set the flags necessary for compiling the test code with SVE support
set(CMAKE_REQUIRED_FLAGS "${CMAKE_CXX_FLAGS_INIT} ${flags}")

# Check if the source code compiles with the given flags for the specified language (C or C++)
CHECK_CXX_SOURCE_COMPILES("${SVE_CODE}" CXX_HAS_SVE)

# If the compilation test is successful, set appropriate variables indicating support
if(CXX_HAS_SVE)
set(CXX_SVE_FOUND TRUE CACHE BOOL "SVE available on host")
set(CXX_SVE_FOUND TRUE CACHE BOOL "CXX SVE support")
set(CXX_SVE_FLAGS "${flags}" CACHE STRING "CXX SVE flags")
endif()

# Restore the original state of required flags
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})

# If the compilation test fails, indicate that the support is not found
if(NOT CXX_SVE_FOUND)
set(CXX_SVE_FOUND FALSE CACHE BOOL "CXX SVE support")
set(CXX_SVE_FLAGS "" CACHE STRING "CXX SVE flags")
endif()

# Mark the variables as advanced to hide them in the default CMake GUI
mark_as_advanced(CXX_SVE_FOUND CXX_SVE_FLAGS)
endmacro()

#
# ov_sse42_optimization_flags(<output flags>)
#
Expand Down Expand Up @@ -208,6 +256,49 @@ macro(ov_arm_neon_fp16_optimization_flags flags)
endif()
endmacro()

#
# ov_arm_sve_optimization_flags(<output flags>)
#
macro(ov_arm_sve_optimization_flags flags)
# Check for compiler SVE support
ov_check_compiler_supports_sve("-march=armv8-a+sve")

if(OV_COMPILER_IS_INTEL_LLVM)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing should be required here
elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} -Wno-unused-command-line-argument)
if(CXX_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
message(WARNING "SVE is not supported on this Android ABI: ${ANDROID_ABI}")
endif()
else()
if(AARCH64)
set(${flags} -O2)

# Add flag for SVE if supported
if(CXX_SVE_FOUND)
list(APPEND ${flags} -march=armv8-a+sve)
endif()
if(NOT CMAKE_CL_64)
list(APPEND ${flags} -ftree-vectorize)
endif()

set(${flags} ${${flags}})
elseif(ARM)
message(WARNING "SVE is not supported on 32-bit ARM architectures.")
else()
message(WARNING "SVE is not supported by architecture ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endmacro()

#
# ov_disable_all_warnings(<target1 [target2 target3 ...]>)
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(_CPU_CHECK_ANY "true")
set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()")
set(_CPU_CHECK_AVX "with_cpu_x86_avx()")
set(_CPU_CHECK_NEON_FP16 "with_cpu_neon_fp16()")
set(_CPU_CHECK_SVE "with_cpu_sve()")
set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()")
set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#

## list of available instruction sets
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16)
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F NEON_FP16 SVE)

set(_ACCEPTED_ARCHS_ANY "^(ANY)$")
set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$")
set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$")
set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$")
set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$")
set(_ACCEPTED_ARCHS_NEON_FP16 "^(ANY|NEON_FP16)$")
set(_ACCEPTED_ARCHS_SVE "^(ANY|SVE)$")

## Arch specific definitions
set(_DEFINE_ANY "")
Expand All @@ -19,12 +20,14 @@ set(_DEFINE_AVX "HAVE_AVX" ${_DEFINE_SSE42})
set(_DEFINE_AVX2 "HAVE_AVX2" ${_DEFINE_AVX})
set(_DEFINE_AVX512F "HAVE_AVX512F" ${_DEFINE_AVX2})
set(_DEFINE_NEON_FP16 "HAVE_NEON_FP16" ${_DEFINE_ANY})
set(_DEFINE_SVE "HAVE_SVE" ${_DEFINE_ANY})

## Arch specific compile options
ov_avx512_optimization_flags(_FLAGS_AVX512F)
ov_avx2_optimization_flags (_FLAGS_AVX2)
ov_sse42_optimization_flags (_FLAGS_SSE42)
ov_arm_neon_fp16_optimization_flags(_FLAGS_NEON_FP16)
ov_arm_sve_optimization_flags(_FLAGS_SVE)
set(_FLAGS_AVX "") ## TBD is not defined for OV project yet
set(_FLAGS_ANY "") ##

Expand Down Expand Up @@ -183,7 +186,9 @@ endfunction()
# Return currently requested ARCH id
#
function(_currently_requested_top_arch VAR)
if(ENABLE_NEON_FP16)
if(ENABLE_SVE)
set(RES SVE)
elseif(ENABLE_NEON_FP16)
set(RES NEON_FP16)
elseif(ENABLE_AVX512F)
set(RES AVX512F)
Expand Down
2 changes: 2 additions & 0 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ ov_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR

ov_dependent_option(ENABLE_NEON_FP16 "Enable ARM FP16 optimizations" ON "AARCH64" OFF)

ov_dependent_option(ENABLE_SVE "Enable SVE optimizations" ON "AARCH64" OFF)

# Type of build, we add this as an explicit option to default it to ON
get_property(BUILD_SHARED_LIBS_DEFAULT GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
ov_option (BUILD_SHARED_LIBS "Build as a shared library" ${BUILD_SHARED_LIBS_DEFAULT})
Expand Down
7 changes: 7 additions & 0 deletions src/inference/dev_api/openvino/runtime/system_conf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_sse42();
*/
OPENVINO_RUNTIME_API bool with_cpu_neon_fp16();

/**
* @brief Checks whether CPU supports ARM SVE capability
* @ingroup ov_dev_api_system_conf
* @return `True` if ARM SVE instructions are available, `false` otherwise
*/
OPENVINO_RUNTIME_API bool with_cpu_sve();

/**
* @brief Checks whether CPU supports AVX capability
* @ingroup ov_dev_api_system_conf
Expand Down
19 changes: 19 additions & 0 deletions src/inference/src/system_conf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include <sys/auxv.h>
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 24)
#elif defined(__APPLE__) && defined(__aarch64__)
# include <sys/sysctl.h>
# include <sys/types.h>
Expand Down Expand Up @@ -114,6 +115,10 @@ bool with_cpu_neon_fp16() {
return false;
}

bool with_cpu_sve() {
return false;
}

#else // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool with_cpu_x86_sse42() {
Expand Down Expand Up @@ -173,6 +178,20 @@ bool with_cpu_neon_fp16() {
return false;
# endif
}
bool with_cpu_sve() {
# if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__arm__) && defined(__aarch64__)
const uint32_t hwcaps = getauxval(AT_HWCAP);
return hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE;
# elif !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__aarch64__) && defined(__arm__)
return false;
# elif defined(__aarch64__) && defined(__APPLE__)
return false;
# else
return false;
# endif
}
#endif // OPENVINO_ARCH_X86 || OPENVINO_ARCH_X86_64

bool check_open_mp_env_vars(bool include_omp_num_threads) {
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -302,21 +302,21 @@ cross_compiled_file(${TARGET_NAME}
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 ANY
ARCH AVX512F AVX2 SVE ANY
src/nodes/kernels/scaled_attn/executor_pa.cpp
API src/nodes/kernels/scaled_attn/executor_pa.hpp
NAME make_pa_executor
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 ANY
ARCH AVX512F AVX2 SVE ANY
src/nodes/kernels/scaled_attn/attn_memcpy.cpp
API src/nodes/kernels/scaled_attn/attn_memcpy.hpp
NAME attn_memcpy paged_attn_memcpy attn_memcpy2d_kernel
NAMESPACE ov::Extensions::Cpu::XARCH
)
cross_compiled_file(${TARGET_NAME}
ARCH AVX512F AVX2 ANY
ARCH AVX512F AVX2 SVE ANY
src/nodes/kernels/scaled_attn/attn_quant.cpp
API src/nodes/kernels/scaled_attn/attn_quant.hpp
NAME attn_quantkv paged_attn_quantkv attn_quant_u8 attn_dequant_u8
Expand Down
Loading
Loading