diff --git a/CHANGELOG.md b/CHANGELOG.md index bfdfe5107..9182057c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- Allow for fine-grained CPU intrinsics overrides when BUILD_ARCH != native e.g. -DBUILD_ARCH=x86-64 -DCOMPILE_AVX512=off - Adds custom bias epilogue kernel. - Adds support for fusing relu and bias addition into gemms when using cuda 11. - Better suppression of unwanted output symbols, specifically "\n" from SentencePiece with byte-fallback. Can be deactivated with --allow-special @@ -36,6 +37,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Broken links to MNIST data sets ### Changed +- For BUILD_ARCH != native enable all intrinsics types by default, can be disabled like this: -DCOMPILE_AVX512=off - Moved FBGEMM pointer to commit c258054 for gcc 9.3+ fix - Change compile options a la -DCOMPILE_CUDA_SM35 to -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL, -DCOMPILE_PASCAL, -DCOMPILE_VOLTA, -DCOMPILE_TURING and -DCOMPILE_AMPERE diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c50681f2..79c8585e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -124,50 +124,81 @@ else(MSVC) # Detect support CPU instrinsics for the current platform. This will # only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we - # minimally use -msse4.1. This seems to work with MKL. + # force intrinsics as set in the options. set(INTRINSICS "") list(APPEND INTRINSICS_NVCC) + option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) + option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) + option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) + option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) + option(COMPILE_AVX "Compile CPU code with AVX support" ON) + option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) + option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) + if(BUILD_ARCH STREQUAL "native") + message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.") message(STATUS "Checking support for CPU intrinsics") include(FindSSE) - if(SSE2_FOUND) - message(STATUS "SSE2 support found") + if(SSE2_FOUND AND NOT COMPILE_SSE2) + message(WARNING "SSE2 enabled due to -march=native and -DCOMPILE_SSE2=${COMPILE_SSE2} is ignored.") + endif(SSE2_FOUND AND NOT COMPILE_SSE2) + if(SSE3_FOUND AND NOT COMPILE_SSE3) + message(WARNING "SSE3 enabled due to -march=native and -DCOMPILE_SSE3=${COMPILE_SSE3} is ignored.") + endif(SSE3_FOUND AND NOT COMPILE_SSE3) + if(SSE4_1_FOUND AND NOT COMPILE_SSE4_1) + message(WARNING "SSE4.1 enabled due to -march=native and -DCOMPILE_SSE4_1=${COMPILE_SSE4_1} is ignored.") + endif(SSE4_1_FOUND AND NOT COMPILE_SSE4_1) + if(SSE4_2_FOUND AND NOT COMPILE_SSE4_2) + message(WARNING "SSE4.2 enabled due to -march=native and -DCOMPILE_SSE4_2=${COMPILE_SSE4_2} is ignored.") + endif(SSE4_2_FOUND AND NOT COMPILE_SSE4_2) + if(AVX_FOUND AND NOT COMPILE_AVX) + message(WARNING "AVX enabled due to -march=native and -DCOMPILE_AVX=${COMPILE_AVX} is ignored.") + endif(AVX_FOUND AND NOT COMPILE_AVX) + if(AVX2_FOUND AND NOT COMPILE_AVX2) + message(WARNING "AVX2 enabled due to -march=native and -DCOMPILE_AVX2=${COMPILE_AVX2} is ignored.") + endif(AVX2_FOUND AND NOT COMPILE_AVX2) + if(AVX512_FOUND AND NOT COMPILE_AVX512) + message(WARNING "AVX512 enabled due to -march=native and -DCOMPILE_AVX512=${COMPILE_AVX512} is ignored.") + endif(AVX512_FOUND AND NOT COMPILE_AVX512) + else() + # force to build with the requested intrisics, requires compiler support + message(STATUS "Building with -march=${BUILD_ARCH} and forcing intrisics as requested") + if(COMPILE_SSE2) + message(STATUS "SSE2 support requested") set(INTRINSICS "${INTRINSICS} -msse2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2) - endif(SSE2_FOUND) - if(SSE3_FOUND) - message(STATUS "SSE3 support found") + endif(COMPILE_SSE2) + if(COMPILE_SSE3) + message(STATUS "SSE3 support requested") set(INTRINSICS "${INTRINSICS} -msse3") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3) - endif(SSE3_FOUND) - if(SSE4_1_FOUND) - message(STATUS "SSE4.1 support found") + endif(COMPILE_SSE3) + if(COMPILE_SSE4_1) + message(STATUS "SSE4.1 support requested") set(INTRINSICS "${INTRINSICS} -msse4.1") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1) - endif(SSE4_1_FOUND) - if(SSE4_2_FOUND) - message(STATUS "SSE4.2 support found") + endif(COMPILE_SSE4_1) + if(COMPILE_SSE4_2) + message(STATUS "SSE4.2 support requested") set(INTRINSICS "${INTRINSICS} -msse4.2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.2) - endif(SSE4_2_FOUND) - if(AVX_FOUND) - message(STATUS "AVX support found") + endif(COMPILE_SSE4_2) + if(COMPILE_AVX) + message(STATUS "AVX support requested") set(INTRINSICS "${INTRINSICS} -mavx") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx) - endif(AVX_FOUND) - if(AVX2_FOUND) - message(STATUS "AVX2 support found") + endif(COMPILE_AVX) + if(COMPILE_AVX2) + message(STATUS "AVX2 support requested") set(INTRINSICS "${INTRINSICS} -mavx2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2) - endif(AVX2_FOUND) - if(AVX512_FOUND) - message(STATUS "AVX512 support found") + endif(COMPILE_AVX2) + if(COMPILE_AVX512) + message(STATUS "AVX512 support requested") set(INTRINSICS "${INTRINSICS} -mavx512f") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f) - endif(AVX512_FOUND) - else() - set(INTRINSICS "-msse4.1") + endif(COMPILE_AVX512) endif() if(USE_FBGEMM)