From e81b9e9c3b82574fe37c6a26ecc7f8d0269a0cfc Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Wed, 2 Aug 2023 15:15:25 +0200 Subject: [PATCH] improve x86 support (fixes #342) - build x86 with sse2/avx2 runtime detection - test for actual atomics used by taskflow --- CHANGELOG.rst | 6 ++ CMakeLists.txt | 1 + src/rapidfuzz/CMakeLists.txt | 83 +++++++++++++++------ src/rapidfuzz/_utils.py | 6 +- src/rapidfuzz/distance/CMakeLists.txt | 22 +++++- src/rapidfuzz/distance/metrics_cpp_sse2.pyx | 5 ++ src/rapidfuzz/fuzz_cpp_sse2.pyx | 5 ++ src/rapidfuzz/generate.sh | 2 + 8 files changed, 104 insertions(+), 26 deletions(-) create mode 100644 src/rapidfuzz/distance/metrics_cpp_sse2.pyx create mode 100644 src/rapidfuzz/fuzz_cpp_sse2.pyx diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 69f8ec57..5e5df3c0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Changelog --------- +[3.2.0] - +^^^^^^^^^^^^^^^^^^^^ +Changed +~~~~~~~ +- build x86 with sse2/avx2 runtime detection + [3.1.2] - 2023-07-19 ^^^^^^^^^^^^^^^^^^^^ Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index ecdaee99..10708a45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,7 @@ include(FetchContent) include(CheckCPUArch) check_cpu_arch_x64(RAPIDFUZZ_ARCH_X64) +check_cpu_arch_x86(RAPIDFUZZ_ARCH_X86) set(RF_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) diff --git a/src/rapidfuzz/CMakeLists.txt b/src/rapidfuzz/CMakeLists.txt index bea47888..6033b3cb 100644 --- a/src/rapidfuzz/CMakeLists.txt +++ b/src/rapidfuzz/CMakeLists.txt @@ -31,7 +31,7 @@ function(rf_add_library name) endif() endfunction(rf_add_library) -if(RAPIDFUZZ_ARCH_X64) +if(RAPIDFUZZ_ARCH_X64 OR RAPIDFUZZ_ARCH_X86) create_cython_target(_feature_detector_cpp) rf_add_library(_feature_detector_cpp ${_feature_detector_cpp} ${CMAKE_CURRENT_LIST_DIR}/FeatureDetector/CpuInfo.cpp) @@ -56,7 +56,7 @@ target_include_directories(fuzz_cpp PRIVATE ${RF_BASE_DIR}/rapidfuzz) target_link_libraries(fuzz_cpp PRIVATE rapidfuzz::rapidfuzz) install(TARGETS fuzz_cpp LIBRARY DESTINATION src/rapidfuzz) -if(RAPIDFUZZ_ARCH_X64) +if(RAPIDFUZZ_ARCH_X64 OR RAPIDFUZZ_ARCH_X86) create_cython_target(fuzz_cpp_avx2) rf_add_library(fuzz_cpp_avx2 ${fuzz_cpp_avx2}) target_compile_features(fuzz_cpp_avx2 PUBLIC cxx_std_17) @@ -72,6 +72,23 @@ if(RAPIDFUZZ_ARCH_X64) install(TARGETS fuzz_cpp_avx2 LIBRARY DESTINATION src/rapidfuzz) endif() +if(RAPIDFUZZ_ARCH_X86) + create_cython_target(fuzz_cpp_sse2) + rf_add_library(fuzz_cpp_sse2 ${fuzz_cpp_sse2}) + target_compile_features(fuzz_cpp_sse2 PUBLIC cxx_std_17) + + if(MSVC) + set_target_properties(fuzz_cpp_sse2 PROPERTIES COMPILE_FLAGS "/arch:SSE2") + else() + set_target_properties(fuzz_cpp_sse2 PROPERTIES COMPILE_FLAGS "-msse2") + endif() + + target_include_directories(fuzz_cpp_sse2 PRIVATE ${RF_BASE_DIR}/rapidfuzz) + target_link_libraries(fuzz_cpp_sse2 PRIVATE rapidfuzz::rapidfuzz) + install(TARGETS fuzz_cpp_sse2 LIBRARY DESTINATION src/rapidfuzz) +endif() + + create_cython_target(process_cpp_impl) rf_add_library(process_cpp_impl ${process_cpp_impl}) target_compile_features(process_cpp_impl PUBLIC cxx_std_17) @@ -88,41 +105,59 @@ if(NOT Windows) [=[ #include #include + #include std::atomic x{0}; int main() { - x.fetch_add(1, std::memory_order_relaxed); + auto c = x.load(std::memory_order_relaxed); return 0; } ]=]) - string(REPLACE "std::atomic" "std::atomic" - ATOMICS8_TEST_SOURCE "${ATOMICS_TEST_SOURCE}") - string(REPLACE "std::atomic" "std::atomic" - ATOMICS64_TEST_SOURCE "${ATOMICS_TEST_SOURCE}") + string(REPLACE "std::atomic" "std::atomic" + ATOMICS_SIZE_T_TEST_SOURCE "${ATOMICS_TEST_SOURCE}") + string(REPLACE "std::atomic" "std::atomic" + ATOMICS_VOID_PTR_TEST_SOURCE "${ATOMICS_TEST_SOURCE}") + string(REPLACE "std::atomic" "std::atomic" + ATOMICS_UNSIGNED_TEST_SOURCE "${ATOMICS_TEST_SOURCE}") if(APPLE) set(CMAKE_REQUIRED_FLAGS "-std=c++11") endif() check_cxx_source_compiles("${ATOMICS_TEST_SOURCE}" - HAVE_CXX_ATOMICS_WITHOUT_LIB) - check_cxx_source_compiles("${ATOMICS8_TEST_SOURCE}" - HAVE_CXX_ATOMICS8_WITHOUT_LIB) - check_cxx_source_compiles("${ATOMICS64_TEST_SOURCE}" - HAVE_CXX_ATOMICS64_WITHOUT_LIB) - if((NOT HAVE_CXX_ATOMICS_WITHOUT_LIB) - OR (NOT HAVE_CXX_ATOMICS8_WITHOUT_LIB) - OR (NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB)) + HAVE_CXX_ATOMICS_INT_WITHOUT_LIB) + check_cxx_source_compiles("${ATOMICS_SIZE_T_TEST_SOURCE}" + HAVE_CXX_ATOMICS_SIZE_T_WITHOUT_LIB) + check_cxx_source_compiles("${ATOMICS_VOID_PTR_TEST_SOURCE}" + HAVE_CXX_ATOMICS_VOID_PTR_WITHOUT_LIB) + check_cxx_source_compiles("${ATOMICS_UNSIGNED_TEST_SOURCE}" + HAVE_CXX_ATOMICS_UNSIGNED_WITHOUT_LIB) + if((NOT HAVE_CXX_ATOMICS_INT_WITHOUT_LIB) + OR (NOT HAVE_CXX_ATOMICS_SIZE_T_WITHOUT_LIB) + OR (NOT HAVE_CXX_ATOMICS_VOID_PTR_WITHOUT_LIB) + OR (NOT HAVE_CXX_ATOMICS_UNSIGNED_WITHOUT_LIB)) set(CMAKE_REQUIRED_LIBRARIES "atomic") check_cxx_source_compiles("${ATOMICS_TEST_SOURCE}" - HAVE_CXX_ATOMICS_WITH_LIB) - check_cxx_source_compiles("${ATOMICS8_TEST_SOURCE}" - HAVE_CXX_ATOMICS8_WITH_LIB) - check_cxx_source_compiles("${ATOMICS64_TEST_SOURCE}" - HAVE_CXX_ATOMICS64_WITH_LIB) - if((NOT HAVE_CXX_ATOMICS_WITH_LIB) - OR (NOT HAVE_CXX_ATOMICS8_WITH_LIB) - OR (NOT HAVE_CXX_ATOMICS64_WITH_LIB)) + HAVE_CXX_ATOMICS_INT_WITH_LIB) + check_cxx_source_compiles("${ATOMICS_SIZE_T_TEST_SOURCE}" + HAVE_CXX_ATOMICS_SIZE_T_WITH_LIB) + check_cxx_source_compiles("${ATOMICS_VOID_PTR_TEST_SOURCE}" + HAVE_CXX_ATOMICS_VOID_PTR_WITH_LIB) + check_cxx_source_compiles("${ATOMICS_UNSIGNED_TEST_SOURCE}" + HAVE_CXX_ATOMICS_UNSIGNED_WITH_LIB) + if(NOT HAVE_CXX_ATOMICS_INT_WITH_LIB) + message( + FATAL_ERROR "No native support for std::atomic, or libatomic not found!" + ) + elseif(NOT HAVE_CXX_ATOMICS_SIZE_T_WITH_LIB) + message( + FATAL_ERROR "No native support for std::atomic, or libatomic not found!" + ) + elseif(NOT HAVE_CXX_ATOMICS_VOID_PTR_WITH_LIB) + message( + FATAL_ERROR "No native support for std::atomic, or libatomic not found!" + ) + elseif(NOT HAVE_CXX_ATOMICS_UNSIGNED_WITH_LIB) message( - FATAL_ERROR "No native support for std::atomic, or libatomic not found!" + FATAL_ERROR "No native support for std::atomic, or libatomic not found!" ) else() message(STATUS "Linking with libatomic for atomics support") diff --git a/src/rapidfuzz/_utils.py b/src/rapidfuzz/_utils.py index 624cf4a2..04736d87 100644 --- a/src/rapidfuzz/_utils.py +++ b/src/rapidfuzz/_utils.py @@ -9,7 +9,7 @@ from math import isnan from typing import Any, Callable -from rapidfuzz._feature_detector import AVX2, supports +from rapidfuzz._feature_detector import AVX2, SSE2, supports class ScorerFlag(IntFlag): @@ -84,6 +84,10 @@ def vectorized_import(name: str) -> tuple[Any, list[Any]]: module = optional_import_module(name + "_avx2") if module is not None: return module + if supports(SSE2): + module = optional_import_module(name + "_sse2") + if module is not None: + return module return importlib.import_module(name) diff --git a/src/rapidfuzz/distance/CMakeLists.txt b/src/rapidfuzz/distance/CMakeLists.txt index 424bdd03..de2bc020 100644 --- a/src/rapidfuzz/distance/CMakeLists.txt +++ b/src/rapidfuzz/distance/CMakeLists.txt @@ -50,7 +50,7 @@ target_include_directories( target_link_libraries(metrics_cpp PRIVATE rapidfuzz::rapidfuzz) install(TARGETS metrics_cpp LIBRARY DESTINATION src/rapidfuzz/distance) -if(RAPIDFUZZ_ARCH_X64) +if(RAPIDFUZZ_ARCH_X64 OR RAPIDFUZZ_ARCH_X86) create_cython_target(metrics_cpp_avx2) rf_add_library(metrics_cpp_avx2 ${metrics_cpp_avx2}) target_compile_features(metrics_cpp_avx2 PUBLIC cxx_std_17) @@ -68,3 +68,23 @@ if(RAPIDFUZZ_ARCH_X64) target_link_libraries(metrics_cpp_avx2 PRIVATE rapidfuzz::rapidfuzz) install(TARGETS metrics_cpp_avx2 LIBRARY DESTINATION src/rapidfuzz/distance) endif() + +if(RAPIDFUZZ_ARCH_X86) + create_cython_target(metrics_cpp_sse2) + rf_add_library(metrics_cpp_sse2 ${metrics_cpp_sse2}) + target_compile_features(metrics_cpp_sse2 PUBLIC cxx_std_17) + + if(MSVC) + set_target_properties(metrics_cpp_sse2 PROPERTIES COMPILE_FLAGS + "/arch:SSE2") + else() + set_target_properties(metrics_cpp_sse2 PROPERTIES COMPILE_FLAGS "-msse2") + endif() + + target_include_directories( + metrics_cpp_sse2 PRIVATE ${RF_BASE_DIR}/rapidfuzz + ${RF_BASE_DIR}/rapidfuzz/distance) + target_link_libraries(metrics_cpp_sse2 PRIVATE rapidfuzz::rapidfuzz) + install(TARGETS metrics_cpp_sse2 LIBRARY DESTINATION src/rapidfuzz/distance) +endif() + diff --git a/src/rapidfuzz/distance/metrics_cpp_sse2.pyx b/src/rapidfuzz/distance/metrics_cpp_sse2.pyx new file mode 100644 index 00000000..2be2e2fc --- /dev/null +++ b/src/rapidfuzz/distance/metrics_cpp_sse2.pyx @@ -0,0 +1,5 @@ +# distutils: language=c++ +# cython: language_level=3, binding=True, linetrace=True + +# this is a hack since I could not get this to build in cmake without it +include "metrics_cpp.pyx" diff --git a/src/rapidfuzz/fuzz_cpp_sse2.pyx b/src/rapidfuzz/fuzz_cpp_sse2.pyx new file mode 100644 index 00000000..cfe3ffb1 --- /dev/null +++ b/src/rapidfuzz/fuzz_cpp_sse2.pyx @@ -0,0 +1,5 @@ +# distutils: language=c++ +# cython: language_level=3, binding=True, linetrace=True + +# this is a hack since I could not get this to build in cmake without it +include "fuzz_cpp.pyx" diff --git a/src/rapidfuzz/generate.sh b/src/rapidfuzz/generate.sh index afd00de6..43a9bbaa 100755 --- a/src/rapidfuzz/generate.sh +++ b/src/rapidfuzz/generate.sh @@ -9,6 +9,7 @@ generate_cython() generate_cython fuzz_cpp generate_cython fuzz_cpp_avx2 +generate_cython fuzz_cpp_sse2 generate_cython process_cpp_impl generate_cython utils_cpp generate_cython _feature_detector_cpp @@ -16,3 +17,4 @@ generate_cython _feature_detector_cpp generate_cython distance/_initialize_cpp generate_cython distance/metrics_cpp generate_cython distance/metrics_cpp_avx2 +generate_cython distance/metrics_cpp_sse2