diff --git a/CMakeLists.txt b/CMakeLists.txt index 800dbf4f..5fa7a1df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ option(BUILD_DFT "libsleefdft will be built." ON) option(BUILD_QUAD "libsleefquad will be built." OFF) option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(BUILD_TESTS "Tests will be built." ON) +option(BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF) option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF) option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON) @@ -83,6 +84,8 @@ set(TARGET_LIBSLEEFGNUABI "sleefgnuabi") # Generates the sleef.h headers and all the rename headers # Defined in src/libm/CMakeLists.txt via custom commands and a custom target set(TARGET_HEADERS "headers") +set(TARGET_INLINE_HEADERS "inline_headers") +set(TARGET_LIBINLINE "sleefinline") # Generates executable files for running the test suite # Defined in src/libm-tester/CMakeLists.txt via command add_executable set(TARGET_TESTER "tester") @@ -148,6 +151,9 @@ if(SLEEF_SHOW_CONFIG) message(STATUS "FFTW3 : " ${LIBFFTW3}) message(STATUS "OPENSSL : " ${OPENSSL_VERSION}) message(STATUS "SDE : " ${SDE_COMMAND}) + if (BUILD_INLINE_HEADERS) + message(STATUS "SED : " ${SED_COMMAND}) + endif() message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS}) message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP}) if(ENABLE_GNUABI) diff --git a/Configure.cmake b/Configure.cmake index 430556f3..65f2df66 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -317,6 +317,12 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(FLAGS_ENABLE_NEON32 "-mfpu=neon") endif(CMAKE_C_COMPILER_ID MATCHES "GNU") + # Flags for generating inline headers + set(FLAG_PREPROCESS "-E") + set(FLAG_PRESERVE_COMMENTS "-C") + set(FLAG_INCLUDE "-I") + set(FLAG_DEFINE "-D") + if (SLEEF_CLANG_ON_WINDOWS) # The following line is required to prevent clang from displaying # many warnings. Clang on Windows references MSVC header files, @@ -343,6 +349,11 @@ elseif(MSVC) set(FLAGS_ENABLE_PURECFMA_SCALAR /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS") set(FLAGS_NO_ERRNO "") + + set(FLAG_PREPROCESS "/E") + set(FLAG_PRESERVE_COMMENTS "/C") + set(FLAG_INCLUDE "/I") + set(FLAG_DEFINE "/D") elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_ENABLE_SSE2 "-msse2") set(FLAGS_ENABLE_SSE4 "-msse4.1") @@ -356,6 +367,11 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type") set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes") set(FLAGS_NO_ERRNO "") + + set(FLAG_PREPROCESS "-E") + set(FLAG_PRESERVE_COMMENTS "-C") + set(FLAG_INCLUDE "-I") + set(FLAG_DEFINE "-D") endif() set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}") @@ -723,6 +739,10 @@ if (NOT SVE_VECTOR_BITS) set(SVE_VECTOR_BITS 128) endif() +# + +find_program(SED_COMMAND sed) + ## if(SLEEF_SHOW_ERROR_LOG) diff --git a/appveyor.yml b/appveyor.yml index 3688adbf..917c7bde 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -20,9 +20,9 @@ build_script: - if "%DO_TEST%" == "TRUE" echo PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%PATH% > q.bat - if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat" - if "%DO_TEST%" == "TRUE" call p.bat - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE;ninja' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE ..;ninja' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE ..;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - if "%DO_TEST%" == "TRUE" del /Q /F build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build\bin > q.bat @@ -30,7 +30,7 @@ build_script: - if "%DO_TEST%" == "TRUE" call p.bat - mkdir build - cd build - - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE %ENV_BUILD_STATIC% + - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE %ENV_BUILD_STATIC% - cmake --build . --target install --config Release - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 4 -C Release) - cd "%BUILDFOLDER%" diff --git a/src/arch/helperadvsimd.h b/src/arch/helperadvsimd.h index 56ebf063..63fc4355 100644 --- a/src/arch/helperadvsimd.h +++ b/src/arch/helperadvsimd.h @@ -9,27 +9,38 @@ #error Please specify advsimd flags. #endif +#if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. +//@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define ISANAME "AArch64 AdvSIMD" @@ -296,14 +307,20 @@ static INLINE VECTOR_CC vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { // Shifts #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) \ vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) #define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) +//@#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) #define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) +//@#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) #define vsrl_vi_vi_i(x, c) \ vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) +//@#define vsrl_vi_vi_i(x, c) vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) // Comparison returning masks static INLINE VECTOR_CC vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } @@ -757,8 +774,6 @@ static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int s // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), @@ -783,6 +798,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -793,6 +811,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g)); @@ -815,7 +834,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { } #define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) #define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { vmask m = vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))); diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index 86167ac1..a0280c3f 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -5,13 +5,13 @@ #if CONFIG == 1 -#if !defined(__AVX__) +#if !defined(__AVX__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx. #endif #elif CONFIG == 4 -#if !defined(__AVX__) || !defined(__FMA4__) +#if (!defined(__AVX__) || !defined(__FMA4__)) && !defined(SLEEF_GENHEADER) #error Please specify -mavx and -mfma4. #endif @@ -20,16 +20,25 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -38,6 +47,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; @@ -54,6 +64,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -95,6 +107,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 20 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { @@ -563,8 +577,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad4 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), @@ -615,6 +627,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -625,6 +640,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; @@ -656,6 +672,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), \ _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +//@#define vsll64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +//@#define vsrl64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) + static INLINE vmask vcast_vm_vi(vint vi) { vint vi0 = _mm_and_si128(_mm_shuffle_epi32(vi, (1 << 4) | (1 << 6)), _mm_set_epi32(0, -1, 0, -1)); vint vi1 = _mm_and_si128(_mm_shuffle_epi32(vi, (2 << 0) | (2 << 2) | (3 << 4) | (3 << 6)), _mm_set_epi32(0, -1, 0, -1)); diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index 56be9ada..6587f488 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -5,7 +5,7 @@ #if CONFIG == 1 -#ifndef __AVX2__ +#if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif @@ -14,19 +14,29 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING -#define SPLIT_KERNEL +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -35,6 +45,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; @@ -51,6 +62,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -76,6 +89,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 25 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { @@ -418,8 +433,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad4 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; } @@ -454,6 +467,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -464,6 +480,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; @@ -477,6 +494,8 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpgt_epi #define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm256_cvtepi32_epi64(vi); } static INLINE vint vcast_vi_vm(vmask vm) { diff --git a/src/arch/helperavx2_128.h b/src/arch/helperavx2_128.h index 761da100..06ae2ae6 100644 --- a/src/arch/helperavx2_128.h +++ b/src/arch/helperavx2_128.h @@ -5,7 +5,7 @@ #if CONFIG == 1 -#ifndef __AVX2__ +#if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif @@ -14,19 +14,29 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING -#define SPLIT_KERNEL +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -35,6 +45,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; @@ -51,6 +62,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -76,6 +89,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 25 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -392,8 +407,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } @@ -415,6 +428,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -424,6 +440,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vstoreu_v_p_vm2(&a, vuninterleave_vm2_vm2(vm2)); return a; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } @@ -435,3 +452,5 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpgt_epi64( #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) diff --git a/src/arch/helperavx512f.h b/src/arch/helperavx512f.h index eefc36cc..d9b5855e 100644 --- a/src/arch/helperavx512f.h +++ b/src/arch/helperavx512f.h @@ -5,7 +5,7 @@ #if CONFIG == 1 || CONFIG == 2 -#ifndef __AVX512F__ +#if !defined(__AVX512F__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx512f. #endif @@ -14,22 +14,32 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 3 +//@#define LOG2VECTLENDP 3 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -#define SPLIT_KERNEL +//@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -38,6 +48,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m512i vmask; typedef __mmask16 vopmask; @@ -54,6 +65,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -82,6 +95,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 0 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } #ifdef __INTEL_COMPILER @@ -218,6 +233,9 @@ static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm256_xor_si256(x, y) #define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) #define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) #define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) +//@#define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) +//@#define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) +//@#define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm256_cmpgt_epi32(x, y); } @@ -290,6 +308,8 @@ static INLINE vfloat vgetmant_vf_vf(vfloat d) { return _mm512_getmant_ps(d, _MM_ #define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) #define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) +//@#define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) +//@#define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) #if defined(_MSC_VER) // This function is needed when debugging on MSVC. @@ -394,6 +414,9 @@ static INLINE vint2 vandnot_vi2_vo_vi2(vopmask o, vint2 m) { #define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) #define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) #define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) +//@#define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) +//@#define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpeq_epi32_mask(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpgt_epi32_mask(x, y); } @@ -522,8 +545,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad8 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; } @@ -558,6 +579,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad8 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -568,6 +592,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) #ifdef __INTEL_COMPILER static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0; } @@ -583,6 +608,8 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64 #define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm512_cvtepi32_epi64(vi); diff --git a/src/arch/helperneon32.h b/src/arch/helperneon32.h index 0eb8a74c..ccbafd74 100644 --- a/src/arch/helperneon32.h +++ b/src/arch/helperneon32.h @@ -12,18 +12,23 @@ #endif #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 4 #define ISANAME "AARCH32 NEON-VFPV4" #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #else #define ISANAME "AARCH32 NEON" #endif #define DFTPRIORITY 10 #define ENABLE_RECSQRT_SP +//@#define ENABLE_RECSQRT_SP #include #include @@ -207,6 +212,9 @@ static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vbicq #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } diff --git a/src/arch/helperpower_128.h b/src/arch/helperpower_128.h index 8d26cbf5..b5470b41 100644 --- a/src/arch/helperpower_128.h +++ b/src/arch/helperpower_128.h @@ -14,22 +14,32 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 -#define VECTLENDP 2 +//@#define LOG2VECTLENDP 1 +#define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP -#define LOG2VECTLENSP 2 -#define VECTLENSP 4 +//@#define ENABLE_SP +#define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) +#define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. +//@#define ENABLE_FMA_SP #endif #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#if !defined(SLEEF_GENHEADER) #include // undef altivec types since CPP and C99 use them as compiler tokens // use __vector and __bool instead @@ -38,6 +48,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) #define ISANAME "VSX" #define DFTPRIORITY 25 @@ -51,6 +62,7 @@ static INLINE void vprefetch_v_p(const void *ptr) { } typedef __vector unsigned int vmask; // using __bool with typedef may cause ambiguous errors #define vopmask __vector __bool int +//@#define vopmask __vector __bool int typedef __vector signed int vint; typedef __vector signed int vint2; typedef __vector float vfloat; diff --git a/src/arch/helperpurec.h b/src/arch/helperpurec.h index daf63595..509928a2 100644 --- a/src/arch/helperpurec.h +++ b/src/arch/helperpurec.h @@ -12,14 +12,21 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENDP CONFIG +//@#define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define DFTPRIORITY LOG2VECTLENDP #define ISANAME "Pure C Array" diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index 1e70f6ee..240cbcd4 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -3,10 +3,15 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) +#if !defined(SLEEF_GENHEADER) #include +#endif #ifndef ENABLE_BUILTIN_MATH + +#if !defined(SLEEF_GENHEADER) #include +#endif #define SQRT sqrt #define SQRTF sqrtf @@ -30,29 +35,37 @@ #endif +#if !defined(SLEEF_GENHEADER) #include "misc.h" +#endif #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP +//@#define ENABLE_DP #define ENABLE_SP +//@#define ENABLE_SP #if CONFIG == 2 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) #ifndef FP_FAST_FMA #define FP_FAST_FMA +//@#define FP_FAST_FMA #endif #ifndef FP_FAST_FMAF #define FP_FAST_FMAF +//@#define FP_FAST_FMAF #endif #endif -#if !defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF) +#if (!defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF)) && !defined(SLEEF_GENHEADER) #error FP_FAST_FMA or FP_FAST_FMAF not defined #endif #define ISANAME "Pure C scalar with FMA" @@ -62,14 +75,20 @@ #endif // #if CONFIG == 2 #define LOG2VECTLENDP 0 +//@#define LOG2VECTLENDP 0 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP 0 +//@#define LOG2VECTLENSP 0 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #if defined(__SSE4_1__) || defined(__aarch64__) #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #endif #define DFTPRIORITY LOG2VECTLENDP @@ -376,8 +395,6 @@ static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { *ptr = v; } // -typedef Sleef_quad1 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vint vuninterleave_vi_vi(vint v) { return v; } @@ -386,6 +403,9 @@ static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad1 vargquad; + static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); @@ -402,6 +422,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return !g ? ~(uint32_t)0 : 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return o ? x : y; } @@ -411,6 +432,8 @@ static INLINE vmask vneg64_vm_vm(vmask x) { return -(int64_t)x; } #define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) #define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) +//@#define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) +//@#define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return (int64_t)x > (int64_t)y ? ~(uint32_t)0 : 0; } diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index 0113f3c6..880c02e1 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -5,19 +5,19 @@ #if CONFIG == 2 -#if !defined(__SSE2__) +#if !defined(__SSE2__) && !defined(SLEEF_GENHEADER) #error Please specify -msse2. #endif #elif CONFIG == 3 -#if !defined(__SSE2__) || !defined(__SSE3__) +#if (!defined(__SSE2__) || !defined(__SSE3__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2 and -msse3 #endif #elif CONFIG == 4 -#if !defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__) +#if (!defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2, -msse3 and -msse4.1 #endif @@ -26,15 +26,23 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -43,6 +51,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; @@ -59,6 +68,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -106,6 +117,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 10 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -153,13 +166,14 @@ static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #else static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } @@ -224,7 +238,7 @@ static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask m, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(m)); } @@ -299,7 +313,7 @@ static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return _mm_castsi128_ps(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm_castps_si128(vf); } -#ifndef __SSE4_1__ +#if CONFIG != 4 static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vcast_vf_vi2(vrint_vi2_vf(vf)); } #endif @@ -346,7 +360,7 @@ static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32( static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(m)); } @@ -408,7 +422,7 @@ static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_ static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } -#ifdef __SSE3__ +#if CONFIG >= 3 static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } #else @@ -445,8 +459,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } @@ -467,6 +479,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -477,6 +492,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } @@ -489,6 +505,8 @@ static INLINE vmask vneg64_vm_vm(vmask x) { return _mm_sub_epi64(vcast_vm_i_i(0, #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { int64_t ax[2], ay[2]; diff --git a/src/arch/helpersve.h b/src/arch/helpersve.h index 20ea9e72..3b0de30f 100644 --- a/src/arch/helpersve.h +++ b/src/arch/helpersve.h @@ -5,14 +5,16 @@ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ -#ifndef __ARM_FEATURE_SVE +#if !defined(__ARM_FEATURE_SVE) && !defined(SLEEF_GENHEADER) #error Please specify SVE flags. #endif +#if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) #if defined(VECTLENDP) || defined(VECTLENSP) #error VECTLENDP or VECTLENSP already defined @@ -21,9 +23,12 @@ #if CONFIG == 1 || CONFIG == 2 // Vector length agnostic #define VECTLENSP (svcntw()) +//@#define VECTLENSP (svcntw()) #define VECTLENDP (svcntd()) +//@#define VECTLENDP (svcntd()) #define ISANAME "AArch64 SVE" #define ptrue svptrue_b8() +//@#define ptrue svptrue_b8() #elif CONFIG == 8 // 256-bit vector length #define ISANAME "AArch64 SVE 256-bit" @@ -67,16 +72,22 @@ static INLINE int vavailability_i(int name) { return 3; } #endif #define ENABLE_SP +//@#define ENABLE_SP #define ENABLE_DP +//@#define ENABLE_DP #if CONFIG != 2 #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP //#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT // Type definitions @@ -265,6 +276,8 @@ static INLINE tdi_t tdisettd_tdi_tdi_vd3(tdi_t tdi, vdouble3 v) { // masking predicates #define ALL_TRUE_MASK svdup_n_s32(0xffffffff) #define ALL_FALSE_MASK svdup_n_s32(0x0) +//@#define ALL_TRUE_MASK svdup_n_s32(0xffffffff) +//@#define ALL_FALSE_MASK svdup_n_s32(0x0) static INLINE void vprefetch_v_p(const void *ptr) {} @@ -552,10 +565,12 @@ static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { // Shifts #define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) #define vsrl_vi2_vi2_i(x, c) \ svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) - +//@#define vsrl_vi2_vi2_i(x, c) svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) // Comparison returning integers static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { @@ -892,7 +907,10 @@ static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return svsel_s32(x, ALL_FALSE_MASK, y); } #define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) #define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) + static INLINE vint vsrl_vi_vi_i(vint x, int c) { return svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)); } @@ -1048,8 +1066,6 @@ static int vcast_i_vi2(vint2 v) { // -typedef Sleef_quadx vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return vm2setxy_vm2_vm_vm(svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v)))), svreinterpret_s32_u64(svtrn2_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v))))); @@ -1088,6 +1104,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quadx vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } @@ -1098,6 +1117,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return svcntp_b64(svptrue_b64(), g) == 0; @@ -1122,7 +1142,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { } #define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) #define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { return svreinterpret_s32_s64(svextw_s64_z(ptrue, svreinterpret_s64_s32(vi))); } static INLINE vint vcast_vi_vm(vmask vm) { return vand_vm_vm_vm(vm, vcast_vm_i_i(0, 0xffffffff)); } diff --git a/src/common/misc.h b/src/common/misc.h index 41f6829f..6de841bc 100644 --- a/src/common/misc.h +++ b/src/common/misc.h @@ -8,8 +8,10 @@ #ifndef __MISC_H__ #define __MISC_H__ +#if !defined(SLEEF_GENHEADER) #include #include +#endif #ifndef M_PI #define M_PI 3.141592653589793238462643383279502884 @@ -145,30 +147,32 @@ #define stringify(s) stringify_(s) #define stringify_(s) #s +#if !defined(SLEEF_GENHEADER) typedef long double longdouble; +#endif -#ifndef Sleef_double2_DEFINED +#if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif -#ifndef Sleef_float2_DEFINED +#if !defined(Sleef_float2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif -#ifndef Sleef_longdouble2_DEFINED +#if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif -#if !defined(Sleef_quad_DEFINED) +#if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; @@ -177,7 +181,7 @@ typedef struct { double x, y; } Sleef_quad; #endif #endif -#if !defined(Sleef_quad1_DEFINED) +#if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad1_DEFINED typedef union { struct { @@ -187,7 +191,7 @@ typedef union { } Sleef_quad1; #endif -#if !defined(Sleef_quad2_DEFINED) +#if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad2_DEFINED typedef union { struct { @@ -197,7 +201,7 @@ typedef union { } Sleef_quad2; #endif -#if !defined(Sleef_quad4_DEFINED) +#if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad4_DEFINED typedef union { struct { @@ -207,14 +211,14 @@ typedef union { } Sleef_quad4; #endif -#if !defined(Sleef_quad8_DEFINED) +#if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad8_DEFINED typedef union { Sleef_quad s[8]; } Sleef_quad8; #endif -#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) +#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quadx_DEFINED typedef union { Sleef_quad s[32]; @@ -229,19 +233,27 @@ typedef union { #define UNLIKELY(condition) __builtin_expect(!!(condition), 0) #define RESTRICT __restrict__ -#define INLINE __attribute__((always_inline)) - #ifndef __arm__ #define ALIGNED(x) __attribute__((aligned(x))) #else #define ALIGNED(x) #endif +#if defined(SLEEF_GENHEADER) + +#define INLINE SLEEF_ALWAYS_INLINE +#define EXPORT SLEEF_INLINE +#define CONST SLEEF_CONST +#define NOEXPORT + +#else // #if defined(SLEEF_GENHEADER) + #ifndef __INTEL_COMPILER #define CONST const #else #define CONST #endif +#define INLINE __attribute__((always_inline)) #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #ifndef SLEEF_STATIC_LIBS @@ -256,6 +268,8 @@ typedef union { #define NOEXPORT __attribute__ ((visibility ("hidden"))) #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) +#endif // #if defined(SLEEF_GENHEADER) + #define SLEEF_NAN __builtin_nan("") #define SLEEF_NANf __builtin_nanf("") #define SLEEF_NANl __builtin_nanl("") @@ -288,7 +302,7 @@ typedef union { #define NOEXPORT #endif -#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(SLEEF_GENHEADER) #include #endif diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index f80cc051..952ef7c6 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -82,7 +82,7 @@ set_target_properties(${TARGET_IUT} PROPERTIES C_STANDARD 99) add_test_iut(${TARGET_IUT}) set(IUT_LIST ${TARGET_IUT}) -set(IUT_SRC iutsimd.c iutsimdmain.c testerutil) +set(IUT_SRC iutsimd.c iutsimdmain.c testerutil.c) # Add vector extension `iut`s macro(test_extension SIMD) @@ -107,11 +107,11 @@ macro(test_extension SIMD) add_test_iut(${TARGET_IUT${SIMD}}) list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) -# The iut programs whose name begins with "iuty" are the iut for the -# deterministic version of functions. By checking the result of -# testing with iutysse2, for example, it can be checked that the -# corresponding deterministic functions passes the accuracy and -# nonnumber tests. + # The iut programs whose names begin with "iuty" are the iut for the + # deterministic version of functions. By checking the result of + # testing with iutysse2, for example, it can be checked that the + # corresponding deterministic functions passes the accuracy and + # nonnumber tests. string(CONCAT IUTYNAME "iuty" ${LCSIMD}) add_executable(${IUTYNAME} ${IUT_SRC}) @@ -127,6 +127,26 @@ macro(test_extension SIMD) add_test_iut(${IUTYNAME}) list(APPEND IUT_LIST ${IUTYNAME}) + # The iut programs whose names begin with "iuti" are the iut for the + # inline version of functions. + + if (BUILD_INLINE_HEADERS AND SED_COMMAND) + string(CONCAT IUTINAME "iuti" ${LCSIMD}) + add_executable(${IUTINAME} ${IUT_SRC}) + target_compile_options(${IUTINAME} PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${IUTINAME} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} + USE_INLINE_HEADER="sleefinline_${LCSIMD}.h" + MACRO_ONLY_HEADER="macroonly${SIMD}.h" + ) + target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/inline) + target_link_libraries(${IUTINAME} ${LIBM} ${LIBRT} ${TARGET_LIBINLINE}) + add_dependencies(${IUTINAME} ${TARGET_INLINE_HEADERS}) + set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99) + add_test_iut(${IUTINAME}) + list(APPEND IUT_LIST ${IUTINAME}) + endif(BUILD_INLINE_HEADERS AND SED_COMMAND) + if(LIB_MPFR AND NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND NOT MINGW) # Build tester2 SIMD string(TOLOWER ${SIMD} SCSIMD) diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 7fde87fe..80f1b885 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -22,74 +22,136 @@ #endif #include "misc.h" + +#if !defined(USE_INLINE_HEADER) #include "sleef.h" +#else // #if !defined(USE_INLINE_HEADER) +#include +#include +#include +#include + +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) +#ifndef FP_FAST_FMA +#define FP_FAST_FMA +#endif +#endif + +#if defined(_MSC_VER) && !defined(__STDC__) +#define __STDC__ 1 +#endif + +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) +#include +#endif + +#if (defined(_MSC_VER)) +#include +#endif + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include +#endif + +#if defined(__ARM_FEATURE_SVE) +#include +#endif + +#if defined(__VSX__) +#include +#endif + +#define SLEEF_ALWAYS_INLINE inline +#define SLEEF_INLINE +#define SLEEF_CONST +#include USE_INLINE_HEADER +#include MACRO_ONLY_HEADER + +#endif // #if !defined(USE_INLINE_HEADER) + #include "testerutil.h" #define DORENAME #ifdef ENABLE_SSE2 +#include "renamesse2.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersse2.h" -#include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_SSE4 +#include "renamesse4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helpersse2.h" -#include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX +#include "renameavx.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx.h" -#include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_FMA4 +#include "renamefma4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperavx.h" -#include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX2 +#include "renameavx2.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2.h" -#include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX2128 +#include "renameavx2128.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2_128.h" -#include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX512F +#include "renameavx512f.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx512f.h" -#include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX512FNOFMA +#include "renameavx512fnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperavx512f.h" -#include "renameavx512fnofma.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif +#endif #ifdef ENABLE_VECEXT #define CONFIG 1 @@ -104,34 +166,42 @@ typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_NEON32 +#include "renameneon32.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperneon32.h" -#include "renameneon32.h" typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_NEON32VFPV4 +#include "renameneon32vfpv4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperneon32.h" -#include "renameneon32vfpv4.h" typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_ADVSIMD +#include "renameadvsimd.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperadvsimd.h" -#include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_ADVSIMDNOFMA +#include "renameadvsimdnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperadvsimd.h" -#include "renameadvsimdnofma.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_DSP128 #define CONFIG 2 @@ -142,17 +212,23 @@ typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SVE +#include "renamesve.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helpersve.h" -#ifdef DORENAME -#include "renamesve.h" +typedef Sleef_svfloat64_t_2 vdouble2; +typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif #ifdef ENABLE_SVENOFMA +#include "renamesvenofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersve.h" -#include "renamesvenofma.h" +typedef Sleef_svfloat64_t_2 vdouble2; +typedef Sleef_svfloat32_t_2 vfloat2; +#endif #endif #ifdef ENABLE_DSP256 @@ -164,54 +240,63 @@ typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_VSX +#include "renamevsx.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif +#endif #ifdef ENABLE_VSXNOFMA +#include "renamevsxnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpower_128.h" #include "renamevsxnofma.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif +#endif #ifdef ENABLE_PUREC_SCALAR +#include "renamepurec_scalar.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpurec_scalar.h" -#include "renamepurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif +#endif #ifdef ENABLE_PURECFMA_SCALAR +#include "renamepurecfma_scalar.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpurec_scalar.h" -#include "renamepurecfma_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif +#endif // #ifdef ENABLE_DP -int check_featureDP() { - if (vavailability_i(1) == 0) return 0; +int check_featureDP(double d) { double s[VECTLENDP]; int i; for(i=0;i ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimddp.c > /build/libm/sleefSSE2.h.tmp1 + + # Remove all lines except those begin with "//@" + COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -n -e "/^\\/\\/@#.*$/p" /build/src/libm/sleefSSE2.h.tmp1 + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # > /build/src/libm/sleefSSE2.h.tmp2 + + # Remove "//@" + COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # sed -e "s/^\\/\\/@#/#/g" /build/src/libm/sleefSSE2.h.tmp2 + > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h # > /build/src/libm/include/macroonlySSE2.h + + # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h. + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch + ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include + ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME + ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimdsp.c >> /build/libm/sleefSSE2.h.tmp1 + + # Remove lines beginning with "#" so that the resulting file can be preprocessed again. + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp1 + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # > /build/src/libm/sleefSSE2.h.c + + # Preprocess the intemediate file again to remove comments + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} # gcc -E + ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # /build/src/libm/sleefSSE2.h.c + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # > /build/src/libm/sleefSSE2.h.tmp3 + + # Embed version number into the header + COMMAND ${SED_COMMAND} -e # sed -e + "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" # "s/%VERSION%/3.5.0/g" + ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org # /sleef/libm/sleefinline_header.h.org + > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # > /build/include/sleefinline_sse2.h + + # Remove lines beginning with "#" + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp3 + >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # >> /build/include/sleefinline_sse2.h + + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} + DEPENDS ${HEADER_${SIMD}} + VERBATIM + ) + + list(APPEND INLINE_HEADER_FILES_GENERATED ${INLINE_HEADER_FILE}) + endif() + endforeach() + + add_custom_target(${TARGET_INLINE_HEADERS} ALL + DEPENDS + ${INLINE_HEADER_FILES_GENERATED} + ) + install(FILES ${INLINE_HEADER_FILES_GENERATED} DESTINATION include) + endif(SED_COMMAND) + + add_library(${TARGET_LIBINLINE} STATIC rempitab.c) + install(TARGETS ${TARGET_LIBINLINE} DESTINATION lib) +endif(BUILD_INLINE_HEADERS) + # On some systems we need to explicitly link libsleef against libm to # use some of the math functions used in the scalar code (for example # sqrt). diff --git a/src/libm/rempitab.c b/src/libm/rempitab.c index 7c7b3908..9b7b950a 100644 --- a/src/libm/rempitab.c +++ b/src/libm/rempitab.c @@ -5,7 +5,13 @@ #include "misc.h" -NOEXPORT ALIGNED(64) const double rempitabdp[] = { +#if !defined(SLEEF_GENHEADER) +#define FUNCATR NOEXPORT ALIGNED(64) +#else +#define FUNCATR EXPORT ALIGNED(64) +#endif + +FUNCATR const double rempitabdp[] = { 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, diff --git a/src/libm/sleefinline_header.h.org b/src/libm/sleefinline_header.h.org new file mode 100644 index 00000000..f0645764 --- /dev/null +++ b/src/libm/sleefinline_header.h.org @@ -0,0 +1,9 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +// This file is generated by SLEEF %VERSION% + +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif diff --git a/src/libm/sleeflibm_header.h.org b/src/libm/sleeflibm_header.h.org index 51643193..cf7afb68 100644 --- a/src/libm/sleeflibm_header.h.org +++ b/src/libm/sleeflibm_header.h.org @@ -58,6 +58,10 @@ #include #endif +#if defined(__VSX__) +#include +#endif + // #ifndef SLEEF_FP_ILOGB0 diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index 0afa9803..17e49c15 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -5,10 +5,12 @@ // Always use -ffp-contract=off option to compile SLEEF. +#if !defined(SLEEF_GENHEADER) #include #include #include #include +#endif #include "misc.h" @@ -3653,7 +3655,7 @@ EXPORT CONST VECTOR_CC vdouble xerfc_u15(vdouble a) { } #endif // #if !defined(DETERMINISTIC) -#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // The normal and deterministic versions of implementations are common // for the functions like sincospi_u05. Aliases are defined by // DALIAS_* macros for such functions. The defined aliases @@ -3720,9 +3722,9 @@ DALIAS_vd_vd(tgamma_u1) DALIAS_vd_vd(lgamma_u1) DALIAS_vd_vd(erf_u1) DALIAS_vd_vd(erfc_u15) -#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) -#ifndef ENABLE_GNUABI +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetInt(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index a0f76716..2222a82a 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -5,10 +5,12 @@ // Always use -ffp-contract=off option to compile SLEEF. +#if !defined(SLEEF_GENHEADER) #include #include #include #include +#endif #include "misc.h" @@ -22,7 +24,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SSE2 #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helpersse2.h" +#else +#include "macroonlySSE2.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" @@ -34,7 +40,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SSE4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helpersse2.h" +#else +#include "macroonlySSE4.h" +#endif #ifdef DORENAME #include "renamesse4.h" #endif @@ -42,7 +52,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx.h" +#else +#include "macroonlyAVX.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" @@ -54,7 +68,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_FMA4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helperavx.h" +#else +#include "macroonlyFMA4.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" @@ -66,7 +84,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX2 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx2.h" +#else +#include "macroonlyAVX2.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" @@ -78,7 +100,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX2128 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx2_128.h" +#else +#include "macroonlyAVX2128.h" +#endif #ifdef DORENAME #include "renameavx2128.h" #endif @@ -86,7 +112,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX512F #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" +#else +#include "macroonlyAVX512F.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" @@ -98,7 +128,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" +#else +#include "macroonlyAVX512FNOFMA.h" +#endif #ifdef DORENAME #include "renameavx512fnofma.h" #endif @@ -106,7 +140,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_ADVSIMD #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" +#else +#include "macroonlyADVSIMD.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" @@ -118,7 +156,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" +#else +#include "macroonlyADVSIMDNOFMA.h" +#endif #ifdef DORENAME #include "renameadvsimdnofma.h" #endif @@ -126,7 +168,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_NEON32 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperneon32.h" +#endif #ifdef DORENAME #include "renameneon32.h" #endif @@ -134,7 +178,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_NEON32VFPV4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helperneon32.h" +#endif #ifdef DORENAME #include "renameneon32vfpv4.h" #endif @@ -142,7 +188,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_VSX #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" +#else +#include "macroonlyVSX.h" +#endif #ifdef DORENAME #include "renamevsx.h" #endif @@ -150,7 +200,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_VSXNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" +#else +#include "macroonlyVSXNOFMA.h" +#endif #ifdef DORENAME #include "renamevsxnofma.h" #endif @@ -160,7 +214,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_VECEXT #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helpervecext.h" +#endif #ifdef DORENAME #include "renamevecext.h" #endif @@ -168,7 +224,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_PUREC #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpurec.h" +#endif #ifdef DORENAME #include "renamepurec.h" #endif @@ -176,7 +234,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" +#else +#include "macroonlyPUREC_SCALAR.h" +#endif #ifdef DORENAME #include "renamepurec_scalar.h" #endif @@ -184,7 +246,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" +#else +#include "macroonlyPURECFMA_SCALAR.h" +#endif #ifdef DORENAME #include "renamepurecfma_scalar.h" #endif @@ -194,7 +260,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SVE #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helpersve.h" +#else +#include "macroonlySVE.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" @@ -206,7 +276,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SVENOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helpersve.h" +#else +#include "macroonlySVENOFMA.h" +#endif #ifdef DORENAME #include "renamesvenofma.h" #endif /* DORENAME */ @@ -2766,7 +2840,9 @@ EXPORT CONST VECTOR_CC vfloat xfmaf(vfloat x, vfloat y, vfloat z) { } #endif // #if !defined(DETERMINISTIC) +#if !defined(SLEEF_GENHEADER) static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } +#endif SQRTFU05_FUNCATR VECTOR_CC vfloat xsqrtf_u05(vfloat d) { #if defined(ENABLE_FMA_SP) @@ -3303,7 +3379,7 @@ EXPORT CONST VECTOR_CC vfloat xerfcf_u15(vfloat a) { } #endif // #if !defined(DETERMINISTIC) -#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // See sleefsimddp.c for explanation of these macros #ifdef ENABLE_ALIAS @@ -3377,9 +3453,9 @@ DALIAS_vf_vf(lgammaf_u1) DALIAS_vf_vf(erff_u1) DALIAS_vf_vf(erfcf_u15) DALIAS_vf_vf_vf(fastpowf_u3500) -#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) -#ifndef ENABLE_GNUABI +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetIntf(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index d3d160b8..756ee4ba 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -8,4 +8,5 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. + diff --git a/travis/before_script.arm64-gcc.sh b/travis/before_script.arm64-gcc.sh index 7d124449..a96211f7 100644 --- a/travis/before_script.arm64-gcc.sh +++ b/travis/before_script.arm64-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=gcc-8 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.common.sh b/travis/before_script.common.sh index 4b228a04..c590ed52 100644 --- a/travis/before_script.common.sh +++ b/travis/before_script.common.sh @@ -1,5 +1,6 @@ #!/bin/bash set -ev mkdir build && cd build + cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.osx-clang.sh b/travis/before_script.osx-clang.sh index 843b4b66..0962498f 100644 --- a/travis/before_script.osx-clang.sh +++ b/travis/before_script.osx-clang.sh @@ -2,4 +2,4 @@ set -ev mkdir sleef.build cd sleef.build -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.osx-gcc.sh b/travis/before_script.osx-gcc.sh index ccf76375..b748887a 100644 --- a/travis/before_script.osx-gcc.sh +++ b/travis/before_script.osx-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=gcc-6 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.x86_64-clang.sh b/travis/before_script.x86_64-clang.sh index 9f11cb10..dfcbf9f6 100644 --- a/travis/before_script.x86_64-clang.sh +++ b/travis/before_script.x86_64-clang.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=clang-7 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.x86_64-gcc.sh b/travis/before_script.x86_64-gcc.sh index 50af0d42..860a8e68 100644 --- a/travis/before_script.x86_64-gcc.sh +++ b/travis/before_script.x86_64-gcc.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el.cmake index 593a8cb1..3f99c2fe 100644 --- a/travis/toolchain-ppc64el.cmake +++ b/travis/toolchain-ppc64el.cmake @@ -6,6 +6,8 @@ SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-li find_program(CMAKE_C_COMPILER ppc64el-cc) +SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) + SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)