From 739f049b5386c9f0518e342688f4a25b1501aae4 Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 21 Feb 2020 09:33:52 +0900 Subject: [PATCH 01/18] no message --- CMakeLists.txt | 8 +- Configure.cmake | 20 +++++ src/arch/helperadvsimd.h | 27 ++++++- src/arch/helperavx.h | 27 ++++++- src/arch/helperavx2.h | 27 ++++++- src/arch/helperavx2_128.h | 27 ++++++- src/arch/helperavx512f.h | 35 ++++++++- src/arch/helperneon32.h | 8 ++ src/arch/helperpower_128.h | 13 +++- src/arch/helperpurec.h | 7 ++ src/arch/helperpurec_scalar.h | 29 ++++++- src/arch/helpersse2.h | 38 ++++++--- src/arch/helpersve.h | 30 +++++++- src/common/misc.h | 38 ++++++--- src/libm-tester/CMakeLists.txt | 32 ++++++-- src/libm-tester/iutsimd.c | 124 +++++++++++++++++++++++------- src/libm-tester/iutsimdmain.c | 8 +- src/libm/CMakeLists.txt | 47 +++++++++++ src/libm/rempitab.c | 10 ++- src/libm/sleefinline_header.h.org | 6 ++ src/libm/sleeflibm_header.h.org | 4 + src/libm/sleefsimddp.c | 8 +- src/libm/sleefsimdsp.c | 82 +++++++++++++++++++- travis/toolchain-ppc64el.cmake | 2 + 24 files changed, 562 insertions(+), 95 deletions(-) create mode 100644 src/libm/sleefinline_header.h.org diff --git a/CMakeLists.txt b/CMakeLists.txt index d16b526d..1e657253 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ option(BUILD_DFT "libsleefdft will be built." ON) option(BUILD_QUAD "libsleefquad will be built." OFF) option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(BUILD_TESTS "Tests will be built." ON) +option(BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF) option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF) option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON) @@ -24,7 +25,7 @@ enable_testing() set(SLEEF_VERSION_MAJOR 3) set(SLEEF_VERSION_MINOR 4) -set(SLEEF_VERSION_PATCHLEVEL 0) +set(SLEEF_VERSION_PATCHLEVEL 1) set(SLEEF_VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}) set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR}) @@ -74,6 +75,8 @@ set(TARGET_LIBSLEEFGNUABI "sleefgnuabi") # Generates the sleef.h headers and all the rename headers # Defined in src/libm/CMakeLists.txt via custom commands and a custom target set(TARGET_HEADERS "headers") +set(TARGET_INLINE_HEADERS "inline_headers") +set(TARGET_LIBINLINE "sleefinline") # Generates executable files for running the test suite # Defined in src/libm-tester/CMakeLists.txt via command add_executable set(TARGET_TESTER "tester") @@ -139,6 +142,9 @@ if(SLEEF_SHOW_CONFIG) message(STATUS "FFTW3 : " ${LIBFFTW3}) message(STATUS "OPENSSL : " ${OPENSSL_VERSION}) message(STATUS "SDE : " ${SDE_COMMAND}) + if (BUILD_INLINE_HEADERS) + message(STATUS "SED : " ${SED_COMMAND}) + endif() message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS}) message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP}) if(ENABLE_GNUABI) diff --git a/Configure.cmake b/Configure.cmake index f175221e..5405f310 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -306,6 +306,12 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -Wno-psabi") set(FLAGS_ENABLE_NEON32 "-mfpu=neon") endif(CMAKE_C_COMPILER_ID MATCHES "GNU") + + # Flags for generating inline headers + set(FLAG_PREPROCESS "-E") + set(FLAG_PRESERVE_COMMENTS "-C") + set(FLAG_INCLUDE "-I") + set(FLAG_DEFINE "-D") elseif(MSVC) # Intel vector extensions. if (CMAKE_CL_64) @@ -324,6 +330,11 @@ elseif(MSVC) set(FLAGS_ENABLE_PURECFMA_SCALAR /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS") set(FLAGS_NO_ERRNO "") + + set(FLAG_PREPROCESS "/E") + set(FLAG_PRESERVE_COMMENTS "/C") + set(FLAG_INCLUDE "/I") + set(FLAG_DEFINE "/D") elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_ENABLE_SSE2 "-msse2") set(FLAGS_ENABLE_SSE4 "-msse4.1") @@ -337,6 +348,11 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type") set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes") set(FLAGS_NO_ERRNO "") + + set(FLAG_PREPROCESS "-E") + set(FLAG_PRESERVE_COMMENTS "-C") + set(FLAG_INCLUDE "-I") + set(FLAG_DEFINE "-D") endif() set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}") @@ -692,6 +708,10 @@ if (NOT SVE_VECTOR_BITS) set(SVE_VECTOR_BITS 128) endif() +# + +find_program(SED_COMMAND sed) + ## if(SLEEF_SHOW_ERROR_LOG) diff --git a/src/arch/helperadvsimd.h b/src/arch/helperadvsimd.h index ae3f3ef9..69b5394b 100644 --- a/src/arch/helperadvsimd.h +++ b/src/arch/helperadvsimd.h @@ -9,27 +9,38 @@ #error Please specify advsimd flags. #endif +#if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. +//@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define ISANAME "AArch64 AdvSIMD" @@ -235,14 +246,20 @@ static INLINE VECTOR_CC vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { // Shifts #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) \ vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) #define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) +//@#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) #define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) +//@#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) #define vsrl_vi_vi_i(x, c) \ vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) +//@#define vsrl_vi_vi_i(x, c) vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) // Comparison returning masks static INLINE VECTOR_CC vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } @@ -656,8 +673,6 @@ static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int s // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), @@ -684,6 +699,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { union { vargquad aq; @@ -701,6 +719,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { c.vm2 = vuninterleave_vm2_vm2(vm2); return c.aq; } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g)); @@ -723,7 +742,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { } #define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) #define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { vmask m = vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))); diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index 50adf688..076d4516 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -5,13 +5,13 @@ #if CONFIG == 1 -#if !defined(__AVX__) +#if !defined(__AVX__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx. #endif #elif CONFIG == 4 -#if !defined(__AVX__) || !defined(__FMA4__) +#if (!defined(__AVX__) || !defined(__FMA4__)) && !defined(SLEEF_GENHEADER) #error Please specify -mavx and -mfma4. #endif @@ -20,16 +20,25 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -38,6 +47,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; @@ -54,6 +64,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -95,6 +107,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 20 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { @@ -563,8 +577,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad4 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), @@ -622,6 +634,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { #if !defined(_MSC_VER) union { @@ -649,6 +664,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; @@ -680,6 +696,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), \ _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +//@#define vsll64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) +//@#define vsrl64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) + static INLINE vmask vcast_vm_vi(vint vi) { vint vi0 = _mm_and_si128(_mm_shuffle_epi32(vi, (1 << 4) | (1 << 6)), _mm_set_epi32(0, -1, 0, -1)); vint vi1 = _mm_and_si128(_mm_shuffle_epi32(vi, (2 << 0) | (2 << 2) | (3 << 4) | (3 << 6)), _mm_set_epi32(0, -1, 0, -1)); diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index 3df1c82c..a46e2660 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -5,7 +5,7 @@ #if CONFIG == 1 -#ifndef __AVX2__ +#if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif @@ -14,19 +14,29 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 2 +//@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING -#define SPLIT_KERNEL +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -35,6 +45,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; @@ -51,6 +62,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -76,6 +89,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 25 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { @@ -418,8 +433,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad4 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; } @@ -461,6 +474,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { #if !defined(_MSC_VER) union { @@ -488,6 +504,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; @@ -501,6 +518,8 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpgt_epi #define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm256_cvtepi32_epi64(vi); } static INLINE vint vcast_vi_vm(vmask vm) { diff --git a/src/arch/helperavx2_128.h b/src/arch/helperavx2_128.h index 409b0ae6..d00aa5ee 100644 --- a/src/arch/helperavx2_128.h +++ b/src/arch/helperavx2_128.h @@ -5,7 +5,7 @@ #if CONFIG == 1 -#ifndef __AVX2__ +#if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif @@ -14,19 +14,29 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING -#define SPLIT_KERNEL +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -35,6 +45,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; @@ -51,6 +62,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -76,6 +89,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 25 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -392,8 +407,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } @@ -415,6 +428,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { #if !defined(_MSC_VER) union { @@ -442,6 +458,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } @@ -453,3 +470,5 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpgt_epi64( #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) diff --git a/src/arch/helperavx512f.h b/src/arch/helperavx512f.h index 04b1fd71..bf76aa6f 100644 --- a/src/arch/helperavx512f.h +++ b/src/arch/helperavx512f.h @@ -5,7 +5,7 @@ #if CONFIG == 1 || CONFIG == 2 -#ifndef __AVX512F__ +#if !defined(__AVX512F__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx512f. #endif @@ -14,22 +14,32 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 3 +//@#define LOG2VECTLENDP 3 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -#define SPLIT_KERNEL +//@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -38,6 +48,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m512i vmask; typedef __mmask16 vopmask; @@ -54,6 +65,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -82,6 +95,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 0 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } #ifdef __INTEL_COMPILER @@ -224,6 +239,9 @@ static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm256_xor_si256(x, y) #define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) #define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) #define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) +//@#define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) +//@#define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) +//@#define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm256_cmpgt_epi32(x, y); } @@ -296,6 +314,8 @@ static INLINE vfloat vgetmant_vf_vf(vfloat d) { return _mm512_getmant_ps(d, _MM_ #define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) #define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) +//@#define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) +//@#define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) #if defined(_MSC_VER) // This function is needed when debugging on MSVC. @@ -408,6 +428,9 @@ static INLINE vint2 vandnot_vi2_vo_vi2(vopmask o, vint2 m) { #define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) #define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) #define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) +//@#define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) +//@#define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpeq_epi32_mask(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpgt_epi32_mask(x, y); } @@ -536,8 +559,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad8 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; } @@ -579,6 +600,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad8 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { #if !defined(_MSC_VER) union { @@ -606,6 +630,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } +#endif #ifdef __INTEL_COMPILER static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0; } @@ -621,6 +646,8 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64 #define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm512_cvtepi32_epi64(vi); diff --git a/src/arch/helperneon32.h b/src/arch/helperneon32.h index ad7105e9..06832490 100644 --- a/src/arch/helperneon32.h +++ b/src/arch/helperneon32.h @@ -12,18 +12,23 @@ #endif #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP 2 +//@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 4 #define ISANAME "AARCH32 NEON-VFPV4" #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #else #define ISANAME "AARCH32 NEON" #endif #define DFTPRIORITY 10 #define ENABLE_RECSQRT_SP +//@#define ENABLE_RECSQRT_SP #include #include @@ -207,6 +212,9 @@ static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vbicq #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) +//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) +//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) +//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } diff --git a/src/arch/helperpower_128.h b/src/arch/helperpower_128.h index 220699ed..46f264fd 100644 --- a/src/arch/helperpower_128.h +++ b/src/arch/helperpower_128.h @@ -14,26 +14,37 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP -//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. +//@#define ENABLE_FMA_SP #endif #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING +#if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef vector unsigned int vmask; typedef vector unsigned int vopmask; diff --git a/src/arch/helperpurec.h b/src/arch/helperpurec.h index 11e063fd..2aa926ea 100644 --- a/src/arch/helperpurec.h +++ b/src/arch/helperpurec.h @@ -12,14 +12,21 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENDP CONFIG +//@#define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #define DFTPRIORITY LOG2VECTLENDP #define ISANAME "Pure C Array" diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index 9c11ace4..65089c50 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -3,10 +3,15 @@ // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) +#if !defined(SLEEF_GENHEADER) #include +#endif #ifndef ENABLE_BUILTIN_MATH + +#if !defined(SLEEF_GENHEADER) #include +#endif #define SQRT sqrt #define SQRTF sqrtf @@ -30,29 +35,37 @@ #endif +#if !defined(SLEEF_GENHEADER) #include "misc.h" +#endif #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP +//@#define ENABLE_DP #define ENABLE_SP +//@#define ENABLE_SP #if CONFIG == 2 #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) #ifndef FP_FAST_FMA #define FP_FAST_FMA +//@#define FP_FAST_FMA #endif #ifndef FP_FAST_FMAF #define FP_FAST_FMAF +//@#define FP_FAST_FMAF #endif #endif -#if !defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF) +#if (!defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF)) && !defined(SLEEF_GENHEADER) #error FP_FAST_FMA or FP_FAST_FMAF not defined #endif #define ISANAME "Pure C scalar with FMA" @@ -62,14 +75,20 @@ #endif // #if CONFIG == 2 #define LOG2VECTLENDP 0 +//@#define LOG2VECTLENDP 0 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP 0 +//@#define LOG2VECTLENSP 0 #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT #if defined(__SSE4_1__) || defined(__aarch64__) #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #endif #define DFTPRIORITY LOG2VECTLENDP @@ -376,8 +395,6 @@ static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { *ptr = v; } // -typedef Sleef_quad1 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vint vuninterleave_vi_vi(vint v) { return v; } @@ -386,6 +403,9 @@ static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad1 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { union { vargquad aq; @@ -403,6 +423,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { c.vm2 = vm2; return c.aq; } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return !g ? ~(uint32_t)0 : 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return o ? x : y; } @@ -412,6 +433,8 @@ static INLINE vmask vneg64_vm_vm(vmask x) { return -(int64_t)x; } #define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) #define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) +//@#define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) +//@#define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return (int64_t)x > (int64_t)y ? ~(uint32_t)0 : 0; } diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index c0875781..4704a294 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -5,19 +5,19 @@ #if CONFIG == 2 -#if !defined(__SSE2__) +#if !defined(__SSE2__) && !defined(SLEEF_GENHEADER) #error Please specify -msse2. #endif #elif CONFIG == 3 -#if !defined(__SSE2__) || !defined(__SSE3__) +#if (!defined(__SSE2__) || !defined(__SSE3__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2 and -msse3 #endif #elif CONFIG == 4 -#if !defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__) +#if (!defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2, -msse3 and -msse4.1 #endif @@ -26,15 +26,23 @@ #endif #define ENABLE_DP +//@#define ENABLE_DP #define LOG2VECTLENDP 1 +//@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) +//@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP +//@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) +//@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) +//@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT +//@#define ACCURATE_SQRT +#if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else @@ -43,6 +51,7 @@ #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; @@ -59,6 +68,8 @@ typedef struct { // +#if !defined(SLEEF_GENHEADER) + #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif @@ -106,6 +117,8 @@ static INLINE int vavailability_i(int name) { #define DFTPRIORITY 10 #endif +#endif // #if !defined(SLEEF_GENHEADER) + static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } @@ -153,13 +166,14 @@ static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #else static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } @@ -224,7 +238,7 @@ static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask m, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(m)); } @@ -299,7 +313,7 @@ static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return _mm_castsi128_ps(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm_castps_si128(vf); } -#ifndef __SSE4_1__ +#if CONFIG != 4 static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vcast_vf_vi2(vrint_vi2_vf(vf)); } #endif @@ -346,7 +360,7 @@ static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32( static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } -#ifdef __SSE4_1__ +#if CONFIG == 4 static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(m)); } @@ -408,7 +422,7 @@ static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_ static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } -#ifdef __SSE3__ +#if CONFIG >= 3 static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } #else @@ -445,8 +459,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa // -typedef Sleef_quad2 vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } @@ -474,6 +486,9 @@ static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { #if !defined(_MSC_VER) union { @@ -501,6 +516,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } @@ -513,6 +529,8 @@ static INLINE vmask vneg64_vm_vm(vmask x) { return _mm_sub_epi64(vcast_vm_i_i(0, #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) +//@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) +//@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { int64_t ax[2], ay[2]; diff --git a/src/arch/helpersve.h b/src/arch/helpersve.h index 710d5f4d..affe7910 100644 --- a/src/arch/helpersve.h +++ b/src/arch/helpersve.h @@ -5,14 +5,16 @@ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ -#ifndef __ARM_FEATURE_SVE +#if !defined(__ARM_FEATURE_SVE) && !defined(SLEEF_GENHEADER) #error Please specify SVE flags. #endif +#if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" +#endif // #if !defined(SLEEF_GENHEADER) #if defined(VECTLENDP) || defined(VECTLENSP) #error VECTLENDP or VECTLENSP already defined @@ -21,9 +23,12 @@ #if CONFIG == 1 || CONFIG == 2 // Vector length agnostic #define VECTLENSP (svcntw()) +//@#define VECTLENSP (svcntw()) #define VECTLENDP (svcntd()) +//@#define VECTLENDP (svcntd()) #define ISANAME "AArch64 SVE" #define ptrue svptrue_b8() +//@#define ptrue svptrue_b8() #elif CONFIG == 8 // 256-bit vector length #define ISANAME "AArch64 SVE 256-bit" @@ -67,16 +72,22 @@ static INLINE int vavailability_i(int name) { return 3; } #endif #define ENABLE_SP +//@#define ENABLE_SP #define ENABLE_DP +//@#define ENABLE_DP #if CONFIG != 2 #define ENABLE_FMA_SP +//@#define ENABLE_FMA_SP #define ENABLE_FMA_DP +//@#define ENABLE_FMA_DP //#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. #endif #define FULL_FP_ROUNDING +//@#define FULL_FP_ROUNDING #define ACCURATE_SQRT +//@#define ACCURATE_SQRT // Mask definition typedef svint32_t vmask; @@ -98,6 +109,8 @@ typedef __sizeless_struct { // masking predicates #define ALL_TRUE_MASK svdup_n_s32(0xffffffff) #define ALL_FALSE_MASK svdup_n_s32(0x0) +//@#define ALL_TRUE_MASK svdup_n_s32(0xffffffff) +//@#define ALL_FALSE_MASK svdup_n_s32(0x0) static INLINE void vprefetch_v_p(const void *ptr) {} @@ -338,10 +351,12 @@ static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { // Shifts #define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) #define vsrl_vi2_vi2_i(x, c) \ svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) - +//@#define vsrl_vi2_vi2_i(x, c) svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) // Comparison returning integers static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { @@ -627,7 +642,10 @@ static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return svsel_s32(x, ALL_FALSE_MASK, y); } #define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) +//@#define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) #define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) +//@#define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) + static INLINE vint vsrl_vi_vi_i(vint x, int c) { return svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)); } @@ -783,8 +801,6 @@ static int vcast_i_vi2(vint2 v) { // -typedef Sleef_quadx vargquad; - static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(v.x), svreinterpret_u64_s32(v.y))), @@ -827,6 +843,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quadx vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2((vmask2) { svld1_s32(ptrue, (int32_t *)&aq), svld1_s32(ptrue, (int32_t *)&(aq.s[svcntd()/2])) }); } @@ -838,6 +857,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { svst1_s32(ptrue, (int32_t *)&(aq.s[svcntd()/2]), vm2.y); return aq; } +#endif static INLINE int vtestallzeros_i_vo64(vopmask g) { return svcntp_b64(svptrue_b64(), g) == 0; @@ -862,7 +882,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { } #define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) #define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) +//@#define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { return svreinterpret_s32_s64(svextw_s64_z(ptrue, svreinterpret_s64_s32(vi))); } static INLINE vint vcast_vi_vm(vmask vm) { return vand_vm_vm_vm(vm, vcast_vm_i_i(0, 0xffffffff)); } diff --git a/src/common/misc.h b/src/common/misc.h index 1c326a3b..8e102de8 100644 --- a/src/common/misc.h +++ b/src/common/misc.h @@ -8,7 +8,9 @@ #ifndef __MISC_H__ #define __MISC_H__ +#if !defined(SLEEF_GENHEADER) #include +#endif #ifndef M_PI #define M_PI 3.141592653589793238462643383279502884 @@ -144,30 +146,32 @@ #define stringify(s) stringify_(s) #define stringify_(s) #s +#if !defined(SLEEF_GENHEADER) typedef long double longdouble; +#endif -#ifndef Sleef_double2_DEFINED +#if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif -#ifndef Sleef_float2_DEFINED +#if !defined(Sleef_float2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif -#ifndef Sleef_longdouble2_DEFINED +#if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif -#if !defined(Sleef_quad_DEFINED) +#if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; @@ -176,7 +180,7 @@ typedef struct { double x, y; } Sleef_quad; #endif #endif -#if !defined(Sleef_quad1_DEFINED) +#if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad1_DEFINED typedef union { struct { @@ -186,7 +190,7 @@ typedef union { } Sleef_quad1; #endif -#if !defined(Sleef_quad2_DEFINED) +#if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad2_DEFINED typedef union { struct { @@ -196,7 +200,7 @@ typedef union { } Sleef_quad2; #endif -#if !defined(Sleef_quad4_DEFINED) +#if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad4_DEFINED typedef union { struct { @@ -206,14 +210,14 @@ typedef union { } Sleef_quad4; #endif -#if !defined(Sleef_quad8_DEFINED) +#if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad8_DEFINED typedef union { Sleef_quad s[8]; } Sleef_quad8; #endif -#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) +#if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quadx_DEFINED typedef union { Sleef_quad s[32]; @@ -228,19 +232,27 @@ typedef union { #define UNLIKELY(condition) __builtin_expect(!!(condition), 0) #define RESTRICT __restrict__ -#define INLINE __attribute__((always_inline)) - #ifndef __arm__ #define ALIGNED(x) __attribute__((aligned(x))) #else #define ALIGNED(x) #endif +#if defined(SLEEF_GENHEADER) + +#define INLINE SLEEF_ALWAYS_INLINE +#define EXPORT SLEEF_INLINE +#define CONST SLEEF_CONST +#define NOEXPORT + +#else // #if defined(SLEEF_GENHEADER) + #ifndef __INTEL_COMPILER #define CONST const #else #define CONST #endif +#define INLINE __attribute__((always_inline)) #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #ifndef SLEEF_STATIC_LIBS @@ -255,6 +267,8 @@ typedef union { #define NOEXPORT __attribute__ ((visibility ("hidden"))) #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) +#endif // #if defined(SLEEF_GENHEADER) + #define SLEEF_NAN __builtin_nan("") #define SLEEF_NANf __builtin_nanf("") #define SLEEF_NANl __builtin_nanl("") @@ -287,7 +301,7 @@ typedef union { #define NOEXPORT #endif -#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(SLEEF_GENHEADER) #include #endif diff --git a/src/libm-tester/CMakeLists.txt b/src/libm-tester/CMakeLists.txt index 013b6c1f..ce49af98 100644 --- a/src/libm-tester/CMakeLists.txt +++ b/src/libm-tester/CMakeLists.txt @@ -79,7 +79,7 @@ set_target_properties(${TARGET_IUT} PROPERTIES C_STANDARD 99) add_test_iut(${TARGET_IUT}) set(IUT_LIST ${TARGET_IUT}) -set(IUT_SRC iutsimd.c iutsimdmain.c testerutil) +set(IUT_SRC iutsimd.c iutsimdmain.c testerutil.c) # Add vector extension `iut`s macro(test_extension SIMD) @@ -104,11 +104,11 @@ macro(test_extension SIMD) add_test_iut(${TARGET_IUT${SIMD}}) list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) -# The iut programs whose name begins with "iuty" are the iut for the -# deterministic version of functions. By checking the result of -# testing with iutysse2, for example, it can be checked that the -# corresponding deterministic functions passes the accuracy and -# nonnumber tests. + # The iut programs whose names begin with "iuty" are the iut for the + # deterministic version of functions. By checking the result of + # testing with iutysse2, for example, it can be checked that the + # corresponding deterministic functions passes the accuracy and + # nonnumber tests. string(CONCAT IUTYNAME "iuty" ${LCSIMD}) add_executable(${IUTYNAME} ${IUT_SRC}) @@ -124,6 +124,26 @@ macro(test_extension SIMD) add_test_iut(${IUTYNAME}) list(APPEND IUT_LIST ${IUTYNAME}) + # The iut programs whose names begin with "iuti" are the iut for the + # inline version of functions. + + if (BUILD_INLINE_HEADERS AND SED_COMMAND) + string(CONCAT IUTINAME "iuti" ${LCSIMD}) + add_executable(${IUTINAME} ${IUT_SRC}) + target_compile_options(${IUTINAME} PRIVATE ${FLAGS_ENABLE_${SIMD}}) + target_compile_definitions(${IUTINAME} + PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} + USE_INLINE_HEADER="sleefinline_${LCSIMD}.h" + MACRO_ONLY_HEADER="macroonly${SIMD}.h" + ) + target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/inline) + target_link_libraries(${IUTINAME} ${LIBM} ${LIBRT} ${TARGET_LIBINLINE}) + add_dependencies(${IUTINAME} ${TARGET_INLINE_HEADERS}) + set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99) + add_test_iut(${IUTINAME}) + list(APPEND IUT_LIST ${IUTINAME}) + endif(BUILD_INLINE_HEADERS AND SED_COMMAND) + if(LIB_MPFR AND NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND NOT MINGW) # Build tester2 SIMD string(TOLOWER ${SIMD} SCSIMD) diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 4fa7c791..38845be5 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -30,74 +30,126 @@ #endif #include "misc.h" + +#if !defined(USE_INLINE_HEADER) #include "sleef.h" +#else // #if !defined(USE_INLINE_HEADER) +#include +#include +#include +#include + +#if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) +#include +#endif + +#if (defined(_MSC_VER)) +#include +#endif + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include +#endif + +#if defined(__ARM_FEATURE_SVE) +#include +#endif + +#if defined(__VSX__) +#include +#endif + +#define SLEEF_ALWAYS_INLINE inline +#define SLEEF_INLINE +#define SLEEF_CONST +#include USE_INLINE_HEADER +#include MACRO_ONLY_HEADER + +#endif // #if !defined(USE_INLINE_HEADER) + #include "testerutil.h" #define DORENAME #ifdef ENABLE_SSE2 +#include "renamesse2.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersse2.h" -#include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_SSE4 +#include "renamesse4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helpersse2.h" -#include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX +#include "renameavx.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx.h" -#include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_FMA4 +#include "renamefma4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperavx.h" -#include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX2 +#include "renameavx2.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2.h" -#include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX2128 +#include "renameavx2128.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2_128.h" -#include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX512F +#include "renameavx512f.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx512f.h" -#include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif +#endif #ifdef ENABLE_AVX512FNOFMA +#include "renameavx512fnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperavx512f.h" -#include "renameavx512fnofma.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif +#endif #ifdef ENABLE_VECEXT #define CONFIG 1 @@ -112,34 +164,42 @@ typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_NEON32 +#include "renameneon32.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperneon32.h" -#include "renameneon32.h" typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_NEON32VFPV4 +#include "renameneon32vfpv4.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperneon32.h" -#include "renameneon32vfpv4.h" typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_ADVSIMD +#include "renameadvsimd.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperadvsimd.h" -#include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_ADVSIMDNOFMA +#include "renameadvsimdnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperadvsimd.h" -#include "renameadvsimdnofma.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif +#endif #ifdef ENABLE_DSP128 #define CONFIG 2 @@ -150,22 +210,24 @@ typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SVE +#include "renamesve.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helpersve.h" -#ifdef DORENAME -#include "renamesve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif #ifdef ENABLE_SVENOFMA +#include "renamesvenofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersve.h" -#include "renamesvenofma.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif +#endif #ifdef ENABLE_DSP256 #define CONFIG 1 @@ -176,68 +238,74 @@ typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_VSX +#include "renamevsx.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpower_128.h" -#include "renamevsx.h" typedef Sleef_vector_double_2 vdouble2; typedef Sleef_vector_float_2 vfloat2; #endif +#endif #ifdef ENABLE_VSXNOFMA +#include "renamevsxnofma.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpower_128.h" -#include "renamevsxnofma.h" typedef Sleef_vector_double_2 vdouble2; typedef Sleef_vector_float_2 vfloat2; #endif +#endif #ifdef ENABLE_PUREC_SCALAR +#include "renamepurec_scalar.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpurec_scalar.h" -#include "renamepurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif +#endif #ifdef ENABLE_PURECFMA_SCALAR +#include "renamepurecfma_scalar.h" +#if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpurec_scalar.h" -#include "renamepurecfma_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif +#endif // #ifdef ENABLE_DP -int check_featureDP() { - if (vavailability_i(1) == 0) return 0; +int check_featureDP(double d) { double s[VECTLENDP]; int i; for(i=0;i ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 + COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 + COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 + COMMAND ${SED_COMMAND} -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp + COMMAND ${CMAKE_COMMAND} -E "rename" ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp ${INLINE_HEADER_FILE} + + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} + DEPENDS ${HEADER_${SIMD}} + VERBATIM + ) + + list(APPEND INLINE_HEADER_FILES_GENERATED ${INLINE_HEADER_FILE}) + endif() + endforeach() + + add_custom_target(${TARGET_INLINE_HEADERS} ALL + DEPENDS + ${INLINE_HEADER_FILES_GENERATED} + ) + install(FILES ${INLINE_HEADER_FILES_GENERATED} DESTINATION include) + endif(SED_COMMAND) + + add_library(${TARGET_LIBINLINE} STATIC rempitab.c) + install(TARGETS ${TARGET_LIBINLINE} DESTINATION lib) +endif(BUILD_INLINE_HEADERS) + # On some systems we need to explicitly link libsleef against libm to # use some of the math functions used in the scalar code (for example # sqrt). diff --git a/src/libm/rempitab.c b/src/libm/rempitab.c index 6d8d98d1..f9e96697 100644 --- a/src/libm/rempitab.c +++ b/src/libm/rempitab.c @@ -5,7 +5,13 @@ #include "misc.h" -NOEXPORT ALIGNED(64) const double rempitabdp[] = { +#if !defined(SLEEF_GENHEADER) +#define FUNCATR NOEXPORT ALIGNED(64) +#else +#define FUNCATR EXPORT ALIGNED(64) +#endif + +FUNCATR const double rempitabdp[] = { 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, @@ -977,7 +983,7 @@ NOEXPORT ALIGNED(64) const double rempitabdp[] = { 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323, }; -NOEXPORT ALIGNED(64) const float rempitabsp[] = { +FUNCATR const float rempitabsp[] = { 0.1591549367, 6.420638243e-09, 7.342738699e-17, 1.518506657e-24, 0.03415494412, -1.029942243e-09, -3.759491547e-17, 1.518506657e-24, 0.03415494412, -1.029942243e-09, -3.759491547e-17, 1.518506657e-24, diff --git a/src/libm/sleefinline_header.h.org b/src/libm/sleefinline_header.h.org new file mode 100644 index 00000000..eef8d3e2 --- /dev/null +++ b/src/libm/sleefinline_header.h.org @@ -0,0 +1,6 @@ +// Copyright Naoki Shibata and contributors 2010 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +// This file is generated by SLEEF %VERSION% + diff --git a/src/libm/sleeflibm_header.h.org b/src/libm/sleeflibm_header.h.org index 0d6e38cb..7a0005b7 100644 --- a/src/libm/sleeflibm_header.h.org +++ b/src/libm/sleeflibm_header.h.org @@ -54,6 +54,10 @@ #include #endif +#if defined(__VSX__) +#include +#endif + // #ifndef SLEEF_FP_ILOGB0 diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index 0ec0da8d..0b241e0d 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -5,10 +5,12 @@ // Always use -ffp-contract=off option to compile SLEEF. +#if !defined(SLEEF_GENHEADER) #include #include #include #include +#endif #include "misc.h" @@ -3608,7 +3610,7 @@ EXPORT CONST VECTOR_CC vdouble xerfc_u15(vdouble a) { } #endif // #if !defined(DETERMINISTIC) -#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // The normal and deterministic versions of implementations are common // for the functions like sincospi_u05. Aliases are defined by // DALIAS_* macros for such functions. The defined aliases @@ -3675,9 +3677,9 @@ DALIAS_vd_vd(tgamma_u1) DALIAS_vd_vd(lgamma_u1) DALIAS_vd_vd(erf_u1) DALIAS_vd_vd(erfc_u15) -#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) -#ifndef ENABLE_GNUABI +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetInt(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 3e4ce1e1..1d52b931 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -5,10 +5,12 @@ // Always use -ffp-contract=off option to compile SLEEF. +#if !defined(SLEEF_GENHEADER) #include #include #include #include +#endif #include "misc.h" @@ -22,7 +24,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SSE2 #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helpersse2.h" +#else +#include "macroonlySSE2.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" @@ -34,7 +40,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SSE4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helpersse2.h" +#else +#include "macroonlySSE4.h" +#endif #ifdef DORENAME #include "renamesse4.h" #endif @@ -42,7 +52,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx.h" +#else +#include "macroonlyAVX.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" @@ -54,7 +68,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_FMA4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helperavx.h" +#else +#include "macroonlyFMA4.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" @@ -66,7 +84,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX2 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx2.h" +#else +#include "macroonlyAVX2.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" @@ -78,7 +100,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX2128 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx2_128.h" +#else +#include "macroonlyAVX2128.h" +#endif #ifdef DORENAME #include "renameavx2128.h" #endif @@ -86,7 +112,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX512F #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" +#else +#include "macroonlyAVX512F.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" @@ -98,7 +128,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" +#else +#include "macroonlyAVX512FNOFMA.h" +#endif #ifdef DORENAME #include "renameavx512fnofma.h" #endif @@ -106,7 +140,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_ADVSIMD #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" +#else +#include "macroonlyADVSIMD.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" @@ -118,7 +156,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" +#else +#include "macroonlyADVSIMDNOFMA.h" +#endif #ifdef DORENAME #include "renameadvsimdnofma.h" #endif @@ -126,7 +168,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_NEON32 #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperneon32.h" +#endif #ifdef DORENAME #include "renameneon32.h" #endif @@ -134,7 +178,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_NEON32VFPV4 #define CONFIG 4 +#if !defined(SLEEF_GENHEADER) #include "helperneon32.h" +#endif #ifdef DORENAME #include "renameneon32vfpv4.h" #endif @@ -142,7 +188,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_VSX #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" +#else +#include "macroonlyVSX.h" +#endif #ifdef DORENAME #include "renamevsx.h" #endif @@ -150,7 +200,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_VSXNOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" +#else +#include "macroonlyVSXNOFMA.h" +#endif #ifdef DORENAME #include "renamevsxnofma.h" #endif @@ -160,7 +214,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_VECEXT #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helpervecext.h" +#endif #ifdef DORENAME #include "renamevecext.h" #endif @@ -168,7 +224,9 @@ extern const float rempitabsp[]; #ifdef ENABLE_PUREC #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpurec.h" +#endif #ifdef DORENAME #include "renamepurec.h" #endif @@ -176,7 +234,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" +#else +#include "macroonlyPUREC_SCALAR.h" +#endif #ifdef DORENAME #include "renamepurec_scalar.h" #endif @@ -184,7 +246,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" +#else +#include "macroonlyPURECFMA_SCALAR.h" +#endif #ifdef DORENAME #include "renamepurecfma_scalar.h" #endif @@ -194,7 +260,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SVE #define CONFIG 1 +#if !defined(SLEEF_GENHEADER) #include "helpersve.h" +#else +#include "macroonlySVE.h" +#endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" @@ -206,7 +276,11 @@ extern const float rempitabsp[]; #ifdef ENABLE_SVENOFMA #define CONFIG 2 +#if !defined(SLEEF_GENHEADER) #include "helpersve.h" +#else +#include "macroonlySVENOFMA.h" +#endif #ifdef DORENAME #include "renamesvenofma.h" #endif /* DORENAME */ @@ -2743,7 +2817,9 @@ EXPORT CONST VECTOR_CC vfloat xfmaf(vfloat x, vfloat y, vfloat z) { } #endif // #if !defined(DETERMINISTIC) +#if !defined(SLEEF_GENHEADER) static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } +#endif SQRTFU05_FUNCATR VECTOR_CC vfloat xsqrtf_u05(vfloat d) { vfloat q; @@ -3244,7 +3320,7 @@ EXPORT CONST VECTOR_CC vfloat xerfcf_u15(vfloat a) { } #endif // #if !defined(DETERMINISTIC) -#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // See sleefsimddp.c for explanation of these macros #ifdef ENABLE_ALIAS @@ -3318,9 +3394,9 @@ DALIAS_vf_vf(lgammaf_u1) DALIAS_vf_vf(erff_u1) DALIAS_vf_vf(erfcf_u15) DALIAS_vf_vf_vf(fastpowf_u3500) -#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) +#endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) -#ifndef ENABLE_GNUABI +#if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetIntf(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; diff --git a/travis/toolchain-ppc64el.cmake b/travis/toolchain-ppc64el.cmake index 593a8cb1..3f99c2fe 100644 --- a/travis/toolchain-ppc64el.cmake +++ b/travis/toolchain-ppc64el.cmake @@ -6,6 +6,8 @@ SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-li find_program(CMAKE_C_COMPILER ppc64el-cc) +SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) + SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) From 6b0e416223161607173c9ef61584dbd9a40409ee Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 21 Feb 2020 09:41:26 +0900 Subject: [PATCH 02/18] no message --- appveyor.yml | 6 +++--- travis/before_script.aarch64-gcc.sh | 2 +- travis/before_script.arm64-clang.sh | 2 +- travis/before_script.arm64-gcc.sh | 2 +- travis/before_script.armhf-gcc.sh | 2 +- travis/before_script.osx-clang.sh | 2 +- travis/before_script.osx-gcc.sh | 2 +- travis/before_script.x86_64-clang.sh | 2 +- travis/before_script.x86_64-gcc.sh | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 5c4d5fcc..d68e621f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,15 +12,15 @@ install: - if "%DO_TEST%" == "TRUE" set ORGPATH="%PATH%" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\setup-x86_64.exe" -q -g -P libmpfr-devel,libgmp-devel,cmake - if "%DO_TEST%" == "TRUE" PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;"%PATH%" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -g\"Unix Makefiles\" .. -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE;make -j 2' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -g\"Unix Makefiles\" .. -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE;make -j 2' - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -g\"Unix Makefiles\" -DBUILD_QUAD=TRUE ..;make -j 2' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -g\"Unix Makefiles\" -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE ..;make -j 2' - if "%DO_TEST%" == "TRUE" del /Q /F c:\projects\sleef\build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" PATH "%ORGPATH%";c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;c:\projects\sleef\build\bin - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" - mkdir build - cd build - - cmake -G"Visual Studio 15 2017 Win64" .. -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE %ENV_BUILD_STATIC% + - cmake -G"Visual Studio 15 2017 Win64" .. -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE %ENV_BUILD_STATIC% build_script: - cmake --build . --target install --config Release test_script: diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index b5bb8ce4..0343a018 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -8,4 +8,4 @@ make -j 2 all cd /build mkdir build-cross cd build-cross -cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.arm64-clang.sh b/travis/before_script.arm64-clang.sh index 2c34abf5..0c7b2ea3 100644 --- a/travis/before_script.arm64-clang.sh +++ b/travis/before_script.arm64-clang.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=clang-8 export CXX=clang++-8 -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.arm64-gcc.sh b/travis/before_script.arm64-gcc.sh index 5aefc0ec..9c222b2f 100644 --- a/travis/before_script.arm64-gcc.sh +++ b/travis/before_script.arm64-gcc.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index 81f93dec..ba18962f 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -8,4 +8,4 @@ make -j 2 all cd /build mkdir build-cross cd build-cross -cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.osx-clang.sh b/travis/before_script.osx-clang.sh index b8137740..470b20e8 100644 --- a/travis/before_script.osx-clang.sh +++ b/travis/before_script.osx-clang.sh @@ -2,4 +2,4 @@ set -ev mkdir sleef.build cd sleef.build -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.osx-gcc.sh b/travis/before_script.osx-gcc.sh index 1af7d9f9..da6a1afb 100644 --- a/travis/before_script.osx-gcc.sh +++ b/travis/before_script.osx-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=gcc-6 -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.x86_64-clang.sh b/travis/before_script.x86_64-clang.sh index 6e3f0604..a0429ceb 100644 --- a/travis/before_script.x86_64-clang.sh +++ b/travis/before_script.x86_64-clang.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=clang-5.0 export CXX=clang++-5.0 -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.x86_64-gcc.sh b/travis/before_script.x86_64-gcc.sh index 5aefc0ec..9c222b2f 100644 --- a/travis/before_script.x86_64-gcc.sh +++ b/travis/before_script.x86_64-gcc.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. From 283295ca8886ed58c259cbe6a9fc65174f1b7a9f Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 21 Feb 2020 10:05:36 +0900 Subject: [PATCH 03/18] no message --- src/libm/CMakeLists.txt | 4 ++-- travis/before_script.armhf-gcc.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 5ad0414c..03d94886 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -305,6 +305,7 @@ if(BUILD_INLINE_HEADERS) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} + COMMAND echo Generating sleefinline_${SIMDLC}.h COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h @@ -312,8 +313,7 @@ if(BUILD_INLINE_HEADERS) COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 COMMAND ${SED_COMMAND} -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp - COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp - COMMAND ${CMAKE_COMMAND} -E "rename" ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp ${INLINE_HEADER_FILE} + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} DEPENDS ${HEADER_${SIMD}} diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index ba18962f..81f93dec 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -8,4 +8,4 @@ make -j 2 all cd /build mkdir build-cross cd build-cross -cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. +cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. From 855b9006a58f232a8be835812b555a05293f0c6c Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 21 Feb 2020 10:23:16 +0900 Subject: [PATCH 04/18] no message --- travis/script.arm64-clang.sh | 2 +- travis/script.arm64-gcc.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/travis/script.arm64-clang.sh b/travis/script.arm64-clang.sh index 1c3df174..4a3cc981 100644 --- a/travis/script.arm64-clang.sh +++ b/travis/script.arm64-clang.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j `nproc` all +make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` diff --git a/travis/script.arm64-gcc.sh b/travis/script.arm64-gcc.sh index 1c3df174..4a3cc981 100644 --- a/travis/script.arm64-gcc.sh +++ b/travis/script.arm64-gcc.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j `nproc` all +make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` From 6c8a6d300a9e0dbee2da2179c10b5cc8c4927481 Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 21 Feb 2020 12:25:51 +0900 Subject: [PATCH 05/18] no message --- Jenkinsfile | 28 ++++++++++++++-------------- appveyor.yml | 4 ++-- src/libm-tester/iutsimd.c | 10 ++++++++++ src/libm/CMakeLists.txt | 2 +- travis/before_script.aarch64-gcc.sh | 2 +- travis/before_script.armhf-gcc.sh | 2 +- travis/script.aarch64-gcc.sh | 4 ++-- travis/script.arm64-clang.sh | 2 +- travis/script.arm64-gcc.sh | 2 +- travis/script.armhf-gcc.sh | 4 ++-- travis/script.osx-clang.sh | 4 ++-- travis/script.osx-gcc.sh | 4 ++-- travis/script.ppc64el-clang.sh | 4 ++-- travis/script.x86_64-clang.sh | 2 +- travis/script.x86_64-gcc.sh | 2 +- 15 files changed, 43 insertions(+), 33 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 38a2000b..ae00af3c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,10 +15,10 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 6 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 6 + ctest -j 3 make install ''' } @@ -35,10 +35,10 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE .. - make -j 6 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 6 + ctest -j 3 make install ''' } @@ -56,7 +56,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -77,7 +77,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -97,7 +97,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -117,7 +117,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 2 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 @@ -134,7 +134,7 @@ pipeline { set "ORG_PATH=%PATH%" PATH C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PATH% rmdir /S /Q build-cygwin - C:/Cygwin64/bin/bash -c 'mkdir build-cygwin;cd build-cygwin;cmake -g"Unix Makefiles" .. -DBUILD_QUAD=TRUE;make -j 4' + C:/Cygwin64/bin/bash -c 'mkdir build-cygwin;cd build-cygwin;cmake -g"Unix Makefiles" .. -DBUILD_QUAD=TRUE;make -j 1' del /Q /F %PROJECT_DIR%/build-cygwin/bin/iut* PATH %ORG_PATH%;C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PROJECT_DIR%/build/bin cd %PROJECT_DIR% @@ -157,7 +157,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -175,7 +175,7 @@ pipeline { mkdir build-native cd build-native cmake -DSLEEF_SHOW_CONFIG=1 .. -DBUILD_QUAD=TRUE - make -j 4 all + make -j 1 all cd .. export PATH=$PATH:`pwd`/travis export QEMU_CPU=POWER8 @@ -184,7 +184,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-ppc64el.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-ppc64le-static -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -202,7 +202,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 4 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 @@ -220,7 +220,7 @@ pipeline { mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 3 all + make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 3 diff --git a/appveyor.yml b/appveyor.yml index d68e621f..85eeec37 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,9 +12,9 @@ install: - if "%DO_TEST%" == "TRUE" set ORGPATH="%PATH%" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\setup-x86_64.exe" -q -g -P libmpfr-devel,libgmp-devel,cmake - if "%DO_TEST%" == "TRUE" PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;"%PATH%" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -g\"Unix Makefiles\" .. -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE;make -j 2' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -g\"Unix Makefiles\" .. -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE;make -j 1' - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -g\"Unix Makefiles\" -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE ..;make -j 2' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -g\"Unix Makefiles\" -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE ..;make -j 1' - if "%DO_TEST%" == "TRUE" del /Q /F c:\projects\sleef\build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" PATH "%ORGPATH%";c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;c:\projects\sleef\build\bin - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 38845be5..05ec5013 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -39,6 +39,16 @@ #include #include +#if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) +#ifndef FP_FAST_FMA +#define FP_FAST_FMA +#endif +#endif + +#if defined(_MSC_VER) && !defined(__STDC__) +#define __STDC__ 1 +#endif + #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) #include #endif diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index 03d94886..ed15b5d5 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -312,7 +312,7 @@ if(BUILD_INLINE_HEADERS) COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 - COMMAND ${SED_COMMAND} -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h.tmp + COMMAND ${SED_COMMAND} -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index 0343a018..9ec65ad3 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -4,7 +4,7 @@ cd /build mkdir build-native cd build-native cmake -DBUILD_QUAD=TRUE .. -make -j 2 all +make -j 1 all cd /build mkdir build-cross cd build-cross diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index 81f93dec..945f2931 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -4,7 +4,7 @@ cd /build mkdir build-native cd build-native cmake -DBUILD_QUAD=TRUE .. -make -j 2 all +make -j 1 all cd /build mkdir build-cross cd build-cross diff --git a/travis/script.aarch64-gcc.sh b/travis/script.aarch64-gcc.sh index b8258204..6a18da01 100644 --- a/travis/script.aarch64-gcc.sh +++ b/travis/script.aarch64-gcc.sh @@ -1,8 +1,8 @@ #!/bin/bash set -ev cd /build/build-cross -make -j 2 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE -ctest -j 2 +ctest -j `nproc` make install diff --git a/travis/script.arm64-clang.sh b/travis/script.arm64-clang.sh index 4a3cc981..2a1324f9 100644 --- a/travis/script.arm64-clang.sh +++ b/travis/script.arm64-clang.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j 4 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` diff --git a/travis/script.arm64-gcc.sh b/travis/script.arm64-gcc.sh index 4a3cc981..2a1324f9 100644 --- a/travis/script.arm64-gcc.sh +++ b/travis/script.arm64-gcc.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j 4 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` diff --git a/travis/script.armhf-gcc.sh b/travis/script.armhf-gcc.sh index b8258204..6a18da01 100644 --- a/travis/script.armhf-gcc.sh +++ b/travis/script.armhf-gcc.sh @@ -1,8 +1,8 @@ #!/bin/bash set -ev cd /build/build-cross -make -j 2 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE -ctest -j 2 +ctest -j `nproc` make install diff --git a/travis/script.osx-clang.sh b/travis/script.osx-clang.sh index 37f8e658..2a1324f9 100644 --- a/travis/script.osx-clang.sh +++ b/travis/script.osx-clang.sh @@ -1,8 +1,8 @@ #!/bin/bash set -ev cd sleef.build -make -j 2 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE -ctest -j 2 +ctest -j `nproc` make install diff --git a/travis/script.osx-gcc.sh b/travis/script.osx-gcc.sh index 37f8e658..2a1324f9 100644 --- a/travis/script.osx-gcc.sh +++ b/travis/script.osx-gcc.sh @@ -1,8 +1,8 @@ #!/bin/bash set -ev cd sleef.build -make -j 2 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE -ctest -j 2 +ctest -j `nproc` make install diff --git a/travis/script.ppc64el-clang.sh b/travis/script.ppc64el-clang.sh index db5b35d9..745e6a93 100644 --- a/travis/script.ppc64el-clang.sh +++ b/travis/script.ppc64el-clang.sh @@ -2,8 +2,8 @@ set -ev export QEMU_CPU=POWER8 cd /build/build-cross -make -j 2 all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE -ctest -j 2 +ctest -j `nproc` make install diff --git a/travis/script.x86_64-clang.sh b/travis/script.x86_64-clang.sh index 1c3df174..2a1324f9 100644 --- a/travis/script.x86_64-clang.sh +++ b/travis/script.x86_64-clang.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j `nproc` all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` diff --git a/travis/script.x86_64-gcc.sh b/travis/script.x86_64-gcc.sh index 1c3df174..2a1324f9 100644 --- a/travis/script.x86_64-gcc.sh +++ b/travis/script.x86_64-gcc.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ev cd sleef.build -make -j `nproc` all +make -j 1 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` From a8c6f53e945155fd25d32502f2ef5822f02fdab5 Mon Sep 17 00:00:00 2001 From: shibatch Date: Wed, 26 Feb 2020 09:56:35 +0900 Subject: [PATCH 06/18] no message --- src/arch/helperadvsimd.h | 2 +- src/arch/helperavx.h | 2 +- src/arch/helperavx2.h | 2 +- src/arch/helperavx2_128.h | 2 +- src/arch/helperavx512f.h | 2 +- src/arch/helperpurec_scalar.h | 2 +- src/arch/helpersse2.h | 2 +- src/arch/helpersve.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/arch/helperadvsimd.h b/src/arch/helperadvsimd.h index 69b5394b..92c210f3 100644 --- a/src/arch/helperadvsimd.h +++ b/src/arch/helperadvsimd.h @@ -719,7 +719,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { c.vm2 = vuninterleave_vm2_vm2(vm2); return c.aq; } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g)); diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index 076d4516..e3edf78e 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -664,7 +664,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index a46e2660..e05bf45b 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -504,7 +504,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; diff --git a/src/arch/helperavx2_128.h b/src/arch/helperavx2_128.h index d00aa5ee..f59a8e07 100644 --- a/src/arch/helperavx2_128.h +++ b/src/arch/helperavx2_128.h @@ -458,7 +458,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } diff --git a/src/arch/helperavx512f.h b/src/arch/helperavx512f.h index bf76aa6f..d1ff1bda 100644 --- a/src/arch/helperavx512f.h +++ b/src/arch/helperavx512f.h @@ -630,7 +630,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } -#endif +#endif // #if !defined(SLEEF_GENHEADER) #ifdef __INTEL_COMPILER static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0; } diff --git a/src/arch/helperpurec_scalar.h b/src/arch/helperpurec_scalar.h index 65089c50..a5ccd7ff 100644 --- a/src/arch/helperpurec_scalar.h +++ b/src/arch/helperpurec_scalar.h @@ -423,7 +423,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { c.vm2 = vm2; return c.aq; } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return !g ? ~(uint32_t)0 : 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return o ? x : y; } diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index 4704a294..478197ee 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -516,7 +516,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { return a; #endif } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } diff --git a/src/arch/helpersve.h b/src/arch/helpersve.h index affe7910..6cfad290 100644 --- a/src/arch/helpersve.h +++ b/src/arch/helpersve.h @@ -857,7 +857,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { svst1_s32(ptrue, (int32_t *)&(aq.s[svcntd()/2]), vm2.y); return aq; } -#endif +#endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return svcntp_b64(svptrue_b64(), g) == 0; From 1f90c2aeef33bd1151f29adcbc306fadd310b881 Mon Sep 17 00:00:00 2001 From: shibatch Date: Wed, 26 Feb 2020 11:07:35 +0900 Subject: [PATCH 07/18] no message --- src/libm/CMakeLists.txt | 49 ++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/src/libm/CMakeLists.txt b/src/libm/CMakeLists.txt index ed15b5d5..890ccd71 100644 --- a/src/libm/CMakeLists.txt +++ b/src/libm/CMakeLists.txt @@ -306,14 +306,47 @@ if(BUILD_INLINE_HEADERS) OUTPUT ${INLINE_HEADER_FILE} COMMAND echo Generating sleefinline_${SIMDLC}.h - COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 - COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 - COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h - COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 - COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c - COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 - COMMAND ${SED_COMMAND} -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h - COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h + + # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch + ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include + ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME + ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimddp.c > /build/libm/sleefSSE2.h.tmp1 + + # Remove all lines except those begin with "//@" + COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -n -e "/^\\/\\/@#.*$/p" /build/src/libm/sleefSSE2.h.tmp1 + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # > /build/src/libm/sleefSSE2.h.tmp2 + + # Remove "//@" + COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # sed -e "s/^\\/\\/@#/#/g" /build/src/libm/sleefSSE2.h.tmp2 + > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h # > /build/src/libm/include/macroonlySSE2.h + + # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h. + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C + ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch + ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include + ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME + ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimdsp.c >> /build/libm/sleefSSE2.h.tmp1 + + # Remove lines beginning with "#" so that the resulting file can be preprocessed again. + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp1 + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # > /build/src/libm/sleefSSE2.h.c + + # Preprocess the intemediate file again to remove comments + COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} # gcc -E + ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # /build/src/libm/sleefSSE2.h.c + > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # > /build/src/libm/sleefSSE2.h.tmp3 + + # Embed version number into the header + COMMAND ${SED_COMMAND} -e # sed -e + "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" # "s/%VERSION%/3.5.0/g" + ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org # /sleef/libm/sleefinline_header.h.org + > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # > /build/include/sleefinline_sse2.h + + # Remove lines beginning with "#" + COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp3 + >> ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # >> /build/include/sleefinline_sse2.h MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} DEPENDS ${HEADER_${SIMD}} From 557aba9f94ede43c45086882c276c520a837fee5 Mon Sep 17 00:00:00 2001 From: shibatch Date: Mon, 6 Apr 2020 10:55:36 +0900 Subject: [PATCH 08/18] no message --- src/libm-tester/iutsimdmain.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/libm-tester/iutsimdmain.c b/src/libm-tester/iutsimdmain.c index 590c0439..3fb2ae62 100644 --- a/src/libm-tester/iutsimdmain.c +++ b/src/libm-tester/iutsimdmain.c @@ -15,14 +15,22 @@ int do_test(int argc, char **argv); int check_featureDP(double d); int check_featureSP(float d); +#ifdef _MSC_VER +#define SETJMP(x) setjmp(x) +#define LONGJMP longjmp +#else +#define SETJMP(x) sigsetjmp(x, 1) +#define LONGJMP siglongjmp +#endif + static void sighandler(int signum) { - longjmp(sigjmp, 1); + LONGJMP(sigjmp, 1); } int detectFeatureDP() { signal(SIGILL, sighandler); - if (setjmp(sigjmp) == 0) { + if (SETJMP(sigjmp) == 0) { int r = check_featureDP(1.0); signal(SIGILL, SIG_DFL); return r; @@ -35,7 +43,7 @@ int detectFeatureDP() { int detectFeatureSP() { signal(SIGILL, sighandler); - if (setjmp(sigjmp) == 0) { + if (SETJMP(sigjmp) == 0) { int r = check_featureSP(1.0); signal(SIGILL, SIG_DFL); return r; From 1467582f13af07a80f3c429113408a803b453eb1 Mon Sep 17 00:00:00 2001 From: shibatch Date: Mon, 6 Apr 2020 11:00:44 +0900 Subject: [PATCH 09/18] no message --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 3d4b7b80..eade1f29 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ version: 1.0.{build} build_cloud: lithium -max_jobs: 4 +max_jobs: 2 image: Visual Studio 2019 configuration: Release environment: From 5d6be85161dbed06c4147c917277c8f1af8867db Mon Sep 17 00:00:00 2001 From: shibatch Date: Mon, 6 Apr 2020 11:07:37 +0900 Subject: [PATCH 10/18] no message --- src/libm-tester/iutsimdmain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libm-tester/iutsimdmain.c b/src/libm-tester/iutsimdmain.c index 3fb2ae62..17e53bff 100644 --- a/src/libm-tester/iutsimdmain.c +++ b/src/libm-tester/iutsimdmain.c @@ -15,7 +15,7 @@ int do_test(int argc, char **argv); int check_featureDP(double d); int check_featureSP(float d); -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #define SETJMP(x) setjmp(x) #define LONGJMP longjmp #else From f91f2fa5edede3e5e7dfa8b031eb35eabef83838 Mon Sep 17 00:00:00 2001 From: shibatch Date: Mon, 6 Apr 2020 11:36:24 +0900 Subject: [PATCH 11/18] no message --- src/libm/sleefinline_header.h.org | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libm/sleefinline_header.h.org b/src/libm/sleefinline_header.h.org index eef8d3e2..f0645764 100644 --- a/src/libm/sleefinline_header.h.org +++ b/src/libm/sleefinline_header.h.org @@ -4,3 +4,6 @@ // This file is generated by SLEEF %VERSION% +#if (defined(_MSC_VER)) +#pragma fp_contract (off) +#endif From 164d6e8aba09b294bfbc3929c43defd01e673c94 Mon Sep 17 00:00:00 2001 From: shibatch Date: Tue, 7 Apr 2020 10:08:18 +0900 Subject: [PATCH 12/18] no message --- src/arch/helperpower_128.h | 1 + travis/before_script.common.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arch/helperpower_128.h b/src/arch/helperpower_128.h index fcb55e7c..b5470b41 100644 --- a/src/arch/helperpower_128.h +++ b/src/arch/helperpower_128.h @@ -62,6 +62,7 @@ static INLINE void vprefetch_v_p(const void *ptr) { } typedef __vector unsigned int vmask; // using __bool with typedef may cause ambiguous errors #define vopmask __vector __bool int +//@#define vopmask __vector __bool int typedef __vector signed int vint; typedef __vector signed int vint2; typedef __vector float vfloat; diff --git a/travis/before_script.common.sh b/travis/before_script.common.sh index a1a7ab57..07d2954e 100644 --- a/travis/before_script.common.sh +++ b/travis/before_script.common.sh @@ -2,4 +2,4 @@ set -ev mkdir build && cd build cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. From 306862c9c6f12a5b02c80e1a60d309bb1b835b61 Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 8 May 2020 13:58:31 +0900 Subject: [PATCH 13/18] no message --- travis/ppc64el-cc | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 travis/ppc64el-cc diff --git a/travis/ppc64el-cc b/travis/ppc64el-cc deleted file mode 100644 index ffda259c..00000000 --- a/travis/ppc64el-cc +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -clang-5.0 -target ppc64le-linux-gnu -mvsx -fuse-ld=/usr/powerpc64le-linux-gnu/bin/ld $* From 1dece23a4d4cc20dba963caf38b6a45660f1ec97 Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 29 May 2020 20:11:40 +0900 Subject: [PATCH 14/18] Update Jenkinsfile --- Jenkinsfile | 192 +++++++++------------------------------------------- 1 file changed, 31 insertions(+), 161 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ae00af3c..fda39c7e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,104 +4,59 @@ pipeline { stages { stage('Preamble') { parallel { - stage('AArch64 SVE') { - agent { label 'aarch64' } + stage('Armclang') { + agent { label 'armclang' } steps { sh ''' - echo "AArch64 SVE on" `hostname` - export PATH=$PATH:/opt/bin + echo "armclang+SVE on" `hostname` export CC=armclang rm -rf build mkdir build cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 3 - make install + ctest -j `nproc` + ninja install ''' } } - stage('AArch64 SVE + Advanced SIMD with AAVPCS + LIBSLEEFGNUABI') { - agent { label 'aarch64' } + stage('Armclang AAVPCS') { + agent { label 'armclang' } steps { sh ''' - echo "AArch64 SVE on" `hostname` - export PATH=$PATH:/opt/bin + echo "armclang+SVE+AAVPCS on" `hostname` export CC=armclang rm -rf build mkdir build cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE .. - make -j 1 all + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE .. + ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 3 - make install + ctest -j `nproc` + ninja install ''' } } stage('Intel Compiler') { - agent { label 'icc' } - steps { - sh ''' - echo "Intel Compiler on" `hostname` - export PATH=$PATH:/export/opt/sde-external-8.16.0-2018-01-30-lin:/opt/intel/compilers_and_libraries/linux/bin/intel64 - export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries/linux/lib/intel64 - export CC=icc - rm -rf build - mkdir build - cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all - export OMP_WAIT_POLICY=passive - export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install - ''' - } - } - - stage('FMA4') { - agent { label 'fma4' } - steps { - sh ''' - echo "FMA4 on" `hostname` - export PATH=$PATH:/opt/local/bin:/opt/bin:/opt/sde-external-8.16.0-2018-01-30-lin - export LD_LIBRARY_PATH=/opt/local/lib:/opt/lib - export CC=gcc-8.2.0 - rm -rf build - mkdir build - cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all - export OMP_WAIT_POLICY=passive - export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install - ''' - } - } - - stage('GCC-4.8') { - agent { label 'x86' } + agent { label 'icc' } steps { sh ''' - echo "gcc-4 on" `hostname` - export PATH=$PATH:/opt/sde-external-8.16.0-2018-01-30-lin - export CC=gcc-4.8 + echo "Intel Compiler on" `hostname` + export CC=icc rm -rf build mkdir build cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install + ctest -j `nproc` + ninja install ''' } } @@ -110,103 +65,18 @@ pipeline { agent { label 'mac' } steps { sh ''' - echo "On" `hostname` + echo "macOS on" `hostname` export PATH=$PATH:/opt/local/bin:/opt/local/bin:/usr/local/bin:/usr/bin:/bin - export CC=gcc-7 - rm -rf build - mkdir build - cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all - export OMP_WAIT_POLICY=passive - export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 2 - make install - ''' - } - } - - stage('Windows') { - agent { label 'win' } - steps { - bat ''' - set "PROJECT_DIR=%cd%" - set "ORG_PATH=%PATH%" - PATH C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PATH% - rmdir /S /Q build-cygwin - C:/Cygwin64/bin/bash -c 'mkdir build-cygwin;cd build-cygwin;cmake -g"Unix Makefiles" .. -DBUILD_QUAD=TRUE;make -j 1' - del /Q /F %PROJECT_DIR%/build-cygwin/bin/iut* - PATH %ORG_PATH%;C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PROJECT_DIR%/build/bin - cd %PROJECT_DIR% - rmdir /S /Q build - mkdir build - cd build - cmake -G"Visual Studio 15 2017 Win64" .. -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE - cmake --build . --target install --config Release - ctest --output-on-failure -j 4 -C Release - ''' - } - } - - stage('i386') { - agent { label 'i386' } - steps { - sh ''' - echo "i386 on" `hostname` - rm -rf build - mkdir build - cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all - export OMP_WAIT_POLICY=passive - export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install - ''' - } - } - - stage('PowerPC VSX') { - agent { label 'x86 && xenial' } - steps { - sh ''' - echo "PowerPC VSX on" `hostname` - rm -rf build-native - mkdir build-native - cd build-native - cmake -DSLEEF_SHOW_CONFIG=1 .. -DBUILD_QUAD=TRUE - make -j 1 all - cd .. - export PATH=$PATH:`pwd`/travis - export QEMU_CPU=POWER8 - chmod +x travis/ppc64el-cc + export CC=gcc-9 rm -rf build mkdir build cd build - cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-ppc64el.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-ppc64le-static -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install - ''' - } - } - - stage('AArch32') { - agent { label 'aarch32' } - steps { - sh ''' - echo "aarch32 on" `hostname` - rm -rf build - mkdir build - cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all - export OMP_WAIT_POLICY=passive - export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 4 - make install + ctest -j `sysctl -n hw.physicalcpu` + ninja install ''' } } @@ -219,12 +89,12 @@ pipeline { rm -rf build mkdir build cd build - cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. - make -j 1 all + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. + ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j 3 - make install + ctest -j 2 + ninja install ''' } } From 02e69747b0d0afc8887f2184b930da3db101abec Mon Sep 17 00:00:00 2001 From: shibatch Date: Mon, 1 Jun 2020 15:53:58 +0900 Subject: [PATCH 15/18] no message --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index fda39c7e..f0e046d0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -75,7 +75,7 @@ pipeline { ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE - ctest -j `sysctl -n hw.physicalcpu` + ctest -j `sysctl -n hw.logicalcpu` ninja install ''' } From 0fe6a6120304243751b9820d3a14d8f2a3798223 Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 26 Jun 2020 13:07:27 +0900 Subject: [PATCH 16/18] no message --- src/arch/helperavx.h | 3 +++ src/arch/helperavx2.h | 3 +++ src/arch/helperavx512f.h | 3 +++ src/arch/helpersse2.h | 3 +++ 4 files changed, 12 insertions(+) diff --git a/src/arch/helperavx.h b/src/arch/helperavx.h index ebed0fe4..a0280c3f 100644 --- a/src/arch/helperavx.h +++ b/src/arch/helperavx.h @@ -627,6 +627,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } diff --git a/src/arch/helperavx2.h b/src/arch/helperavx2.h index e8afb844..6587f488 100644 --- a/src/arch/helperavx2.h +++ b/src/arch/helperavx2.h @@ -467,6 +467,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad4 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } diff --git a/src/arch/helperavx512f.h b/src/arch/helperavx512f.h index 589172cc..d9b5855e 100644 --- a/src/arch/helperavx512f.h +++ b/src/arch/helperavx512f.h @@ -579,6 +579,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad8 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } diff --git a/src/arch/helpersse2.h b/src/arch/helpersse2.h index ce2679c1..880c02e1 100644 --- a/src/arch/helpersse2.h +++ b/src/arch/helpersse2.h @@ -479,6 +479,9 @@ static vmask2 vloadu_vm2_p(void *p) { return vm2; } +#if !defined(SLEEF_GENHEADER) +typedef Sleef_quad2 vargquad; + static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } From 99af8e89ff57610746a771d35efd3ce9fe0b200f Mon Sep 17 00:00:00 2001 From: shibatch Date: Fri, 26 Jun 2020 13:27:38 +0900 Subject: [PATCH 17/18] no message --- src/libm-tester/iutsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libm-tester/iutsimd.c b/src/libm-tester/iutsimd.c index 1c32ed85..80f1b885 100644 --- a/src/libm-tester/iutsimd.c +++ b/src/libm-tester/iutsimd.c @@ -296,7 +296,7 @@ int check_featureDP(double d) { return s[0] == s[0]; } -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif @@ -317,7 +317,7 @@ int check_featureSP(float d) { return s[0] == s[0]; } -#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) +#if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA) || defined(USE_INLINE_HEADER)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif From 400756a8f1967ce67f9ae4f9f883345ee7c57fb6 Mon Sep 17 00:00:00 2001 From: shibatch Date: Thu, 23 Jul 2020 14:25:34 +0900 Subject: [PATCH 18/18] no message --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 9a1a6e53..917c7bde 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -31,7 +31,6 @@ build_script: - mkdir build - cd build - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE %ENV_BUILD_STATIC% -build_script: - cmake --build . --target install --config Release - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 4 -C Release) - cd "%BUILDFOLDER%"