diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a870859..321cca5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ option(BUILD_SHARED_LIBS "Build shared libs" ON) option(BUILD_STATIC_TEST_BINS "Build statically linked test executables" OFF) option(ENABLE_LTO "Enable LTO on GCC or ThinLTO on clang" OFF) option(BUILD_LIBM "libsleef will be built." ON) -option(BUILD_DFT "libsleefdft will be built." ON) +option(BUILD_DFT "libsleefdft will be built." OFF) option(BUILD_QUAD "libsleefquad will be built." OFF) option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(BUILD_TESTS "Tests will be built." ON) diff --git a/Configure.cmake b/Configure.cmake index b5edb0fa..c14156f0 100644 --- a/Configure.cmake +++ b/Configure.cmake @@ -85,6 +85,7 @@ if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AM set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mavx2;-mfma;-fno-strict-aliasing") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.") @@ -98,16 +99,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(COMPILER_SUPPORTS_NEON32VFPV4 1) set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mfpu=vfpv4;-fno-strict-aliasing") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") set(SLEEF_ARCH_PPC64 ON CACHE INTERNAL "True for PPC64 architecture.") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mvsx;-fno-strict-aliasing") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.") set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector;-fno-strict-aliasing") + set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing") endif() set(COMPILER_SUPPORTS_PUREC_SCALAR 1) diff --git a/Jenkinsfile b/Jenkinsfile index b404a1b8..a52ad01c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,7 +14,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -34,7 +34,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -53,7 +53,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DDISABLE_SVE=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DDISABLE_SVE=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -78,7 +78,7 @@ pipeline { docker exec jenkins tar xf /tmp/builddir.tgz -C /build docker exec jenkins rm -f /tmp/builddir.tgz rm -f /tmp/builddir.tgz - docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`" + docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`" docker stop jenkins ''' } @@ -99,7 +99,7 @@ pipeline { docker exec jenkins tar xf /tmp/builddir.tgz -C /build docker exec jenkins rm -f /tmp/builddir.tgz rm -f /tmp/builddir.tgz - docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`" + docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`" docker stop jenkins ''' } @@ -114,7 +114,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -134,7 +134,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -154,7 +154,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -172,7 +172,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -192,7 +192,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -212,7 +212,7 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-10 -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-10 -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE @@ -237,7 +237,7 @@ pipeline { docker exec jenkins tar xf /tmp/builddir.tgz -C /build docker exec jenkins rm -f /tmp/builddir.tgz rm -f /tmp/builddir.tgz - docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`" + docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`" docker stop jenkins ''' } @@ -258,7 +258,7 @@ pipeline { docker exec jenkins tar xf /tmp/builddir.tgz -C /build docker exec jenkins rm -f /tmp/builddir.tgz rm -f /tmp/builddir.tgz - docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`" + docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`" docker stop jenkins ''' } @@ -281,7 +281,7 @@ pipeline { rm -rf build mkdir build cd build - /usr/local/bin/cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE .. + /usr/local/bin/cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE .. ninja ''' } @@ -296,9 +296,9 @@ pipeline { rm -rf build mkdir build cd build - cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja - cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. + cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja ctest -j `nproc` ''' diff --git a/appveyor.yml b/appveyor.yml index 1dc5c89b..c48bfb21 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -20,9 +20,9 @@ build_script: - if "%DO_TEST%" == "TRUE" echo PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%PATH% > q.bat - if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat" - if "%DO_TEST%" == "TRUE" call p.bat - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE;ninja' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE ..;ninja' + - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE ..;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - if "%DO_TEST%" == "TRUE" del /Q /F build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build\bin > q.bat @@ -30,7 +30,7 @@ build_script: - if "%DO_TEST%" == "TRUE" call p.bat - mkdir build - cd build - - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% + - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% - cmake --build . --target install --config Release - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 16 -C Release) - cd "%BUILDFOLDER%" @@ -39,7 +39,7 @@ build_script: - call p.bat - mkdir build-clang - cd build-clang - - cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% + - cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% - ninja test_script: - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 16 -C Release) diff --git a/src/common/commonfuncs.h b/src/common/commonfuncs.h index 99760e3f..f8d7f879 100644 --- a/src/common/commonfuncs.h +++ b/src/common/commonfuncs.h @@ -116,7 +116,7 @@ static INLINE CONST VECTOR_CC tdi_t tdisettdi_tdi_vd3_vi(vdouble3 v, vint i) { #if defined(ENABLE_MAIN) // Functions for debugging #include -#include +#include static void printvmask(char *mes, vmask g) { uint64_t u[VECTLENDP]; @@ -163,7 +163,7 @@ static void printvint64(char *mes, vint64 vi) { uint64_t u[VECTLENDP*2]; vstoreu_v_p_vd((double *)u, vreinterpret_vd_vm(vreinterpret_vm_vi64(vi))); printf("%s ", mes); - for(int i=0;i= 8) typedef __float128 Sleef_quad; +#define SLEEF_QUAD_C(x) (x ## Q) +//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +//typedef long double Sleef_quad; +//#define SLEEF_QUAD_C(x) (x ## L) #else typedef struct { uint64_t x, y; } Sleef_quad; #endif diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index ed2f5950..268dadf0 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -285,7 +285,7 @@ EXPORT CONST VECTOR_CC vint xilogb(vdouble d) { vdouble e = vcast_vd_vi(vilogbk_vi_vd(vabs_vd_vd(d))); e = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_FP_ILOGB0), e); e = vsel_vd_vo_vd_vd(visnan_vo_vd(d), vcast_vd_d(SLEEF_FP_ILOGBNAN), e); - e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(INT_MAX), e); + e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(SLEEF_INT_MAX), e); return vrint_vi_vd(e); } diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 61497895..cda71840 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -451,7 +451,7 @@ EXPORT CONST VECTOR_CC vint2 xilogbf(vfloat d) { vint2 e = vilogbk_vi2_vf(vabs_vf_vf(d)); e = vsel_vi2_vo_vi2_vi2(veq_vo_vf_vf(d, vcast_vf_f(0.0f)), vcast_vi2_i(SLEEF_FP_ILOGB0), e); e = vsel_vi2_vo_vi2_vi2(visnan_vo_vf(d), vcast_vi2_i(SLEEF_FP_ILOGBNAN), e); - e = vsel_vi2_vo_vi2_vi2(visinf_vo_vf(d), vcast_vi2_i(INT_MAX), e); + e = vsel_vi2_vo_vi2_vi2(visinf_vo_vf(d), vcast_vi2_i(SLEEF_INT_MAX), e); return e; } diff --git a/src/quad-tester/qiutcuda.cu b/src/quad-tester/qiutcuda.cu index dc027323..ff8890e7 100644 --- a/src/quad-tester/qiutcuda.cu +++ b/src/quad-tester/qiutcuda.cu @@ -81,6 +81,12 @@ __global__ void xlog2q_u10(Sleef_quadx1 *r, Sleef_quadx1 *a0) { *r = Sleef_log2q __global__ void xlog10q_u10(Sleef_quadx1 *r, Sleef_quadx1 *a0) { *r = Sleef_log10q1_u10cuda(*a0); } __global__ void xlog1pq_u10(Sleef_quadx1 *r, Sleef_quadx1 *a0) { *r = Sleef_log1pq1_u10cuda(*a0); } +__global__ void xfabsq(Sleef_quadx1 *r, Sleef_quadx1 *a0) { *r = Sleef_fabsq1_cuda(*a0); } +__global__ void xcopysignq(Sleef_quadx1 *r, Sleef_quadx1 *a0, Sleef_quadx1 *a1) { *r = Sleef_copysignq1_cuda(*a0, *a1); } +__global__ void xfmaxq(Sleef_quadx1 *r, Sleef_quadx1 *a0, Sleef_quadx1 *a1) { *r = Sleef_fmaxq1_cuda(*a0, *a1); } +__global__ void xfminq(Sleef_quadx1 *r, Sleef_quadx1 *a0, Sleef_quadx1 *a1) { *r = Sleef_fminq1_cuda(*a0, *a1); } +__global__ void xfdimq_u05(Sleef_quadx1 *r, Sleef_quadx1 *a0, Sleef_quadx1 *a1) { *r = Sleef_fdimq1_u05cuda(*a0, *a1); } + // typedef union { @@ -287,6 +293,11 @@ int main(int argc, char **argv) { func_q_q("log10q_u10", xlog10q_u10); func_q_q("log1pq_u10", xlog1pq_u10); func_q_q("negq", xnegq); + func_q_q("fabsq", xfabsq); + func_q_q_q("copysignq", xcopysignq); + func_q_q_q("fmaxq", xfmaxq); + func_q_q_q("fminq", xfminq); + func_q_q_q("fdimq_u05", xfdimq_u05); func_q_d("cast_from_doubleq", xcast_from_doubleq); func_d_q("cast_to_doubleq", xcast_to_doubleq); diff --git a/src/quad-tester/qiutsimd.c b/src/quad-tester/qiutsimd.c index a7cdb88e..504111cb 100644 --- a/src/quad-tester/qiutsimd.c +++ b/src/quad-tester/qiutsimd.c @@ -545,12 +545,19 @@ int do_test(int argc, char **argv) { func_q_q("log10q_u10", xlog10q_u10); func_q_q("log1pq_u10", xlog1pq_u10); func_q_q("negq", xnegq); + func_q_q("fabsq", xfabsq); + func_q_q_q("copysignq", xcopysignq); + func_q_q_q("fmaxq", xfmaxq); + func_q_q_q("fminq", xfminq); + func_q_q_q("fdimq_u05", xfdimq_u05); + func_q_d("cast_from_doubleq", xcast_from_doubleq); func_d_q("cast_to_doubleq", xcast_to_doubleq); func_q_i64("cast_from_int64q", xcast_from_int64q); func_i64_q("cast_to_int64q", xcast_to_int64q); func_q_u64("cast_from_uint64q", xcast_from_uint64q); func_u64_q("cast_to_uint64q", xcast_to_uint64q); + func_i_q_q("icmpltq", xicmpltq); func_i_q_q("icmpgtq", xicmpgtq); func_i_q_q("icmpleq", xicmpleq); diff --git a/src/quad-tester/qtester.c b/src/quad-tester/qtester.c index 0c452993..70ca5587 100644 --- a/src/quad-tester/qtester.c +++ b/src/quad-tester/qtester.c @@ -260,10 +260,11 @@ Sleef_quad child_log2q_u10(Sleef_quad x) { child_q_q("log2q_u10", x); } Sleef_quad child_log10q_u10(Sleef_quad x) { child_q_q("log10q_u10", x); } Sleef_quad child_log1pq_u10(Sleef_quad x) { child_q_q("log1pq_u10", x); } -Sleef_quad child_copysignq(Sleef_quad x, Sleef_quad y) { child_q_q_q("copysignq", x, y); } Sleef_quad child_fabsq(Sleef_quad x) { child_q_q("fabsq", x); } +Sleef_quad child_copysignq(Sleef_quad x, Sleef_quad y) { child_q_q_q("copysignq", x, y); } Sleef_quad child_fmaxq(Sleef_quad x, Sleef_quad y) { child_q_q_q("fmaxq", x, y); } Sleef_quad child_fminq(Sleef_quad x, Sleef_quad y) { child_q_q_q("fminq", x, y); } +Sleef_quad child_fdimq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("fdimq_u05", x, y); } // @@ -430,7 +431,17 @@ void do_test(int options) { int success = 1; static const char *stdCheckVals[] = { - "0.0", "-0.0", "+0.5", "-0.5", "+1.0", "-1.0", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", + "-0.0", "0.0", "+0.5", "-0.5", "+1.0", "-1.0", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "Inf", "-Inf", "NaN" + }; + + static const char *noNegZeroCheckVals[] = { + "0.0", "+0.5", "-0.5", "+1.0", "-1.0", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", "3.1415926535897932384626433832795028841971693993751058209749445923078164", @@ -439,6 +450,16 @@ void do_test(int options) { "Inf", "-Inf", "NaN" }; + static const char *noNanCheckVals[] = { + "-0.0", "0.0", "+0.5", "-0.5", "+1.0", "-1.0", "+1.5", "-1.5", "+2.0", "-2.0", "+2.5", "-2.5", + "1.234", "-1.234", "+1.234e+100", "-1.234e+100", "+1.234e-100", "-1.234e-100", + "+1.234e+3000", "-1.234e+3000", "+1.234e-3000", "-1.234e-3000", + "3.1415926535897932384626433832795028841971693993751058209749445923078164", + "+" STR_QUAD_MIN, "-" STR_QUAD_MIN, + "+" STR_QUAD_DENORM_MIN, "-" STR_QUAD_DENORM_MIN, + "Inf", "-Inf" + }; + static const char *trigCheckVals[] = { "3.141592653589793238462643383279502884197169399375105820974944592307", "6.283185307179586476925286766559005768394338798750211641949889184615", @@ -492,8 +513,8 @@ void do_test(int options) { maxError = 0; cmpDenormOuterLoop_q(mpfr_neg, child_negq, stdCheckVals); checkAccuracyOuterLoop2_q(mpfr_neg, child_negq, stdCheckVals, 0); - checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "-1e-100", "-1e+100", 5 * NTEST, errorBound, 0); - checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "0", "Inf", 5 * NTEST, errorBound, 1); + checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "-1e-100", "-1e+100", 5 * NTEST, 0, 0); + checkAccuracyOuterLoop_q(mpfr_neg, child_negq, "0", "Inf", 5 * NTEST, 0, 1); checkResult(success, maxError); // @@ -802,6 +823,52 @@ void do_test(int options) { checkAccuracyOuterLoop_q(mpfr_log1p, child_log1pq_u10, "0", "Inf", 3 * NTEST, 1.0, 1); checkResult(success, maxError); + // + + fprintf(stderr, "fabsq : "); + maxError = 0; + cmpDenormOuterLoop_q(mpfr_abs, child_fabsq, stdCheckVals); + checkAccuracyOuterLoop2_q(mpfr_abs, child_fabsq, stdCheckVals, 0); + checkAccuracyOuterLoop_q(mpfr_abs, child_fabsq, "-1e-100", "-1e+100", 5 * NTEST, 0, 0); + checkAccuracyOuterLoop_q(mpfr_abs, child_fabsq, "0", "Inf", 5 * NTEST, 0, 1); + checkResult(success, maxError); + + fprintf(stderr, "fmaxq : "); + maxError = 0; + cmpDenormOuterLoop_q_q(mpfr_max, child_fmaxq, noNegZeroCheckVals); + checkAccuracyOuterLoop2_q_q(mpfr_max, child_fmaxq, stdCheckVals, 0); + checkAccuracyOuterLoop_q_q(mpfr_max, child_fmaxq, "-1e-100", "-1e+100", 5 * NTEST, 0, 0); + checkAccuracyOuterLoop_q_q(mpfr_max, child_fmaxq, "0", "Inf", 5 * NTEST, 0, 1); + checkResult(success, maxError); + + fprintf(stderr, "fminq : "); + maxError = 0; + cmpDenormOuterLoop_q_q(mpfr_min, child_fminq, noNegZeroCheckVals); + checkAccuracyOuterLoop2_q_q(mpfr_min, child_fminq, stdCheckVals, 0); + checkAccuracyOuterLoop_q_q(mpfr_min, child_fminq, "-1e-100", "-1e+100", 5 * NTEST, 0, 0); + checkAccuracyOuterLoop_q_q(mpfr_min, child_fminq, "0", "Inf", 5 * NTEST, 0, 1); + checkResult(success, maxError); + + fprintf(stderr, "copysignq : "); + maxError = 0; + cmpDenormOuterLoop_q_q(mpfr_copysign, child_copysignq, noNanCheckVals); + checkAccuracyOuterLoop2_q_q(mpfr_copysign, child_copysignq, noNanCheckVals, 0); + checkAccuracyOuterLoop_q_q(mpfr_copysign, child_copysignq, "-1e-100", "-1e+100", 5 * NTEST, 0, 0); + checkAccuracyOuterLoop_q_q(mpfr_copysign, child_copysignq, "0", "Inf", 5 * NTEST, 0, 1); + checkResult(success, maxError); + +#if 0 + fprintf(stderr, "fdimq_u05 : "); + maxError = 0; + cmpDenormOuterLoop_q_q(mpfr_dim, child_fdimq_u05, stdCheckVals); + checkAccuracyOuterLoop2_q_q(mpfr_dim, child_fdimq_u05, stdCheckVals, 0.5); + checkAccuracyOuterLoop_q_q(mpfr_dim, child_fdimq_u05, "-1e-100", "-1e+100", 5 * NTEST, errorBound, 0); + checkAccuracyOuterLoop_q_q(mpfr_dim, child_fdimq_u05, "0", "Inf", 5 * NTEST, errorBound, 1); + checkResult(success, maxError); +#endif + + // + if ((options & 2) != 0) { fprintf(stderr, "strtoq : "); for(int i=0;ifloat128 conversion is used" ) void mpfr_set_f128(mpfr_t frx, Sleef_quad a, mpfr_rnd_t rnd) { diff --git a/src/quad-tester/tester2printf.c b/src/quad-tester/tester2printf.c index 9209db93..341d302f 100644 --- a/src/quad-tester/tester2printf.c +++ b/src/quad-tester/tester2printf.c @@ -18,7 +18,7 @@ #include "sleefquad.h" #include "qtesterutil.h" -void testem_rnd(__float128 val) { +void testem_rnd(Sleef_quad val) { int prec = xrand() % 25, width = xrand() % 50; char *types[] = { "Qe", "Qf", "Qg", "Qa" }; for(int i=0;i<4;i++) { @@ -125,7 +125,7 @@ void testem_rnd(__float128 val) { } } -int testem(__float128 val) { +int testem(Sleef_quad val) { int ret = 0; char *types[] = { "Qe", "Qf", "Qg", "Qa" }; for(int i=0;i<4;i++) { @@ -238,7 +238,7 @@ int main(int argc, char **argv) { strtoflt128("1", NULL); // This is for registering hook - __float128 vals[] = { + Sleef_quad vals[] = { 1.2345678912345678912345e+0Q, 1.2345678912345678912345e+1Q, 1.2345678912345678912345e-1Q, @@ -269,13 +269,13 @@ int main(int argc, char **argv) { 1e+300*1e+300, 1e+300*1e+300 - 1e+300*1e+300 }; - for(int i=0;i 1e+25) continue; testem_rnd(q); diff --git a/src/quad/qfuncproto.h b/src/quad/qfuncproto.h index f829184e..7d31dc54 100644 --- a/src/quad/qfuncproto.h +++ b/src/quad/qfuncproto.h @@ -92,6 +92,12 @@ funcSpec funcList[] = { { "log10", 10, 1, 0, 0 }, { "log1p", 10, 1, 0, 0 }, + { "fabs", -1, 0, 0, 0 }, + { "copysign", -1, 0, 1, 0 }, + { "fmax", -1, 0, 1, 0 }, + { "fmin", -1, 0, 1, 0 }, + { "fdim", 5, 2, 1, 0 }, + //{ "sincos", 10, 1, 2, 0 }, //{ "ldexp", -1, 0, 3, 0 }, //{ "ilogb", -1, 0, 4, 0 }, diff --git a/src/quad/sleefquadinline_cuda_header.h.org b/src/quad/sleefquadinline_cuda_header.h.org index d110d4a1..abd080e3 100644 --- a/src/quad/sleefquadinline_cuda_header.h.org +++ b/src/quad/sleefquadinline_cuda_header.h.org @@ -11,6 +11,10 @@ #define Sleef_quad_DEFINED #if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8) typedef __float128 Sleef_quad; +#define SLEEF_QUAD_C(x) (x ## Q) +//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +//typedef long double Sleef_quad; +//#define SLEEF_QUAD_C(x) (x ## L) #else typedef struct { uint64_t x, y; } Sleef_quad; #endif diff --git a/src/quad/sleefquadinline_header.h.org b/src/quad/sleefquadinline_header.h.org index 5c0455c7..fd7ce903 100644 --- a/src/quad/sleefquadinline_header.h.org +++ b/src/quad/sleefquadinline_header.h.org @@ -9,7 +9,7 @@ #endif #ifndef SLEEF_INLINE -#define SLEEF_INLINE inline +#define SLEEF_INLINE static inline #endif #ifndef SLEEF_CONST @@ -24,6 +24,10 @@ #define Sleef_quad_DEFINED #if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8) typedef __float128 Sleef_quad; +#define SLEEF_QUAD_C(x) (x ## Q) +//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__) +//typedef long double Sleef_quad; +//#define SLEEF_QUAD_C(x) (x ## L) #else typedef struct { uint64_t x, y; } Sleef_quad; #endif diff --git a/src/quad/sleefsimdqp.c b/src/quad/sleefsimdqp.c index 412d8a2a..241ced2f 100644 --- a/src/quad/sleefsimdqp.c +++ b/src/quad/sleefsimdqp.c @@ -807,14 +807,38 @@ static INLINE CONST VECTOR_CC vmask vcmp_vm_tdx_tdx(tdx t0, tdx t1) { return r; } +static INLINE CONST VECTOR_CC vopmask vsignbit_vo_tdx(tdx x) { + return vsignbit_vo_vd(tdxgetd3x_vd_tdx(x)); +} + +static INLINE CONST VECTOR_CC vopmask visnan_vo_tdx(tdx x) { + return visnan_vo_vd(tdxgetd3x_vd_tdx(x)); +} + +static INLINE CONST VECTOR_CC vopmask veq_vo_tdx_tdx(tdx x, tdx y) { + return veq64_vo_vm_vm(vcmp_vm_tdx_tdx(x, y), vcast_vm_i_i(0, 0)); +} + +static INLINE CONST VECTOR_CC vopmask vneq_vo_tdx_tdx(tdx x, tdx y) { + return vnot_vo64_vo64(veq64_vo_vm_vm(vcmp_vm_tdx_tdx(x, y), vcast_vm_i_i(0, 0))); +} + static INLINE CONST VECTOR_CC vopmask vgt_vo_tdx_tdx(tdx x, tdx y) { return vgt64_vo_vm_vm(vcmp_vm_tdx_tdx(x, y), vcast_vm_i_i(0, 0)); } +static INLINE CONST VECTOR_CC vopmask vlt_vo_tdx_tdx(tdx x, tdx y) { + return vgt64_vo_vm_vm(vcmp_vm_tdx_tdx(y, x), vcast_vm_i_i(0, 0)); +} + static INLINE CONST VECTOR_CC vopmask vge_vo_tdx_tdx(tdx x, tdx y) { return vgt64_vo_vm_vm(vcmp_vm_tdx_tdx(x, y), vcast_vm_i_i(-1, -1)); } +static INLINE CONST VECTOR_CC vopmask vle_vo_tdx_tdx(tdx x, tdx y) { + return vgt64_vo_vm_vm(vcmp_vm_tdx_tdx(y, x), vcast_vm_i_i(-1, -1)); +} + // TDX Cast operators static INLINE CONST VECTOR_CC tdx vcast_tdx_vd(vdouble d) { @@ -824,6 +848,10 @@ static INLINE CONST VECTOR_CC tdx vcast_tdx_vd(vdouble d) { return r; } +static INLINE CONST VECTOR_CC tdx vcast_tdx_d(double d) { + return vcast_tdx_vd(vcast_vd_d(d)); +} + static INLINE CONST VECTOR_CC tdx vcast_tdx_vd3(vdouble3 d) { vmask re = vilogbk_vm_vd(vd3getx_vd_vd3(d)); vdouble3 rd3 = vd3setxyz_vd3_vd_vd_vd(vldexp2_vd_vd_vm(vd3getx_vd_vd3(d), vneg64_vm_vm(re)), @@ -886,7 +914,7 @@ static INLINE CONST VECTOR_CC vmask vilogb_vm_tdx(tdx t) { return e; } -static INLINE CONST VECTOR_CC tdx add2_tdx_tdx_tdx(tdx dd0, tdx dd1) { +static INLINE CONST VECTOR_CC tdx add2_tdx_tdx_tdx(tdx dd0, tdx dd1) { // finite numbers only vmask ed = vsub64_vm_vm_vm(tdxgete_vm_tdx(dd1), tdxgete_vm_tdx(dd0)); vdouble t = vldexp3_vd_vd_vm(vcast_vd_d(1), ed); @@ -1380,9 +1408,9 @@ static INLINE CONST VECTOR_CC tdx expm1_tdx_tdx(tdx a) { vopmask p = vneq_vo_vd_vd(dq, vcast_vd_d(0)); - r = vsel_tdx_vo64_tdx_tdx(p, add2_tdx_tdx_tdx(r, vcast_tdx_vd(vcast_vd_d(1))), r); + r = vsel_tdx_vo64_tdx_tdx(p, add2_tdx_tdx_tdx(r, vcast_tdx_d(1)), r); r = tdxsete_tdx_tdx_vm(r, vsel_vm_vo64_vm_vm(p, vadd64_vm_vm_vm(tdxgete_vm_tdx(r), vcast_vm_vi(q)), tdxgete_vm_tdx(r))); - r = vsel_tdx_vo64_tdx_tdx(p, sub2_tdx_tdx_tdx(r, vcast_tdx_vd(vcast_vd_d(1))), r); + r = vsel_tdx_vo64_tdx_tdx(p, sub2_tdx_tdx_tdx(r, vcast_tdx_d(1)), r); p = vand_vo_vo_vo(vgt_vo_vd_vd(tdxgetd3x_vd_tdx(a), vcast_vd_d(0)), vor_vo_vo_vo(visinf_vo_vd(tdxgetd3x_vd_tdx(a)), vgt64_vo_vm_vm(tdxgete_vm_tdx(a), vcast_vm_i_i(0, 16397)))); @@ -1391,7 +1419,7 @@ static INLINE CONST VECTOR_CC tdx expm1_tdx_tdx(tdx a) { p = vandnot_vo_vo_vo(vgt_vo_vd_vd(tdxgetd3x_vd_tdx(a), vcast_vd_d(0)), vor_vo_vo_vo(visinf_vo_vd(tdxgetd3x_vd_tdx(a)), vgt64_vo_vm_vm(tdxgete_vm_tdx(a), vcast_vm_i_i(0, 16389)))); - r = vsel_tdx_vo64_tdx_tdx(vor_vo_vo_vo(o, p), vcast_tdx_vd(vcast_vd_d(-1)), r); + r = vsel_tdx_vo64_tdx_tdx(vor_vo_vo_vo(o, p), vcast_tdx_d(-1), r); r = vsel_tdx_vo64_tdx_tdx(vor_vo_vo_vo(visnan_vo_vd(tdxgetd3x_vd_tdx(a)), vlt64_vo_vm_vm(tdxgete_vm_tdx(a), vcast_vm_i_i(0, 16000))), a, r); @@ -1550,13 +1578,13 @@ static INLINE CONST VECTOR_CC tdx log10_tdx_tdx(tdx d) { } static INLINE CONST VECTOR_CC tdx log1p_tdx_tdx(tdx d) { - vmask cm1 = vcmp_vm_tdx_tdx(d, vcast_tdx_vd(vcast_vd_d(-1))); + vmask cm1 = vcmp_vm_tdx_tdx(d, vcast_tdx_d(-1)); vopmask fnan = vlt64_vo_vm_vm(cm1, vcast_vm_i_i(0, 0)); vopmask fminf = vand_vo_vo_vo(veq64_vo_vm_vm(cm1, vcast_vm_i_i(0, 0)), vneq_vo_vd_vd(tdxgetd3x_vd_tdx(d), vcast_vd_d(-SLEEF_INFINITY))); vopmask o = vlt64_vo_vm_vm(tdxgete_vm_tdx(d), vcast_vm_i_i(0, 16383 + 0x3f0)); - tdx dp1 = add2_tdx_tdx_tdx(d, vcast_tdx_vd(vcast_vd_d(1))); + tdx dp1 = add2_tdx_tdx_tdx(d, vcast_tdx_d(1)); vdouble s = vsel_vd_vo_vd_vd(o, vcast_vd_tdx(dp1), tdxgetd3x_vd_tdx(d)); vmask e = vilogb2k_vm_vd(vmul_vd_vd_vd(s, vcast_vd_d(1/0.75))); @@ -2468,6 +2496,58 @@ EXPORT CONST VECTOR_CC vargquad xnegq(vargquad aa) { return vcast_aq_vm2(a); } +EXPORT CONST VECTOR_CC vargquad xfabsq(vargquad aa) { + vmask2 a = vcast_vm2_aq(aa); + a = vm2sety_vm2_vm2_vm(a, vand_vm_vm_vm(vm2gety_vm_vm2(a), vcast_vm_i_i(0x7fffffff, 0xffffffff))); + return vcast_aq_vm2(a); +} + +EXPORT CONST VECTOR_CC vargquad xcopysignq(vargquad aa, vargquad ab) { + vmask2 a = vcast_vm2_aq(aa), b = vcast_vm2_aq(ab); + a = vm2sety_vm2_vm2_vm(a, vor_vm_vm_vm(vand_vm_vm_vm(vm2gety_vm_vm2(a), vcast_vm_i_i(0x7fffffff, 0xffffffff)), + vand_vm_vm_vm(vm2gety_vm_vm2(b), vcast_vm_i_i(0x80000000, 0)))); + return vcast_aq_vm2(a); +} + +EXPORT CONST VECTOR_CC vargquad xfmaxq(vargquad aa, vargquad ab) { + vmask2 a = vcast_vm2_aq(aa), b = vcast_vm2_aq(ab); + vopmask onana = visnanq_vo_vm2(a), onanb = visnanq_vo_vm2(b); + a = vcmpcnv_vm2_vm2(a); + b = vcmpcnv_vm2_vm2(b); + + vopmask ogt = vor_vo_vo_vo(vgt64_vo_vm_vm(vm2gety_vm_vm2(a), vm2gety_vm_vm2(b)), + vand_vo_vo_vo(veq64_vo_vm_vm(vm2gety_vm_vm2(a), vm2gety_vm_vm2(b)), vugt64_vo_vm_vm(vm2getx_vm_vm2(a), vm2getx_vm_vm2(b)))); + + vmask2 r = vsel_vm2_vo_vm2_vm2(ogt, vcast_vm2_aq(aa), vcast_vm2_aq(ab)); + r = vsel_vm2_vo_vm2_vm2(onana, vcast_vm2_aq(ab), r); + r = vsel_vm2_vo_vm2_vm2(onanb, vcast_vm2_aq(aa), r); + return vcast_aq_vm2(r); +} + +EXPORT CONST VECTOR_CC vargquad xfminq(vargquad aa, vargquad ab) { + vmask2 a = vcast_vm2_aq(aa), b = vcast_vm2_aq(ab); + vopmask onana = visnanq_vo_vm2(vcast_vm2_aq(aa)), onanb = visnanq_vo_vm2(b); + a = vcmpcnv_vm2_vm2(a); + b = vcmpcnv_vm2_vm2(b); + + vopmask olt = vor_vo_vo_vo(vgt64_vo_vm_vm(vm2gety_vm_vm2(b), vm2gety_vm_vm2(a)), + vand_vo_vo_vo(veq64_vo_vm_vm(vm2gety_vm_vm2(b), vm2gety_vm_vm2(a)), vugt64_vo_vm_vm(vm2getx_vm_vm2(b), vm2getx_vm_vm2(a)))); + + vmask2 r = vsel_vm2_vo_vm2_vm2(olt, vcast_vm2_aq(aa), vcast_vm2_aq(ab)); + r = vsel_vm2_vo_vm2_vm2(onana, vcast_vm2_aq(ab), r); + r = vsel_vm2_vo_vm2_vm2(onanb, vcast_vm2_aq(aa), r); + return vcast_aq_vm2(r); +} + +EXPORT CONST VECTOR_CC vargquad xfdimq_u05(vargquad aa, vargquad ab) { + tdx a = vcast_tdx_vf128(vcast_vm2_aq(aa)), b = vcast_tdx_vf128(vcast_vm2_aq(ab)); + tdx r = sub2_tdx_tdx_tdx(a, b); + r = vsel_tdx_vo64_tdx_tdx(vsignbit_vo_tdx(r), vcast_tdx_d(0), r); + r = vsel_tdx_vo64_tdx_tdx(visnan_vo_tdx(a), a, r); + r = vsel_tdx_vo64_tdx_tdx(visnan_vo_tdx(b), b, r); + return vcast_aq_vm2(vcast_vf128_tdx(r)); +} + // Float128 math functions EXPORT CONST VECTOR_CC vargquad xsqrtq_u05(vargquad aa) { @@ -2674,17 +2754,17 @@ static const tdx exp10tab[14] = { static CONST tdx exp10i(int n) { int neg = 0; if (n < 0) { neg = 1; n = -n; } - tdx r = vcast_tdx_vd(1); + tdx r = vcast_tdx_d(1); for(int i=0;i<14;i++) if ((n & (1 << i)) != 0) r = mul2_tdx_tdx_tdx(r, exp10tab[i]); - if (neg) r = div2_tdx_tdx_tdx(vcast_tdx_vd(1), r); + if (neg) r = div2_tdx_tdx_tdx(vcast_tdx_d(1), r); return r; } static CONST int ilog10(tdx t) { int r = 0, p = 1; - if ((int)vcmp_vm_tdx_tdx(t, vcast_tdx_vd(1)) < 0) { - t = div2_tdx_tdx_tdx(vcast_tdx_vd(1), t); + if ((int)vcmp_vm_tdx_tdx(t, vcast_tdx_d(1)) < 0) { + t = div2_tdx_tdx_tdx(vcast_tdx_d(1), t); p = -1; } for(int i=12;i>=0;i--) { @@ -2741,7 +2821,7 @@ EXPORT vargquad Sleef_strtoq(const char *str, const char **endptr) { const char *p = str; int positive = 1, bp = 0, e = 0, mf = 0; - tdx n = vcast_tdx_vd(0), d = vcast_tdx_vd(1); + tdx n = vcast_tdx_d(0), d = vcast_tdx_d(1); if (*p == '-') { positive = 0; @@ -2835,8 +2915,8 @@ EXPORT vargquad Sleef_strtoq(const char *str, const char **endptr) { while(*p != '\0') { if ('0' <= *p && *p <= '9') { - n = add2_tdx_tdx_tdx(mul2_tdx_tdx_tdx(n, vcast_tdx_vd(10)), vcast_tdx_vd(*p - '0')); - if (bp) d = mul2_tdx_tdx_tdx(d, vcast_tdx_vd(10)); + n = add2_tdx_tdx_tdx(mul2_tdx_tdx_tdx(n, vcast_tdx_d(10)), vcast_tdx_d(*p - '0')); + if (bp) d = mul2_tdx_tdx_tdx(d, vcast_tdx_d(10)); p++; mf = 1; continue; @@ -2925,7 +3005,7 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe if (precision > bufsize/2 - 10) precision = bufsize/2 - 10; if (typespec == 'g' && precision > 0) precision--; - tdx rounder = mul2_tdx_tdx_tdx(vcast_tdx_vd(0.5), exp10i(-precision)); + tdx rounder = mul2_tdx_tdx_tdx(vcast_tdx_d(0.5), exp10i(-precision)); if (typespec == 'f') value = add2_tdx_tdx_tdx(value, rounder); @@ -2941,8 +3021,8 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe value = add2_tdx_tdx_tdx(value, rounder); } - if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_vd(10.0)) >= 0) { - value = div2_tdx_tdx_tdx(value, vcast_tdx_vd(10)); + if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_d(10.0)) >= 0) { + value = div2_tdx_tdx_tdx(value, vcast_tdx_d(10)); exp++; } @@ -2967,10 +3047,10 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe } else { for(;e2>=0;e2--) { int digit = (int)vcast_vd_tdx(value); - if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_vd(digit)) < 0) digit--; + if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_d(digit)) < 0) digit--; if (ptr - buf >= bufsize-1) { *ptr = '\0'; return -1; } *ptr++ = digit + '0'; - value = mul2_tdx_tdx_tdx(add2_tdx_tdx_tdx(value, vcast_tdx_vd(-digit)), vcast_tdx_vd(10)); + value = mul2_tdx_tdx_tdx(add2_tdx_tdx_tdx(value, vcast_tdx_d(-digit)), vcast_tdx_d(10)); } } @@ -2986,10 +3066,10 @@ static int snprintquad(char *buf, size_t bufsize, vargquad argvalue, int typespe while (precision-- > 0) { int digit = (int)vcast_vd_tdx(value); - if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_vd(digit)) < 0) digit--; + if ((int)vcmp_vm_tdx_tdx(value, vcast_tdx_d(digit)) < 0) digit--; if (ptr - buf >= bufsize-1) { *ptr = '\0'; return -1; } *ptr++ = digit + '0'; - value = mul2_tdx_tdx_tdx(add2_tdx_tdx_tdx(value, vcast_tdx_vd(-digit)), vcast_tdx_vd(10)); + value = mul2_tdx_tdx_tdx(add2_tdx_tdx_tdx(value, vcast_tdx_d(-digit)), vcast_tdx_d(10)); } if (flag_rtz && flag_dp) { @@ -3507,14 +3587,16 @@ EXPORT void Sleef_unregisterPrintfHook() { #if 0 int main(int argc, char **argv) { - Sleef_quad q0 = strtoflt128(argv[1], NULL); - Sleef_quad q1 = strtoflg128(argv[2], NULL); + Sleef_quad q0 = atof(argv[1]); - vint64 ti = xcast_to_int64q(q0); - printfvint64("t ", ti); + int lane = 0; + vargquad a0; + memset(&a0, 0, sizeof(vargquad)); + a0 = xsetq(a0, lane, q0); + + tdx t = vcast_tdx_vf128(vcast_vm2_aq(a0)); - int64_t ci = (int64_t)q0; - printf("c %ld\n", ci); + printvdouble("t.d3.x", t.d3.x); } #endif @@ -3583,6 +3665,17 @@ int main(int argc, char **argv) { printf("test : %s\n", sprintfr(fr2)); #endif +#if 1 + a2 = xsubq_u05(a0, a1); + mpfr_sub(fr2, fr0, fr1, GMP_RNDN); + + printf("\nsub\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif + #if 0 a2 = xmulq_u05(a0, a1); mpfr_mul(fr2, fr0, fr1, GMP_RNDN); @@ -3594,7 +3687,7 @@ int main(int argc, char **argv) { printf("test : %s\n", sprintfr(fr2)); #endif -#if 1 +#if 0 a2 = xdivq_u05(a0, a1); mpfr_div(fr2, fr0, fr1, GMP_RNDN); @@ -3605,7 +3698,7 @@ int main(int argc, char **argv) { printf("test : %s\n", sprintfr(fr2)); #endif -#if 1 +#if 0 a2 = xsqrtq_u05(a0); mpfr_sqrt(fr2, fr0, GMP_RNDN); @@ -3616,7 +3709,7 @@ int main(int argc, char **argv) { printf("test : %s\n", sprintfr(fr2)); #endif -#if 1 +#if 0 a2 = xsinq_u10(a0); mpfr_sin(fr2, fr0, GMP_RNDN); @@ -3626,6 +3719,61 @@ int main(int argc, char **argv) { mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); printf("test : %s\n", sprintfr(fr2)); #endif + +#if 1 + a2 = xfabsq(a0); + mpfr_abs(fr2, fr0, GMP_RNDN); + + printf("\nfabs\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif + +#if 1 + a2 = xcopysignq(a0, a1); + mpfr_copysign(fr2, fr0, fr1, GMP_RNDN); + + printf("\ncopysign\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif + +#if 1 + a2 = xfmaxq(a0, a1); + mpfr_max(fr2, fr0, fr1, GMP_RNDN); + + printf("\nmax\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif + +#if 1 + a2 = xfminq(a0, a1); + mpfr_min(fr2, fr0, fr1, GMP_RNDN); + + printf("\nmin\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif + +#if 1 + a2 = xfdimq_u05(a0, a1); + mpfr_dim(fr2, fr0, fr1, GMP_RNDN); + + printf("\nfdim\n"); + mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); + printf("corr : %s\n", sprintfr(fr2)); + mpfr_set_f128(fr2, xgetq(a2, lane), GMP_RNDN); + printf("test : %s\n", sprintfr(fr2)); +#endif } #endif #endif diff --git a/travis/before_script.aarch64-gcc.sh b/travis/before_script.aarch64-gcc.sh index 756ee4ba..56c4c88c 100644 --- a/travis/before_script.aarch64-gcc.sh +++ b/travis/before_script.aarch64-gcc.sh @@ -8,5 +8,5 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.arm64-clang-lto.sh b/travis/before_script.arm64-clang-lto.sh index 7a715e81..9a5eab57 100644 --- a/travis/before_script.arm64-clang-lto.sh +++ b/travis/before_script.arm64-clang-lto.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=clang-8 export CXX=clang++-8 -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DCMAKE_LINKER=lld-8 -DSLEEF_ENABLE_LLVM_BITCODE=TRUE .. +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DCMAKE_LINKER=lld-8 -DSLEEF_ENABLE_LLVM_BITCODE=TRUE .. diff --git a/travis/before_script.arm64-clang.sh b/travis/before_script.arm64-clang.sh index a8edca21..915b87f9 100644 --- a/travis/before_script.arm64-clang.sh +++ b/travis/before_script.arm64-clang.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=clang-8 export CXX=clang++-8 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_ALTDIV=TRUE -DENABLE_ALTSQRT=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_ALTDIV=TRUE -DENABLE_ALTSQRT=TRUE .. diff --git a/travis/before_script.arm64-gcc-lto.sh b/travis/before_script.arm64-gcc-lto.sh index b15a7f4a..f4c4af1c 100644 --- a/travis/before_script.arm64-gcc-lto.sh +++ b/travis/before_script.arm64-gcc-lto.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE .. +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE .. diff --git a/travis/before_script.arm64-gcc-sve.sh b/travis/before_script.arm64-gcc-sve.sh index 7329def1..3a9009fe 100644 --- a/travis/before_script.arm64-gcc-sve.sh +++ b/travis/before_script.arm64-gcc-sve.sh @@ -5,4 +5,4 @@ cd sleef.build export PATH=/opt/local/bin:$PATH export LD_LIBRARY_PATH=/opt/local/lib:$LD_LIBRARY_PATH export CC=gcc-10 -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DEMULATOR=qemu-aarch64 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=FALSE -DENFORCE_SVE=TRUE .. +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DEMULATOR=qemu-aarch64 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=FALSE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE .. diff --git a/travis/before_script.arm64-gcc.sh b/travis/before_script.arm64-gcc.sh index a96211f7..0b0c976a 100644 --- a/travis/before_script.arm64-gcc.sh +++ b/travis/before_script.arm64-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=gcc-8 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE .. diff --git a/travis/before_script.armhf-gcc.sh b/travis/before_script.armhf-gcc.sh index 808f6016..8c4bd4fa 100644 --- a/travis/before_script.armhf-gcc.sh +++ b/travis/before_script.armhf-gcc.sh @@ -8,4 +8,4 @@ ninja all cd /build mkdir build-cross cd build-cross -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE .. diff --git a/travis/before_script.osx-clang.sh b/travis/before_script.osx-clang.sh index 7f4f3fab..7eda5aeb 100644 --- a/travis/before_script.osx-clang.sh +++ b/travis/before_script.osx-clang.sh @@ -2,4 +2,4 @@ set -ev mkdir sleef.build cd sleef.build -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. diff --git a/travis/before_script.osx-gcc.sh b/travis/before_script.osx-gcc.sh index b8d9bc2b..7b959a9d 100644 --- a/travis/before_script.osx-gcc.sh +++ b/travis/before_script.osx-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=gcc-6 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. diff --git a/travis/before_script.ppc64le-clang.sh b/travis/before_script.ppc64le-clang.sh index 1bf4b9e8..2d1c8d5b 100644 --- a/travis/before_script.ppc64le-clang.sh +++ b/travis/before_script.ppc64le-clang.sh @@ -3,4 +3,4 @@ set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE -DENFORCE_VSX3=TRUE .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE -DENFORCE_VSX3=TRUE .. diff --git a/travis/before_script.ppc64le-gcc.sh b/travis/before_script.ppc64le-gcc.sh index 1bf4b9e8..2d1c8d5b 100644 --- a/travis/before_script.ppc64le-gcc.sh +++ b/travis/before_script.ppc64le-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE -DENFORCE_VSX3=TRUE .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE -DENFORCE_VSX3=TRUE .. diff --git a/travis/before_script.s390x-clang.sh b/travis/before_script.s390x-clang.sh index 3c6ee2e2..b0124b97 100644 --- a/travis/before_script.s390x-clang.sh +++ b/travis/before_script.s390x-clang.sh @@ -3,4 +3,4 @@ set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VXE=TRUE -DENFORCE_VXE2=TRUE -DCMAKE_BUILD_TYPE=Debug .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VXE=TRUE -DENFORCE_VXE2=TRUE -DCMAKE_BUILD_TYPE=Debug .. diff --git a/travis/before_script.s390x-gcc.sh b/travis/before_script.s390x-gcc.sh index 6407d9ac..dae6b1e6 100644 --- a/travis/before_script.s390x-gcc.sh +++ b/travis/before_script.s390x-gcc.sh @@ -3,4 +3,4 @@ set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ - -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VXE=TRUE .. + -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VXE=TRUE .. diff --git a/travis/before_script.x86_64-clang.sh b/travis/before_script.x86_64-clang.sh index b6069f85..b7572787 100644 --- a/travis/before_script.x86_64-clang.sh +++ b/travis/before_script.x86_64-clang.sh @@ -3,4 +3,4 @@ set -ev mkdir sleef.build cd sleef.build export CC=clang-8 -cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=FALSE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. +cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=FALSE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. diff --git a/travis/before_script.x86_64-gcc.sh b/travis/before_script.x86_64-gcc.sh index a1b8b423..8ca0429f 100644 --- a/travis/before_script.x86_64-gcc.sh +++ b/travis/before_script.x86_64-gcc.sh @@ -4,4 +4,4 @@ mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..