Skip to content

Commit

Permalink
[Quad] Add fabsq (#375)
Browse files Browse the repository at this point in the history
Co-authored-by: shibatch <[email protected]>
  • Loading branch information
shibatch and shibatch authored Dec 21, 2020
1 parent cb21c85 commit 80b994f
Show file tree
Hide file tree
Showing 36 changed files with 375 additions and 80 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ option(BUILD_SHARED_LIBS "Build shared libs" ON)
option(BUILD_STATIC_TEST_BINS "Build statically linked test executables" OFF)
option(ENABLE_LTO "Enable LTO on GCC or ThinLTO on clang" OFF)
option(BUILD_LIBM "libsleef will be built." ON)
option(BUILD_DFT "libsleefdft will be built." ON)
option(BUILD_DFT "libsleefdft will be built." OFF)
option(BUILD_QUAD "libsleefquad will be built." OFF)
option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON)
option(BUILD_TESTS "Tests will be built." ON)
Expand Down
4 changes: 4 additions & 0 deletions Configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AM
set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.")

set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mavx2;-mfma;-fno-strict-aliasing")
set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing")

elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.")
Expand All @@ -98,16 +99,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
set(COMPILER_SUPPORTS_NEON32VFPV4 1)

set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mfpu=vfpv4;-fno-strict-aliasing")
set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing")

elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
set(SLEEF_ARCH_PPC64 ON CACHE INTERNAL "True for PPC64 architecture.")

set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mvsx;-fno-strict-aliasing")
set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing")

elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.")

set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector;-fno-strict-aliasing")
set(CLANG_FLAGS_ENABLE_PUREC_SCALAR "-fno-strict-aliasing")
endif()

set(COMPILER_SUPPORTS_PUREC_SCALAR 1)
Expand Down
32 changes: 16 additions & 16 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -34,7 +34,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SVE=TRUE -DEMULATOR=qemu-aarch64 ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -53,7 +53,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DDISABLE_SVE=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DDISABLE_SVE=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_CUDA=TRUE -DENFORCE_CUDA=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -78,7 +78,7 @@ pipeline {
docker exec jenkins tar xf /tmp/builddir.tgz -C /build
docker exec jenkins rm -f /tmp/builddir.tgz
rm -f /tmp/builddir.tgz
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`"
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`"
docker stop jenkins
'''
}
Expand All @@ -99,7 +99,7 @@ pipeline {
docker exec jenkins tar xf /tmp/builddir.tgz -C /build
docker exec jenkins rm -f /tmp/builddir.tgz
rm -f /tmp/builddir.tgz
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`"
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;export PATH=/opt/bin:$PATH;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_TESTER3=TRUE ..;ninja;ctest -j `nproc`"
docker stop jenkins
'''
}
Expand All @@ -114,7 +114,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -134,7 +134,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -154,7 +154,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -172,7 +172,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -192,7 +192,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -212,7 +212,7 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-10 -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-10 -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
export OMP_WAIT_POLICY=passive
export CTEST_OUTPUT_ON_FAILURE=TRUE
Expand All @@ -237,7 +237,7 @@ pipeline {
docker exec jenkins tar xf /tmp/builddir.tgz -C /build
docker exec jenkins rm -f /tmp/builddir.tgz
rm -f /tmp/builddir.tgz
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`"
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=gcc;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`"
docker stop jenkins
'''
}
Expand All @@ -258,7 +258,7 @@ pipeline {
docker exec jenkins tar xf /tmp/builddir.tgz -C /build
docker exec jenkins rm -f /tmp/builddir.tgz
rm -f /tmp/builddir.tgz
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`"
docker exec jenkins bash -c "set -ev;export OMP_WAIT_POLICY=passive;cd /build;rm -rf build;mkdir build;cd build;export CC=clang;cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..;ninja;ctest -j `nproc`"
docker stop jenkins
'''
}
Expand All @@ -281,7 +281,7 @@ pipeline {
rm -rf build
mkdir build
cd build
/usr/local/bin/cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE ..
/usr/local/bin/cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE ..
ninja
'''
}
Expand All @@ -296,9 +296,9 @@ pipeline {
rm -rf build
mkdir build
cd build
cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE ..
ninja
ctest -j `nproc`
'''
Expand Down
8 changes: 4 additions & 4 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ build_script:
- if "%DO_TEST%" == "TRUE" echo PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%PATH% > q.bat
- if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat"
- if "%DO_TEST%" == "TRUE" call p.bat
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE;ninja'
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE;ninja'
- if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%"
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE ..;ninja'
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE ..;ninja'
- if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%"
- if "%DO_TEST%" == "TRUE" del /Q /F build-cygwin\bin\iut*
- if "%DO_TEST%" == "TRUE" echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build\bin > q.bat
- if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat"
- if "%DO_TEST%" == "TRUE" call p.bat
- mkdir build
- cd build
- cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC%
- cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC%
- cmake --build . --target install --config Release
- if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 16 -C Release)
- cd "%BUILDFOLDER%"
Expand All @@ -39,7 +39,7 @@ build_script:
- call p.bat
- mkdir build-clang
- cd build-clang
- cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC%
- cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DBUILD_DFT=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC%
- ninja
test_script:
- if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 16 -C Release)
Expand Down
9 changes: 7 additions & 2 deletions src/common/commonfuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static INLINE CONST VECTOR_CC tdi_t tdisettdi_tdi_vd3_vi(vdouble3 v, vint i) {
#if defined(ENABLE_MAIN)
// Functions for debugging
#include <stdio.h>
#include <wchar.t>
#include <wchar.h>

static void printvmask(char *mes, vmask g) {
uint64_t u[VECTLENDP];
Expand Down Expand Up @@ -163,7 +163,7 @@ static void printvint64(char *mes, vint64 vi) {
uint64_t u[VECTLENDP*2];
vstoreu_v_p_vd((double *)u, vreinterpret_vd_vm(vreinterpret_vm_vi64(vi)));
printf("%s ", mes);
for(int i=0;i<VECTLENDP;i++) printf("%016x : ", (unsigned long)u[i]);
for(int i=0;i<VECTLENDP;i++) printf("%016lx : ", (unsigned long)u[i]);
printf("\n");
}

Expand Down Expand Up @@ -359,6 +359,11 @@ static INLINE CONST vmask vilogbk_vm_vd(vdouble d) {

// vmask2 functions

static INLINE CONST vopmask veq64_vo_vm2_vm2(vmask2 x, vmask2 y) {
return vand_vo_vo_vo(veq64_vo_vm_vm(vm2getx_vm_vm2(x), vm2getx_vm_vm2(y)),
veq64_vo_vm_vm(vm2gety_vm_vm2(x), vm2gety_vm_vm2(y)));
}

static INLINE CONST vmask2 vsel_vm2_vo_vm2_vm2(vopmask o, vmask2 x, vmask2 y) {
return vm2setxy_vm2_vm_vm(vsel_vm_vo64_vm_vm(o, vm2getx_vm_vm2(x), vm2getx_vm_vm2(y)), vsel_vm_vo64_vm_vm(o, vm2gety_vm_vm2(x), vm2gety_vm_vm2(y)));
}
Expand Down
8 changes: 8 additions & 0 deletions src/common/keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,15 @@ vcast_vf128_tdx
vcast_vf128_tdx_fast
vcast_vf128_tdx_slow
vcmp_vm_tdx_tdx
vsignbit_vo_tdx
visnan_vo_tdx
veq_vo_tdx_tdx
vneq_vo_tdx_tdx
vge_vo_tdx_tdx
vle_vo_tdx_tdx
vgt_vo_tdx_tdx
vlt_vo_tdx_tdx
vcast_tdx_d
vilogb_vm_tdx
vneg_tdx_tdx
vsel_tdx_vo64_tdx_tdx
Expand Down Expand Up @@ -635,6 +642,7 @@ vreinterpret_vi64_vm
vreinterpret_vm_vi64
vreinterpret_vm_vu64
vreinterpret_vu64_vm
veq64_vo_vm2_vm2
vsel_vm2_vo_vm2_vm2
vtruncate_vm_vd
vugt64_vo_vm_vm
Expand Down
8 changes: 8 additions & 0 deletions src/common/keywords_cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,15 @@ vcast_vf128_tdx
vcast_vf128_tdx_fast
vcast_vf128_tdx_slow
vcmp_vm_tdx_tdx
vsignbit_vo_tdx
visnan_vo_tdx
veq_vo_tdx_tdx
vneq_vo_tdx_tdx
vge_vo_tdx_tdx
vle_vo_tdx_tdx
vgt_vo_tdx_tdx
vlt_vo_tdx_tdx
vcast_tdx_d
vilogb_vm_tdx
vneg_tdx_tdx
vsel_tdx_vo64_tdx_tdx
Expand Down Expand Up @@ -633,6 +640,7 @@ vreinterpret_vi64_vm
vreinterpret_vm_vi64
vreinterpret_vm_vu64
vreinterpret_vu64_vm
veq64_vo_vm2_vm2
vsel_vm2_vo_vm2_vm2
vtruncate_vm_vd
vugt64_vo_vm_vm
Expand Down
1 change: 1 addition & 0 deletions src/common/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

#define SLEEF_FLT_MIN 0x1p-126
#define SLEEF_DBL_MIN 0x1p-1022
#define SLEEF_INT_MAX 2147483647

//

Expand Down
2 changes: 1 addition & 1 deletion src/libm/sleefinline_header.h.org
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#endif

#ifndef SLEEF_INLINE
#define SLEEF_INLINE inline
#define SLEEF_INLINE static inline
#endif

#ifndef SLEEF_CONST
Expand Down
4 changes: 4 additions & 0 deletions src/libm/sleeflibm_header.h.org.in
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ typedef struct {
#define Sleef_quad_DEFINED
#if defined(__SIZEOF_FLOAT128__) || (defined(__linux__) && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || (defined(__PPC64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 8)
typedef __float128 Sleef_quad;
#define SLEEF_QUAD_C(x) (x ## Q)
//#elif defined(__SIZEOF_LONG_DOUBLE__) && defined(__aarch64__)
//typedef long double Sleef_quad;
//#define SLEEF_QUAD_C(x) (x ## L)
#else
typedef struct { uint64_t x, y; } Sleef_quad;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/libm/sleefsimddp.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ EXPORT CONST VECTOR_CC vint xilogb(vdouble d) {
vdouble e = vcast_vd_vi(vilogbk_vi_vd(vabs_vd_vd(d)));
e = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_FP_ILOGB0), e);
e = vsel_vd_vo_vd_vd(visnan_vo_vd(d), vcast_vd_d(SLEEF_FP_ILOGBNAN), e);
e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(INT_MAX), e);
e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(SLEEF_INT_MAX), e);
return vrint_vi_vd(e);
}

Expand Down
Loading

0 comments on commit 80b994f

Please sign in to comment.