Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate inline headers #283

Merged
merged 24 commits into from
Jul 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ option(BUILD_DFT "libsleefdft will be built." ON)
option(BUILD_QUAD "libsleefquad will be built." OFF)
option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON)
option(BUILD_TESTS "Tests will be built." ON)
option(BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF)

option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF)
option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON)
Expand Down Expand Up @@ -83,6 +84,8 @@ set(TARGET_LIBSLEEFGNUABI "sleefgnuabi")
# Generates the sleef.h headers and all the rename headers
# Defined in src/libm/CMakeLists.txt via custom commands and a custom target
set(TARGET_HEADERS "headers")
set(TARGET_INLINE_HEADERS "inline_headers")
set(TARGET_LIBINLINE "sleefinline")
# Generates executable files for running the test suite
# Defined in src/libm-tester/CMakeLists.txt via command add_executable
set(TARGET_TESTER "tester")
Expand Down Expand Up @@ -148,6 +151,9 @@ if(SLEEF_SHOW_CONFIG)
message(STATUS "FFTW3 : " ${LIBFFTW3})
message(STATUS "OPENSSL : " ${OPENSSL_VERSION})
message(STATUS "SDE : " ${SDE_COMMAND})
if (BUILD_INLINE_HEADERS)
message(STATUS "SED : " ${SED_COMMAND})
endif()
message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS})
message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP})
if(ENABLE_GNUABI)
Expand Down
20 changes: 20 additions & 0 deletions Configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(FLAGS_ENABLE_NEON32 "-mfpu=neon")
endif(CMAKE_C_COMPILER_ID MATCHES "GNU")

# Flags for generating inline headers
set(FLAG_PREPROCESS "-E")
set(FLAG_PRESERVE_COMMENTS "-C")
set(FLAG_INCLUDE "-I")
set(FLAG_DEFINE "-D")

if (SLEEF_CLANG_ON_WINDOWS)
# The following line is required to prevent clang from displaying
# many warnings. Clang on Windows references MSVC header files,
Expand All @@ -343,6 +349,11 @@ elseif(MSVC)
set(FLAGS_ENABLE_PURECFMA_SCALAR /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2)
set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS")
set(FLAGS_NO_ERRNO "")

set(FLAG_PREPROCESS "/E")
set(FLAG_PRESERVE_COMMENTS "/C")
set(FLAG_INCLUDE "/I")
set(FLAG_DEFINE "/D")
elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
set(FLAGS_ENABLE_SSE2 "-msse2")
set(FLAGS_ENABLE_SSE4 "-msse4.1")
Expand All @@ -356,6 +367,11 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type")
set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes")
set(FLAGS_NO_ERRNO "")

set(FLAG_PREPROCESS "-E")
set(FLAG_PRESERVE_COMMENTS "-C")
set(FLAG_INCLUDE "-I")
set(FLAG_DEFINE "-D")
endif()

set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}")
Expand Down Expand Up @@ -723,6 +739,10 @@ if (NOT SVE_VECTOR_BITS)
set(SVE_VECTOR_BITS 128)
endif()

#

find_program(SED_COMMAND sed)

##

if(SLEEF_SHOW_ERROR_LOG)
Expand Down
6 changes: 3 additions & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ build_script:
- if "%DO_TEST%" == "TRUE" echo PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%PATH% > q.bat
- if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat"
- if "%DO_TEST%" == "TRUE" call p.bat
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE;ninja'
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE;ninja'
- if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%"
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE ..;ninja'
- if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE ..;ninja'
- if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%"
- if "%DO_TEST%" == "TRUE" del /Q /F build-cygwin\bin\iut*
- if "%DO_TEST%" == "TRUE" echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build\bin > q.bat
- if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat"
- if "%DO_TEST%" == "TRUE" call p.bat
- mkdir build
- cd build
- cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE %ENV_BUILD_STATIC%
- cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE %ENV_BUILD_STATIC%
- cmake --build . --target install --config Release
- if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 4 -C Release)
- cd "%BUILDFOLDER%"
Expand Down
27 changes: 24 additions & 3 deletions src/arch/helperadvsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,38 @@
#error Please specify advsimd flags.
#endif

#if !defined(SLEEF_GENHEADER)
#include <arm_neon.h>
#include <stdint.h>

#include "misc.h"
#endif // #if !defined(SLEEF_GENHEADER)

#define ENABLE_DP
//@#define ENABLE_DP
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this syntax for? I have seen it in other places, so I suspect there is a reason for using it?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please read my first comment. Generation of the header files requires multiple processing with cpp, and some of the macros are required in later use.

Addition to helper files : This patch adds lines beginning with //@ which is specially treated during generation of the header files.

#define LOG2VECTLENDP 1
//@#define LOG2VECTLENDP 1
#define VECTLENDP (1 << LOG2VECTLENDP)
//@#define VECTLENDP (1 << LOG2VECTLENDP)

#define ENABLE_SP
//@#define ENABLE_SP
#define LOG2VECTLENSP 2
//@#define LOG2VECTLENSP 2
#define VECTLENSP (1 << LOG2VECTLENSP)
//@#define VECTLENSP (1 << LOG2VECTLENSP)

#if CONFIG == 1
#define ENABLE_FMA_DP
//@#define ENABLE_FMA_DP
#define ENABLE_FMA_SP
//#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled.
//@#define ENABLE_FMA_SP
#endif

#define FULL_FP_ROUNDING
//@#define FULL_FP_ROUNDING
#define ACCURATE_SQRT
//@#define ACCURATE_SQRT

#define ISANAME "AArch64 AdvSIMD"

Expand Down Expand Up @@ -296,14 +307,20 @@ static INLINE VECTOR_CC vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) {

// Shifts
#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c)
//@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c)
#define vsrl_vi2_vi2_i(x, c) \
vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c))
//@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c))

#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c)
//@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c)
#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c)
//@#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c)
#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c)
//@#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c)
#define vsrl_vi_vi_i(x, c) \
vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c))
//@#define vsrl_vi_vi_i(x, c) vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c))

// Comparison returning masks
static INLINE VECTOR_CC vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); }
Expand Down Expand Up @@ -757,8 +774,6 @@ static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int s

//

typedef Sleef_quad2 vargquad;

static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) {
return (vmask2) {
vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))),
Expand All @@ -783,6 +798,9 @@ static vmask2 vloadu_vm2_p(void *p) {
return vm2;
}

#if !defined(SLEEF_GENHEADER)
typedef Sleef_quad2 vargquad;

static INLINE vmask2 vcast_vm2_aq(vargquad aq) {
return vinterleave_vm2_vm2(vloadu_vm2_p(&aq));
}
Expand All @@ -793,6 +811,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) {
memcpy(&aq, &vm2, VECTLENDP * 16);
return aq;
}
#endif // #if !defined(SLEEF_GENHEADER)

static INLINE int vtestallzeros_i_vo64(vopmask g) {
uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g));
Expand All @@ -815,7 +834,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
}

#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c))
//@#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c))
#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c))
//@#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c))

static INLINE vmask vcast_vm_vi(vint vi) {
vmask m = vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi)));
Expand Down
27 changes: 23 additions & 4 deletions src/arch/helperavx.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

#if CONFIG == 1

#if !defined(__AVX__)
#if !defined(__AVX__) && !defined(SLEEF_GENHEADER)
#error Please specify -mavx.
#endif

#elif CONFIG == 4

#if !defined(__AVX__) || !defined(__FMA4__)
#if (!defined(__AVX__) || !defined(__FMA4__)) && !defined(SLEEF_GENHEADER)
#error Please specify -mavx and -mfma4.
#endif

Expand All @@ -20,16 +20,25 @@
#endif

#define ENABLE_DP
//@#define ENABLE_DP
#define LOG2VECTLENDP 2
//@#define LOG2VECTLENDP 2
#define VECTLENDP (1 << LOG2VECTLENDP)
//@#define VECTLENDP (1 << LOG2VECTLENDP)

#define ENABLE_SP
//@#define ENABLE_SP
#define LOG2VECTLENSP (LOG2VECTLENDP+1)
//@#define LOG2VECTLENSP (LOG2VECTLENDP+1)
#define VECTLENSP (1 << LOG2VECTLENSP)
//@#define VECTLENSP (1 << LOG2VECTLENSP)

#define FULL_FP_ROUNDING
//@#define FULL_FP_ROUNDING
#define ACCURATE_SQRT
//@#define ACCURATE_SQRT

#if !defined(SLEEF_GENHEADER)
#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand All @@ -38,6 +47,7 @@

#include <stdint.h>
#include "misc.h"
#endif // #if !defined(SLEEF_GENHEADER)

typedef __m256i vmask;
typedef __m256i vopmask;
Expand All @@ -54,6 +64,8 @@ typedef struct {

//

#if !defined(SLEEF_GENHEADER)

#ifndef __SLEEF_H__
void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx);
#endif
Expand Down Expand Up @@ -95,6 +107,8 @@ static INLINE int vavailability_i(int name) {
#define DFTPRIORITY 20
#endif

#endif // #if !defined(SLEEF_GENHEADER)

static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); }

static INLINE int vtestallones_i_vo32(vopmask g) {
Expand Down Expand Up @@ -563,8 +577,6 @@ static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloa

//

typedef Sleef_quad4 vargquad;

static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) {
return (vmask2) {
vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))),
Expand Down Expand Up @@ -615,6 +627,9 @@ static vmask2 vloadu_vm2_p(void *p) {
return vm2;
}

#if !defined(SLEEF_GENHEADER)
typedef Sleef_quad4 vargquad;

static INLINE vmask2 vcast_vm2_aq(vargquad aq) {
return vinterleave_vm2_vm2(vloadu_vm2_p(&aq));
}
Expand All @@ -625,6 +640,7 @@ static INLINE vargquad vcast_aq_vm2(vmask2 vm2) {
memcpy(&aq, &vm2, VECTLENDP * 16);
return aq;
}
#endif // #if !defined(SLEEF_GENHEADER)

static INLINE int vtestallzeros_i_vo64(vopmask g) {
return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0;
Expand Down Expand Up @@ -656,6 +672,9 @@ static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) {
_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), \
_mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1)

//@#define vsll64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1)
//@#define vsrl64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1)

static INLINE vmask vcast_vm_vi(vint vi) {
vint vi0 = _mm_and_si128(_mm_shuffle_epi32(vi, (1 << 4) | (1 << 6)), _mm_set_epi32(0, -1, 0, -1));
vint vi1 = _mm_and_si128(_mm_shuffle_epi32(vi, (2 << 0) | (2 << 2) | (3 << 4) | (3 << 6)), _mm_set_epi32(0, -1, 0, -1));
Expand Down
Loading