Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make SIMD configure tests work for MacOS multiarch builds #1587

Merged
merged 2 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ libdeflate_template: &LIBDEFLATE
pushd "$HOME"
git clone --depth 1 https://github.com/ebiggers/libdeflate.git
pushd libdeflate
cmake -B build -DLIBDEFLATE_BUILD_SHARED_LIB=OFF -DLIBDEFLATE_BUILD_GZIP=OFF -DCMAKE_C_FLAGS='-g -O3 -fPIC'
cmake -B build -DLIBDEFLATE_BUILD_SHARED_LIB=OFF -DLIBDEFLATE_BUILD_GZIP=OFF -DCMAKE_C_FLAGS="-g -O3 -fPIC $LIBDEFLATE_CFLAGS"
cmake --build build --verbose
popd
popd
Expand Down Expand Up @@ -186,6 +186,9 @@ macosx_task:

environment:
CC: clang
CFLAGS: "-Wall -arch arm64 -arch x86_64"
LDFLAGS: "-arch arm64 -arch x86_64"
LIBDEFLATE_CFLAGS: "-arch arm64 -arch x86_64"
LC_ALL: C
CIRRUS_CLONE_DEPTH: 1

Expand Down
16 changes: 13 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,18 @@ srcdir = .
srcprefix =
HTSPREFIX =

# Flags for SIMD code
HTS_CFLAGS_AVX2 =
HTS_CFLAGS_AVX512 =
HTS_CFLAGS_SSE4 =

# Control building of SIMD code. Not used if configure has been run.
HTS_BUILD_AVX2 =
HTS_BUILD_AVX512 =
HTS_BUILD_SSSE3 =
HTS_BUILD_POPCNT =
HTS_BUILD_SSE4_1 =

include htslib_vars.mk
include htscodecs.mk

Expand Down Expand Up @@ -274,18 +282,20 @@ config.h:
echo '#endif' >> $@
echo '#define HAVE_DRAND48 1' >> $@
echo '#define HAVE_LIBCURL 1' >> $@
if [ "x$(HTS_CFLAGS_SSE4)" != "x" ] ; then \
if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \
[ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \
[ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \
echo '#define HAVE_POPCNT 1' >> $@ ; \
echo '#define HAVE_SSE4_1 1' >> $@ ; \
echo '#define HAVE_SSSE3 1' >> $@ ; \
echo '#if defined(HTS_ALLOW_UNALIGNED) && HTS_ALLOW_UNALIGNED == 0' >> $@ ; \
echo '#define UBSAN 1' >> $@ ; \
echo '#endif' >> $@ ; \
fi
if [ "x$(HTS_CFLAGS_AVX2)" != "x" ] ; then \
if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \
echo '#define HAVE_AVX2 1' >> $@ ; \
fi
if [ "x$(HTS_CFLAGS_AVX512)" != "x" ] ; then \
if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \
echo '#define HAVE_AVX512 1' >> $@ ; \
fi

Expand Down
122 changes: 71 additions & 51 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ AC_CONFIG_SRCDIR(hts.c)
AC_CONFIG_HEADERS(config.h)

m4_include([m4/hts_prog_cc_warnings.m4])
m4_include([m4/ax_check_compile_flag.m4])
m4_include([m4/hts_check_compile_flags_needed.m4])
m4_include([m4/hts_hide_dynamic_syms.m4])
m4_include([m4/pkg.m4])

Expand Down Expand Up @@ -71,16 +71,53 @@ dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS)
HTS_PROG_CC_WERROR(hts_late_cflags)

dnl Check for various compiler flags to enable SIMD features
dnl Options for rANS32x16 sse4.1 version
AX_CHECK_COMPILE_FLAG([-mssse3 -mpopcnt -msse4.1], [
hts_cflags_sse4="-mssse3 -mpopcnt -msse4.1"
AC_SUBST([hts_cflags_sse4])
AC_DEFINE([HAVE_SSSE3],1,
[Defined to 1 if the compiler can issue SSSE3 instructions.])
AC_DEFINE([HAVE_POPCNT],1,
[Defined to 1 if the compiler can issue popcnt instructions.])
AC_DEFINE([HAVE_SSE4_1],1,
[Defined to 1 if the compiler can issue SSE4.1 instructions.])
dnl Options for rANS32x16 sse4.1 version - ssse3
hts_cflags_sse4=""
HTS_CHECK_COMPILE_FLAGS_NEEDED([ssse3], [-mssse3], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_shuffle_epi8(a, b);
return *((char *) &c);
#endif
]])], [
hts_cflags_sse4="$flags_needed $hts_cflags_sse4"
AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.])
])

dnl Options for rANS32x16 sse4.1 version - popcnt
HTS_CHECK_COMPILE_FLAGS_NEEDED([popcnt], [-mpopcnt], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
unsigned int i = _mm_popcnt_u32(1);
return i != 1;
#endif
]])], [
hts_cflags_sse4="$flags_needed $hts_cflags_sse4"
AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.])
])

dnl Options for rANS32x16 sse4.1 version - sse4.1
HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_max_epu32(a, b);
return *((char *) &c);
#endif
]])], [
hts_cflags_sse4="$flags_needed $hts_cflags_sse4"
AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled.
])
dnl Propagate HTSlib's unaligned access preference to htscodecs
AH_VERBATIM([UBSAN],[
/* Prevent unaligned access in htscodecs SSE4 rANS codec */
Expand All @@ -89,60 +126,43 @@ dnl Propagate HTSlib's unaligned access preference to htscodecs
#endif
])
AC_DEFINE([UBSAN],1,[])
], [], [], [AC_LANG_PROGRAM([[
#include "x86intrin.h"
]],[[
unsigned int i = _mm_popcnt_u32(1);
__m128i a = _mm_set_epi32(1, 2, 3, i), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_max_epu32(a, b);
b = _mm_shuffle_epi8(a, c);
return *((char *) &b);
]])])
])
AC_SUBST([hts_cflags_sse4])

dnl Options for rANS32x16 avx2 version
AX_CHECK_COMPILE_FLAG([-mavx2], [
hts_cflags_avx2="-mavx2"
AC_SUBST([hts_cflags_avx2])
AC_DEFINE([HAVE_AVX2],1,
[Defined to 1 if the compiler can issue AVX2 instructions.])
], [], [], [AC_LANG_PROGRAM([[
#include "x86intrin.h"
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
__m256i b = _mm256_add_epi32(a, a);
long long c = _mm256_extract_epi64(b, 0);
return (int) c;
]])])
#endif
]])], [
hts_cflags_avx2="$flags_needed"
AC_SUBST([hts_cflags_avx2])
AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.])
])

dnl Options for rANS32x16 avx512 version
AX_CHECK_COMPILE_FLAG([-mavx512f], [
hts_cflags_avx512="-mavx512f"
AC_SUBST([hts_cflags_avx512])
AC_DEFINE([HAVE_AVX512],1,
[Defined to 1 if the compiler can issue AVX512 instructions.])
], [], [], [AC_LANG_PROGRAM([[
HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f], [AC_LANG_PROGRAM([[
#ifdef __x86_64__
#include "x86intrin.h"
#endif
]],[[
#ifdef __x86_64__
__m512i a = _mm512_set1_epi32(1);
__m512i b = _mm512_add_epi32(a, a);
return *((char *) &b);
]])])

dnl Detect ARM Neon availability
AC_CACHE_CHECK([whether C compiler supports ARM Neon], [hts_cv_have_neon], [
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
#include "arm_neon.h"
]], [[
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vaddq_s32(a, a);
return *((char *) &b);
]])], [hts_cv_have_neon=yes], [hts_cv_have_neon=no])])
if test "$hts_cv_have_neon" = yes; then
hts_have_neon=yes
AC_SUBST([hts_have_neon])
fi

#endif
]])], [
hts_cflags_avx512="$flags_needed"
AC_SUBST([hts_cflags_avx512])
AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.])
])

dnl Avoid chicken-and-egg problem where pkg-config supplies the
dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check
Expand Down
103 changes: 66 additions & 37 deletions hts_probe_cc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,72 +43,101 @@ run_compiler ()
return $retval
}

# Run a test. $1 is the flag to try, $2 is the Makefile variable to set
# with the flag probe result, $3 is a Makefile variable which will be
# set to 1 if the code was built successfully. The code to test should
# be passed in via fd 0.
# First try compiling conftest.c without the flag. If that fails, try
# again with it to see if the flag is needed.
run_test ()
{
rm -f conftest conftest.err conftest.c
cat - > conftest.c
if run_compiler ; then
echo "$2 ="
echo "$3 = 1"
elif run_compiler "$1" ; then
echo "$2 = $1"
echo "$3 = 1"
else
echo "$3 ="
fi
}

echo "# Compiler probe results, generated by $0"

# Check for sse4.1 etc. support
# Check for ssse3
run_test "-mssse3" HTS_CFLAGS_SSSE3 HTS_BUILD_SSSE3 <<'EOF'
#ifdef __x86_64__
#include "x86intrin.h"
int main(int argc, char **argv) {
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_shuffle_epi8(a, b);
return *((char *) &c);
}
#else
int main(int argc, char **argv) { return 0; }
#endif
EOF

rm -f conftest conftest.err conftest.c
cat - <<'EOF' > conftest.c
# Check for popcnt
run_test "-mpopcnt" HTS_CFLAGS_POPCNT HTS_BUILD_POPCNT <<'EOF'
#ifdef __x86_64__
#include "x86intrin.h"
int main(int argc, char **argv) {
unsigned int i = _mm_popcnt_u32(1);
__m128i a = _mm_set_epi32(1, 2, 3, i), b = _mm_set_epi32(4, 3, 2, 1);
return i != 1;
}
#else
int main(int argc, char **argv) { return 0; }
#endif
EOF

# Check for sse4.1 etc. support
run_test "-msse4.1" HTS_CFLAGS_SSE4_1 HTS_BUILD_SSE4_1 <<'EOF'
#ifdef __x86_64__
#include "x86intrin.h"
int main(int argc, char **argv) {
__m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1);
__m128i c = _mm_max_epu32(a, b);
b = _mm_shuffle_epi8(a, c);
return *((char *) &b);
return *((char *) &c);
}
#else
int main(int argc, char **argv) { return 0; }
#endif
EOF
FLAGS="-mpopcnt -msse4.1 -mssse3"
if run_compiler "$FLAGS" ; then
echo "HTS_CFLAGS_SSE4 = $FLAGS"
fi

echo 'HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSSE3) $(HTS_CFLAGS_POPCNT) $(HTS_CFLAGS_SSE4_1)'

# Check for avx2

rm -f conftest.c
cat - <<'EOF' > conftest.c
run_test -mavx2 HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF'
#ifdef __x86_64__
#include "x86intrin.h"
int main(int argc, char **argv) {
__m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
__m256i b = _mm256_add_epi32(a, a);
long long c = _mm256_extract_epi64(b, 0);
return (int) c;
}
#else
int main(int argc, char **argv) { return 0; }
#endif
EOF
FLAGS="-mavx2"
if run_compiler "$FLAGS" ; then
echo "HTS_CFLAGS_AVX2 = $FLAGS"
fi

# Check for avx512

rm -f conftest.c
cat - <<'EOF' > conftest.c
run_test -mavx512f HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF'
#ifdef __x86_64__
#include "x86intrin.h"
int main(int argc, char **argv) {
__m512i a = _mm512_set1_epi32(1);
__m512i b = _mm512_add_epi32(a, a);
return *((char *) &b);
}
#else
int main(int argc, char **argv) { return 0; }
#endif
EOF
FLAGS="-mavx512f"
if run_compiler "$FLAGS" ; then
echo "HTS_CFLAGS_AVX512 = $FLAGS"
fi

# Check for neon

rm -f conftest.c
cat - <<'EOF' > conftest.c
#include "arm_neon.h"
int main(int argc, char **argv) {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vaddq_s32(a, a);
return *((char *) &b);
}
EOF
if run_compiler "" ; then
echo "HTS_HAVE_NEON = yes"
fi

rm -f conftest.c
8 changes: 4 additions & 4 deletions htscodecs_bundled.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ HTSCODECS_SOURCES = $(HTSPREFIX)htscodecs/htscodecs/arith_dynamic.c \
$(HTSPREFIX)htscodecs/htscodecs/htscodecs.c \
$(HTSPREFIX)htscodecs/htscodecs/pack.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static4x16pr.c \
$(if $(HTS_CFLAGS_AVX2),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx2.c) \
$(if $(HTS_CFLAGS_AVX512),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx512.c) \
$(if $(HTS_CFLAGS_SSE4),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_sse4.c) \
$(if $(HTS_HAVE_NEON),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_neon.c) \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx2.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx512.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_sse4.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_neon.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr.c \
$(HTSPREFIX)htscodecs/htscodecs/rANS_static.c \
$(HTSPREFIX)htscodecs/htscodecs/rle.c \
Expand Down
Loading