From 26f4d4465c9452ca698be79809d7a00d4c377d54 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Wed, 15 Mar 2023 11:45:49 +0000 Subject: [PATCH] Make SIMD tests work when building multiarch binaries MacOS multiarch binaries compile source code for each architecture and then join them together using 'lipo'. This means architecture specific code both in the actual source and configure tests need to be compilable on both architectures. Switch the configure tests and hts_probe_cc.sh so that they check if a given flag is needed to compile the test code instead of just testing to see if the flag works. By adding #ifdef __x86_64__ guards around the test code, compilation will work on non-x86_64 returning the result that no special compiler flag is needed. Similar #ifdef guards are added to the source files so that the SIMD-specific code only gets compiled for x86_64. The htscodecs submodule is updated to pull in these source file changes. The SIMD parts of built-in htscodecs are now compiled unconditionally. Tests for NEON have also been removed as they weren't really doing anything. The configure and hts_probe_cc.sh are adjusted to exactly match those used by htscodecs' configure, for ease of maintenance. --- Makefile | 16 +++- configure.ac | 122 ++++++++++++++++----------- hts_probe_cc.sh | 103 ++++++++++++++-------- htscodecs | 2 +- htscodecs_bundled.mk | 8 +- m4/ax_check_compile_flag.m4 | 53 ------------ m4/hts_check_compile_flags_needed.m4 | 63 ++++++++++++++ 7 files changed, 218 insertions(+), 149 deletions(-) delete mode 100644 m4/ax_check_compile_flag.m4 create mode 100644 m4/hts_check_compile_flags_needed.m4 diff --git a/Makefile b/Makefile index 3e95a0bef..9b7f7f2f4 100644 --- a/Makefile +++ b/Makefile @@ -126,10 +126,18 @@ srcdir = . srcprefix = HTSPREFIX = +# Flags for SIMD code HTS_CFLAGS_AVX2 = HTS_CFLAGS_AVX512 = HTS_CFLAGS_SSE4 = +# Control building of SIMD code. Not used if configure has been run. +HTS_BUILD_AVX2 = +HTS_BUILD_AVX512 = +HTS_BUILD_SSSE3 = +HTS_BUILD_POPCNT = +HTS_BUILD_SSE4_1 = + include htslib_vars.mk include htscodecs.mk @@ -274,7 +282,9 @@ config.h: echo '#endif' >> $@ echo '#define HAVE_DRAND48 1' >> $@ echo '#define HAVE_LIBCURL 1' >> $@ - if [ "x$(HTS_CFLAGS_SSE4)" != "x" ] ; then \ + if [ "x$(HTS_BUILD_POPCNT)" != "x" ] && \ + [ "x$(HTS_BUILD_SSE4_1)" != "x" ] && \ + [ "x$(HTS_BUILD_SSSE3)" != "x" ]; then \ echo '#define HAVE_POPCNT 1' >> $@ ; \ echo '#define HAVE_SSE4_1 1' >> $@ ; \ echo '#define HAVE_SSSE3 1' >> $@ ; \ @@ -282,10 +292,10 @@ config.h: echo '#define UBSAN 1' >> $@ ; \ echo '#endif' >> $@ ; \ fi - if [ "x$(HTS_CFLAGS_AVX2)" != "x" ] ; then \ + if [ "x$(HTS_BUILD_AVX2)" != "x" ] ; then \ echo '#define HAVE_AVX2 1' >> $@ ; \ fi - if [ "x$(HTS_CFLAGS_AVX512)" != "x" ] ; then \ + if [ "x$(HTS_BUILD_AVX512)" != "x" ] ; then \ echo '#define HAVE_AVX512 1' >> $@ ; \ fi diff --git a/configure.ac b/configure.ac index 98b0a44c7..ff2367c1b 100644 --- a/configure.ac +++ b/configure.ac @@ -30,7 +30,7 @@ AC_CONFIG_SRCDIR(hts.c) AC_CONFIG_HEADERS(config.h) m4_include([m4/hts_prog_cc_warnings.m4]) -m4_include([m4/ax_check_compile_flag.m4]) +m4_include([m4/hts_check_compile_flags_needed.m4]) m4_include([m4/hts_hide_dynamic_syms.m4]) m4_include([m4/pkg.m4]) @@ -71,16 +71,53 @@ dnl later as they can interfere with some of the tests (notably AC_SEARCH_LIBS) HTS_PROG_CC_WERROR(hts_late_cflags) dnl Check for various compiler flags to enable SIMD features -dnl Options for rANS32x16 sse4.1 version -AX_CHECK_COMPILE_FLAG([-mssse3 -mpopcnt -msse4.1], [ - hts_cflags_sse4="-mssse3 -mpopcnt -msse4.1" - AC_SUBST([hts_cflags_sse4]) - AC_DEFINE([HAVE_SSSE3],1, - [Defined to 1 if the compiler can issue SSSE3 instructions.]) - AC_DEFINE([HAVE_POPCNT],1, - [Defined to 1 if the compiler can issue popcnt instructions.]) - AC_DEFINE([HAVE_SSE4_1],1, - [Defined to 1 if the compiler can issue SSE4.1 instructions.]) +dnl Options for rANS32x16 sse4.1 version - ssse3 +hts_cflags_sse4="" +HTS_CHECK_COMPILE_FLAGS_NEEDED([ssse3], [-mssse3], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(a, b); + return *((char *) &c); + #endif + ]])], [ + hts_cflags_sse4="$flags_needed $hts_cflags_sse4" + AC_DEFINE([HAVE_SSSE3],1,[Defined to 1 if rANS source using SSSE3 can be compiled.]) +]) + +dnl Options for rANS32x16 sse4.1 version - popcnt +HTS_CHECK_COMPILE_FLAGS_NEEDED([popcnt], [-mpopcnt], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + unsigned int i = _mm_popcnt_u32(1); + return i != 1; + #endif + ]])], [ + hts_cflags_sse4="$flags_needed $hts_cflags_sse4" + AC_DEFINE([HAVE_POPCNT],1,[Defined to 1 if rANS source using popcnt can be compiled.]) +]) + +dnl Options for rANS32x16 sse4.1 version - sse4.1 +HTS_CHECK_COMPILE_FLAGS_NEEDED([sse4.1], [-msse4.1], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif + ]],[[ + #ifdef __x86_64__ + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_max_epu32(a, b); + return *((char *) &c); + #endif + ]])], [ + hts_cflags_sse4="$flags_needed $hts_cflags_sse4" + AC_DEFINE([HAVE_SSE4_1],1,[Defined to 1 if rANS source using SSE4.1 can be compiled. +]) dnl Propagate HTSlib's unaligned access preference to htscodecs AH_VERBATIM([UBSAN],[ /* Prevent unaligned access in htscodecs SSE4 rANS codec */ @@ -89,60 +126,43 @@ dnl Propagate HTSlib's unaligned access preference to htscodecs #endif ]) AC_DEFINE([UBSAN],1,[]) - ], [], [], [AC_LANG_PROGRAM([[ - #include "x86intrin.h" - ]],[[ - unsigned int i = _mm_popcnt_u32(1); - __m128i a = _mm_set_epi32(1, 2, 3, i), b = _mm_set_epi32(4, 3, 2, 1); - __m128i c = _mm_max_epu32(a, b); - b = _mm_shuffle_epi8(a, c); - return *((char *) &b); - ]])]) +]) +AC_SUBST([hts_cflags_sse4]) dnl Options for rANS32x16 avx2 version -AX_CHECK_COMPILE_FLAG([-mavx2], [ - hts_cflags_avx2="-mavx2" - AC_SUBST([hts_cflags_avx2]) - AC_DEFINE([HAVE_AVX2],1, - [Defined to 1 if the compiler can issue AVX2 instructions.]) - ], [], [], [AC_LANG_PROGRAM([[ - #include "x86intrin.h" +HTS_CHECK_COMPILE_FLAGS_NEEDED([avx2], [-mavx2], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ + #include "x86intrin.h" + #endif ]],[[ + #ifdef __x86_64__ __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); __m256i b = _mm256_add_epi32(a, a); long long c = _mm256_extract_epi64(b, 0); return (int) c; - ]])]) + #endif + ]])], [ + hts_cflags_avx2="$flags_needed" + AC_SUBST([hts_cflags_avx2]) + AC_DEFINE([HAVE_AVX2],1,[Defined to 1 if rANS source using AVX2 can be compiled.]) +]) dnl Options for rANS32x16 avx512 version -AX_CHECK_COMPILE_FLAG([-mavx512f], [ - hts_cflags_avx512="-mavx512f" - AC_SUBST([hts_cflags_avx512]) - AC_DEFINE([HAVE_AVX512],1, - [Defined to 1 if the compiler can issue AVX512 instructions.]) - ], [], [], [AC_LANG_PROGRAM([[ +HTS_CHECK_COMPILE_FLAGS_NEEDED([avx512f], [-mavx512f], [AC_LANG_PROGRAM([[ + #ifdef __x86_64__ #include "x86intrin.h" + #endif ]],[[ + #ifdef __x86_64__ __m512i a = _mm512_set1_epi32(1); __m512i b = _mm512_add_epi32(a, a); return *((char *) &b); - ]])]) - -dnl Detect ARM Neon availability -AC_CACHE_CHECK([whether C compiler supports ARM Neon], [hts_cv_have_neon], [ - AC_COMPILE_IFELSE([ - AC_LANG_PROGRAM([[ - #include "arm_neon.h" - ]], [[ - int32x4_t a = vdupq_n_s32(1); - int32x4_t b = vaddq_s32(a, a); - return *((char *) &b); - ]])], [hts_cv_have_neon=yes], [hts_cv_have_neon=no])]) -if test "$hts_cv_have_neon" = yes; then - hts_have_neon=yes - AC_SUBST([hts_have_neon]) -fi - + #endif + ]])], [ + hts_cflags_avx512="$flags_needed" + AC_SUBST([hts_cflags_avx512]) + AC_DEFINE([HAVE_AVX512],1,[Defined to 1 if rANS source using AVX512F can be compiled.]) +]) dnl Avoid chicken-and-egg problem where pkg-config supplies the dnl PKG_PROG_PKG_CONFIG macro, but we want to use it to check diff --git a/hts_probe_cc.sh b/hts_probe_cc.sh index 37d6bae7e..5e5ddec1e 100755 --- a/hts_probe_cc.sh +++ b/hts_probe_cc.sh @@ -43,30 +43,76 @@ run_compiler () return $retval } +# Run a test. $1 is the flag to try, $2 is the Makefile variable to set +# with the flag probe result, $3 is a Makefile variable which will be +# set to 1 if the code was built successfully. The code to test should +# be passed in via fd 0. +# First try compiling conftest.c without the flag. If that fails, try +# again with it to see if the flag is needed. +run_test () +{ + rm -f conftest conftest.err conftest.c + cat - > conftest.c + if run_compiler ; then + echo "$2 =" + echo "$3 = 1" + elif run_compiler "$1" ; then + echo "$2 = $1" + echo "$3 = 1" + else + echo "$3 =" + fi +} + echo "# Compiler probe results, generated by $0" -# Check for sse4.1 etc. support +# Check for ssse3 +run_test "-mssse3" HTS_CFLAGS_SSSE3 HTS_BUILD_SSSE3 <<'EOF' +#ifdef __x86_64__ +#include "x86intrin.h" +int main(int argc, char **argv) { + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); + __m128i c = _mm_shuffle_epi8(a, b); + return *((char *) &c); +} +#else +int main(int argc, char **argv) { return 0; } +#endif +EOF -rm -f conftest conftest.err conftest.c -cat - <<'EOF' > conftest.c +# Check for popcnt +run_test "-mpopcnt" HTS_CFLAGS_POPCNT HTS_BUILD_POPCNT <<'EOF' +#ifdef __x86_64__ #include "x86intrin.h" int main(int argc, char **argv) { unsigned int i = _mm_popcnt_u32(1); - __m128i a = _mm_set_epi32(1, 2, 3, i), b = _mm_set_epi32(4, 3, 2, 1); + return i != 1; +} +#else +int main(int argc, char **argv) { return 0; } +#endif +EOF + +# Check for sse4.1 etc. support +run_test "-msse4.1" HTS_CFLAGS_SSE4_1 HTS_BUILD_SSE4_1 <<'EOF' +#ifdef __x86_64__ +#include "x86intrin.h" +int main(int argc, char **argv) { + __m128i a = _mm_set_epi32(1, 2, 3, 4), b = _mm_set_epi32(4, 3, 2, 1); __m128i c = _mm_max_epu32(a, b); - b = _mm_shuffle_epi8(a, c); - return *((char *) &b); + return *((char *) &c); } +#else +int main(int argc, char **argv) { return 0; } +#endif EOF -FLAGS="-mpopcnt -msse4.1 -mssse3" -if run_compiler "$FLAGS" ; then - echo "HTS_CFLAGS_SSE4 = $FLAGS" -fi + +echo 'HTS_CFLAGS_SSE4 = $(HTS_CFLAGS_SSSE3) $(HTS_CFLAGS_POPCNT) $(HTS_CFLAGS_SSE4_1)' # Check for avx2 -rm -f conftest.c -cat - <<'EOF' > conftest.c +run_test -mavx2 HTS_CFLAGS_AVX2 HTS_BUILD_AVX2 <<'EOF' +#ifdef __x86_64__ #include "x86intrin.h" int main(int argc, char **argv) { __m256i a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); @@ -74,41 +120,24 @@ int main(int argc, char **argv) { long long c = _mm256_extract_epi64(b, 0); return (int) c; } +#else +int main(int argc, char **argv) { return 0; } +#endif EOF -FLAGS="-mavx2" -if run_compiler "$FLAGS" ; then - echo "HTS_CFLAGS_AVX2 = $FLAGS" -fi # Check for avx512 -rm -f conftest.c -cat - <<'EOF' > conftest.c +run_test -mavx512f HTS_CFLAGS_AVX512 HTS_BUILD_AVX512 <<'EOF' +#ifdef __x86_64__ #include "x86intrin.h" int main(int argc, char **argv) { __m512i a = _mm512_set1_epi32(1); __m512i b = _mm512_add_epi32(a, a); return *((char *) &b); } +#else +int main(int argc, char **argv) { return 0; } +#endif EOF -FLAGS="-mavx512f" -if run_compiler "$FLAGS" ; then - echo "HTS_CFLAGS_AVX512 = $FLAGS" -fi - -# Check for neon - -rm -f conftest.c -cat - <<'EOF' > conftest.c -#include "arm_neon.h" -int main(int argc, char **argv) { - int32x4_t a = vdupq_n_s32(1); - int32x4_t b = vaddq_s32(a, a); - return *((char *) &b); -} -EOF -if run_compiler "" ; then - echo "HTS_HAVE_NEON = yes" -fi rm -f conftest.c diff --git a/htscodecs b/htscodecs index cd0737fff..d4aed5859 160000 --- a/htscodecs +++ b/htscodecs @@ -1 +1 @@ -Subproject commit cd0737fff5893b0842b047da5aa3209e5f65442c +Subproject commit d4aed585929e2dab9dd8e6a2b74484dfc347c0f2 diff --git a/htscodecs_bundled.mk b/htscodecs_bundled.mk index 91a9c39e9..6274350f5 100644 --- a/htscodecs_bundled.mk +++ b/htscodecs_bundled.mk @@ -28,10 +28,10 @@ HTSCODECS_SOURCES = $(HTSPREFIX)htscodecs/htscodecs/arith_dynamic.c \ $(HTSPREFIX)htscodecs/htscodecs/htscodecs.c \ $(HTSPREFIX)htscodecs/htscodecs/pack.c \ $(HTSPREFIX)htscodecs/htscodecs/rANS_static4x16pr.c \ - $(if $(HTS_CFLAGS_AVX2),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx2.c) \ - $(if $(HTS_CFLAGS_AVX512),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx512.c) \ - $(if $(HTS_CFLAGS_SSE4),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_sse4.c) \ - $(if $(HTS_HAVE_NEON),$(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_neon.c) \ + $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx2.c \ + $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_avx512.c \ + $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_sse4.c \ + $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr_neon.c \ $(HTSPREFIX)htscodecs/htscodecs/rANS_static32x16pr.c \ $(HTSPREFIX)htscodecs/htscodecs/rANS_static.c \ $(HTSPREFIX)htscodecs/htscodecs/rle.c \ diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 deleted file mode 100644 index 16bb46495..000000000 --- a/m4/ax_check_compile_flag.m4 +++ /dev/null @@ -1,53 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) -# -# DESCRIPTION -# -# Check whether the given FLAG works with the current language's compiler -# or gives an error. (Warnings, however, are ignored) -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. -# -# If EXTRA-FLAGS is defined, it is added to the current language's default -# flags (e.g. CFLAGS) when the check is done. The check is thus made with -# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to -# force the compiler to issue an error when a bad flag is given. -# -# INPUT gives an alternative input source to AC_COMPILE_IFELSE. -# -# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this -# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. -# -# LICENSE -# -# Copyright (c) 2008 Guido U. Draheim -# Copyright (c) 2011 Maarten Bosmans -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 6 - -AC_DEFUN([AX_CHECK_COMPILE_FLAG], -[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF -AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl -AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ - ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" - AC_LINK_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], - [AS_VAR_SET(CACHEVAR,[yes])], - [AS_VAR_SET(CACHEVAR,[no])]) - _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) -AS_VAR_IF(CACHEVAR,yes, - [m4_default([$2], :)], - [m4_default([$3], :)]) -AS_VAR_POPDEF([CACHEVAR])dnl -])dnl AX_CHECK_COMPILE_FLAGS diff --git a/m4/hts_check_compile_flags_needed.m4 b/m4/hts_check_compile_flags_needed.m4 new file mode 100644 index 000000000..fb668e86f --- /dev/null +++ b/m4/hts_check_compile_flags_needed.m4 @@ -0,0 +1,63 @@ +# hts_check_compile_flags_needed.m4 +# +# SYNOPSIS +# +# HTS_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAGS, [INPUT], [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) +# +# DESCRIPTION +# +# Check whether the given FLAGS are required to build and link INPUT with +# the current language's compiler. Compilation and linking are first +# tries without FLAGS. If that fails it then tries to compile and +# link again with FLAGS. +# +# FEATURE describes the feature being tested, and is used when printing +# messages and to name the cache entry (along with the tested flags). +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. In ACTION-SUCCESS, $flags_needed will be set to +# either an empty string or FLAGS depending on the test results. +# +# If EXTRA-FLAGS is defined, it is added to the current language's default +# flags (e.g. CFLAGS) when the check is done. The check is thus made with +# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to +# force the compiler to issue an error when a bad flag is given. +# +# If omitted, INPUT defaults to AC_LANG_PROGRAM(), although that probably +# isn't very useful. +# +# NOTE: Implementation based on AX_CHECK_COMPILE_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# Copyright (c) 2023 Robert Davies +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +# AX_CHECK_COMPILE_FLAGS_NEEDED(FEATURE, FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) + +AC_DEFUN([HTS_CHECK_COMPILE_FLAGS_NEEDED], +[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF +AS_VAR_PUSHDEF([CACHEVAR],[hts_cv_check_[]_AC_LANG_ABBREV[]flags_needed_$1_$6_$2])dnl +AC_CACHE_CHECK([_AC_LANG compiler flags needed for $1], CACHEVAR, [ + AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[none])], + [ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $6 $2" + AC_LINK_IFELSE([m4_default([$3],[AC_LANG_PROGRAM()])], + [AS_VAR_SET(CACHEVAR,[$2])], + [AS_VAR_SET(CACHEVAR,[unsupported])]) + _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])]) +AS_VAR_IF(CACHEVAR,unsupported, [ + m4_default([$5], :) +], [ + AS_VAR_IF(CACHEVAR,none,[flags_needed=""], [flags_needed="$CACHEVAR"]) + m4_default([$4], :) +]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl HTS_CHECK_COMPILE_FLAGS_NEEDED