From ea37db828e51810b8f33e3d754334a95b5bad696 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 26 Feb 2018 20:48:03 +0100 Subject: [PATCH 1/2] Convert .align to .p2align for OSX compatibility --- kernel/x86_64/dscal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index 78ad521799..428558617a 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -99,7 +99,7 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_ "leaq (%1,%4,4), %2 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movsd (%1) , %%xmm4 \n\t" From 497f0c3d8a90a0b1b6590501e6bdc03ff8d0d61d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 26 Feb 2018 20:58:33 +0100 Subject: [PATCH 2/2] Replace .align with .p2align in the Nehalem microkernels --- kernel/x86_64/daxpy_microk_nehalem-2.c | 2 +- kernel/x86_64/ddot_microk_nehalem-2.c | 2 +- kernel/x86_64/dgemv_n_microk_nehalem-4.c | 2 +- kernel/x86_64/dsymv_L_microk_nehalem-2.c | 2 +- kernel/x86_64/dsymv_U_microk_nehalem-2.c | 2 +- kernel/x86_64/saxpy_microk_nehalem-2.c | 2 +- kernel/x86_64/sdot_microk_nehalem-2.c | 2 +- kernel/x86_64/sgemv_n_microk_nehalem-4.c | 10 +++++----- kernel/x86_64/sgemv_t_microk_nehalem-4.c | 2 +- kernel/x86_64/ssymv_L_microk_nehalem-2.c | 2 +- kernel/x86_64/ssymv_U_microk_nehalem-2.c | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/kernel/x86_64/daxpy_microk_nehalem-2.c b/kernel/x86_64/daxpy_microk_nehalem-2.c index 38472c520a..943d893af3 100644 --- a/kernel/x86_64/daxpy_microk_nehalem-2.c +++ b/kernel/x86_64/daxpy_microk_nehalem-2.c @@ -39,7 +39,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "movsd (%4), %%xmm0 \n\t" // alpha "shufpd $0, %%xmm0, %%xmm0 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" // "prefetcht0 192(%2,%0,8) \n\t" // "prefetcht0 192(%3,%0,8) \n\t" diff --git a/kernel/x86_64/ddot_microk_nehalem-2.c b/kernel/x86_64/ddot_microk_nehalem-2.c index 1d10fc2d75..fb5ec9bca0 100644 --- a/kernel/x86_64/ddot_microk_nehalem-2.c +++ b/kernel/x86_64/ddot_microk_nehalem-2.c @@ -41,7 +41,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "xorpd %%xmm6, %%xmm6 \n\t" "xorpd %%xmm7, %%xmm7 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%2,%0,8), %%xmm12 \n\t" // 2 * x diff --git a/kernel/x86_64/dgemv_n_microk_nehalem-4.c b/kernel/x86_64/dgemv_n_microk_nehalem-4.c index 09be7c2bb2..641a6d898e 100644 --- a/kernel/x86_64/dgemv_n_microk_nehalem-4.c +++ b/kernel/x86_64/dgemv_n_microk_nehalem-4.c @@ -62,7 +62,7 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "subq $4 , %1 \n\t" "jz 2f \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "xorpd %%xmm4 , %%xmm4 \n\t" diff --git a/kernel/x86_64/dsymv_L_microk_nehalem-2.c b/kernel/x86_64/dsymv_L_microk_nehalem-2.c index f7f7954b2e..38479f77af 100644 --- a/kernel/x86_64/dsymv_L_microk_nehalem-2.c +++ b/kernel/x86_64/dsymv_L_microk_nehalem-2.c @@ -47,7 +47,7 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL "shufpd $0, %%xmm6, %%xmm6 \n\t" "shufpd $0, %%xmm7, %%xmm7 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%4,%0,8), %%xmm12 \n\t" // 2 * a "movups (%2,%0,8), %%xmm8 \n\t" // 2 * x diff --git a/kernel/x86_64/dsymv_U_microk_nehalem-2.c b/kernel/x86_64/dsymv_U_microk_nehalem-2.c index 75e3d02d17..1344c75f73 100644 --- a/kernel/x86_64/dsymv_U_microk_nehalem-2.c +++ b/kernel/x86_64/dsymv_U_microk_nehalem-2.c @@ -50,7 +50,7 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT "xorq %0,%0 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%4,%0,8), %%xmm12 \n\t" // 2 * a "movups (%2,%0,8), %%xmm8 \n\t" // 2 * x diff --git a/kernel/x86_64/saxpy_microk_nehalem-2.c b/kernel/x86_64/saxpy_microk_nehalem-2.c index a09494935c..68f68ea3a9 100644 --- a/kernel/x86_64/saxpy_microk_nehalem-2.c +++ b/kernel/x86_64/saxpy_microk_nehalem-2.c @@ -39,7 +39,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "movss (%4), %%xmm0 \n\t" // alpha "shufps $0, %%xmm0, %%xmm0 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" // "prefetcht0 192(%2,%0,4) \n\t" // "prefetcht0 192(%3,%0,4) \n\t" diff --git a/kernel/x86_64/sdot_microk_nehalem-2.c b/kernel/x86_64/sdot_microk_nehalem-2.c index b5f6a1c913..1a27177f58 100644 --- a/kernel/x86_64/sdot_microk_nehalem-2.c +++ b/kernel/x86_64/sdot_microk_nehalem-2.c @@ -41,7 +41,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "xorps %%xmm6, %%xmm6 \n\t" "xorps %%xmm7, %%xmm7 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%2,%0,4), %%xmm12 \n\t" // 4 * x "movups (%3,%0,4), %%xmm8 \n\t" // 4 * x diff --git a/kernel/x86_64/sgemv_n_microk_nehalem-4.c b/kernel/x86_64/sgemv_n_microk_nehalem-4.c index 36dfb14ee7..11a3e943b7 100644 --- a/kernel/x86_64/sgemv_n_microk_nehalem-4.c +++ b/kernel/x86_64/sgemv_n_microk_nehalem-4.c @@ -59,18 +59,18 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "shufps $0, %%xmm6 , %%xmm6 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "xorps %%xmm4 , %%xmm4 \n\t" "xorps %%xmm5 , %%xmm5 \n\t" "movups (%3,%0,4), %%xmm7 \n\t" // 4 * y - ".align 2 \n\t" + ".p2align 1 \n\t" "movups (%4,%0,4), %%xmm8 \n\t" "movups (%5,%0,4), %%xmm9 \n\t" "movups (%6,%0,4), %%xmm10 \n\t" "movups (%7,%0,4), %%xmm11 \n\t" - ".align 2 \n\t" + ".p2align 1 \n\t" "mulps %%xmm12, %%xmm8 \n\t" "mulps %%xmm13, %%xmm9 \n\t" "mulps %%xmm14, %%xmm10 \n\t" @@ -84,7 +84,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "movups (%5,%8,4), %%xmm9 \n\t" "movups (%6,%8,4), %%xmm10 \n\t" "movups (%7,%8,4), %%xmm11 \n\t" - ".align 2 \n\t" + ".p2align 1 \n\t" "mulps %%xmm0 , %%xmm8 \n\t" "mulps %%xmm1 , %%xmm9 \n\t" "mulps %%xmm2 , %%xmm10 \n\t" @@ -154,7 +154,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "movss (%8), %%xmm6 \n\t" // alpha "shufps $0, %%xmm6 , %%xmm6 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "xorps %%xmm4 , %%xmm4 \n\t" "movups (%3,%0,4), %%xmm7 \n\t" // 4 * y diff --git a/kernel/x86_64/sgemv_t_microk_nehalem-4.c b/kernel/x86_64/sgemv_t_microk_nehalem-4.c index b3c07126c0..8955c84312 100644 --- a/kernel/x86_64/sgemv_t_microk_nehalem-4.c +++ b/kernel/x86_64/sgemv_t_microk_nehalem-4.c @@ -40,7 +40,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "xorps %%xmm6 , %%xmm6 \n\t" "xorps %%xmm7 , %%xmm7 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%2,%0,4), %%xmm12 \n\t" // 4 * x diff --git a/kernel/x86_64/ssymv_L_microk_nehalem-2.c b/kernel/x86_64/ssymv_L_microk_nehalem-2.c index fb53379464..c0fe5d6401 100644 --- a/kernel/x86_64/ssymv_L_microk_nehalem-2.c +++ b/kernel/x86_64/ssymv_L_microk_nehalem-2.c @@ -47,7 +47,7 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, F "shufps $0, %%xmm6, %%xmm6 \n\t" "shufps $0, %%xmm7, %%xmm7 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%2,%0,4), %%xmm8 \n\t" // 4 * x "movups (%3,%0,4), %%xmm9 \n\t" // 4 * y diff --git a/kernel/x86_64/ssymv_U_microk_nehalem-2.c b/kernel/x86_64/ssymv_U_microk_nehalem-2.c index 2fb8f4494b..b8e6ee7326 100644 --- a/kernel/x86_64/ssymv_U_microk_nehalem-2.c +++ b/kernel/x86_64/ssymv_U_microk_nehalem-2.c @@ -50,7 +50,7 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT "xorq %0,%0 \n\t" - ".align 16 \n\t" + ".p2align 4 \n\t" "1: \n\t" "movups (%2,%0,4), %%xmm8 \n\t" // 4 * x "movups (%3,%0,4), %%xmm9 \n\t" // 4 * y