Skip to content

Commit

Permalink
Fixed #7. 1)Disable the multi-thread and 2) Modified kernel codes to …
Browse files Browse the repository at this point in the history
…avoid unloop in axpy function when incx==0 or incy==0.
  • Loading branch information
xianyi committed Feb 20, 2011
1 parent 109b86d commit 0cfd29a
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 0 deletions.
5 changes: 5 additions & 0 deletions interface/axpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
#ifdef SMP
nthreads = num_cpu_avail(1);

//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;

if (nthreads == 1) {
#endif

Expand Down
5 changes: 5 additions & 0 deletions interface/zaxpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
#ifdef SMP
nthreads = num_cpu_avail(1);

//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;

if (nthreads == 1) {
#endif

Expand Down
6 changes: 6 additions & 0 deletions kernel/x86_64/axpy_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,12 @@
.L50:
movq M, %rax
movq Y, YY
//If incx==0 || incy==0, avoid unloop.
cmpq $0, INCX
je .L56
cmpq $0, INCY
je .L56

sarq $3, %rax
jle .L55
ALIGN_3
Expand Down
6 changes: 6 additions & 0 deletions kernel/x86_64/axpy_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,12 @@
.L40:
movq Y, YY
movq M, %rax
//If incx==0 || incy==0, avoid unloop.
cmpq $0, INCX
je .L46
cmpq $0, INCY
je .L46

sarq $3, %rax
jle .L45
ALIGN_3
Expand Down
40 changes: 40 additions & 0 deletions kernel/x86_64/zaxpy_sse.S
Original file line number Diff line number Diff line change
Expand Up @@ -2893,6 +2893,12 @@
unpcklps %xmm13, %xmm15
#endif

//If incx==0 || incy==0, avoid unloop and jump to end.
cmpq $0, INCX
je .L200
cmpq $0, INCY
je .L200

movq Y, YY

movq M, %rax
Expand Down Expand Up @@ -3105,8 +3111,42 @@
addps %xmm1, %xmm8

movsd %xmm8, (Y)
jmp .L999
ALIGN_3

.L200:
movq M, %rax
cmpq $0, %rax
jle .L999
ALIGN_3

.L201:
movsd (X), %xmm0
addq INCX, X

#ifdef HAVE_SSE3
movshdup %xmm0, %xmm1
movsldup %xmm0, %xmm0
#else
pshufd $0xf5, %xmm0, %xmm1
shufps $0xa0, %xmm0, %xmm0
#endif

mulps %xmm14, %xmm0
mulps %xmm15, %xmm1

movsd (Y), %xmm8

addps %xmm0, %xmm8
addps %xmm1, %xmm8

movsd %xmm8, (Y)
addq INCY, Y

decq %rax
jg .L201
ALIGN_3

.L999:
xorq %rax, %rax

Expand Down
10 changes: 10 additions & 0 deletions kernel/x86_64/zaxpy_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,12 @@

movq Y, YY
movq M, %rax
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpq $0, INCX
je .L58
cmpq $0, INCY
je .L58

sarq $3, %rax
jle .L55

Expand Down Expand Up @@ -1769,6 +1775,7 @@
andq $1, %rax
jle .L999

.L58:
MOVDDUP( 0 * SIZE, X, %xmm0)
MOVDDUP( 1 * SIZE, X, %xmm1)

Expand All @@ -1781,6 +1788,9 @@

movlpd %xmm8, 0 * SIZE(YY)
movhpd %xmm8, 1 * SIZE(YY)

decq %rax
jg .L58
ALIGN_3

.L999:
Expand Down

0 comments on commit 0cfd29a

Please sign in to comment.