-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP: Upgrade to OpenBLAS 0.3.13 (#39216)
* Use OpenBLAS 0.3.13 Bumping to support xcode/clang 12 which was addressed in OpenBLAS 0.3.11 * Use OpenBLAS 0.3.13+1 * Add openblas-exshift patch for src build * Update LinearAlgebra doctests for Linux * non-ambiguous ordering in eigen and eigvals test (#39767) add missing sortby's Co-authored-by: Pablo San-Jose <[email protected]> (cherry picked from commit 3129a5b)
- Loading branch information
1 parent
ebf1dc3
commit ca12dcc
Showing
11 changed files
with
302 additions
and
149 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
OPENBLAS_BRANCH=v0.3.10 | ||
OPENBLAS_SHA1=63b03efc2af332c88b86d4fd8079d00f4b439adf | ||
OPENBLAS_BRANCH=v0.3.13 | ||
OPENBLAS_SHA1=d2b11c47774b9216660e76e2fc67e87079f26fa1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
commit c4b5abbe43d7c22215ef36ef4f7c1413c975678c | ||
Author: Martin Kroeker <[email protected]> | ||
Date: Fri Jan 29 10:45:36 2021 +0100 | ||
|
||
fix data type | ||
|
||
commit f87842483eee9d158f44d51d4c09662c3cff7526 | ||
Author: Martin Kroeker <[email protected]> | ||
Date: Fri Jan 29 09:56:12 2021 +0100 | ||
|
||
fix calculation of non-exceptional shift (from Reference-LAPACK PR 477) | ||
|
||
commit 856bc365338f7559639f341d76ca8746d1628ee5 | ||
Author: Martin Kroeker <[email protected]> | ||
Date: Wed Jan 27 13:41:45 2021 +0100 | ||
|
||
Add exceptional shift to fix rare convergence problems | ||
|
||
--- | ||
diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f | ||
index 73d35621..4725e716 100644 | ||
--- a/lapack-netlib/SRC/chgeqz.f | ||
+++ b/lapack-netlib/SRC/chgeqz.f | ||
@@ -320,12 +320,13 @@ | ||
$ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP | ||
COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, | ||
$ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, | ||
- $ U12, X | ||
+ $ U12, X, ABI12, Y | ||
* .. | ||
* .. External Functions .. | ||
+ COMPLEX CLADIV | ||
LOGICAL LSAME | ||
REAL CLANHS, SLAMCH | ||
- EXTERNAL LSAME, CLANHS, SLAMCH | ||
+ EXTERNAL CLADIV, LLSAME, CLANHS, SLAMCH | ||
* .. | ||
* .. External Subroutines .. | ||
EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA | ||
@@ -729,22 +730,34 @@ | ||
AD22 = ( ASCALE*H( ILAST, ILAST ) ) / | ||
$ ( BSCALE*T( ILAST, ILAST ) ) | ||
ABI22 = AD22 - U12*AD21 | ||
+ ABI12 = AD12 - U12*AD11 | ||
* | ||
- T1 = HALF*( AD11+ABI22 ) | ||
- RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) | ||
- TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) + | ||
- $ AIMAG( T1-ABI22 )*AIMAG( RTDISC ) | ||
- IF( TEMP.LE.ZERO ) THEN | ||
- SHIFT = T1 + RTDISC | ||
- ELSE | ||
- SHIFT = T1 - RTDISC | ||
+ SHIFT = ABI22 | ||
+ CTEMP = SQRT( ABI12 )*SQRT( AD21 ) | ||
+ TEMP = ABS1( CTEMP ) | ||
+ IF( CTEMP.NE.ZERO ) THEN | ||
+ X = HALF*( AD11-SHIFT ) | ||
+ TEMP2 = ABS1( X ) | ||
+ TEMP = MAX( TEMP, ABS1( X ) ) | ||
+ Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) | ||
+ IF( TEMP2.GT.ZERO ) THEN | ||
+ IF( REAL( X / TEMP2 )*REAL( Y )+ | ||
+ $ AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y | ||
+ END IF | ||
+ SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) ) | ||
END IF | ||
ELSE | ||
* | ||
* Exceptional shift. Chosen for no particularly good reason. | ||
* | ||
- ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/ | ||
- $ (BSCALE*T(ILAST-1,ILAST-1)) | ||
+ IF( ( IITER / 20 )*20.EQ.IITER .AND. | ||
+ $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN | ||
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST, | ||
+ $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) ) | ||
+ ELSE | ||
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST, | ||
+ $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) ) | ||
+ END IF | ||
SHIFT = ESHIFT | ||
END IF | ||
* | ||
diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f | ||
index b51cba4f..b28ae47a 100644 | ||
--- a/lapack-netlib/SRC/zhgeqz.f | ||
+++ b/lapack-netlib/SRC/zhgeqz.f | ||
@@ -320,12 +320,13 @@ | ||
$ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP | ||
COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, | ||
$ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, | ||
- $ U12, X | ||
+ $ U12, X, ABI12, Y | ||
* .. | ||
* .. External Functions .. | ||
+ COMPLEX*16 ZLADIV | ||
LOGICAL LSAME | ||
DOUBLE PRECISION DLAMCH, ZLANHS | ||
- EXTERNAL LSAME, DLAMCH, ZLANHS | ||
+ EXTERNAL ZLADIV, LSAME, DLAMCH, ZLANHS | ||
* .. | ||
* .. External Subroutines .. | ||
EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL | ||
@@ -730,22 +731,34 @@ | ||
AD22 = ( ASCALE*H( ILAST, ILAST ) ) / | ||
$ ( BSCALE*T( ILAST, ILAST ) ) | ||
ABI22 = AD22 - U12*AD21 | ||
+ ABI12 = AD12 - U12*AD11 | ||
* | ||
- T1 = HALF*( AD11+ABI22 ) | ||
- RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) | ||
- TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) + | ||
- $ DIMAG( T1-ABI22 )*DIMAG( RTDISC ) | ||
- IF( TEMP.LE.ZERO ) THEN | ||
- SHIFT = T1 + RTDISC | ||
- ELSE | ||
- SHIFT = T1 - RTDISC | ||
+ SHIFT = ABI22 | ||
+ CTEMP = SQRT( ABI12 )*SQRT( AD21 ) | ||
+ TEMP = ABS1( CTEMP ) | ||
+ IF( CTEMP.NE.ZERO ) THEN | ||
+ X = HALF*( AD11-SHIFT ) | ||
+ TEMP2 = ABS1( X ) | ||
+ TEMP = MAX( TEMP, ABS1( X ) ) | ||
+ Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) | ||
+ IF( TEMP2.GT.ZERO ) THEN | ||
+ IF( DBLE( X / TEMP2 )*DBLE( Y )+ | ||
+ $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y | ||
+ END IF | ||
+ SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) ) | ||
END IF | ||
ELSE | ||
* | ||
* Exceptional shift. Chosen for no particularly good reason. | ||
* | ||
- ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/ | ||
- $ (BSCALE*T(ILAST-1,ILAST-1)) | ||
+ IF( ( IITER / 20 )*20.EQ.IITER .AND. | ||
+ $ BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN | ||
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST, | ||
+ $ ILAST ) )/( BSCALE*T( ILAST, ILAST ) ) | ||
+ ELSE | ||
+ ESHIFT = ESHIFT + ( ASCALE*H( ILAST, | ||
+ $ ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) ) | ||
+ END IF | ||
SHIFT = ESHIFT | ||
END IF | ||
* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,32 @@ | ||
Makefile.power | 8 ++++---- | ||
1 file changed, 4 insertions(+), 4 deletions(-) | ||
Makefile.power | 6 +++--- | ||
1 file changed, 3 insertions(+), 3 deletions(-) | ||
|
||
diff --git a/Makefile.power b/Makefile.power | ||
index 24d8aa8a..e53a243a 100644 | ||
index c7e97229..8426e816 100644 | ||
--- a/Makefile.power | ||
+++ b/Makefile.power | ||
@@ -11,20 +11,20 @@ endif | ||
|
||
ifeq ($(CORE), POWER9) | ||
ifeq ($(USE_OPENMP), 1) | ||
-COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
else | ||
-COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math | ||
+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math | ||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math | ||
@@ -10,13 +10,13 @@ USE_OPENMP = 1 | ||
endif | ||
|
||
ifeq ($(CORE), POWER10) | ||
-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math | ||
+CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math | ||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math | ||
endif | ||
|
||
ifeq ($(CORE), POWER9) | ||
ifneq ($(C_COMPILER), PGI) | ||
-CCOMMON_OPT += -Ofast -mvsx -fno-fast-math | ||
+CCOMMON_OPT += -mvsx -fno-fast-math | ||
ifeq ($(C_COMPILER), GCC) | ||
ifneq ($(GCCVERSIONGT4), 1) | ||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) | ||
@@ -49,7 +49,7 @@ endif | ||
|
||
ifeq ($(CORE), POWER8) | ||
ifeq ($(USE_OPENMP), 1) | ||
-COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||
ifneq ($(C_COMPILER), PGI) | ||
-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math | ||
+CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math | ||
else | ||
-COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math | ||
+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math | ||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math | ||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align | ||
endif | ||
endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,14 +3,14 @@ From: Keno Fischer <[email protected]> | |
Date: Sat, 14 Mar 2020 12:05:19 +0100 | ||
|
||
--- | ||
driver/others/memory.c | 131 +---------------------------------------- | ||
driver/others/memory.c | 131 +------------------------------------------------ | ||
1 file changed, 2 insertions(+), 129 deletions(-) | ||
|
||
diff --git a/driver/others/memory.c b/driver/others/memory.c | ||
index 62a5a021..23f8fe65 100644 | ||
index ba2bb55b..bf6b5529 100644 | ||
--- a/driver/others/memory.c | ||
+++ b/driver/others/memory.c | ||
@@ -1510,7 +1510,7 @@ void CONSTRUCTOR gotoblas_init(void) { | ||
@@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) { | ||
|
||
} | ||
|
||
|
@@ -19,7 +19,7 @@ index 62a5a021..23f8fe65 100644 | |
|
||
if (gotoblas_initialized == 0) return; | ||
|
||
@@ -1547,74 +1547,12 @@ void DESTRUCTOR gotoblas_quit(void) { | ||
@@ -1571,74 +1571,12 @@ void DESTRUCTOR gotoblas_quit(void) { | ||
#endif | ||
} | ||
|
||
|
@@ -57,8 +57,8 @@ index 62a5a021..23f8fe65 100644 | |
-*/ | ||
-static int on_process_term(void) | ||
-{ | ||
- gotoblas_quit(); | ||
- return 0; | ||
- gotoblas_quit(); | ||
- return 0; | ||
-} | ||
#ifdef _WIN64 | ||
#pragma comment(linker, "/INCLUDE:_tls_used") | ||
|
@@ -94,7 +94,7 @@ index 62a5a021..23f8fe65 100644 | |
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) | ||
/* Don't call me; this is just work around for PGI / Sun bug */ | ||
void gotoblas_dummy_for_PGI(void) { | ||
@@ -3104,7 +3042,7 @@ void CONSTRUCTOR gotoblas_init(void) { | ||
@@ -3136,7 +3074,7 @@ void CONSTRUCTOR gotoblas_init(void) { | ||
|
||
} | ||
|
||
|
@@ -103,7 +103,7 @@ index 62a5a021..23f8fe65 100644 | |
|
||
if (gotoblas_initialized == 0) return; | ||
|
||
@@ -3133,71 +3071,6 @@ void DESTRUCTOR gotoblas_quit(void) { | ||
@@ -3165,71 +3103,6 @@ void DESTRUCTOR gotoblas_quit(void) { | ||
#endif | ||
} | ||
|
||
|
@@ -138,8 +138,8 @@ index 62a5a021..23f8fe65 100644 | |
-*/ | ||
-static int on_process_term(void) | ||
-{ | ||
- gotoblas_quit(); | ||
- return 0; | ||
- gotoblas_quit(); | ||
- return 0; | ||
-} | ||
-#ifdef _WIN64 | ||
-#pragma comment(linker, "/INCLUDE:_tls_used") | ||
|
@@ -175,4 +175,3 @@ index 62a5a021..23f8fe65 100644 | |
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) | ||
/* Don't call me; this is just work around for PGI / Sun bug */ | ||
void gotoblas_dummy_for_PGI(void) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters