Skip to content

Commit

Permalink
Merge pull request #2907 from xianyi/develop
Browse files Browse the repository at this point in the history
Update from develop for 0.3.11
  • Loading branch information
martin-frbg authored Oct 17, 2020
2 parents 63b03ef + b8f6892 commit 51c2261
Show file tree
Hide file tree
Showing 503 changed files with 26,175 additions and 3,622 deletions.
32 changes: 27 additions & 5 deletions .github/workflows/dynamic_arch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
fortran: [gfortran, flang]
build: [cmake, make]
steps:
- name: Checkout repository
Expand All @@ -24,7 +25,7 @@ jobs:
# Restore any ccache cache entry, if none for
# ${{ runner.os }}-ccache-${{ github.sha }} exists
restore-keys: |
${{ runner.os }}-ccache
${{ runner.os }}-ccache-
- name: Print system information
run: |
Expand All @@ -49,8 +50,8 @@ jobs:
fi
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
- name: Build
if: matrix.build == 'make'
- name: gfortran build
if: matrix.build == 'make' && matrix.fortran == 'gfortran'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
Expand All @@ -63,8 +64,29 @@ jobs:
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
- name: CMake build
if: matrix.build == 'cmake'
- name: flang build
if: matrix.build == 'make' && matrix.fortran == 'flang'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
exit 0
else
echo "$RUNNER_OS not supported"
exit 1
fi
cd /usr/
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
sudo tar xf flang-20190329-x86-70.tgz
sudo rm flang-20190329-x86-70.tgz
cd -
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang
- name: CMake gfortran build
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
Expand Down
72 changes: 72 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ matrix:
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"

- <<: *test-ubuntu
os: linux
dist: focal
arch: s390x
compiler: clang
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"

- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
Expand Down Expand Up @@ -75,6 +87,40 @@ matrix:
- TARGET_BOX=LINUX32
- BTYPE="BINARY=32"

- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9

- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9

- os: linux
compiler: gcc
addons:
Expand Down Expand Up @@ -175,6 +221,17 @@ matrix:
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"

- <<: *test-macos
osx_image: xcode12
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
- brew install gcc@10 # for gfortran
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"

- <<: *test-macos
osx_image: xcode10.0
env:
Expand All @@ -193,6 +250,21 @@ matrix:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"

- &test-graviton2
os: linux
arch: arm64-graviton2
dist: focal
group: edge
virt: lxd
compiler: gcc
addons:
apt:
packages:
- gfortran
script:
- travis_wait 45 make && make lapack-test

# whitelist
branches:
only:
Expand Down
42 changes: 28 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 10)

set(OpenBLAS_PATCH_VERSION 11)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

# Adhere to GNU filesystem layout conventions
Expand All @@ -30,6 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
else()
set(NO_AFFINITY 1)
endif()
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)

# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
Expand Down Expand Up @@ -88,13 +89,13 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()

if (NOT DEFINED BUILD_HALF)
set (BUILD_HALF false)
if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_HALF true)
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
Expand Down Expand Up @@ -126,9 +127,9 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()

if (BUILD_HALF)
if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF") # defines nothing
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif ()

if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
Expand Down Expand Up @@ -234,6 +235,10 @@ if (NOT MSVC AND NOT NOFORTRAN)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
add_subdirectory(lapack-netlib/TESTING)
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
endif()

set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
Expand All @@ -249,7 +254,7 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
endif()
endif()

if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
Expand Down Expand Up @@ -358,10 +363,21 @@ endif()

if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")

set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
if (NOT ${SYMBOLPREFIX} STREQUAL "")
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
Expand All @@ -378,11 +394,9 @@ if(NOT NO_LAPACKE)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
endif()

include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
endif()
# Install pkg-config files
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)


# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ In chronological order:
* Marius Hillenbrand <https://github.com/mhillenibm>
* [2020-05-12] Revise dynamic architecture detection for IBM z
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support

* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
72 changes: 72 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,76 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.11
17-Oct-2020

common:
* API change:
the newly added BFLOAT16 functions were renamed to use the
letter "B" instead of "H" to avoid potential confusion with
the IEEE "half precision float" type, i.e. the 0.3.10
SHGEMM is now SBGEMM and the corresponding build option
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
limit for placing temporary arrays on the stack) to be compatible
with a stack size of 1mb (as imposed by the JAVA runtime library)
* Added mixed-precision dot function SBDOT and utility functions
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
single or double precision float arrays and bfloat16 arrays
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
in lapack.h
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
* Fixed several bugs in the LAPACK testsuite
* Improved performance of TRMM and TRSM for certain problem sizes
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
* CMAKE builds no longer require pkg-config for creating the .pc file
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
enabling these options
* Fixed detection of gfortran when invoked through an mpi wrapper
* Improve thread reinitialization performance with OpenMP xafter a fork
* Added support for building only the subset of the library required
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
* Optional function name prefixes and suffixes are now correctly
reflected in the generated cblas.h
* Added CMAKE build support for the LAPACK and multithreading tests

POWER:
* Added optimized support for POWER10
* Added support for compiling for POWER8 in 32bit mode
* Added support for compilation with LLVM/clang
* Added support for compilation with NVIDIA/PGI compilers
* Fixed building on big-endian POWER8
* Fixed miscompilation of ZDOTC by gcc10
* Fixed alignment errors in the POWER8 SAXPY kernel
* Improved CPU detection on AIX
* Supported building with older compilers on POWER9

x86_64:
* Added support for Intel Cooperlake
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
* Added autodetection of Intel Comet Lake cpus
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
* Reset the fpu state before using the fpu on Windows as a workaround
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers

ARMV7:
* Fixed cpu detection on BSD-like systems

ARMV8:
* Added preliminary support for Apple Vortex cpus
* Added support for the Cavium ThunderX3T110 cpu
* Fixed cpu detection on BSD-like systems
* Fixed compilation in -std=C18 mode


IBM Z:
* Added support for compiling with the clang compiler
* Improved GEMM performance on Z14

====================================================================
Version 0.3.10
14-Jun-2020
Expand Down
9 changes: 9 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
node {
stage('Checkout') {
checkout
}

stage('Build') {
sh("make")
}
}
Loading

0 comments on commit 51c2261

Please sign in to comment.