Skip to content

Commit

Permalink
Merge pull request #2667 from xianyi/develop
Browse files Browse the repository at this point in the history
Merge develop into 0.3.0 for 0.3.10 release
  • Loading branch information
martin-frbg authored Jun 14, 2020
2 parents 33f76a6 + 95dbeff commit 63b03ef
Show file tree
Hide file tree
Showing 196 changed files with 20,992 additions and 4,085 deletions.
53 changes: 51 additions & 2 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ platform:

steps:
- name: Build and Test
image: ubuntu:19.04
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
Expand All @@ -32,7 +32,7 @@ platform:

steps:
- name: Build and Test
image: ubuntu:19.04
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
Expand Down Expand Up @@ -141,3 +141,52 @@ steps:
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V

---
kind: pipeline
name: arm64_native_test

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: epyc_native_test

platform:
os: linux
arch: amd64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
81 changes: 81 additions & 0 deletions .github/workflows/dynamic_arch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: continuous build

on: [push, pull_request]

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
build: [cmake, make]
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Compilation cache
uses: actions/cache@v2
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
key: ${{ runner.os }}-ccache-${{ github.sha }}
# Restore any ccache cache entry, if none for
# ${{ runner.os }}-ccache-${{ github.sha }} exists
restore-keys: |
${{ runner.os }}-ccache
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache
elif [ "$RUNNER_OS" == "macOS" ]; then
brew install coreutils cmake ccache
else
echo "$RUNNER_OS not supported"
exit 1
fi
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
- name: Build
if: matrix.build == 'make'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
else
echo "$RUNNER_OS not supported"
exit 1
fi
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
- name: CMake build
if: matrix.build == 'cmake'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
else
echo "$RUNNER_OS not supported"
exit 1
fi
mkdir build
cd build
cmake -DDYNAMIC_ARCH=1 -DNOFORTRAN=0 -DBUILD_WITHOUT_LAPACK=0 -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_BUILD_TYPE=Release ..
make -j$(nproc)
1 change: 1 addition & 0 deletions .github/workflows/nightly-Homebrew-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
build-OpenBLAS-with-Homebrew:
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
HOMEBREW_DEVELOPER: "ON"
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
HOMEBREW_NO_ANALYTICS: "ON"
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ test/SBLAT2.SUMM
test/SBLAT3.SUMM
test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/SHBLAT3.SUMM
test/cblat1
test/cblat2
test/cblat3
Expand All @@ -79,6 +80,7 @@ test/dblat3
test/sblat1
test/sblat2
test/sblat3
test/test_shgemm
test/zblat1
test/zblat2
test/zblat3
Expand Down
19 changes: 16 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ matrix:
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script:
- set -e
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
Expand All @@ -34,6 +33,16 @@ matrix:
- TARGET_BOX=PPC64LE_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"

- <<: *test-ubuntu
os: linux
arch: s390x
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"

- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
Expand Down Expand Up @@ -98,7 +107,6 @@ matrix:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
before_script: *common-before
script:
- set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
Expand Down Expand Up @@ -141,7 +149,6 @@ matrix:
before_script:
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
script:
- set -e
- mkdir build
- CONFIG=Release
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
Expand Down Expand Up @@ -180,6 +187,12 @@ matrix:
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"

- <<: *test-macos
osx_image: xcode10.1
env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
# whitelist
branches:
only:
Expand Down
15 changes: 13 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 9)
set(OpenBLAS_PATCH_VERSION 10)

set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

# Adhere to GNU filesystem layout conventions
Expand All @@ -23,6 +24,7 @@ option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS fun
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else()
Expand Down Expand Up @@ -86,9 +88,13 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()

if (NOT DEFINED BUILD_HALF)
set (BUILD_HALF false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_HALF true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
Expand Down Expand Up @@ -120,6 +126,11 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()

if (BUILD_HALF)
message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF") # defines nothing
endif ()

if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
Expand Down Expand Up @@ -234,7 +245,7 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
else()
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
endif()
endif()

Expand Down
10 changes: 10 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,13 @@ In chronological order:
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
* [2020-01-07] optimize AVX2 SGEMM and STRMM

* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
* [2020-04-15] Half-precision GEMM for bfloat16

* Marius Hillenbrand <https://github.com/mhillenibm>
* [2020-05-12] Revise dynamic architecture detection for IBM z
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14

* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
73 changes: 73 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,77 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.10
14-Jun-2020

common:
* Improved thread locking behaviour in blas_server and parallel getrf
* Imported bugfix 394 from LAPACK (spurious reference to "XERBL"
due to overlong lines)
* Imported bugfix 403 from LAPACK (compile option "recursive" required
for correctness with Intel and PGI)
* Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB)
* Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP)
* Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that
could lead to crashes at large matrix sizes
* Restored internal soname in dynamic libraries on FreeBSD and Dragonfly
* Added API (openblas_setaffinity) to set the thread affinity on Linux
* Added initial infrastructure for half-precision floating point
(bfloat16) support with a generic implementation of SHGEMM
* Added CMAKE build system support for building the cblas_Xgemm3m
functions
* Fixed CMAKE support for building in a path with embedded spaces
* Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC
* Fixed GCC version detection in the Makefiles
* Allowed overriding the names of AR, AS and LD in Makefile builds

POWER:
* Fixed big-endian POWER8 ELFv2 builds on FreeBSD
* Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9
* Fixed CMAKE build support for POWER9
* fixed a potential race condition in the thread buffer allocation
* Worked around LAPACK test failures on PPC G4

MIPS:
* Fixed a potential race condition in the thread buffer allocation
* Added support for MIPS 24K/24KE family based on P5600 kernels

MIPS64:
* fixed a potential race condition in the thread buffer allocation
* Added TARGET=GENERIC

ARMV7:
* Fixed a race condition in the thread buffer allocation

ARMV8:
* Fixed a race condition in the thread buffer allocation
* Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA
* Improved performance of the ThunderX2 DAXPY kernel
* Added an optimized SGEMM kernel for Cortex A53
* Fixed Makefile support for INTERFACE64 (8-byte integer)

x86_64:
* Fixed a syntax error in the CMAKE setup for SkylakeX
* Improved performance of STRSM on Haswell, SkylakeX and Ryzen
* Improved SGEMM performance on SGEMM for workloads with ldc a
multiple of 1024
* Improved DGEMM performance on Skylake X
* Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH
builds created on SkylakeX
* Removed data alignment requirement in the SSE2 copy kernels
that could cause spurious crashes
* Added a workaround for an optimizer bug in AppleClang 11.0.3
* Fixed LAPACK test failures due to wrong options for Intel Fortran
* Fixed compilation and LAPACK test results with recent Flang
and AMD AOCC
* Fixed DYNAMIC_ARCH builds with CMAKE on OS X
* Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max,
cblas_?sum, cblas_?gemm3m in the shared library on OS
* Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes
show the name of an older generation chip supported by the same kernels)

IBM Z:
* Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14

====================================================================
Version 0.3.9
1-Mar-2020
Expand Down
Loading

0 comments on commit 63b03ef

Please sign in to comment.