Skip to content

Commit

Permalink
Merge pull request #520 from daineAMD/master
Browse files Browse the repository at this point in the history
Merge staging into master for ROCm 5.4
  • Loading branch information
daineAMD authored Sep 14, 2022
2 parents 587bfbe + 5f9a3b3 commit b80975b
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 26 deletions.
58 changes: 58 additions & 0 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Upload to the upload server

# Controls when the workflow will run
on:
push:
branches: [develop, master]
tags:
- rocm-5.*
release:
types: [published]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2

- name: getting branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: branch_name
- name: getting tag name
shell: bash
run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})"
id: tag_name
- name: zipping files
run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*'
- name: echo-step
run: echo "${{ github.event.release.target_commitish }}"
- name: uploading archive to prod
if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}}
uses: wlixcc/[email protected]
with:
username: ${{ secrets.USERNAME }}
server: ${{ secrets.SERVER }}
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
remote_path: '${{ secrets.PROD_UPLOAD_URL }}'
args: '-o ConnectTimeout=5'
- name: uploading archive to staging
if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }}
uses: wlixcc/[email protected]
with:
username: ${{ secrets.USERNAME }}
server: ${{ secrets.SERVER }}
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
remote_path: '${{ secrets.STG_UPLOAD_URL }}'
args: '-o ConnectTimeout=5'
2 changes: 1 addition & 1 deletion .jenkins/staticanalysis.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ ci: {

properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 2')])]))
stage(urlJobName) {
runCI([ubuntu18:['any']], urlJobName)
runCI([ubuntu20:['any']], urlJobName)
}
}
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Change Log for hipBLAS

## (Unreleased) hipBLAS 0.53.0
### Added
- Allow for selection of int8 datatype
- Added support for hipblasXgels and hipblasXgelsStridedBatched operations (with s,d,c,z precisions),
only supported with rocBLAS backend
- Added support for hipblasXgelsBatched operations (with s,d,c,z precisions)

## (Unreleased) hipBLAS 0.52.0
### Added
- Added --cudapath option to install.sh to allow user to specify which cuda build they would like to use.
Expand Down
22 changes: 13 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,15 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS)
rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID)
endif()
set(GFORTRAN_PKG "gcc-gfortran")
if(CLIENTS_OS STREQUAL "sles")
set(GFORTRAN_PKG "gcc-fortran")
elseif(CLIENTS_OS STREQUAL "centos" AND CLIENTS_OS_VERSION EQUAL 7)
set(GFORTRAN_PKG "devtoolset-7-gcc-gfortran")
message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}")
set(GFORTRAN_RPM "libgfortran4")
set(GFORTRAN_DEB "libgfortran4")
if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel")
if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8")
set(GFORTRAN_RPM "libgfortran")
endif()
elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04")
set(GFORTRAN_DEB "libgfortran5")
endif()
rocm_package_setup_component(clients)
rocm_package_setup_client_component(clients-common)
Expand All @@ -171,16 +175,16 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
tests
DEPENDS
COMPONENT clients-common
DEB "gfortran"
RPM "${GFORTRAN_PKG}")
DEB "${GFORTRAN_DEB}"
RPM "${GFORTRAN_RPM}")
endif()
if(BUILD_CLIENTS_BENCHMARKS)
rocm_package_setup_client_component(
benchmarks
DEPENDS
COMPONENT clients-common
DEB "gfortran"
RPM "${GFORTRAN_PKG}")
DEB "${GFORTRAN_DEB}"
RPM "${GFORTRAN_RPM}")
endif()
add_subdirectory( clients )
endif( )
Expand Down
12 changes: 6 additions & 6 deletions bump_staging_version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
# - run this script in master branch
# - after running this script merge master into develop

OLD_HIPBLAS_VERSION="0.52.0"
NEW_HIPBLAS_VERSION="0.53.0"
OLD_HIPBLAS_VERSION="0.53.0"
NEW_HIPBLAS_VERSION="0.54.0"

OLD_MINIMUM_ROCBLAS_VERSION="2.45.0"
NEW_MINIMUM_ROCBLAS_VERSION="2.46.0"
OLD_MINIMUM_ROCBLAS_VERSION="2.46.0"
NEW_MINIMUM_ROCBLAS_VERSION="2.47.0"

OLD_MINIMUM_ROCSOLVER_VERSION="3.19.0"
NEW_MINIMUM_ROCSOLVER_VERSION="3.20.0"
OLD_MINIMUM_ROCSOLVER_VERSION="3.20.0"
NEW_MINIMUM_ROCSOLVER_VERSION="3.21.0"

sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt
sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt
Expand Down
47 changes: 46 additions & 1 deletion clients/common/near.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@

#endif

#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(float(a), float(b), err)
#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err)
#define NEAR_ASSERT_BF16(a, b, err) ASSERT_NEAR(bfloat16_to_float(a), bfloat16_to_float(b), err)

#define NEAR_ASSERT_COMPLEX(a, b, err) \
do \
Expand Down Expand Up @@ -105,6 +106,13 @@ void near_check_general(
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(
int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error)
{
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(
int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error)
Expand Down Expand Up @@ -160,6 +168,19 @@ void near_check_general(int M,
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
hipblasStride strideA,
hipblasBfloat16* hCPU,
hipblasBfloat16* hGPU,
double abs_error)
{
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(int M,
int N,
Expand Down Expand Up @@ -200,6 +221,18 @@ void near_check_general(int M,
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
host_vector<hipblasBfloat16> hCPU[],
host_vector<hipblasBfloat16> hGPU[],
double abs_error)
{
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(int M,
int N,
Expand Down Expand Up @@ -262,6 +295,18 @@ void near_check_general(int M,
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
hipblasBfloat16* hCPU[],
hipblasBfloat16* hGPU[],
double abs_error)
{
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(
int M, int N, int batch_count, int lda, float* hCPU[], float* hGPU[], double abs_error)
Expand Down
8 changes: 8 additions & 0 deletions clients/include/near.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,12 @@ void near_check_general(int M,
host_vector<T> hGPU[],
double abs_error);

// currently only used for half-precision comparisons int dot_ex tests
template <class T>
HIPBLAS_CLANG_STATIC constexpr double error_tolerance = 0.0;

// 2 ^ -14, smallest positive normal number for IEEE16
template <>
HIPBLAS_CLANG_STATIC constexpr double error_tolerance<hipblasHalf> = 0.000061035;

#endif
29 changes: 26 additions & 3 deletions clients/include/testing_dot_batched_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)
double gpu_time_used, hipblas_error_host, hipblas_error_device;

// Initial Data on CPU
hipblas_init(hy, true, true);
hipblas_init(hy, true, false);
hipblas_init_alternating_sign(hx);
CHECK_HIP_ERROR(dx.transfer_from(hx));
CHECK_HIP_ERROR(dy.transfer_from(hy));
Expand Down Expand Up @@ -159,8 +159,31 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_host.data(),
tol);
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_device.data(),
tol);
}
else
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
13 changes: 11 additions & 2 deletions clients/include/testing_dot_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,17 @@ hipblasStatus_t testing_dot_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_host, tol);
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_device, tol);
}
else
{
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
27 changes: 25 additions & 2 deletions clients/include/testing_dot_strided_batched_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,31 @@ hipblasStatus_t testing_dot_strided_batched_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_host.data(),
tol);
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_device.data(),
tol);
}
else
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
10 changes: 8 additions & 2 deletions scripts/performance/blas/commandrunner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved.
"""Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -73,6 +73,7 @@
import subprocess
import sys
import time
from decimal import Decimal

import getspecs

Expand Down Expand Up @@ -113,8 +114,13 @@ def import_rocm_smi(install_path):
global smi_imported
if not smi_imported:
smi_imported = True
host_rocm_ver = Decimal('.'.join(getspecs.getrocmversion().split('.')[0:2])) # get host's rocm major.minor version
rocm_5_2_ver = Decimal('5.2')
try:
sys.path.append(os.path.join(install_path, 'bin'))
if rocm_5_2_ver.compare(host_rocm_ver) == 1:
sys.path.append(os.path.join(install_path, 'bin')) # For versions below ROCm 5.2
else:
sys.path.append(os.path.join(install_path, 'libexec/rocm_smi')) # For versions equal or above ROCm 5.2
import rocm_smi
smi = rocm_smi

Expand Down

0 comments on commit b80975b

Please sign in to comment.