Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge staging into master for ROCm 5.4 #520

Merged
merged 10 commits into from
Sep 14, 2022
58 changes: 58 additions & 0 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Upload to the upload server

# Controls when the workflow will run
on:
push:
branches: [develop, master]
tags:
- rocm-5.*
release:
types: [published]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2

- name: getting branch name
shell: bash
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
id: branch_name
- name: getting tag name
shell: bash
run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})"
id: tag_name
- name: zipping files
run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*'
- name: echo-step
run: echo "${{ github.event.release.target_commitish }}"
- name: uploading archive to prod
if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}}
uses: wlixcc/[email protected]
with:
username: ${{ secrets.USERNAME }}
server: ${{ secrets.SERVER }}
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
remote_path: '${{ secrets.PROD_UPLOAD_URL }}'
args: '-o ConnectTimeout=5'
- name: uploading archive to staging
if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }}
uses: wlixcc/[email protected]
with:
username: ${{ secrets.USERNAME }}
server: ${{ secrets.SERVER }}
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
remote_path: '${{ secrets.STG_UPLOAD_URL }}'
args: '-o ConnectTimeout=5'
2 changes: 1 addition & 1 deletion .jenkins/staticanalysis.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ ci: {

properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 2')])]))
stage(urlJobName) {
runCI([ubuntu18:['any']], urlJobName)
runCI([ubuntu20:['any']], urlJobName)
}
}
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Change Log for hipBLAS

## (Unreleased) hipBLAS 0.53.0
### Added
- Allow for selection of int8 datatype
- Added support for hipblasXgels and hipblasXgelsStridedBatched operations (with s,d,c,z precisions),
only supported with rocBLAS backend
- Added support for hipblasXgelsBatched operations (with s,d,c,z precisions)

## (Unreleased) hipBLAS 0.52.0
### Added
- Added --cudapath option to install.sh to allow user to specify which cuda build they would like to use.
Expand Down
22 changes: 13 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,15 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS)
rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID)
endif()
set(GFORTRAN_PKG "gcc-gfortran")
if(CLIENTS_OS STREQUAL "sles")
set(GFORTRAN_PKG "gcc-fortran")
elseif(CLIENTS_OS STREQUAL "centos" AND CLIENTS_OS_VERSION EQUAL 7)
set(GFORTRAN_PKG "devtoolset-7-gcc-gfortran")
message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}")
set(GFORTRAN_RPM "libgfortran4")
set(GFORTRAN_DEB "libgfortran4")
if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel")
if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8")
set(GFORTRAN_RPM "libgfortran")
endif()
elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04")
set(GFORTRAN_DEB "libgfortran5")
endif()
rocm_package_setup_component(clients)
rocm_package_setup_client_component(clients-common)
Expand All @@ -171,16 +175,16 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
tests
DEPENDS
COMPONENT clients-common
DEB "gfortran"
RPM "${GFORTRAN_PKG}")
DEB "${GFORTRAN_DEB}"
RPM "${GFORTRAN_RPM}")
endif()
if(BUILD_CLIENTS_BENCHMARKS)
rocm_package_setup_client_component(
benchmarks
DEPENDS
COMPONENT clients-common
DEB "gfortran"
RPM "${GFORTRAN_PKG}")
DEB "${GFORTRAN_DEB}"
RPM "${GFORTRAN_RPM}")
endif()
add_subdirectory( clients )
endif( )
Expand Down
12 changes: 6 additions & 6 deletions bump_staging_version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
# - run this script in master branch
# - after running this script merge master into develop

OLD_HIPBLAS_VERSION="0.52.0"
NEW_HIPBLAS_VERSION="0.53.0"
OLD_HIPBLAS_VERSION="0.53.0"
NEW_HIPBLAS_VERSION="0.54.0"

OLD_MINIMUM_ROCBLAS_VERSION="2.45.0"
NEW_MINIMUM_ROCBLAS_VERSION="2.46.0"
OLD_MINIMUM_ROCBLAS_VERSION="2.46.0"
NEW_MINIMUM_ROCBLAS_VERSION="2.47.0"

OLD_MINIMUM_ROCSOLVER_VERSION="3.19.0"
NEW_MINIMUM_ROCSOLVER_VERSION="3.20.0"
OLD_MINIMUM_ROCSOLVER_VERSION="3.20.0"
NEW_MINIMUM_ROCSOLVER_VERSION="3.21.0"

sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt
sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt
Expand Down
47 changes: 46 additions & 1 deletion clients/common/near.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@

#endif

#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(float(a), float(b), err)
#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err)
#define NEAR_ASSERT_BF16(a, b, err) ASSERT_NEAR(bfloat16_to_float(a), bfloat16_to_float(b), err)

#define NEAR_ASSERT_COMPLEX(a, b, err) \
do \
Expand Down Expand Up @@ -105,6 +106,13 @@ void near_check_general(
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(
int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error)
{
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(
int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error)
Expand Down Expand Up @@ -160,6 +168,19 @@ void near_check_general(int M,
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
hipblasStride strideA,
hipblasBfloat16* hCPU,
hipblasBfloat16* hGPU,
double abs_error)
{
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(int M,
int N,
Expand Down Expand Up @@ -200,6 +221,18 @@ void near_check_general(int M,
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
host_vector<hipblasBfloat16> hCPU[],
host_vector<hipblasBfloat16> hGPU[],
double abs_error)
{
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(int M,
int N,
Expand Down Expand Up @@ -262,6 +295,18 @@ void near_check_general(int M,
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
}

template <>
void near_check_general(int M,
int N,
int batch_count,
int lda,
hipblasBfloat16* hCPU[],
hipblasBfloat16* hGPU[],
double abs_error)
{
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
}

template <>
void near_check_general(
int M, int N, int batch_count, int lda, float* hCPU[], float* hGPU[], double abs_error)
Expand Down
8 changes: 8 additions & 0 deletions clients/include/near.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,12 @@ void near_check_general(int M,
host_vector<T> hGPU[],
double abs_error);

// currently only used for half-precision comparisons int dot_ex tests
template <class T>
HIPBLAS_CLANG_STATIC constexpr double error_tolerance = 0.0;

// 2 ^ -14, smallest positive normal number for IEEE16
template <>
HIPBLAS_CLANG_STATIC constexpr double error_tolerance<hipblasHalf> = 0.000061035;

#endif
29 changes: 26 additions & 3 deletions clients/include/testing_dot_batched_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)
double gpu_time_used, hipblas_error_host, hipblas_error_device;

// Initial Data on CPU
hipblas_init(hy, true, true);
hipblas_init(hy, true, false);
hipblas_init_alternating_sign(hx);
CHECK_HIP_ERROR(dx.transfer_from(hx));
CHECK_HIP_ERROR(dy.transfer_from(hy));
Expand Down Expand Up @@ -159,8 +159,31 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_host.data(),
tol);
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_device.data(),
tol);
}
else
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
13 changes: 11 additions & 2 deletions clients/include/testing_dot_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,17 @@ hipblasStatus_t testing_dot_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_host, tol);
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_device, tol);
}
else
{
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
27 changes: 25 additions & 2 deletions clients/include/testing_dot_strided_batched_ex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,31 @@ hipblasStatus_t testing_dot_strided_batched_ex_template(const Arguments& argus)

if(argus.unit_check)
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
if(std::is_same<Tr, hipblasHalf>{})
{
double tol = error_tolerance<Tr> * N;
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_host.data(),
tol);
near_check_general(1,
1,
batch_count,
1,
1,
h_cpu_result.data(),
h_hipblas_result_device.data(),
tol);
}
else
{
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
}
}
if(argus.norm_check)
{
Expand Down
10 changes: 8 additions & 2 deletions scripts/performance/blas/commandrunner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved.
"""Copyright (C) 2018-2022 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -73,6 +73,7 @@
import subprocess
import sys
import time
from decimal import Decimal

import getspecs

Expand Down Expand Up @@ -113,8 +114,13 @@ def import_rocm_smi(install_path):
global smi_imported
if not smi_imported:
smi_imported = True
host_rocm_ver = Decimal('.'.join(getspecs.getrocmversion().split('.')[0:2])) # get host's rocm major.minor version
rocm_5_2_ver = Decimal('5.2')
try:
sys.path.append(os.path.join(install_path, 'bin'))
if rocm_5_2_ver.compare(host_rocm_ver) == 1:
sys.path.append(os.path.join(install_path, 'bin')) # For versions below ROCm 5.2
else:
sys.path.append(os.path.join(install_path, 'libexec/rocm_smi')) # For versions equal or above ROCm 5.2
import rocm_smi
smi = rocm_smi

Expand Down