Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCm] redo hipify of version controlled files #22449

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,6 @@ set(provider_excluded_files
"cuda_execution_provider_info.h"
"cuda_execution_provider.cc"
"cuda_execution_provider.h"
"cuda_memory_check.cc"
"cuda_memory_check.h"
"cuda_fence.cc"
"cuda_fence.h"
"cuda_kernel.h"
"cuda_pch.cc"
"cuda_pch.h"
Expand Down
6 changes: 5 additions & 1 deletion include/onnxruntime/core/providers/rocm/rocm_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,9 @@
enum RocmResource : int {
hip_stream_t = rocm_resource_offset,
miopen_handle_t,
hipblas_handle_t
hipblas_handle_t,
deferred_cpu_allocator_t,
// below are rocm ep options
device_id_t, // 10004
arena_extend_strategy_t
};
70 changes: 62 additions & 8 deletions onnxruntime/core/providers/rocm/cu_inc/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
#include <stdint.h>
#include <vector>
#include <mutex>
#include <limits>
#include <assert.h>
#include <math.h>

Check warning on line 10 in onnxruntime/core/providers/rocm/cu_inc/common.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Found C system header after C++ system header. Should be: common.h, c system, c++ system, other. [build/include_order] [4] Raw Output: onnxruntime/core/providers/rocm/cu_inc/common.cuh:10: Found C system header after C++ system header. Should be: common.h, c system, c++ system, other. [build/include_order] [4]
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
//#include <hip/hip_bf16.h>

Check warning on line 13 in onnxruntime/core/providers/rocm/cu_inc/common.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: onnxruntime/core/providers/rocm/cu_inc/common.cuh:13: Should have a space between // and comment [whitespace/comments] [4]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the bf16.h is not needed? It seem that bfloat16 is used below.

#include "core/providers/rocm/rocm_common.h"
#include "core/providers/rocm/shared_inc/rocm_call.h"

Expand Down Expand Up @@ -242,12 +245,63 @@
template <>
__device__ __inline__ half _Pow(half a, half b) { return half(powf((float)a, (float)b)); }

#define ISNAN_BFLOAT16(v__) static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&v__) & ~BFloat16::kSignMask) \
> BFloat16::kPositiveInfinityBits

// Note that there is no consistent canonical NaN for FP16 and BF16;
// HIP uses 0x7FFF for HIPRT_NAN_BF16, but ONNX Runtime uses 0x7FC1.
// (see BFloat16Impl::kPositiveQNaNBits).
#define NAN_BFLOAT16 BFloat16::FromBits((uint16_t)0x7FFFU)

Check warning on line 254 in onnxruntime/core/providers/rocm/cu_inc/common.cuh

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Using C-style cast. Use static_cast<uint16_t>(...) instead [readability/casting] [4] Raw Output: onnxruntime/core/providers/rocm/cu_inc/common.cuh:254: Using C-style cast. Use static_cast<uint16_t>(...) instead [readability/casting] [4]

template <typename T>
__device__ __inline__ T _Min(T a, T b) { return a < b ? a : b; }

template <>
__device__ __inline__ float _Min(float a, float b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<float>::quiet_NaN() : ( a < b ? a : b );
}

template <>
__device__ __inline__ double _Min(double a, double b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<double>::quiet_NaN() : ( a < b ? a : b );
}

template <>
__device__ __inline__ half _Min(half a, half b) {
return __hmin_nan(a, b);
}

template <>
__device__ __inline__ BFloat16 _Min(BFloat16 a, BFloat16 b) {
return (ISNAN_BFLOAT16(a) || ISNAN_BFLOAT16(b)) ? NAN_BFLOAT16 : (a < b ? a : b);
}

template <typename T>
__device__ __inline__ T _Max(T a, T b) { return a > b ? a : b; }

template <>
__device__ __inline__ float _Max(float a, float b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<float>::quiet_NaN() : ( a > b ? a : b );
}

template <>
__device__ __inline__ double _Max(double a, double b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<double>::quiet_NaN() : ( a > b ? a : b );
}

template <>
__device__ __inline__ half _Max(half a, half b) {
return __hmax_nan(a, b);
}

template <>
__device__ __inline__ BFloat16 _Max(BFloat16 a, BFloat16 b) {
return (ISNAN_BFLOAT16(a) || ISNAN_BFLOAT16(b)) ? NAN_BFLOAT16 : (a > b ? a : b);
}

#undef ISNAN_BFLOAT16
#undef NAN_BFLOAT16

template <typename T>
__device__ __inline__ T _Abs(T a) { return a > (T)0 ? a : -a; }

Expand Down Expand Up @@ -443,44 +497,44 @@
template <>
struct _IsNan<half> {
__device__ __inline__ bool operator()(half a) const {
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~MLFloat16::kSignMask)
> MLFloat16::kPositiveInfinityBits;
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~MLFloat16::kSignMask)
> MLFloat16::kPositiveInfinityBits;
}
};

template <>
struct _IsNan<BFloat16> {
__device__ __inline__ bool operator()(BFloat16 a) const {
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~BFloat16::kSignMask)
> BFloat16::kPositiveInfinityBits;
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~BFloat16::kSignMask)
> BFloat16::kPositiveInfinityBits;
}
};

#if !defined(DISABLE_FLOAT8_TYPES)

template <>
template<>
struct _IsNan<Float8E4M3FN> {
__device__ __inline__ bool operator()(Float8E4M3FN a) const {
return (*reinterpret_cast<const uint8_t*>(&a) & 0x7f) == 0x7f;
}
};

template <>
template<>
struct _IsNan<Float8E4M3FNUZ> {
__device__ __inline__ bool operator()(Float8E4M3FNUZ a) const {
return *reinterpret_cast<const uint8_t*>(&a) == 0x80;
}
};

template <>
template<>
struct _IsNan<Float8E5M2> {
__device__ __inline__ bool operator()(Float8E5M2 a) const {
uint8_t c = *reinterpret_cast<const uint8_t*>(&a);
return ((c & 0x7c) == 0x7c) && ((c & 0x03) != 0x00);
}
};

template <>
template<>
struct _IsNan<Float8E5M2FNUZ> {
__device__ __inline__ bool operator()(Float8E5M2FNUZ a) const {
return *reinterpret_cast<const uint8_t*>(&a) == 0x80;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "hip/hip_runtime.h"
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

Expand Down
Loading
Loading