Skip to content

Commit

Permalink
Minor improvements cherry-pick (pytorch#12973)
Browse files Browse the repository at this point in the history
Summary:
* Enable disabled functions for ROCm (ROCm 252)
* fixes for topk fp16 (ROCm 270)
* HIP needs kernel invocation to be explicitly templated to be able to take non-const arg as const kernel arg (ROCm 281)

For attention: bddppq ezyang

Full set of PyTorch/Caffe2 tests on ROCm here: ROCm#283
Pull Request resolved: pytorch#12973

Differential Revision: D10516072

Pulled By: bddppq

fbshipit-source-id: 833b3de1544dfa4886a34e2b5ea53d77b6f0ba9e
  • Loading branch information
iotamudelta authored and facebook-github-bot committed Oct 23, 2018
1 parent 223a96a commit b8a11cf
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 45 deletions.
18 changes: 3 additions & 15 deletions aten/src/ATen/native/cuda/Unique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
namespace at {
namespace native{

#ifndef __HIP_PLATFORM_HCC__

namespace {
template <typename scalar_t>
__global__ void inverse_indices_kernel(
Expand Down Expand Up @@ -157,30 +155,20 @@ template <typename scalar_t>
}
} // namespace

#endif

std::tuple<Tensor, Tensor>
_unique_cuda(const Tensor& self, const bool sorted, const bool return_inverse) {
#ifndef __HIP_PLATFORM_HCC__
return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] {
// The current CUDA implementation of unique always sort due to the
// lack of hashtable implementation in thrust
return _unique_cuda_template<scalar_t>(self, return_inverse);
});
#else
AT_ERROR("unique_cuda: HIP not supported");
#endif
}

std::tuple<Tensor, Tensor>
_unique_dim_cuda(const Tensor& self, const int64_t dim, const bool sorted, const bool return_inverse) {
#ifndef __HIP_PLATFORM_HCC__
return AT_DISPATCH_ALL_TYPES(self.type(), "unique_dim", [&] {
return _unique_dim_cuda_template<scalar_t>(self, dim, return_inverse);
});
#else
AT_ERROR("unique_dim_cuda: HIP not supported");
#endif
return AT_DISPATCH_ALL_TYPES(self.type(), "unique_dim", [&] {
return _unique_dim_cuda_template<scalar_t>(self, dim, return_inverse);
});
}

} // namespace native
Expand Down
2 changes: 1 addition & 1 deletion aten/src/THC/THCNumerics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ struct THCNumerics<at::Half> {
static inline __host__ __device__ at::Half round(at::Half a) { return ::round(a); }

static inline __host__ __device__ at::Half frac(at::Half a) {
#ifdef __CUDA_ARCH__
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__)
return a - ::trunc(a);
#else // __CUDA_ARCH__
return a - ::floor(a);
Expand Down
4 changes: 2 additions & 2 deletions aten/src/THC/THCTensorTopK.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ struct TopKTypeConfig<at::Half> {
typedef uint32_t RadixType;

static inline __device__ RadixType convert(at::Half v) {
#if CUDA_VERSION >= 8000
#if CUDA_VERSION >= 8000 || defined __HIP_PLATFORM_HCC__
RadixType x = __half_as_ushort(v);
RadixType mask = -((x >> 15)) | 0x8000;
return (x ^ mask);
Expand All @@ -128,7 +128,7 @@ struct TopKTypeConfig<at::Half> {
}

static inline __device__ at::Half deconvert(RadixType v) {
#if CUDA_VERSION >= 8000
#if CUDA_VERSION >= 8000 || defined __HIP_PLATFORM_HCC__
RadixType mask = ((v >> 15) - 1) | 0x8000;
return __ushort_as_half(v ^ mask);
#else
Expand Down
4 changes: 4 additions & 0 deletions aten/src/THCUNN/LookupTable.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
#include "THCTensorSort.cuh"
#include "../THC/THCTensorMathReduce.cuh"

#ifdef __HIP_PLATFORM_HCC__
const int WARP_SIZE = 64;
#else
const int WARP_SIZE = 32;
#endif

template
<typename Dtype,
Expand Down
33 changes: 6 additions & 27 deletions tools/amd_build/disabled_features.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,12 @@
"_OPENMP": "_OPENMP_STUB"
}
},
{
"path": "aten/src/ATen/Context.cpp",
"s_constants": {
"#ifdef USE_SSE3": "#if defined(USE_SSE3) && !defined(__HIP_DEVICE_COMPILE__)"
}
},
{
"path": "aten/src/ATen/native/Distributions.h",
"s_constants": {
"scalar_cast": "static_cast"
}
},
{
"path": "aten/src/ATen/native/cuda/Distributions.cu",
"s_constants": {
"#include <nvfunctional>": ""
}
},
{
"path": "aten/src/THC/THCNumerics.cuh",
"s_constants": {
"#ifdef __CUDA_ARCH__": "#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__)",
"#if CUDA_VERSION < 9000": "#if CUDA_VERSION < 9000 && !defined(__HIP_PLATFORM_HCC__)"
}
},
{
"path": "aten/src/ATen/native/cuda/RoiPooling.cu",
"s_constants": {
Expand Down Expand Up @@ -118,6 +99,12 @@
# ROCm Pytorch issue: https://github.com/ROCmSoftwarePlatform/pytorch/issues/31
"detail::getCUDAHooks().getNumGPUs()": "1",
}
},
{
"path": "aten/src/ATen/native/cuda/Unique.cu",
"s_constants": {
"inverse_indices_kernel<<<": "inverse_indices_kernel<scalar_t><<<",
}
}
],
"disabled_modules": [
Expand All @@ -144,8 +131,6 @@
{
"path": "aten/src/ATen/native/cuda/Distributions.cu",
"functions": [
"_s_poisson_cuda",
"poisson_cuda_kernel",
"gamma_cuda_kernel",
"gamma_grad_cuda_kernel",
]
Expand All @@ -164,12 +149,6 @@
"THNN_(LookupTable_renorm)"
]
},
{
"path": "aten/src/THCUNN/LookupTable.cu",
"functions": [
"calculate_norms_and_renorm"
]
},
{
"path": "aten/src/THC/generic/THCTensor.cu",
"functions": [
Expand Down

0 comments on commit b8a11cf

Please sign in to comment.