From b8a11cffdb638bac8ad6266807796b4530c54e67 Mon Sep 17 00:00:00 2001 From: iotamudelta Date: Tue, 23 Oct 2018 14:50:03 -0700 Subject: [PATCH] Minor improvements cherry-pick (#12973) Summary: * Enable disabled functions for ROCm (ROCm 252) * fixes for topk fp16 (ROCm 270) * HIP needs kernel invocation to be explicitly templated to be able to take non-const arg as const kernel arg (ROCm 281) For attention: bddppq ezyang Full set of PyTorch/Caffe2 tests on ROCm here: https://github.com/ROCmSoftwarePlatform/pytorch/pull/283 Pull Request resolved: https://github.com/pytorch/pytorch/pull/12973 Differential Revision: D10516072 Pulled By: bddppq fbshipit-source-id: 833b3de1544dfa4886a34e2b5ea53d77b6f0ba9e --- aten/src/ATen/native/cuda/Unique.cu | 18 +++----------- aten/src/THC/THCNumerics.cuh | 2 +- aten/src/THC/THCTensorTopK.cuh | 4 ++-- aten/src/THCUNN/LookupTable.cu | 4 ++++ tools/amd_build/disabled_features.yaml | 33 +++++--------------------- 5 files changed, 16 insertions(+), 45 deletions(-) diff --git a/aten/src/ATen/native/cuda/Unique.cu b/aten/src/ATen/native/cuda/Unique.cu index a33c90c23ec5b..5b62280054c6c 100644 --- a/aten/src/ATen/native/cuda/Unique.cu +++ b/aten/src/ATen/native/cuda/Unique.cu @@ -11,8 +11,6 @@ namespace at { namespace native{ -#ifndef __HIP_PLATFORM_HCC__ - namespace { template __global__ void inverse_indices_kernel( @@ -157,30 +155,20 @@ template } } // namespace -#endif - std::tuple _unique_cuda(const Tensor& self, const bool sorted, const bool return_inverse) { -#ifndef __HIP_PLATFORM_HCC__ return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] { // The current CUDA implementation of unique always sort due to the // lack of hashtable implementation in thrust return _unique_cuda_template(self, return_inverse); }); -#else - AT_ERROR("unique_cuda: HIP not supported"); -#endif } std::tuple _unique_dim_cuda(const Tensor& self, const int64_t dim, const bool sorted, const bool return_inverse) { - #ifndef __HIP_PLATFORM_HCC__ - return AT_DISPATCH_ALL_TYPES(self.type(), "unique_dim", [&] { - return _unique_dim_cuda_template(self, dim, return_inverse); - }); - #else - AT_ERROR("unique_dim_cuda: HIP not supported"); - #endif + return AT_DISPATCH_ALL_TYPES(self.type(), "unique_dim", [&] { + return _unique_dim_cuda_template(self, dim, return_inverse); + }); } } // namespace native diff --git a/aten/src/THC/THCNumerics.cuh b/aten/src/THC/THCNumerics.cuh index 27ec95adbaa82..59a27b1f68b80 100644 --- a/aten/src/THC/THCNumerics.cuh +++ b/aten/src/THC/THCNumerics.cuh @@ -209,7 +209,7 @@ struct THCNumerics { static inline __host__ __device__ at::Half round(at::Half a) { return ::round(a); } static inline __host__ __device__ at::Half frac(at::Half a) { - #ifdef __CUDA_ARCH__ + #if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__) return a - ::trunc(a); #else // __CUDA_ARCH__ return a - ::floor(a); diff --git a/aten/src/THC/THCTensorTopK.cuh b/aten/src/THC/THCTensorTopK.cuh index 71d1bc98e8e28..773232dba92bf 100644 --- a/aten/src/THC/THCTensorTopK.cuh +++ b/aten/src/THC/THCTensorTopK.cuh @@ -117,7 +117,7 @@ struct TopKTypeConfig { typedef uint32_t RadixType; static inline __device__ RadixType convert(at::Half v) { -#if CUDA_VERSION >= 8000 +#if CUDA_VERSION >= 8000 || defined __HIP_PLATFORM_HCC__ RadixType x = __half_as_ushort(v); RadixType mask = -((x >> 15)) | 0x8000; return (x ^ mask); @@ -128,7 +128,7 @@ struct TopKTypeConfig { } static inline __device__ at::Half deconvert(RadixType v) { -#if CUDA_VERSION >= 8000 +#if CUDA_VERSION >= 8000 || defined __HIP_PLATFORM_HCC__ RadixType mask = ((v >> 15) - 1) | 0x8000; return __ushort_as_half(v ^ mask); #else diff --git a/aten/src/THCUNN/LookupTable.cu b/aten/src/THCUNN/LookupTable.cu index 9a6e33efb5860..ff222ab302d8c 100644 --- a/aten/src/THCUNN/LookupTable.cu +++ b/aten/src/THCUNN/LookupTable.cu @@ -7,7 +7,11 @@ #include "THCTensorSort.cuh" #include "../THC/THCTensorMathReduce.cuh" +#ifdef __HIP_PLATFORM_HCC__ +const int WARP_SIZE = 64; +#else const int WARP_SIZE = 32; +#endif template ": "" } }, - { - "path": "aten/src/THC/THCNumerics.cuh", - "s_constants": { - "#ifdef __CUDA_ARCH__": "#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__)", - "#if CUDA_VERSION < 9000": "#if CUDA_VERSION < 9000 && !defined(__HIP_PLATFORM_HCC__)" - } - }, { "path": "aten/src/ATen/native/cuda/RoiPooling.cu", "s_constants": { @@ -118,6 +99,12 @@ # ROCm Pytorch issue: https://github.com/ROCmSoftwarePlatform/pytorch/issues/31 "detail::getCUDAHooks().getNumGPUs()": "1", } + }, + { + "path": "aten/src/ATen/native/cuda/Unique.cu", + "s_constants": { + "inverse_indices_kernel<<<": "inverse_indices_kernel<<<", + } } ], "disabled_modules": [ @@ -144,8 +131,6 @@ { "path": "aten/src/ATen/native/cuda/Distributions.cu", "functions": [ - "_s_poisson_cuda", - "poisson_cuda_kernel", "gamma_cuda_kernel", "gamma_grad_cuda_kernel", ] @@ -164,12 +149,6 @@ "THNN_(LookupTable_renorm)" ] }, - { - "path": "aten/src/THCUNN/LookupTable.cu", - "functions": [ - "calculate_norms_and_renorm" - ] - }, { "path": "aten/src/THC/generic/THCTensor.cu", "functions": [