Skip to content

Commit

Permalink
move math_cuda_utils.h to pten/kernels/funcs (#39246)
Browse files Browse the repository at this point in the history
  • Loading branch information
Feiyu Chan authored Jan 27, 2022
1 parent 3e6950d commit 809a10b
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 95 deletions.
1 change: 0 additions & 1 deletion paddle/fluid/operators/activation_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h"
#include "paddle/fluid/operators/math/math_cuda_utils.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"

Expand Down
17 changes: 10 additions & 7 deletions paddle/fluid/operators/interpolate_v2_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
#include <algorithm>
#include <string>
#include "paddle/fluid/operators/interpolate_v2_op.h"
#include "paddle/fluid/operators/math/math_cuda_utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/fast_divmod.h"
#include "paddle/pten/kernels/funcs/math_cuda_utils.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -522,7 +522,7 @@ __inline__ __device__ T PartialBlockMin(T val, size_t threads_num_in_block,

if (threadIdx.x < threshold) {
shared_last_idx = (threshold >> 5) - 1;
val = math::warpReduceMin(val, mask);
val = pten::funcs::warpReduceMin(val, mask);
if (lane == 0) {
shared[wid] = val;
}
Expand All @@ -537,7 +537,7 @@ __inline__ __device__ T PartialBlockMin(T val, size_t threads_num_in_block,
if (threadIdx.x < threshold) {
val = (lane <= shared_last_idx) ? shared[lane]
: std::numeric_limits<T>::max();
val = math::warpReduceMin(val, mask);
val = pten::funcs::warpReduceMin(val, mask);
shared_last_val = val;
}
__syncthreads();
Expand Down Expand Up @@ -589,12 +589,15 @@ __global__ void KeBilinearInterpBwShareMemory(
s_data[0][threadIdx.x] = 0.f;
s_data[1][threadIdx.x] = 0.f;
int remain = nthreads - (tid & (-blockDim.x));
int in_top_max_index = math::blockReduceMax(top_right_index, FINAL_MASK);
int in_bot_max_index = math::blockReduceMax(bot_right_index, FINAL_MASK);
int in_top_max_index =
pten::funcs::blockReduceMax(top_right_index, FINAL_MASK);
int in_bot_max_index =
pten::funcs::blockReduceMax(bot_right_index, FINAL_MASK);

if (remain > blockDim.x) {
in_top_min_index = math::blockReduceMin(input_index, FINAL_MASK);
in_bot_min_index = math::blockReduceMin(bot_left_index, FINAL_MASK);
in_top_min_index = pten::funcs::blockReduceMin(input_index, FINAL_MASK);
in_bot_min_index =
pten::funcs::blockReduceMin(bot_left_index, FINAL_MASK);
} else {
in_top_min_index = PartialBlockMin(input_index, remain, FINAL_MASK);
in_bot_min_index = PartialBlockMin(bot_left_index, remain, FINAL_MASK);
Expand Down
Loading

0 comments on commit 809a10b

Please sign in to comment.