Skip to content

Commit

Permalink
【Hackathon 6th No.10】Update paddle.searchsorted -part (#64331)
Browse files Browse the repository at this point in the history
* update searchsorted

* fix impl

* fix impl dtype

* fix impl dtype

* fix impl dtype

* rerun ci

* update test

* fix test

* fix test

* update fp16 test

* fix impl

* fix impl

* fix impl

* fix impl

* fix impl
  • Loading branch information
NKNaN authored May 23, 2024
1 parent 592ac86 commit 5ccfdff
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 13 deletions.
4 changes: 3 additions & 1 deletion paddle/phi/kernels/gpu/searchsorted_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ PD_REGISTER_KERNEL(searchsorted,
float,
double,
int,
int64_t) {
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {
kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
}
69 changes: 62 additions & 7 deletions paddle/phi/kernels/impl/searchsorted_kernel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <math.h>

#include "paddle/common/ddim.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/kernels/funcs/for_range.h"

namespace phi {
Expand Down Expand Up @@ -59,6 +59,54 @@ class GpuAndCpuSearchSortedCompute {
static HOSTDEVICE bool IsInf(int x UNUSED) { return false; }
static HOSTDEVICE bool IsInf(int64_t x UNUSED) { return false; }

HOSTDEVICE inline size_t LowerBound(const T1* x, size_t num, const T2& val) {
// @{ Group LowerBound
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/lower_bound
using MT1 = typename phi::dtype::MPTypeTrait<T1>::Type;
using MT2 = typename phi::dtype::MPTypeTrait<T2>::Type;
MT2 val_mt = static_cast<MT2>(val);

auto* first = x;
int64_t count = static_cast<int64_t>(num);
while (count > 0) {
int64_t step = (count >> 1);
auto* it = first + step;
MT1 it_mt = static_cast<MT1>(*it);
if (it_mt < val_mt) {
first = ++it;
count -= (step + 1);
} else {
count = step;
}
}
return static_cast<size_t>(first - x);
}

HOSTDEVICE inline size_t UpperBound(const T1* x, size_t num, const T2& val) {
// @{ Group UpperBound
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/upper_bound
using MT1 = typename phi::dtype::MPTypeTrait<T1>::Type;
using MT2 = typename phi::dtype::MPTypeTrait<T2>::Type;
MT2 val_mt = static_cast<MT2>(val);

auto* first = x;
int64_t count = static_cast<int64_t>(num);
while (count > 0) {
auto step = (count >> 1);
auto* it = first + step;
MT1 it_mt = static_cast<MT1>(*it);
if (val_mt < it_mt) {
count = step;
} else {
first = ++it;
count -= (step + 1);
}
}
return static_cast<size_t>(first - x);
}

HOSTDEVICE GpuAndCpuSearchSortedCompute(const T1* sequence_data,
const T2* value_data,
bool right,
Expand All @@ -74,19 +122,21 @@ class GpuAndCpuSearchSortedCompute {
seq_size_(seq_size),
out_data_(out_data) {}
HOSTDEVICE void operator()(int64_t idx) {
using MT2 = typename phi::dtype::MPTypeTrait<T2>::Type;
const T2* value_ptr = value_data_ + idx;
const MT2 value_mt = static_cast<MT2>(*value_ptr);
const T1* sequence_ptr = is_1d_boundaries_
? sequence_data_
: sequence_data_ + idx / val_size_ * seq_size_;
if (IsInf(*value_ptr) || IsNan(*value_ptr)) {
if (IsInf(value_mt) || IsNan(value_mt)) {
out_data_[idx] = seq_size_;
} else {
if (right_) {
out_data_[idx] = static_cast<OutType>(phi::funcs::UpperBound<T1, T2>(
sequence_ptr, seq_size_, *value_ptr));
out_data_[idx] = static_cast<OutType>(
UpperBound(sequence_ptr, seq_size_, *value_ptr));
} else {
out_data_[idx] = static_cast<OutType>(phi::funcs::LowerBound<T1, T2>(
sequence_ptr, seq_size_, *value_ptr));
out_data_[idx] = static_cast<OutType>(
LowerBound(sequence_ptr, seq_size_, *value_ptr));
}
}
}
Expand Down Expand Up @@ -166,11 +216,16 @@ void VisitDataTypeForSearchSorted(DataType type, Visitor visitor) {
visitor.template apply<int>();
} else if (type == DataType::INT64) {
visitor.template apply<int64_t>();
} else if (type == DataType::FLOAT16) {
visitor.template apply<phi::dtype::float16>();
} else if (type == DataType::BFLOAT16) {
visitor.template apply<phi::dtype::bfloat16>();
} else {
PADDLE_THROW(errors::InvalidArgument(
"The received values data type %s can not meet input requirements. "
"Because the given values data type of searchsorted operators must be "
"float32, float64, int32 or int64. Please input appropriate "
"bfloat16, float16, float32, float64, int32 or int64. Please input "
"appropriate "
"sorted_sequence again! ",
type));
}
Expand Down
8 changes: 4 additions & 4 deletions python/paddle/tensor/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,8 +1125,8 @@ def searchsorted(
Find the index of the corresponding `sorted_sequence` in the innermost dimension based on the given `values`.
Args:
sorted_sequence (Tensor): An input N-D or 1-D tensor with type int32, int64, float32, float64. The value of the tensor monotonically increases in the innermost dimension.
values (Tensor): An input N-D tensor value with type int32, int64, float32, float64.
sorted_sequence (Tensor): An input N-D or 1-D tensor with type int32, int64, float16, float32, float64, bfloat16. The value of the tensor monotonically increases in the innermost dimension.
values (Tensor): An input N-D tensor value with type int32, int64, float16, float32, float64, bfloat16.
out_int32 (bool, optional): Data type of the output tensor which can be int32, int64. The default value is False, and it indicates that the output data type is int64.
right (bool, optional): Find the upper or lower bounds of the sorted_sequence range in the innermost dimension based on the given `values`. If the value of the sorted_sequence is nan or inf, return the size of the innermost dimension.
The default value is False and it shows the lower bounds.
Expand Down Expand Up @@ -1168,13 +1168,13 @@ def searchsorted(
check_variable_and_dtype(
sorted_sequence,
'SortedSequence',
['float32', 'float64', 'int32', 'int64'],
['uint16', 'float16', 'float32', 'float64', 'int32', 'int64'],
'paddle.searchsorted',
)
check_variable_and_dtype(
values,
'Values',
['float32', 'float64', 'int32', 'int64'],
['uint16', 'float16', 'float32', 'float64', 'int32', 'int64'],
'paddle.searchsorted',
)

Expand Down
87 changes: 86 additions & 1 deletion test/legacy_test/test_searchsorted_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import unittest

import numpy as np
from op_test import OpTest
from op_test import OpTest, convert_float_to_uint16

import paddle
from paddle.base import core
Expand Down Expand Up @@ -92,6 +92,91 @@ def init_test_case(self):
self.side = "right"


@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_float16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and not support the float16",
)
class TestSearchSortedFP16OP(TestSearchSorted):
def setUp(self):
self.python_api = paddle.searchsorted
self.op_type = "searchsorted"
self.dtype = np.float16
self.init_test_case()

self.inputs = {
'SortedSequence': self.sorted_sequence.astype(self.dtype),
'Values': self.values.astype(self.dtype),
}
self.attrs = {"out_int32": False, "right": False}
self.attrs["right"] = True if self.side == 'right' else False
self.outputs = {
'Out': np.searchsorted(
self.sorted_sequence, self.values, side=self.side
)
}

def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place, check_pir=True)

def init_test_case(self):
self.sorted_sequence = np.array([1, 3, 5, 7, 9])
self.values = np.array([[3, 6, 9], [3, 6, 9]])
self.side = "left"


class TestSearchSortedFP16OP_2(TestSearchSortedFP16OP):
def init_test_case(self):
self.sorted_sequence = np.array([1, 3, 5, 7, 9])
self.values = np.array([[3, 6, 9], [3, 6, 9]])
self.side = "right"


@unittest.skipIf(
not core.is_compiled_with_cuda()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and not support the bfloat16",
)
class TestSearchSortedBF16(TestSearchSorted):
def setUp(self):
self.python_api = paddle.searchsorted
self.public_python_api = paddle.searchsorted
self.op_type = "searchsorted"
self.python_out_sig = ["Out"]
self.dtype = np.uint16
self.np_dtype = np.float32
self.init_test_case()

self.inputs = {
'SortedSequence': convert_float_to_uint16(self.sorted_sequence),
'Values': convert_float_to_uint16(self.values),
}
self.attrs = {"out_int32": False, "right": False}
self.attrs["right"] = True if self.side == 'right' else False
self.outputs = {
'Out': np.searchsorted(
self.sorted_sequence, self.values, side=self.side
)
}

def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place, check_pir=True)

def init_test_case(self):
self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype(self.np_dtype)
self.values = np.array([[3, 6, 9], [3, 6, 9]]).astype(self.np_dtype)
self.side = "left"


class TestSearchSortedBF16_2(TestSearchSortedBF16):
def init_test_case(self):
self.sorted_sequence = np.array([1, 3, 5, 7, 9]).astype(self.np_dtype)
self.values = np.array([[3, 6, 9], [3, 6, 9]]).astype(self.np_dtype)
self.side = "right"


class TestSearchSortedAPI(unittest.TestCase):
def init_test_case(self):
self.sorted_sequence = np.array([2, 4, 6, 8, 10]).astype("float64")
Expand Down

0 comments on commit 5ccfdff

Please sign in to comment.