Skip to content

Commit

Permalink
add c_reduce_sum/unstack/all_reduce_datatype for kunlun
Browse files Browse the repository at this point in the history
  • Loading branch information
QingshuChen committed Feb 21, 2023
1 parent 27281e1 commit 8b0b5a8
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 0 deletions.
2 changes: 2 additions & 0 deletions paddle/fluid/operators/collective/c_allreduce_max_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_XPU_KERNEL(c_allreduce_max,
ops::CAllReduceOpXPUKernel<ops::kRedMax, plat::float16>,
ops::CAllReduceOpXPUKernel<ops::kRedMax, int>,
ops::CAllReduceOpXPUKernel<ops::kRedMax, float>)
1 change: 1 addition & 0 deletions paddle/fluid/operators/collective/c_reduce_sum_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_XPU_KERNEL(c_reduce_sum,
ops::CReduceOpXPUKernel<ops::kRedSum, plat::float16>,
ops::CReduceOpXPUKernel<ops::kRedSum, float>)
16 changes: 16 additions & 0 deletions paddle/phi/backends/xpu/xpu2_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::FLOAT64,
phi::DataType::INT32,
phi::DataType::INT64})},
{"c_allreduce_max",
XPUKernelSet({phi::DataType::FLOAT16,
phi::DataType::FLOAT32,
phi::DataType::INT32})},
{"c_allreduce_sum",
XPUKernelSet({phi::DataType::FLOAT16,
phi::DataType::FLOAT32,
Expand All @@ -94,6 +98,8 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::FLOAT64,
phi::DataType::INT32,
phi::DataType::INT64})},
{"c_reduce_sum",
XPUKernelSet({phi::DataType::FLOAT16, phi::DataType::FLOAT32})},
{"c_split",
XPUKernelSet({phi::DataType::FLOAT16,
phi::DataType::FLOAT32,
Expand Down Expand Up @@ -730,6 +736,16 @@ XPUOpMap& get_kl2_ops() {
phi::DataType::UINT8,
phi::DataType::FLOAT16,
phi::DataType::FLOAT32})},
{"unstack",
XPUKernelSet({phi::DataType::INT64,
phi::DataType::INT32,
phi::DataType::FLOAT16,
phi::DataType::FLOAT32})},
{"unstack_grad",
XPUKernelSet({phi::DataType::INT64,
phi::DataType::INT32,
phi::DataType::FLOAT16,
phi::DataType::FLOAT32})},
{"warpctc_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"warpctc", XPUKernelSet({phi::DataType::FLOAT32})},
{"where_index",
Expand Down
65 changes: 65 additions & 0 deletions paddle/phi/kernels/xpu/unstack_grad_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/unstack_grad_kernel.h"

#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {

template <typename T, typename Context>
void UnStackGradKernel(const Context &dev_ctx,
const std::vector<const DenseTensor *> &x,
int axis,
DenseTensor *x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
if (axis < 0) {
axis += x[0]->dims().size() + 1;
}
dev_ctx.template Alloc<T>(x_grad);
auto &dim = x[0]->dims();
std::vector<int> xdims;
for (auto i = 0; i < dim.size(); ++i) {
xdims.push_back(dim[i]);
}
xdims.push_back(1);
std::vector<std::vector<int>> xdims_list;
int n = static_cast<int>(x.size());
for (int i = 0; i < n; i++) {
xdims_list.push_back(xdims);
}

std::vector<const XPUType *> x_list;
for (int i = 0; i < n; i++) {
x_list.push_back(reinterpret_cast<const XPUType *>(x[i]->data<T>()));
}

int r = xpu::concat<XPUType>(dev_ctx.x_context(),
x_list,
reinterpret_cast<XPUType *>(x_grad->data<T>()),
xdims_list,
axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "concat in unstack_grad op");
}
} // namespace phi

PD_REGISTER_KERNEL(unstack_grad,
XPU,
ALL_LAYOUT,
phi::UnStackGradKernel,
float,
phi::dtype::float16,
int,
int64_t) {}
60 changes: 60 additions & 0 deletions paddle/phi/kernels/xpu/unstack_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/unstack_kernel.h"

#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {

template <typename T, typename Context>
void UnStackKernel(const Context &dev_ctx,
const DenseTensor &x,
int axis,
int num,
std::vector<DenseTensor *> outs) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto x_dims = x.dims();

if (axis < 0) axis += x_dims.size();
auto x_shape = phi::vectorize<int>(x_dims);

std::vector<int> dx_dims_list(outs.size(), 1);
std::vector<XPUType *> dx_lists;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
dx_lists.push_back(reinterpret_cast<XPUType *>(outs[j]->data<T>()));
}

int r = xpu::split<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType *>(x.data<T>()),
dx_lists,
x_shape,
dx_dims_list,
axis);

PADDLE_ENFORCE_XDNN_SUCCESS(r, "split in unstack op");
}

} // namespace phi

PD_REGISTER_KERNEL(unstack,
XPU,
ALL_LAYOUT,
phi::UnStackKernel,
phi::dtype::float16,
float,
int,
int64_t) {}
131 changes: 131 additions & 0 deletions python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import unittest

import numpy as np

sys.path.append("..")
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import (
XPUOpTestWrapper,
create_test_class,
get_xpu_op_support_types,
)

import paddle

paddle.enable_static()


class XPUTestUnStackOp(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'unstack'
self.use_dynamic_create_class = False

class TestUnStackOpBase(XPUOpTest):
def initDefaultParameters(self):
self.input_dim = (5, 6, 7)
self.axis = 0
self.dtype = 'float32'

def initParameters(self):
pass

def get_y_names(self):
y_names = []
for i in range(self.input_dim[self.axis]):
y_names.append('y{}'.format(i))
return y_names

def setUp(self):
self.initDefaultParameters()
self.initParameters()
self.op_type = 'unstack'
self.python_api = paddle.unstack
self.x = np.random.random(size=self.input_dim).astype(self.dtype)

outs = np.split(self.x, self.input_dim[self.axis], self.axis)
new_shape = list(self.input_dim)
del new_shape[self.axis]
y_names = self.get_y_names()
tmp = []
tmp_names = []
for i in range(self.input_dim[self.axis]):
tmp.append((y_names[i], np.reshape(outs[i], new_shape)))
tmp_names.append(y_names[i])

self.python_out_sig = tmp_names
self.inputs = {'X': self.x}
self.outputs = {'Y': tmp}
self.attrs = {'axis': self.axis, 'num': self.input_dim[self.axis]}

def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)

def test_check_grad(self):
self.check_grad_with_place(
paddle.XPUPlace(0), self.get_y_names, 'Y'
)

class TestStackOp3(TestUnStackOpBase):
def initParameters(self):
self.axis = -1

class TestStackOp4(TestUnStackOpBase):
def initParameters(self):
self.axis = -3

class TestStackOp5(TestUnStackOpBase):
def initParameters(self):
self.axis = 1

class TestStackOp6(TestUnStackOpBase):
def initParameters(self):
self.axis = 2

class TestUnstackZeroInputOp(unittest.TestCase):
def unstack_zero_input_static(self):

paddle.enable_static()

array = np.array([], dtype=np.float32)
x = paddle.to_tensor(np.reshape(array, [0]), dtype='float32')
paddle.unstack(x, axis=1)

def unstack_zero_input_dynamic(self):

array = np.array([], dtype=np.float32)
x = paddle.to_tensor(np.reshape(array, [0]), dtype='float32')
paddle.unstack(x, axis=1)

def test_type_error(self):
paddle.disable_static()

self.assertRaises(ValueError, self.unstack_zero_input_dynamic)
self.assertRaises(ValueError, self.unstack_zero_input_static)

paddle.disable_static()


support_types = get_xpu_op_support_types('unstack')
for stype in support_types:
create_test_class(globals(), XPUTestUnStackOp, stype)


if __name__ == '__main__':
unittest.main()

0 comments on commit 8b0b5a8

Please sign in to comment.