-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
【PaddlePaddle Hackathon 2】8、为 Paddle 新增 nanmean API #40472
Changes from 46 commits
265e64c
50960a6
9acc480
86d44ab
e238284
2ea0682
cc70998
f3da96d
b8e03d0
bfd0fff
a2de8d0
8efba3c
111ee88
037d038
b0afcd4
777b29e
e3e78fb
ee4b992
06e356f
db0b137
c4fadc8
773f5ba
01a8719
436f243
34b8b17
028d70f
9d2ebf2
4593a3e
a8543b1
10fb8a5
ee45c5b
7a02b25
1a576ec
0e944ca
6a391e0
c76d696
48c0656
f24fed9
709d9b8
5d48902
81ce3c6
9f29a18
c66454a
b03ec25
7be3764
32179ff
b029831
b018a5d
1391212
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import print_function | ||
|
||
import unittest | ||
import numpy as np | ||
import paddle | ||
import paddle.fluid as fluid | ||
import paddle.fluid.core as core | ||
from paddle.fluid import Program, program_guard | ||
|
||
np.random.seed(10) | ||
|
||
|
||
class TestNanmeanAPI(unittest.TestCase): | ||
# test paddle.tensor.math.nanmean | ||
|
||
def setUp(self): | ||
self.x_shape = [2, 3, 4, 5] | ||
self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) | ||
self.x[0, :, :, :] = np.nan | ||
self.x_grad = np.array([[np.nan, np.nan, 3.], | ||
[0., np.nan, 2.]]).astype(np.float32) | ||
self.place = paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ | ||
else paddle.CPUPlace() | ||
|
||
def test_api_static(self): | ||
paddle.enable_static() | ||
with paddle.static.program_guard(paddle.static.Program()): | ||
x = paddle.fluid.data('X', self.x_shape) | ||
out1 = paddle.nanmean(x) | ||
out2 = paddle.tensor.nanmean(x) | ||
out3 = paddle.tensor.math.nanmean(x) | ||
axis = np.arange(len(self.x_shape)).tolist() | ||
out4 = paddle.nanmean(x, axis) | ||
out5 = paddle.nanmean(x, tuple(axis)) | ||
exe = paddle.static.Executor(self.place) | ||
res = exe.run(feed={'X': self.x}, | ||
fetch_list=[out1, out2, out3, out4, out5]) | ||
out_ref = np.nanmean(self.x) | ||
for out in res: | ||
self.assertEqual(np.allclose(out, out_ref, rtol=1e-04), True) | ||
|
||
def test_api_dygraph(self): | ||
paddle.disable_static(self.place) | ||
|
||
def test_case(x, axis=None, keepdim=False): | ||
x_tensor = paddle.to_tensor(x) | ||
out = paddle.nanmean(x_tensor, axis, keepdim) | ||
if isinstance(axis, list): | ||
axis = tuple(axis) | ||
if len(axis) == 0: | ||
axis = None | ||
|
||
out_ref = np.nanmean(x, axis, keepdims=keepdim) | ||
if np.isnan(out_ref).sum(): | ||
nan_mask = np.isnan(out_ref) | ||
out_ref[nan_mask] = 0 | ||
out_np = out.numpy() | ||
out_np[nan_mask] = 0 | ||
self.assertEqual(np.allclose(out_np, out_ref, rtol=1e-04), True) | ||
else: | ||
self.assertEqual( | ||
np.allclose( | ||
out.numpy(), out_ref, rtol=1e-04), True) | ||
|
||
test_case(self.x) | ||
test_case(self.x, []) | ||
test_case(self.x, -1) | ||
test_case(self.x, keepdim=True) | ||
test_case(self.x, 2, keepdim=True) | ||
test_case(self.x, [0, 2]) | ||
test_case(self.x, (0, 2)) | ||
test_case(self.x, [0, 1, 2, 3]) | ||
paddle.enable_static() | ||
|
||
def test_errors(self): | ||
paddle.enable_static() | ||
with paddle.static.program_guard(paddle.static.Program()): | ||
x = paddle.fluid.data('X', [10, 12], 'int32') | ||
self.assertRaises(TypeError, paddle.nanmean, x) | ||
|
||
def test_api_dygraph_grad(self): | ||
paddle.disable_static(self.place) | ||
|
||
def test_case(x, axis=None, keepdim=False): | ||
if isinstance(axis, list): | ||
axis = list(axis) | ||
if len(axis) == 0: | ||
axis = None | ||
x_tensor = paddle.to_tensor(x, stop_gradient=False) | ||
y = paddle.nanmean(x_tensor, axis, keepdim) | ||
dx = paddle.grad(y, x_tensor)[0].numpy() | ||
sum_dx_ref = np.prod(y.shape) | ||
if np.isnan(y.numpy()).sum(): | ||
sum_dx_ref -= np.isnan(y.numpy()).sum() | ||
cnt = paddle.sum(~paddle.isnan(x_tensor), | ||
axis=axis, | ||
keepdim=keepdim) | ||
if (cnt == 0).sum(): | ||
dx[np.isnan(dx)] = 0 | ||
sum_dx = dx.sum() | ||
self.assertEqual(np.allclose(sum_dx, sum_dx_ref, rtol=1e-04), True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this method to check grad may work in this data : self.x[0, :, :, :] = np.nan,but may not work in another |
||
|
||
test_case(self.x) | ||
test_case(self.x, []) | ||
test_case(self.x, -1) | ||
test_case(self.x, keepdim=True) | ||
test_case(self.x, 2, keepdim=True) | ||
test_case(self.x, [0, 2]) | ||
test_case(self.x, (0, 2)) | ||
test_case(self.x, [0, 1, 2, 3]) | ||
|
||
test_case(self.x_grad) | ||
test_case(self.x_grad, []) | ||
test_case(self.x_grad, -1) | ||
test_case(self.x_grad, keepdim=True) | ||
test_case(self.x_grad, 0, keepdim=True) | ||
test_case(self.x_grad, 1) | ||
test_case(self.x_grad, (0, 1)) | ||
paddle.enable_static() | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1008,6 +1008,80 @@ def nansum(x, axis=None, dtype=None, keepdim=False, name=None): | |
return sum(tmp_tensor, axis, dtype, keepdim, name) | ||
|
||
|
||
def nanmean(x, axis=None, keepdim=False, name=None): | ||
r""" | ||
Compute the arithmetic mean along the specified axis, ignoring NaNs. | ||
|
||
Args: | ||
x (Tensor): The input Tensor with data type uint16, float16, float32, float64. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. data type uint16, float16, float32, float64. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 数据类型应该是 unit16, float16, float32, float64。我会修改中文文档和英文文档对齐。 |
||
axis (int|list|tuple, optional):The axis along which to perform nanmean | ||
calculations. ``axis`` should be int, list(int) or tuple(int). If | ||
``axis`` is a list/tuple of dimension(s), nanmean is calculated along | ||
all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` | ||
should be in range [-D, D), where D is the dimensions of ``x`` . If | ||
``axis`` or element(s) of ``axis`` is less than 0, it works the | ||
same way as :math:`axis + D` . If ``axis`` is None, nanmean is | ||
calculated over all elements of ``x``. Default is None. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 建议改成: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Li-fAngyU 这句中出现了两次 nanmean is calculated,需要同步修改哈 |
||
keepdim (bool, optional): Whether to reserve the reduced dimension(s) | ||
in the output Tensor. If ``keepdim`` is True, the dimensions of | ||
the output Tensor is the same as ``x`` except in the reduced | ||
dimensions(it is of size 1 in this case). Otherwise, the shape of | ||
the output Tensor is squeezed in ``axis`` . Default is False. | ||
name (str, optional): Name for the operation (optional, default is None). | ||
For more information, please refer to :ref:`api_guide_Name`. | ||
|
||
Returns: | ||
Tensor, results of arithmetic mean along ``axis`` of ``x``, with the same data | ||
type as ``x``. | ||
|
||
Examples: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要与code空一行,保证文档样式正确显示。 |
||
|
||
.. code-block:: python | ||
:name: code-example1 | ||
|
||
import paddle | ||
|
||
# x is a Tensor with following elements: | ||
# [[nan, 0.3, 0.5, 0.9] | ||
# [0.1, 0.2, -nan, 0.7]] | ||
# Each example is followed by the corresponding output tensor. | ||
x = paddle.to_tensor([[float('nan'), 0.3, 0.5, 0.9], | ||
[0.1, 0.2, float('-nan'), 0.7]]) | ||
out1 = paddle.nanmean(x) | ||
# [0.45000002] | ||
out2 = paddle.nanmean(x, axis=0) | ||
# [0.1, 0.25, 0.5, 0.79999995] | ||
out3 = paddle.nanmean(x, axis=0, keepdim=True) | ||
# [[0.1, 0.25, 0.5, 0.79999995]] | ||
out4 = paddle.nanmean(x, axis=1) | ||
# [0.56666666 0.33333334] | ||
out5 = paddle.nanmean(x, axis=1, keepdim=True) | ||
# [[0.56666666] | ||
# [0.33333334]] | ||
# y is a Tensor with shape [2, 2, 2] and elements as below: | ||
# [[[1, nan], [3, 4]], | ||
# [[5, 6], [-nan, 8]]] | ||
# Each example is followed by the corresponding output tensor. | ||
y = np.array([[[1, float('nan')], [3, 4]], | ||
[[5, 6], [float('-nan'), 8]]]) | ||
y = paddle.to_tensor(y) | ||
out5 = paddle.nanmean(y, axis=[1, 2]) | ||
# [2.66666667, 6.33333333] | ||
out6 = paddle.nanmean(y, axis=[0, 1]) | ||
# [3., 6.] | ||
""" | ||
if isinstance(axis, int): | ||
axis = [axis] | ||
check_variable_and_dtype(x, 'x/input', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just 'x' instead of 'x/input'? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is refer to the code of paddle.mean. |
||
['uint16', 'float16', 'float32', 'float64'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dtype should be the same to description of x above. eg: 'uint16' is not in "x (Tensor): The input Tensor with data type float32, float64." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is refer to the code of paddle.mean |
||
'nanmean' ) | ||
if axis is not None: | ||
check_type(axis, 'axis/dim', (int, list, tuple), 'nanmean') | ||
|
||
cnt = paddle.sum(~paddle.isnan(x), axis = axis,keepdim=keepdim) | ||
return paddle.divide(paddle.nansum(x, axis=axis, keepdim=keepdim, name=name), cnt.astype(x.dtype)) | ||
|
||
|
||
@templatedoc(op_type="sum") | ||
def add_n(inputs, name=None): | ||
""" | ||
|
@@ -3905,6 +3979,7 @@ def diff(x, n=1, axis=-1, prepend=None, append=None, name=None): | |
else: | ||
out = elementwise_sub(input_back, input_front, axis=axis) | ||
return out | ||
|
||
else: | ||
check_variable_and_dtype(x, 'x', ['float32', 'float64', 'bool', 'int32', 'int64'], 'diff') | ||
check_type(axis, 'axis', (int), 'diff') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.x does not have 'nan', we should cover all the test case in rfcs
also should include check gradient
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks for your suggestion! i will cover all the test case in next update.
but i am confused in check gradient, because i can't find the example of check gradient in the paddle.mean test file test_mean_op.py .
I will appreciate it if you can give me a example.