Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 6th No.26】API improvement for nn.initializer.KaimingNormal and nn.initializer.KaimingUniform 易用性提升 -part #63268

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 45 additions & 13 deletions python/paddle/nn/initializer/kaiming.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,22 @@ class MSRAInitializer(Initializer):

.. math::

x = gain \times \sqrt{\frac{3}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_mode}}

In case of Normal distribution, the mean is 0 and the standard deviation
is

.. math::

\frac{gain}{\sqrt{{fan\_in}}}
\frac{gain}{\sqrt{{fan\_mode}}}

Args:
uniform (bool, optional): whether to use uniform or normal distribution. Default is True.
fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automatically. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
seed (int32, optional): random seed. Default is 0.
negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
nonlinearity(str, optional): the non-linear function. Default is relu.
mode (str, optional): Support 'fan_in' and 'fan_out'. Indicate which fan mode to use. Default is 'fan_in'.

Note:
It is recommended to set fan_in to None for most cases.
Expand All @@ -69,6 +70,7 @@ def __init__(
seed=0,
negative_slope=0,
nonlinearity='relu',
mode='fan_in',
):
"""Constructor for MSRAInitializer"""
assert uniform is not None
Expand All @@ -79,6 +81,7 @@ def __init__(
self._seed = seed
self._negative_slope = negative_slope
self._nonlinearity = nonlinearity
self._mode = mode

def forward(self, var, block=None):
"""Initialize the input tensor with MSRA initialization.
Expand All @@ -102,7 +105,10 @@ def forward(self, var, block=None):
f_in, f_out = self._compute_fans(var)

# If fan_in is passed, use it
fan_in = f_in if self._fan_in is None else self._fan_in
if self._fan_in is None:
fan = f_in if self._mode == 'fan_in' else f_out
else:
fan = self._fan_in

if self._seed == 0:
self._seed = block.program.random_seed
Expand Down Expand Up @@ -134,7 +140,7 @@ def forward(self, var, block=None):
if in_dygraph_mode():
if self._uniform:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
limit = gain * math.sqrt(3.0 / float(fan))
out_var = _C_ops.uniform(
var.shape,
out_dtype,
Expand All @@ -145,7 +151,7 @@ def forward(self, var, block=None):
)
else:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
std = gain / math.sqrt(float(fan))
place = _current_expected_place()
out_var = _C_ops.gaussian(
out_var.shape, 0.0, std, self._seed, out_dtype, place
Expand All @@ -162,7 +168,7 @@ def forward(self, var, block=None):
elif in_pir_mode():
if self._uniform:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
limit = gain * math.sqrt(3.0 / float(fan))
out_var = _C_ops.uniform(
var.shape,
out_dtype,
Expand All @@ -173,7 +179,7 @@ def forward(self, var, block=None):
)
else:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
std = gain / math.sqrt(float(fan))
place = _current_expected_place()
out_var = _C_ops.gaussian(
out_var.shape, 0.0, std, self._seed, out_dtype, place
Expand All @@ -189,7 +195,7 @@ def forward(self, var, block=None):
else:
if self._uniform:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
limit = gain * math.sqrt(3.0 / float(fan))
op = block.append_op(
type="uniform_random",
inputs={},
Expand All @@ -206,7 +212,7 @@ def forward(self, var, block=None):

else:
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
std = gain / math.sqrt(float(fan))
op = block.append_op(
type="gaussian_random",
outputs={"Out": out_var},
Expand Down Expand Up @@ -249,12 +255,13 @@ class KaimingNormal(MSRAInitializer):

.. math::

\frac{gain}{\sqrt{{fan\_in}}}
\frac{gain}{\sqrt{{fan\_mode}}}

Args:
fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automatically. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
nonlinearity(str, optional): the non-linear function. Default is relu.
mode (str, optional): Support 'fan_in' and 'fan_out'. Indicate which fan mode to use. Default is 'fan_in'.

Note:
It is recommended to set fan_in to None for most cases.
Expand All @@ -271,13 +278,25 @@ class KaimingNormal(MSRAInitializer):

"""

def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
def __init__(
self,
fan_in=None,
negative_slope=0.0,
nonlinearity='relu',
mode='fan_in',
):
if mode != 'fan_in' and mode != 'fan_out':
raise ValueError(
"Kaiming initializer\'s mode only support fan_in or fan_out."
)

super().__init__(
uniform=False,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity,
mode=mode,
)


Expand All @@ -295,10 +314,11 @@ class KaimingUniform(MSRAInitializer):

.. math::

x = gain \times \sqrt{\frac{3}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_mode}}

Args:
fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None.
mode (str, optional): Support 'fan_in' and 'fan_out'. Indicate which fan mode to use. Default is 'fan_in'.
negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0.
nonlinearity(str, optional): the non-linear function. Default is relu.

Expand All @@ -317,11 +337,23 @@ class KaimingUniform(MSRAInitializer):

"""

def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
def __init__(
self,
fan_in=None,
negative_slope=0.0,
nonlinearity='relu',
mode='fan_in',
):
if mode != 'fan_in' and mode != 'fan_out':
raise ValueError(
"Kaiming initializer\'s mode only support fan_in or fan_out."
)

super().__init__(
uniform=True,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity,
mode=mode,
)
118 changes: 118 additions & 0 deletions test/deprecated/legacy_test/test_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,52 @@ def test_msra_initializer_bf16(self):
"""Test the MSRA initializer with bfloat16"""
block = self.test_msra_initializer_supplied_arguments("uint16")

def test_uniform_msra_initializer_fan_mode(self):
"""Test MSRA initializer with uniform distribution and
'fan_out' mode.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="param",
initializer=paddle.nn.initializer.KaimingUniform(
mode='fan_out'
),
)
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / param.shape[1])
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)

def test_normal_msra_initializer_fan_mode(self):
"""Test MSRA initializer with normal distribution and
'fan_out' mode.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="param",
initializer=paddle.nn.initializer.KaimingNormal(mode='fan_out'),
)
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random')
std = np.sqrt(2.0 / param.shape[1])
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)


class TestMSRAInitializerPir(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -1171,6 +1217,68 @@ def test_msra_initializer_bf16(self):
exe.run(startup_2)
exe.run(main_2)

def test_uniform_msra_initializer_fan_mode(self):
"""Test MSRA initializer with uniform distribution and
'fan_out' mode.
"""
with paddle.pir_utils.IrGuard():
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
param = paddle.pir.core.create_parameter(
dtype="float32",
shape=[5, 10],
name="param",
initializer=paddle.nn.initializer.KaimingUniform(
mode='fan_out'
),
)
block = startup.global_block()
checked_ops = self.get_init_ops_by_op_name(
block, self.init_uniform_op_name
)
self.assertEqual(len(checked_ops), 1)
init_op = checked_ops[0]
limit = np.sqrt(6.0 / param.shape[1])
min = self.get_operand_definition_op_attrs(
init_op, "min", "value"
)
max = self.get_operand_definition_op_attrs(
init_op, "max", "value"
)
self.assertAlmostEqual(min, -limit, delta=DELTA)
self.assertAlmostEqual(max, limit, delta=DELTA)
self.assertEqual(init_op.attrs()['seed'], 0)

def test_normal_msra_initializer_fan_mode(self):
"""Test MSRA initializer with normal distribution and
'fan_out' mode.
"""
with paddle.pir_utils.IrGuard():
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.static.program_guard(main, startup):
param = paddle.pir.core.create_parameter(
dtype="float32",
shape=[5, 10],
name="param",
initializer=paddle.nn.initializer.KaimingNormal(
mode='fan_out'
),
)
block = startup.global_block()
checked_ops = self.get_init_ops_by_op_name(
block, self.init_normal_op_name
)
self.assertEqual(len(checked_ops), 1)
init_op = checked_ops[0]
std = np.sqrt(2.0 / param.shape[1])
self.assertAlmostEqual(
init_op.attrs()['mean'], 0.0, delta=DELTA
)
self.assertAlmostEqual(init_op.attrs()['std'], std, delta=DELTA)
self.assertEqual(init_op.attrs()['seed'], 0)


class TestBilinearInitializer(unittest.TestCase):
def test_bilinear_initializer(self, dtype="float32"):
Expand Down Expand Up @@ -2207,6 +2315,16 @@ def test_type_error(self):
ZeroDivisionError, self.func_kaiminguniform_initializer_fan_in_zero
)

def test_input_error(self):
with self.assertRaises(ValueError):
paddle.nn.initializer.KaimingUniform(mode='in')


class TestKaimingNormal(unittest.TestCase):
def test_input_error(self):
with self.assertRaises(ValueError):
paddle.nn.initializer.KaimingNormal(mode='in')


class TestTruncatedNormalInitializerDygraph(unittest.TestCase):
def _trunc_normal_numpy(self, tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
Expand Down