From b6482b631e6b7ae8f7329aa99e3c725015585928 Mon Sep 17 00:00:00 2001 From: AyaseNana <13659110308@163.com> Date: Sun, 31 Mar 2024 10:53:11 +0800 Subject: [PATCH 1/3] add param gain to xavier --- python/paddle/nn/initializer/xavier.py | 40 ++++++++++++++++---------- test/legacy_test/test_initializer.py | 35 ++++++++++++++++++++-- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index fd47805c22133e..0a4c414aa274c6 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -41,14 +41,14 @@ class XavierInitializer(Initializer): .. math:: - x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + x = gain \times \sqrt{\\frac{6.0}{fan\_in + fan\_out}} In case of Normal distribution, the mean is 0 and the standard deviation is .. math:: - \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + gain \times \sqrt{\\frac{2.0}{fan\_in + fan\_out}} Args: @@ -57,6 +57,7 @@ class XavierInitializer(Initializer): inferred from the variable. Default is None. fan_out (float, optional): fan_out for Xavier initialization. If None, it is inferred from the variable. Default is None. + gain (float, optional): Scaling Tensor. Default is 1.0. seed (int, optional): Random seed. Default is 0. Note: @@ -64,7 +65,9 @@ class XavierInitializer(Initializer): """ - def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): + def __init__( + self, uniform=True, fan_in=None, fan_out=None, seed=0, gain=1.0 + ): assert uniform is not None assert seed is not None super().__init__() @@ -72,6 +75,7 @@ def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): self._fan_in = fan_in self._fan_out = fan_out self._seed = seed + self._gain = gain def forward(self, var, block=None): """Initialize the input tensor with Xavier initialization. @@ -136,7 +140,7 @@ def forward(self, var, block=None): if in_dygraph_mode(): if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) + limit = self._gain * math.sqrt(6.0 / float(fan_in + fan_out)) out_var = _C_ops.uniform( out_var_shape, out_dtype, @@ -146,7 +150,7 @@ def forward(self, var, block=None): _current_expected_place(), ) else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) + std = self._gain * math.sqrt(2.0 / float(fan_in + fan_out)) place = _current_expected_place() out_var = _C_ops.gaussian( @@ -173,7 +177,7 @@ def forward(self, var, block=None): return None elif in_pir_mode(): if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) + limit = self._gain * math.sqrt(6.0 / float(fan_in + fan_out)) out_var = paddle._pir_ops.uniform( out_var.shape, out_dtype, @@ -183,7 +187,7 @@ def forward(self, var, block=None): _current_expected_place(), ) else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) + std = self._gain * math.sqrt(2.0 / float(fan_in + fan_out)) out_var = _C_ops.gaussian( out_var.shape, 0.0, @@ -202,7 +206,7 @@ def forward(self, var, block=None): return out_var else: if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) + limit = self._gain * math.sqrt(6.0 / float(fan_in + fan_out)) op = block.append_op( type="uniform_random", inputs={}, @@ -217,7 +221,7 @@ def forward(self, var, block=None): stop_gradient=True, ) else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) + std = self._gain * math.sqrt(2.0 / float(fan_in + fan_out)) op = block.append_op( type="gaussian_random", outputs={"Out": out_var}, @@ -254,7 +258,7 @@ class XavierNormal(XavierInitializer): .. math:: - \sqrt{\frac{2.0}{fan\_in + fan\_out}}. + gain \times \sqrt{\frac{2.0}{fan\_in + fan\_out}}. Args: @@ -262,6 +266,7 @@ class XavierNormal(XavierInitializer): inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is inferred from the Tensor. Default is None. + gain (float, optional): Scaling Tensor. Default is 1.0. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: @@ -299,8 +304,10 @@ class XavierNormal(XavierInitializer): [[1.13615966, 0.89018601]]]) """ - def __init__(self, fan_in=None, fan_out=None, name=None): - super().__init__(uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0) + def __init__(self, fan_in=None, fan_out=None, gain=1.0, name=None): + super().__init__( + uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0, gain=gain + ) class XavierUniform(XavierInitializer): @@ -316,13 +323,14 @@ class XavierUniform(XavierInitializer): .. math:: - x = \sqrt{\frac{6.0}{fan\_in + fan\_out}}. + x = gain \times \sqrt{\frac{6.0}{fan\_in + fan\_out}}. Args: fan_in (float, optional): fan_in for Xavier initialization, which is inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is inferred from the Tensor. Default is None. + gain (float, optional): Scaling Tensor. Default is 1.0. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: @@ -359,5 +367,7 @@ class XavierUniform(XavierInitializer): [[-1.02494967, 0.67544925]]]) """ - def __init__(self, fan_in=None, fan_out=None, name=None): - super().__init__(uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0) + def __init__(self, fan_in=None, fan_out=None, gain=1.0, name=None): + super().__init__( + uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0, gain=gain + ) diff --git a/test/legacy_test/test_initializer.py b/test/legacy_test/test_initializer.py index 68645abbcdf581..28f9478ced8f5f 100644 --- a/test/legacy_test/test_initializer.py +++ b/test/legacy_test/test_initializer.py @@ -543,7 +543,7 @@ def test_xavier_initializer_supplied_arguments( lod_level=0, name="param", initializer=paddle.nn.initializer.XavierInitializer( - uniform=uniform, fan_in=12, fan_out=23, seed=134 + uniform=uniform, fan_in=12, fan_out=23, seed=134, gain=0.2 ), ) num_ops = ( @@ -561,6 +561,7 @@ def test_xavier_initializer_supplied_arguments( else: self.assertEqual(init_op.type, 'gaussian_random') self.assertEqual(init_op.attr('seed'), 134) + self.assertEqual(init_op.attr('gain'), 0.2) return block def test_xavier_initializer_fp16(self): @@ -741,7 +742,11 @@ def test_xavier_initializer_supplied_arguments( shape=[5, 10], name="param", initializer=paddle.nn.initializer.XavierInitializer( - uniform=uniform, fan_in=12, fan_out=23, seed=134 + uniform=uniform, + fan_in=12, + fan_out=23, + seed=134, + gain=0.2, ), ) block = startup.global_block() @@ -766,6 +771,7 @@ def test_xavier_initializer_supplied_arguments( self.assertAlmostEqual(max, limit, delta=DELTA) self.assertEqual(init_op.attrs()['seed'], 134) + self.assertEqual(init_op.attrs()['gain'], 0.2) return main, startup @@ -1553,6 +1559,31 @@ def test_xavier_initializer(self, dtype="float32"): paddle.enable_static() +class TestXavierInitializerDygraph2(unittest.TestCase): + def test_xavier_initializer_with_gain(self, dtype="float32"): + """ + In dygraph mode, we can use initializer directly to initialize a tensor. + """ + paddle.disable_static() + + tensor = paddle.zeros([1024, 1024, 16]) + tensor.stop_gradient = False + + xavier_ = paddle.nn.initializer.XavierNormal( + fan_in=3, fan_out=5, gain=2.5 + ) + xavier_(tensor) + + hist, _ = output_hist(tensor.numpy()) + + hist2, _ = output_hist( + np.random.normal(0, 2.5 * np.sqrt(2.0 / (3 + 5)), [1024, 1024, 16]) + ) + + np.testing.assert_allclose(hist, hist2, rtol=0, atol=0.01) + paddle.enable_static() + + class TestMSRAInitializerDygraph(unittest.TestCase): def test_msra_initializer(self, dtype="float32"): """ From fefccb0cb5b5c92883dd8e319f2b39f06de2c149 Mon Sep 17 00:00:00 2001 From: AyaseNana <13659110308@163.com> Date: Mon, 1 Apr 2024 10:55:56 +0800 Subject: [PATCH 2/3] fix test --- test/legacy_test/test_initializer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/legacy_test/test_initializer.py b/test/legacy_test/test_initializer.py index 28f9478ced8f5f..1c5f30011756e0 100644 --- a/test/legacy_test/test_initializer.py +++ b/test/legacy_test/test_initializer.py @@ -555,7 +555,7 @@ def test_xavier_initializer_supplied_arguments( init_op = block.ops[0] if uniform: self.assertEqual(init_op.type, 'uniform_random') - limit = np.sqrt(6.0 / (12 + 23)) + limit = 0.2 * np.sqrt(6.0 / (12 + 23)) self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) else: @@ -760,7 +760,7 @@ def test_xavier_initializer_supplied_arguments( self.assertEqual(len(checked_ops), 1) init_op = checked_ops[0] if uniform: - limit = np.sqrt(6.0 / (12 + 23)) + limit = 0.2 * np.sqrt(6.0 / (12 + 23)) min = self.get_operand_definition_op_attrs( init_op, "min", "value" ) From 0e0ca61087071560e24b183be94bbf2658936879 Mon Sep 17 00:00:00 2001 From: AyaseNana <13659110308@163.com> Date: Mon, 1 Apr 2024 11:58:52 +0800 Subject: [PATCH 3/3] fix test --- test/legacy_test/test_initializer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/legacy_test/test_initializer.py b/test/legacy_test/test_initializer.py index 1c5f30011756e0..5910a9c4297e09 100644 --- a/test/legacy_test/test_initializer.py +++ b/test/legacy_test/test_initializer.py @@ -561,7 +561,6 @@ def test_xavier_initializer_supplied_arguments( else: self.assertEqual(init_op.type, 'gaussian_random') self.assertEqual(init_op.attr('seed'), 134) - self.assertEqual(init_op.attr('gain'), 0.2) return block def test_xavier_initializer_fp16(self): @@ -771,7 +770,6 @@ def test_xavier_initializer_supplied_arguments( self.assertAlmostEqual(max, limit, delta=DELTA) self.assertEqual(init_op.attrs()['seed'], 134) - self.assertEqual(init_op.attrs()['gain'], 0.2) return main, startup