Merge pull request #7688 from reyoung/feature/python_overload_math_op…

…erators Add math operator patches
PaddlePaddle · Jan 22, 2018 · f45b0b0 · f45b0b0
2 parents cb17dd2 + 87b424e
commit f45b0b0
Show file tree

Hide file tree

Showing 4 changed files with 338 additions and 0 deletions.
diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py
@@ -37,6 +37,7 @@
 from memory_optimization_transpiler import memory_optimize
 
 Tensor = LoDTensor
+
 __all__ = framework.__all__ + executor.__all__ + [
     'io',
     'initializer',
@@ -94,4 +95,5 @@ def __bootstrap__():
     core.init_devices()
 
 
+layers.monkey_patch_variable()
 __bootstrap__()
diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py
@@ -24,6 +24,8 @@
 from control_flow import *
 import device
 from device import *
+import math_op_patch
+from math_op_patch import *
 
 __all__ = []
 __all__ += nn.__all__
@@ -32,3 +34,4 @@
 __all__ += control_flow.__all__
 __all__ += ops.__all__
 __all__ += device.__all__
+__all__ += math_op_patch.__all__
diff --git a/python/paddle/v2/fluid/layers/math_op_patch.py b/python/paddle/v2/fluid/layers/math_op_patch.py
@@ -0,0 +1,152 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ..framework import Variable, unique_name
+from ..registry import OpProtoHolder
+
+__all__ = ['monkey_patch_variable']
+
+
+def monkey_patch_variable():
+    def unique_tmp_name():
+        return unique_name("tmp")
+
+    def safe_get_dtype(var):
+        try:
+            dtype = var.dtype
+        except:
+            raise ValueError("Cannot get data type from %s", var.name)
+        return dtype
+
+    def create_tensor(block, value, dtype, shape):
+        value = float(value)
+        tmp_name = unique_tmp_name()
+        var = block.create_var(name=tmp_name, shape=shape, dtype=dtype)
+        block.append_op(
+            type="fill_constant",
+            outputs={'Out': [var]},
+            attrs={'dtype': var.dtype,
+                   'shape': shape,
+                   'value': value})
+        return var
+
+    def create_scalar(block, value, dtype):
+        return create_tensor(block, value, dtype, shape=[1])
+
+    def create_tensor_with_batchsize(ref_var, value, dtype):
+        assert isinstance(ref_var, Variable)
+        value = float(value)
+        tmp_name = unique_tmp_name()
+        var = ref_var.block.create_var(name=tmp_name, dtype=dtype)
+        ref_var.block.append_op(
+            type='fill_constant_batch_size_like',
+            outputs={'Out': [var]},
+            inputs={'Input': [ref_var]},
+            attrs={'shape': ref_var.shape,
+                   'value': value})
+        return var
+
+    def astype(self, dtype):
+        """
+        Cast a variable to a specified data type.
+        NOTE: The variable must be a Tensor
+        Args:
+            self(Variable): The source variable
+            dtype: The target dtype
+
+        Returns:
+            Variable with new dtype
+        """
+        tmp_name = unique_tmp_name()
+        out = self.block.create_var(name=tmp_name, dtype=dtype)
+        self.block.append_op(
+            type="cast",
+            inputs={"X": [self]},
+            outputs={"Out": [out]},
+            attrs={"in_dtype": self.dtype,
+                   "out_dtype": out.dtype})
+        return out
+
+    def _elemwise_method_creator_(method_name, op_type, reverse=False):
+        def __impl__(self, other_var):
+            lhs_dtype = safe_get_dtype(self)
+
+            if not isinstance(other_var, Variable):
+                if reverse:
+                    has_batch_size = False
+                    for elem in self.shape:
+                        if elem < 0:
+                            has_batch_size = True
+                            break
+                    if not has_batch_size:
+                        other_var = create_tensor(
+                            self.block,
+                            other_var,
+                            dtype=lhs_dtype,
+                            shape=self.shape)
+                    else:
+                        other_var = create_tensor_with_batchsize(
+                            self, other_var, lhs_dtype)
+                else:
+                    # add fill_op to self.block
+                    other_var = create_scalar(
+                        self.block, value=other_var, dtype=lhs_dtype)
+
+            rhs_dtype = safe_get_dtype(other_var)
+            if lhs_dtype != rhs_dtype:
+                other_var = astype(other_var, lhs_dtype)
+            if reverse:
+                tmp = self
+                self = other_var
+                other_var = tmp
+
+            tmp_name = unique_tmp_name()
+            out = self.block.create_var(name=tmp_name, dtype=lhs_dtype)
+            self.block.append_op(
+                type=op_type,
+                inputs={'X': [self],
+                        'Y': [other_var]},
+                outputs={'Out': out})
+            return out
+
+        comment = OpProtoHolder.instance().get_op_proto(op_type).comment
+
+        __impl__.__doc__ = """
+        {0}
+        Args:
+            self(Variable): left hand variable
+            other_var(Variable|float|int): right hand variable 
+
+        Returns:
+            Variable
+        """.format(comment)
+        __impl__.__name__ = method_name
+        return __impl__
+
+    # inject methods
+    for method_name, op_type, reverse in (
+        ("__add__", "elementwise_add", False),
+            # a+b == b+a. Do not need to reverse explicitly
+        ("__radd__", "elementwise_add", False),
+        ("__sub__", "elementwise_sub", False),
+        ("__rsub__", "elementwise_sub", True),
+        ("__mul__", "elementwise_mul", False),
+            # a*b == b*a. Do not need to reverse explicitly
+        ("__rmul__", "elementwise_mul", False),
+        ("__div__", "elementwise_div", False),
+        ("__rdiv__", "elementwise_div", True)):
+        setattr(Variable, method_name,
+                _elemwise_method_creator_(method_name, op_type, reverse))
+
+    Variable.astype = astype
diff --git a/python/paddle/v2/fluid/tests/test_math_op_patch.py b/python/paddle/v2/fluid/tests/test_math_op_patch.py
@@ -0,0 +1,181 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import decorators
+import paddle.v2.fluid as fluid
+import numpy
+
+
+class TestMathOpPatches(unittest.TestCase):
+    @decorators.prog_scope()
+    def test_add_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a + 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np + 10, b_np))
+
+    @decorators.prog_scope()
+    def test_radd_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 + a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np + 10, b_np))
+
+    @decorators.prog_scope()
+    def test_sub_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a - 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np - 10, b_np))
+
+    @decorators.prog_scope()
+    def test_radd_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 - a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 - a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_mul_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a * 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np * 10, b_np))
+
+    @decorators.prog_scope()
+    def test_rmul_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 * a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 * a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_div_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = a / 10
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(a_np / 10, b_np))
+
+    @decorators.prog_scope()
+    def test_rdiv_scalar(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = 10 / a
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32') + 1e-2
+
+        b_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np},
+                       fetch_list=[b])
+        self.assertTrue(numpy.allclose(10 / a_np, b_np))
+
+    @decorators.prog_scope()
+    def test_div_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a / b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32') + 1e-2
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np / b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_mul_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a * b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np * b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_add_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a + b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np + b_np, c_np))
+
+    @decorators.prog_scope()
+    def test_sub_two_tensor(self):
+        a = fluid.layers.data(name="a", shape=[1])
+        b = fluid.layers.data(name="b", shape=[1])
+        c = a - b
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        a_np = numpy.random.random(size=[10, 1]).astype('float32')
+        b_np = numpy.random.random(size=[10, 1]).astype('float32')
+        c_np = exe.run(fluid.default_main_program(),
+                       feed={"a": a_np,
+                             'b': b_np},
+                       fetch_list=[c])
+        self.assertTrue(numpy.allclose(a_np - b_np, c_np))
+
+
+if __name__ == '__main__':
+    unittest.main()