Merge pull request PaddlePaddle#32 from cxxly/prim

add first-order autodiff composite logic for static graph
xiaoguoguo626807 · Jan 5, 2023 · 096b3a6 · 096b3a6
2 parents a60eb1f + e7f4731
commit 096b3a6
Show file tree

Hide file tree

Showing 12 changed files with 337 additions and 27 deletions.
diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h
@@ -268,7 +268,7 @@ struct OpInfoFiller<T, kGradCompOpDescMaker> {
         [](const OpDesc& fwd_op,
            const std::unordered_set<std::string>& no_grad_set,
            std::unordered_map<std::string, std::string>* grad_to_var,
-           BlockDesc* current_block,
+           const BlockDesc* current_block,
            const std::vector<BlockDesc*>& grad_block) {
           T maker(fwd_op, no_grad_set, grad_to_var, current_block, grad_block);
           return maker();

diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h
@@ -82,26 +82,9 @@ class OpInfo {
     return creator_;
   }
 
-  const GradOpMakerFN& GradOpMaker() const {
-    // Normally, proto_ should not be null, except some special operators, such
-    // as LeaklyReluDoubleGrad op.
-    std::string type = proto_ ? proto_->type() : "unknown";
-    PADDLE_ENFORCE_NOT_NULL(
-        grad_op_maker_,
-        platform::errors::NotFound(
-            "Operator %s's GradOpMaker has not been "
-            "registered.\nPlease check whether (%s) operator has "
-            "gradient operator.\nIf not, please set stop_gradient to be True "
-            "for its input and output variables using var.stop_gradient=True.",
-            type.c_str(),
-            type.c_str()));
-    return grad_op_maker_;
-  }
+  const GradOpMakerFN& GradOpMaker() const { return grad_op_maker_; }
 
   const GradCompositeOpMakerFN& GradCompOpMaker() const {
-    // Normally, proto_ should not be null, except some special operators, such
-    // as LeaklyReluDoubleGrad op.
-    std::string type = proto_ ? proto_->type() : "unknown";
     return grad_comp_op_maker_;
   }
 

diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h
@@ -101,7 +101,7 @@ using GradCompositeOpMakerFN =
         const OpDesc&,
         const std::unordered_set<std::string>& /*no_grad_set*/,
         std::unordered_map<std::string, std::string>* /*grad_to_var*/,
-        BlockDesc*,
+        const BlockDesc*,
         const std::vector<BlockDesc*>& grad_block)>;
 
 using DygraphGradOpMakerFN =

diff --git a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h
@@ -46,7 +46,7 @@ class GradCompositeOpMakerBase {
       const framework::OpDesc& fwd_op,
       const std::unordered_set<std::string>& no_grad_set,
       std::unordered_map<std::string, std::string>* grad_to_var,
-      framework::BlockDesc* original_block,
+      const framework::BlockDesc* original_block,
       const std::vector<framework::BlockDesc*>& grad_block =
           std::vector<framework::BlockDesc*>())
       : fwd_op_(fwd_op),
@@ -331,7 +331,7 @@ class GradCompositeOpMakerBase {
   const framework::OpDesc& fwd_op_;
   const std::unordered_set<std::string>& no_grad_set_;
   std::unordered_map<std::string, std::string>* grad_to_var_;
-  framework::BlockDesc* original_block_;
+  const framework::BlockDesc* original_block_;
   framework::ProgramDesc acting_program_;
 
  protected:

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -66,6 +66,7 @@ limitations under the License. */
 #include "paddle/fluid/imperative/amp_auto_cast.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/memory/allocation/allocator_strategy.h"
+#include "paddle/fluid/prim/utils/utils.h"
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/fluid/memory/allocation/cuda_ipc_allocator.h"
 #endif
@@ -1221,11 +1222,56 @@ All parameter, weight, gradient are variables in Paddle.
            const std::unordered_set<std::string> &no_grad_set,
            const std::vector<BlockDesc *> &grad_sub_block) {
           std::unordered_map<std::string, std::string> grad_to_var;
-          std::vector<std::unique_ptr<OpDesc>> grad_op_descs =
-              framework::OpInfoMap::Instance()
-                  .Get(op_desc.Type())
-                  .GradOpMaker()(
-                      op_desc, no_grad_set, &grad_to_var, grad_sub_block);
+
+          auto op_info = framework::OpInfoMap::Instance().Get(op_desc.Type());
+          auto grad_op_maker = op_info.GradOpMaker();
+          auto grad_comp_op_maker = op_info.GradCompOpMaker();
+
+          if ((grad_op_maker == nullptr) && (grad_comp_op_maker == nullptr)) {
+            // Normally, proto_ should not be null, except some special
+            // operators, such as LeaklyReluDoubleGrad op.
+            std::string type =
+                op_info.proto_ ? op_info.proto_->type() : "unknown";
+            PADDLE_THROW(platform::errors::NotFound(
+                "Neither operator %s's GradOpMaker nor GradCompOpMaker has "
+                "been registered.\nPlease check whether (%s) operator has "
+                "gradient operator.\nIf not, please set stop_gradient to be "
+                "True for its input and output variables using "
+                "var.stop_gradient=True.",
+                type.c_str(),
+                type.c_str()));
+          }
+
+          // In PrimEnabled mode, the priority of GradCompOpMaker is grater than
+          // GradCompMaker as we need split first-order grad operator into
+          // primitive operators for compiler. In PrimDisabled mode, the
+          // priority of GradCompOpMaker is less than GradCompMaker for better
+          // performance.
+          std::vector<std::unique_ptr<OpDesc>> grad_op_descs;
+          if (paddle::prim::PrimCommonUtils::IsPrimEnabled()) {
+            if (grad_comp_op_maker != nullptr) {
+              grad_op_descs = grad_comp_op_maker(op_desc,
+                                                 no_grad_set,
+                                                 &grad_to_var,
+                                                 op_desc.Block(),
+                                                 grad_sub_block);
+            } else {
+              grad_op_descs = grad_op_maker(
+                  op_desc, no_grad_set, &grad_to_var, grad_sub_block);
+            }
+          } else {
+            if (grad_op_maker != nullptr) {
+              grad_op_descs = grad_op_maker(
+                  op_desc, no_grad_set, &grad_to_var, grad_sub_block);
+            } else {
+              grad_op_descs = grad_comp_op_maker(op_desc,
+                                                 no_grad_set,
+                                                 &grad_to_var,
+                                                 op_desc.Block(),
+                                                 grad_sub_block);
+            }
+          }
+
           std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size());
           std::transform(
               grad_op_descs.begin(),

diff --git a/python/paddle/fluid/tests/unittests/prim/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/CMakeLists.txt
@@ -0,0 +1,12 @@
+file(
+  GLOB TEST_OPS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+  "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0)
+
+foreach(TEST_OP ${TEST_OPS})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
+endforeach()
+
+add_subdirectory(api)
diff --git a/python/paddle/fluid/tests/unittests/prim/api/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/api/CMakeLists.txt
@@ -0,0 +1,13 @@
+file(
+  GLOB TEST_OPS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+  "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0)
+
+foreach(TEST_OP ${TEST_OPS})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
+endforeach()
+
+add_subdirectory(comp)
+add_subdirectory(prim)
diff --git a/python/paddle/fluid/tests/unittests/prim/api/comp/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/api/comp/CMakeLists.txt
@@ -0,0 +1,12 @@
+file(
+  GLOB TEST_OPS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+  "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0)
+
+foreach(TEST_OP ${TEST_OPS})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
+endforeach()
+
+set_tests_properties(test_tanh_grad_comp PROPERTIES TIMEOUT 60)
diff --git a/python/paddle/fluid/tests/unittests/prim/api/comp/test_tanh_grad_comp.py b/python/paddle/fluid/tests/unittests/prim/api/comp/test_tanh_grad_comp.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import unittest
+
+os.environ['FLAGS_prim_enabled'] = 'True'
+
+import autograd
+import autograd.numpy
+import numpy as np
+import parameterized as param
+
+import paddle
+
+
+@param.parameterized_class(
+    ('primal', 'cotangent', 'dtype'),
+    [
+        (np.random.rand(10, 10), np.random.rand(10, 10), np.float32),
+    ],
+)
+class TestTanhGradComp(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.primal = cls.primal.astype(cls.dtype)
+        cls.cotangent = cls.cotangent.astype(cls.dtype)
+
+    def setUp(self):
+        paddle.enable_static()
+
+    def tearDown(self):
+        paddle.disable_static()
+
+    def test_tanh_grad_comp(self):
+        def actual(primal, cotangent):
+            mp, sp = paddle.static.Program(), paddle.static.Program()
+            with paddle.static.program_guard(mp, sp):
+                x = paddle.static.data('primal', primal.shape, primal.dtype)
+                x.stop_gradient = False
+                v = paddle.static.data(
+                    'cotangent', cotangent.shape, cotangent.dtype
+                )
+                y = paddle.tanh(x)
+                x_cotangent = paddle.static.gradients(y, x, v)
+            exe = paddle.static.Executor()
+            exe.run(sp)
+            return exe.run(
+                program=mp,
+                feed={'primal': primal, 'cotangent': cotangent},
+                fetch_list='composite_tmp_2',
+            )[0]
+
+        def desired(primal, cotangent):
+            return autograd.make_vjp(autograd.numpy.tanh)(primal)[0](cotangent)
+
+        np.testing.assert_allclose(
+            actual=actual(self.primal, self.cotangent),
+            desired=desired(self.primal, self.cotangent),
+            rtol=1e-6,
+            atol=0,
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/prim/api/prim/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/api/prim/CMakeLists.txt
@@ -0,0 +1,10 @@
+file(
+  GLOB TEST_OPS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+  "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0)
+
+foreach(TEST_OP ${TEST_OPS})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
+endforeach()
diff --git a/python/paddle/fluid/tests/unittests/prim/test_get_grad_op_desc_prim_disabled.py b/python/paddle/fluid/tests/unittests/prim/test_get_grad_op_desc_prim_disabled.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import unittest
+
+os.environ['FLAGS_prim_enabled'] = 'False'
+
+import parameterized as param
+
+import paddle
+from paddle.fluid import core, framework
+
+
+@param.parameterized_class(
+    (
+        'fwd_type',
+        'inputs',
+        'outputs',
+        'no_grad_var',
+        'grad_sub_block',
+        'desired_ops',
+    ),
+    (
+        ('tanh', {'X': ['x']}, {'Out': ['y']}, set(), tuple(), ('tanh_grad',)),
+        ('empty', {}, {'Out': ['y']}, set(), tuple(), tuple()),
+    ),
+)
+class TestGetGradOpDescPrimEnabled(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        paddle.enable_static()
+        block = framework.Block(framework.Program(), 0)
+        block.append_op(
+            type=cls.fwd_type,
+            inputs={
+                n: [block.create_var(name=v, stop_gradient=False) for v in vs]
+                for n, vs in cls.inputs.items()
+            },
+            outputs={
+                n: [block.create_var(name=v, stop_gradient=False) for v in vs]
+                for n, vs in cls.outputs.items()
+            },
+        )
+        cls.fwd = block.ops[0].desc
+
+    @classmethod
+    def tearDownClass(cls):
+        paddle.disable_static()
+
+    def test_get_grad_op_desc(self):
+        actual = tuple(
+            desc.type()
+            for desc in core.get_grad_op_desc(
+                self.fwd, self.no_grad_var, self.grad_sub_block
+            )[0]
+        )
+        self.assertEquals(actual, self.desired_ops)
+
+
+if __name__ == '__main__':
+    unittest.main()