[TE] reverse-mode autodiff without any optimization (#5121)

* [TE] reverse-mode autodiff without any optimization Co-authored-by: Sergei Grechanik <[email protected]> * address review comments * add comments and retrigger CI * move unittest to debug ci * move test back and add seed Co-authored-by: Sergei Grechanik <[email protected]>
apache · Mar 31, 2020 · e4a5441 · e4a5441
1 parent ff7bab8
commit e4a5441
Show file tree

Hide file tree

Showing 10 changed files with 988 additions and 1 deletion.
diff --git a/include/tvm/te/autodiff.h b/include/tvm/te/autodiff.h
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/te/autodiff.h
+ * \brief Automatic differentiation of tensor expressions.
+ */
+
+#ifndef TVM_TE_AUTODIFF_H_
+#define TVM_TE_AUTODIFF_H_
+
+#include <tvm/runtime/object.h>
+#include <tvm/tir/expr.h>
+#include "tensor.h"
+
+namespace tvm {
+/*! \brief Tensor expression language DSL. */
+namespace te {
+
+/*!
+ * \brief Take the derivative of the expression with respect to the given variable.
+ * \param expr The expression to differentiate.
+ * \param var The variable to differentiate with respect to.
+ * \return The expression for the derivative.
+ */
+PrimExpr Derivative(const PrimExpr& expr, const Var& var);
+
+/*!
+ * \brief Get the tensor representing the Jacobian of the output with respect to the input.
+ *
+ *  Note that if \p output depends on \p input indirectly (by using some other tensor
+ *  depending on \p input), this dependency won't contribute to the resulting Jacobian.
+ *  For such cases use the function ::Gradient.
+ *
+ * \param output The tensor to differentiate.
+ * \param input The input tensor, which \p output should directly use.
+ * \return The tensor representing the Jacobian of shape `output.shape + input.shape`.
+ */
+Tensor Jacobian(const Tensor& output, const Tensor& input);
+
+/*!
+ * \brief The building block for reverse-mode AD.
+ *
+ *  Differentiate \p output wrt \p input and multiply the result by \p head on the left using tensor
+ *  dot product. \p input must be an immediate dependency of \p output (must be called from within
+ *  the body of \p output). That is, the function will compute one summand of the adjoint for \p input
+ *  given the adjoint for \p output (which is called \p head here).
+ *
+ * \param output The tensor to differentiate.
+ * \param input The input tensor, which \p output should directly use.
+ * \param head The adjoint of \p output. Must be of shape `prefix + output.shape`
+ * \return The tensor of shape `prefix + input.shape`
+ *         representing the partial adjoint of \p input wrt one of its consumers (output)
+ */
+Tensor VectorJacobianProduct(const Tensor &output, const Tensor &input, const Tensor &head);
+
+/*!
+ * \brief Perform reverse mode automatic differentiation.
+ *
+ *  Each item of the `result` field of the result is an adjoint for the corresponding item of
+ *  \p inputs, i.e. \p head multiplied by the Jacobian of \p output with respect to the
+ *  corresponding item of \p inputs.
+ *
+ * \param output The tensor to differentiate.
+ * \param inputs The array of input tensors. When the array is empty, will perform differentiation
+ *               wrt all tensors the output depends on.
+ * \param head The adjoint of the output, in other words, some tensor, by which the Jacobians
+ *             will be multiplied (using tensordot axes=`output.shape`).
+ *             Its shape must be of the form `prefix + output.shape`. If the null pointer is provided,
+ *             the identity tensor of shape `output.shape + output.shape` will be used.
+ * \return An array of adjoints corresponding to \p inputs.
+ */
+TVM_DLL Array<Tensor> Gradient(
+    const Tensor& output,
+    const Array<Tensor>& inputs,
+    const Tensor& head = Tensor());
+
+}  // namespace te
+}  // namespace tvm
+
+#endif  // TVM_TE_AUTODIFF_H_
diff --git a/python/tvm/te/__init__.py b/python/tvm/te/__init__.py
@@ -33,3 +33,4 @@
 from .operation import thread_axis, reduce_axis
 
 from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp
+from .autodiff import gradient
diff --git a/python/tvm/te/autodiff.py b/python/tvm/te/autodiff.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Automatic differentiation of tensor expressions."""
+from . import _ffi_api
+
+
+def gradient(output, inputs, head=None):
+    """Perform reverse-mode automatic differentiation.
+
+    Parameters
+    ----------
+    output : Tensor
+        The tensor to differentiate.
+
+    inputs : List[Tensor]
+        The list of input tensors to be differentiated wrt.
+
+    head : Tensor
+        The adjoint of the output, in other words, some tensor, by which the Jacobians
+        will be multiplied. Its shape must be of the form `prefix + output.shape`.
+        If `None` is passed, the identity tensor of shape `output.shape + output.shape`
+        will be used.
+
+    Returns
+    -------
+    tensors: List[Tensor]
+        The result gradient, in the same order as the inputs
+
+    Example
+    -------
+    .. code-block:: python
+
+        x = tvm.placeholder((32, 3, 28, 28), name='x')
+        w1 = tvm.placeholder((10, 3, 3, 3), name='w1')
+        w2 = tvm.placeholder((10, 10, 3, 3), name='w2')
+        z1 = topi.nn.conv2d(x, w1, 1, 1, 1)
+        z2 = topi.nn.conv2d(z1, w2, 1, 1, 1)
+        y = topi.sum(z2)
+
+        # produce gradients
+        [dw1, dw2] = tvm.gradient(y, [w1, w2])
+
+        # produce Jacobians
+        [jw1, jw2] = tvm.gradient(z2, [w1, w2])
+
+        # produce gradients, the head adjoint for z2 is provided manually
+        [dw1, dw2] = tvm.gradient(z2, [w1, w2], topi.full_like(z2, 1.0))
+
+    """
+    if not isinstance(inputs, list):
+        inputs = [inputs]
+    return _ffi_api.Gradient(output, inputs, head)
diff --git a/src/te/autodiff/ad_util.cc b/src/te/autodiff/ad_util.cc
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file ad_util.cc
+ * \brief Utility for tensor-level auto-differentiation.
+ */
+#include <tvm/tir/expr.h>
+#include <tvm/tir/ir_pass.h>
+#include <string>
+#include "ad_util.h"
+
+namespace tvm {
+namespace te {
+
+std::pair<Array<IterVar>, Map<Var, PrimExpr>> CloneIterVars(const Array<IterVar>& vars) {
+  Array<IterVar> new_vars;
+  Map<Var, PrimExpr> vmap;
+  for (const IterVar& iv : vars) {
+    IterVar new_v =
+      IterVarNode::make(iv->dom, iv->var.copy_with_suffix(""),
+          iv->iter_type, iv->thread_tag);
+    new_vars.push_back(new_v);
+    vmap.Set(iv->var, new_v->var);
+  }
+  return std::make_pair(std::move(new_vars), std::move(vmap));
+}
+
+PrimExpr CloneReduction(const PrimExpr& expr) {
+  if (const ReduceNode* red = expr.as<ReduceNode>()) {
+    Array<IterVar> new_axis;
+    Map<Var, PrimExpr> vmap;
+    std::tie(new_axis, vmap) = CloneIterVars(red->axis);
+
+    Array<PrimExpr> src_with_newaxis;
+    for (const auto& src : red->source) {
+      src_with_newaxis.push_back(tir::Substitute(src, vmap));
+    }
+
+    return ReduceNode::make(red->combiner, src_with_newaxis,
+        new_axis, tir::Substitute(red->condition, vmap), red->value_index);
+  } else {
+    return expr;
+  }
+}
+
+}  // namespace te
+}  // namespace tvm
diff --git a/src/te/autodiff/ad_util.h b/src/te/autodiff/ad_util.h
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file ad_util.h
+ * \brief Helper utilities to implement auto-differentiation.
+ */
+#ifndef TVM_TE_AUTODIFF_AD_UTIL_H_
+#define TVM_TE_AUTODIFF_AD_UTIL_H_
+
+#include <tvm/tir/expr.h>
+#include <tvm/te/operation.h>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+
+namespace tvm {
+namespace te {
+
+/*!
+ * \brief Clone iter vars and return both the new vars and the substitution from old to new.
+ *
+ * \param vars The original iter vars.
+ * \return A pair containing the array of new iter vars and the map from old vars to new ones.
+ */
+std::pair<Array<IterVar>, Map<Var, PrimExpr>> CloneIterVars(const Array<IterVar>& vars);
+
+/*!
+ * \brief Clone reduction by cloning the axis variables.
+ * \param expr A reduction expr to clone. Non-reduction expressions are left intact.
+ */
+PrimExpr CloneReduction(const PrimExpr& expr);
+
+}  // namespace te
+}  // namespace tvm
+#endif  // TVM_TE_AUTODIFF_AD_UTIL_H_
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,3 +33,4 @@
		from .operation import thread_axis, reduce_axis

		from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp
		from .autodiff import gradient