Skip to content

Commit

Permalink
[TE] reverse-mode autodiff without any optimization (#5121)
Browse files Browse the repository at this point in the history
* [TE] reverse-mode autodiff without any optimization

Co-authored-by: Sergei Grechanik <[email protected]>

* address review comments

* add comments and retrigger CI

* move unittest to debug ci

* move test back and add seed

Co-authored-by: Sergei Grechanik <[email protected]>
  • Loading branch information
yzhliu and sgrechanik-h authored Mar 31, 2020
1 parent ff7bab8 commit e4a5441
Show file tree
Hide file tree
Showing 10 changed files with 988 additions and 1 deletion.
97 changes: 97 additions & 0 deletions include/tvm/te/autodiff.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm/te/autodiff.h
* \brief Automatic differentiation of tensor expressions.
*/

#ifndef TVM_TE_AUTODIFF_H_
#define TVM_TE_AUTODIFF_H_

#include <tvm/runtime/object.h>
#include <tvm/tir/expr.h>
#include "tensor.h"

namespace tvm {
/*! \brief Tensor expression language DSL. */
namespace te {

/*!
* \brief Take the derivative of the expression with respect to the given variable.
* \param expr The expression to differentiate.
* \param var The variable to differentiate with respect to.
* \return The expression for the derivative.
*/
PrimExpr Derivative(const PrimExpr& expr, const Var& var);

/*!
* \brief Get the tensor representing the Jacobian of the output with respect to the input.
*
* Note that if \p output depends on \p input indirectly (by using some other tensor
* depending on \p input), this dependency won't contribute to the resulting Jacobian.
* For such cases use the function ::Gradient.
*
* \param output The tensor to differentiate.
* \param input The input tensor, which \p output should directly use.
* \return The tensor representing the Jacobian of shape `output.shape + input.shape`.
*/
Tensor Jacobian(const Tensor& output, const Tensor& input);

/*!
* \brief The building block for reverse-mode AD.
*
* Differentiate \p output wrt \p input and multiply the result by \p head on the left using tensor
* dot product. \p input must be an immediate dependency of \p output (must be called from within
* the body of \p output). That is, the function will compute one summand of the adjoint for \p input
* given the adjoint for \p output (which is called \p head here).
*
* \param output The tensor to differentiate.
* \param input The input tensor, which \p output should directly use.
* \param head The adjoint of \p output. Must be of shape `prefix + output.shape`
* \return The tensor of shape `prefix + input.shape`
* representing the partial adjoint of \p input wrt one of its consumers (output)
*/
Tensor VectorJacobianProduct(const Tensor &output, const Tensor &input, const Tensor &head);

/*!
* \brief Perform reverse mode automatic differentiation.
*
* Each item of the `result` field of the result is an adjoint for the corresponding item of
* \p inputs, i.e. \p head multiplied by the Jacobian of \p output with respect to the
* corresponding item of \p inputs.
*
* \param output The tensor to differentiate.
* \param inputs The array of input tensors. When the array is empty, will perform differentiation
* wrt all tensors the output depends on.
* \param head The adjoint of the output, in other words, some tensor, by which the Jacobians
* will be multiplied (using tensordot axes=`output.shape`).
* Its shape must be of the form `prefix + output.shape`. If the null pointer is provided,
* the identity tensor of shape `output.shape + output.shape` will be used.
* \return An array of adjoints corresponding to \p inputs.
*/
TVM_DLL Array<Tensor> Gradient(
const Tensor& output,
const Array<Tensor>& inputs,
const Tensor& head = Tensor());

} // namespace te
} // namespace tvm

#endif // TVM_TE_AUTODIFF_H_
1 change: 1 addition & 0 deletions python/tvm/te/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@
from .operation import thread_axis, reduce_axis

from .tensor import PlaceholderOp, ComputeOp, TensorComputeOp, ScanOp, ExternOp, HybridOp
from .autodiff import gradient
67 changes: 67 additions & 0 deletions python/tvm/te/autodiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Automatic differentiation of tensor expressions."""
from . import _ffi_api


def gradient(output, inputs, head=None):
"""Perform reverse-mode automatic differentiation.
Parameters
----------
output : Tensor
The tensor to differentiate.
inputs : List[Tensor]
The list of input tensors to be differentiated wrt.
head : Tensor
The adjoint of the output, in other words, some tensor, by which the Jacobians
will be multiplied. Its shape must be of the form `prefix + output.shape`.
If `None` is passed, the identity tensor of shape `output.shape + output.shape`
will be used.
Returns
-------
tensors: List[Tensor]
The result gradient, in the same order as the inputs
Example
-------
.. code-block:: python
x = tvm.placeholder((32, 3, 28, 28), name='x')
w1 = tvm.placeholder((10, 3, 3, 3), name='w1')
w2 = tvm.placeholder((10, 10, 3, 3), name='w2')
z1 = topi.nn.conv2d(x, w1, 1, 1, 1)
z2 = topi.nn.conv2d(z1, w2, 1, 1, 1)
y = topi.sum(z2)
# produce gradients
[dw1, dw2] = tvm.gradient(y, [w1, w2])
# produce Jacobians
[jw1, jw2] = tvm.gradient(z2, [w1, w2])
# produce gradients, the head adjoint for z2 is provided manually
[dw1, dw2] = tvm.gradient(z2, [w1, w2], topi.full_like(z2, 1.0))
"""
if not isinstance(inputs, list):
inputs = [inputs]
return _ffi_api.Gradient(output, inputs, head)
64 changes: 64 additions & 0 deletions src/te/autodiff/ad_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file ad_util.cc
* \brief Utility for tensor-level auto-differentiation.
*/
#include <tvm/tir/expr.h>
#include <tvm/tir/ir_pass.h>
#include <string>
#include "ad_util.h"

namespace tvm {
namespace te {

std::pair<Array<IterVar>, Map<Var, PrimExpr>> CloneIterVars(const Array<IterVar>& vars) {
Array<IterVar> new_vars;
Map<Var, PrimExpr> vmap;
for (const IterVar& iv : vars) {
IterVar new_v =
IterVarNode::make(iv->dom, iv->var.copy_with_suffix(""),
iv->iter_type, iv->thread_tag);
new_vars.push_back(new_v);
vmap.Set(iv->var, new_v->var);
}
return std::make_pair(std::move(new_vars), std::move(vmap));
}

PrimExpr CloneReduction(const PrimExpr& expr) {
if (const ReduceNode* red = expr.as<ReduceNode>()) {
Array<IterVar> new_axis;
Map<Var, PrimExpr> vmap;
std::tie(new_axis, vmap) = CloneIterVars(red->axis);

Array<PrimExpr> src_with_newaxis;
for (const auto& src : red->source) {
src_with_newaxis.push_back(tir::Substitute(src, vmap));
}

return ReduceNode::make(red->combiner, src_with_newaxis,
new_axis, tir::Substitute(red->condition, vmap), red->value_index);
} else {
return expr;
}
}

} // namespace te
} // namespace tvm
52 changes: 52 additions & 0 deletions src/te/autodiff/ad_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file ad_util.h
* \brief Helper utilities to implement auto-differentiation.
*/
#ifndef TVM_TE_AUTODIFF_AD_UTIL_H_
#define TVM_TE_AUTODIFF_AD_UTIL_H_

#include <tvm/tir/expr.h>
#include <tvm/te/operation.h>
#include <vector>
#include <unordered_map>
#include <utility>

namespace tvm {
namespace te {

/*!
* \brief Clone iter vars and return both the new vars and the substitution from old to new.
*
* \param vars The original iter vars.
* \return A pair containing the array of new iter vars and the map from old vars to new ones.
*/
std::pair<Array<IterVar>, Map<Var, PrimExpr>> CloneIterVars(const Array<IterVar>& vars);

/*!
* \brief Clone reduction by cloning the axis variables.
* \param expr A reduction expr to clone. Non-reduction expressions are left intact.
*/
PrimExpr CloneReduction(const PrimExpr& expr);

} // namespace te
} // namespace tvm
#endif // TVM_TE_AUTODIFF_AD_UTIL_H_
Loading

0 comments on commit e4a5441

Please sign in to comment.