Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-978] Higher Order Gradient Support arctan, arctanh, `radia…
Browse files Browse the repository at this point in the history
…ns`. (#15531)

* support arc{tan/tanh} for higher order grad

* add relevant tests

* add new abstraction for Node operations

* support radians for higher order grad

* add test for radians

* changes

* use NodeOp for arctan.
* update few comments.
* update few variable names.

* rename grad_x to x_grad

* update comments

* move node_op_util.h to src/nnvm

* address comments

* rename NodeOp to NodeOpGen.
* rename Op to op.

* fix file description
  • Loading branch information
kshitij12345 authored and apeforest committed Sep 6, 2019
1 parent d85a2d0 commit 255dff0
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 3 deletions.
76 changes: 76 additions & 0 deletions src/nnvm/node_op_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file node_op_util.h
* \brief abstraction for commonly used nnvm::Node operations.
*/
#ifndef MXNET_NNVM_NODE_OP_UTIL_H_
#define MXNET_NNVM_NODE_OP_UTIL_H_
#include <mxnet/base.h>
#include <string>
#include <unordered_map>
#include "../operator/elemwise_op_common.h"

namespace mxnet {
namespace util {

class NodeOpGen {
private:
const nnvm::NodePtr &dependent_node;

public:
explicit NodeOpGen(const nnvm::NodePtr &dependent_node) : dependent_node{dependent_node} {}

nnvm::NodeEntry mul(const nnvm::NodeEntry &lhs, const nnvm::NodeEntry &rhs) {
return nnvm::NodeEntry{mxnet::op::MakeNode("elemwise_mul",
dependent_node->attrs.name + "_mul",
{lhs, rhs}, nullptr, &dependent_node)};
}

nnvm::NodeEntry mul(const nnvm::NodeEntry &x, double scalar) {
const std::unordered_map<std::string, std::string> scalar_dict =
{{"scalar", std::to_string(scalar)}};
return nnvm::NodeEntry{mxnet::op::MakeNode("_mul_scalar",
dependent_node->attrs.name + "_mul_scalar",
{x}, &scalar_dict, &dependent_node)};
}

nnvm::NodeEntry mul(double scalar, const nnvm::NodeEntry &x) {
return NodeOpGen::mul(x, scalar);
}

nnvm::NodeEntry div(const nnvm::NodeEntry &lhs, const nnvm::NodeEntry &rhs) {
return nnvm::NodeEntry{mxnet::op::MakeNode("elemwise_div",
dependent_node->attrs.name + "_div",
{lhs, rhs}, nullptr, &dependent_node)};
}

nnvm::NodeEntry square(const nnvm::NodeEntry &x) {
return nnvm::NodeEntry{mxnet::op::MakeNode("square",
dependent_node->attrs.name + "_square",
{x}, nullptr, &dependent_node)};
}
};

} // namespace util
} // namespace mxnet

#endif // MXNET_NNVM_NODE_OP_UTIL_H_
63 changes: 60 additions & 3 deletions src/operator/tensor/elemwise_unary_op_trig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <mxnet/base.h>
#include "elemwise_unary_op.h"
#include "./elemwise_binary_op-inl.h"
#include "../../nnvm/node_op_util.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -227,7 +228,35 @@ The storage type of ``arctan`` output depends upon the input storage type:
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_arctan" });

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctan,
unary_bwd<mshadow_op::arctan_grad>);
unary_bwd<mshadow_op::arctan_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: head_grad_grads (dL/dxgrad)
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseGradUseIn)
// n: dL/dy * f'(x)
// f(x) = arctanh(x)
// dydx = f'(x) = 1/(1+x^2)
// f''(x) = f'(x) * f'(x) * -2 * x = (-2 * x) / (1 + x^2)^2
// return:
// 0: dL/dy_grad * dy/dx
// 1: dL/dy_grad * dL/dy * f''(x)
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dldy_mul_dydx = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dldy_mul_dydx, dldy);
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_square_mul_2_x = op.mul(-2.0, x_grad_square_mul_x);
auto grad_grad_x = op.mul(dldy, x_grad_square_mul_2_x);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], x_grad));
ret.emplace_back(op.mul(ograds[0], grad_grad_x));
return ret;
});

// degrees
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(degrees, cpu, mshadow_op::degrees)
Expand Down Expand Up @@ -265,7 +294,8 @@ The storage type of ``radians`` output depends upon the input storage type:
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_radians" });

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_radians,
unary_bwd<mshadow_op::radians_grad>);
unary_bwd<mshadow_op::radians_grad>)
.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);

// sinh
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(sinh, cpu, mshadow_op::sinh)
Expand Down Expand Up @@ -391,8 +421,35 @@ The storage type of ``arctanh`` output depends upon the input storage type:
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_arctanh" });

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctanh,
unary_bwd<mshadow_op::arctanh_grad>);
unary_bwd<mshadow_op::arctanh_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: head_grad_grads (dL/dxgrad)
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseGradUseIn)
// n: dL/dy * dy/dx
// f(x) = arctanh(x)
// dy/dx = f'(x) = 1/(1-x^2)
// f''(x) = f'(x) * f'(x) * 2 * x = (2 * x) / (1 - x^2)^2
// return:
// 0: dL/dy_grad * dy/dx
// 1: dL/dy_grad * dL/dy * f''(x)
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dldy_mul_dydx = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dldy_mul_dydx, dldy);
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_square_mul_2_x = op.mul(2.0, x_grad_square_mul_x);
auto grad_grad_x = op.mul(dldy, x_grad_square_mul_2_x);

std::vector<nnvm::NodeEntry> ret;
ret.emplace_back(op.mul(ograds[0], x_grad));
ret.emplace_back(op.mul(ograds[0], grad_grad_x));
return ret;
});

} // namespace op
} // namespace mxnet
46 changes: 46 additions & 0 deletions tests/python/unittest/test_higher_order_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


import math
import random
from mxnet import nd, autograd
from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd
from common import with_seed
Expand Down Expand Up @@ -85,6 +86,51 @@ def grad_grad_op(x):
array, tanh, grad_grad_op, rtol=1e-6, atol=1e-6)


@with_seed()
def test_arctan():
def arctan(x):
return nd.arctan(x)

def grad_grad_op(x):
return (-2 * x)/((1 + x**2)**2)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
# Domain of arctan is all real numbers.
# Scale std_dev
array *= random.randint(500, 10000)
check_second_order_unary(array, arctan, grad_grad_op)


@with_seed()
def test_arctanh():
def arctanh(x):
return nd.arctanh(x)

def grad_grad_op(x):
return (2 * x)/((1 - x**2)**2)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, arctanh, grad_grad_op)


@with_seed()
def test_radians():
def radians(x):
return nd.radians(x)

def grad_grad_op(x):
return nd.zeros_like(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, radians, grad_grad_op)


@with_seed()
def test_relu():
def relu(x):
Expand Down

0 comments on commit 255dff0

Please sign in to comment.