From fc7384b9af653d4ff905e96b12e569592d320c7b Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sat, 22 Jul 2017 20:07:25 -0700 Subject: [PATCH 1/6] Adding Variable::dims() --- include/af/autograd/Variable.hpp | 2 ++ src/autograd/Variable.cpp | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 330c37f..f63bebb 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -60,6 +60,8 @@ namespace af { bool isGradAvailable() const; + af::dim4 dims() const; + void zeroGrad(); void setCalcGrad(bool calc_grad); diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index 9ff55c5..d49188a 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -102,6 +102,11 @@ namespace af { return m_shared->m_grads.size() >= 1; } + af::dim4 Variable::dims() const + { + return m_shared->m_data.dims(); + } + void Variable::zeroGrad() { m_shared->m_grads.clear(); From 68ff4f22496431dd05044ce5485b4fff24a21f53 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sat, 22 Jul 2017 20:08:00 -0700 Subject: [PATCH 2/6] Changingg expandAs, reduceAs to tileAs, sumAs --- examples/autograd.cpp | 12 ++++++------ include/af/autograd/Functions.hpp | 11 ++++++----- src/autograd/Functions.cpp | 22 +++++++++++----------- src/nn/Modules/Activations.cpp | 3 +-- src/nn/Modules/Linear.cpp | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 702992b..174a2fb 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -128,11 +128,11 @@ void test_tanh() VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); } -void test_expand() +void test_tile() { auto x = Variable(af::randu(5), true); auto y = Variable(af::randu(5, 2), true); - auto z = y * expandAs(x, y); + auto z = y * tileAs(x, y); auto dz = Variable(af::constant(1.0, 5, 2), false); z.backward(dz); auto dy = y.grad(); @@ -141,11 +141,11 @@ void test_expand() VERIFY(dx.array() - af::sum(y.array(), 1)); } -void test_reduce() +void test_sum() { auto x = Variable(af::randu(5), true); auto y = Variable(af::randu(5, 2), true); - auto z = x * reduceAs(y, x); + auto z = x * sumAs(y, x); auto dz = Variable(af::constant(1.0, 5), false); z.backward(dz); auto dy = y.grad(); @@ -166,7 +166,7 @@ int main() test_exp(); test_sigmoid(); test_tanh(); - test_expand(); - test_reduce(); + test_tile(); + test_sum(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index e272f60..2375107 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -7,6 +7,7 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ #pragma once +#include namespace af { namespace autograd { @@ -50,18 +51,18 @@ namespace af { Variable cos(const Variable &input); Variable tanh(const Variable &input); Variable sigmoid(const Variable &input); - + Variable max(const Variable &lhs, const Variable &rhs); Variable max(const Variable &lhs, const double &rhs); Variable max(const double &lhs, const Variable &rhs); - + Variable min(const Variable &lhs, const Variable &rhs); Variable min(const Variable &lhs, const double &rhs); Variable min(const double &lhs, const Variable &rhs); - + Variable transpose(const Variable &input); - Variable expandAs(const Variable &input, const Variable &reference); - Variable reduceAs(const Variable &input, const Variable &reference); + Variable tileAs(const Variable &input, const Variable &reference); + Variable sumAs(const Variable &input, const Variable &reference); Variable matmul(const Variable &lhs, const Variable &rhs); Variable matmulTN(const Variable &lhs, const Variable &rhs); diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 2be0d86..1292dc1 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -54,7 +54,7 @@ namespace af { }; return Variable(result, {lhs, rhs}, grad_func); } - + Variable operator >(const Variable &lhs, const Variable &rhs) { auto result = lhs.array() > rhs.array(); @@ -116,7 +116,7 @@ namespace af { auto result = !input.array(); return Variable(result, false); } - + Variable max(const Variable &lhs, const Variable &rhs) { auto mask = lhs > rhs; @@ -165,7 +165,7 @@ namespace af { INSTANTIATE_FUNCTION(min); #undef INSTANTIATE_FUNCTION - + Variable negate(const Variable &input) { auto result = 0.0 - input.array(); @@ -241,31 +241,31 @@ namespace af { return Variable(result, {input}, grad_func); } - Variable expandAs(const Variable &input, const Variable &reference) + Variable tileAs(const Variable &input, const Variable &reference) { dim4 dims(1,1,1,1); - dim4 idims = input.array().dims(); - dim4 rdims = reference.array().dims(); + dim4 rdims = reference.dims(); + dim4 idims = input.dims(); for (int i = 0; i < 4; i++) { dims[i] = rdims[i] / idims[i]; } auto result = tile(input.array(), dims); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(reduceAs(grad_output, inputs[0])); + inputs[0].addGrad(sumAs(grad_output, inputs[0])); }; return Variable(result, {input}, grad_func); } - Variable reduceAs(const Variable &input, const Variable &reference) + Variable sumAs(const Variable &input, const Variable &reference) { - dim4 idims = input.array().dims(); - dim4 rdims = reference.array().dims(); + dim4 rdims = reference.dims(); + dim4 idims = input.dims(); auto result = input.array(); for (int i = 0; i < 4; i++) { if (idims[i] != rdims[i]) result = sum(result, i); } auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(expandAs(grad_output, inputs[0])); + inputs[0].addGrad(tileAs(grad_output, inputs[0])); }; return Variable(result, {input}, grad_func); } diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp index adedf26..ba0ae80 100644 --- a/src/nn/Modules/Activations.cpp +++ b/src/nn/Modules/Activations.cpp @@ -61,7 +61,7 @@ namespace af Variable PReLU::forward(const Variable &input) { auto mask = input >= 0.0; - return (input * mask) + (input * !mask * expandAs(m_parameters[0],input)); + return (input * mask) + (input * !mask * tileAs(m_parameters[0], input)); } ELU::ELU(double alpha) : @@ -85,6 +85,5 @@ namespace af auto mask = input >= m_threshold; return input * mask; } - } } diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp index c289ea5..49140f0 100644 --- a/src/nn/Modules/Linear.cpp +++ b/src/nn/Modules/Linear.cpp @@ -51,7 +51,7 @@ namespace af { auto res = matmul(m_parameters[0], input); if (m_bias) { - res = res + expandAs(m_parameters[1], res); + res = res + tileAs(m_parameters[1], res); } return res; } From 2a0a15b3d202d1b51dcc9d9f1b47757511888af5 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 23 Jul 2017 16:47:03 -0700 Subject: [PATCH 3/6] FEAT: Adding mean, adding new API for sum and tile --- examples/autograd.cpp | 14 ++++++++++ include/af/autograd/Functions.hpp | 7 ++++- src/autograd/Functions.cpp | 43 +++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 174a2fb..e9e8b5e 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -154,6 +154,19 @@ void test_sum() VERIFY(dx.array() - af::sum(y.array(), 1)); } +void test_mean() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 3, 2), true); + auto z = x * mean(y, {1,2}); + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + VERIFY(dy.array() - 6 * af::tile(x.array(), 1, 3, 2)); + VERIFY(dx.array() - af::mean(af::mean(y.array(), 1), 2)); +} + int main() { af::info(); @@ -168,5 +181,6 @@ int main() test_tanh(); test_tile(); test_sum(); + test_mean(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 2375107..182aba0 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -7,7 +7,8 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ #pragma once -#include + +#include namespace af { namespace autograd { @@ -64,6 +65,10 @@ namespace af { Variable tileAs(const Variable &input, const Variable &reference); Variable sumAs(const Variable &input, const Variable &reference); + Variable tile(const Variable &input, const std::vector &repeats); + Variable sum(const Variable &input, const std::vector &axes); + Variable mean(const Variable &input, const std::vector &axes); + Variable matmul(const Variable &lhs, const Variable &rhs); Variable matmulTN(const Variable &lhs, const Variable &rhs); Variable matmulNT(const Variable &lhs, const Variable &rhs); diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 1292dc1..d754689 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -270,6 +270,49 @@ namespace af { return Variable(result, {input}, grad_func); } + Variable tile(const Variable &input, const std::vector &repeats) + { + dim4 dims; + for (size_t i = 0; i < repeats.size(); i++) { + dims[i] = repeats[i]; + } + auto result = tile(input.array(), dims); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(sumAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable sum(const Variable &input, const std::vector &axes) + { + auto result = input.array(); + for (size_t i = 0; i < axes.size(); i++) { + result = sum(result, axes[i]); + } + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(tileAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable mean(const Variable &input, const std::vector &axes) + { + auto result = input.array(); + for (size_t i = 0; i < axes.size(); i++) { + result = mean(result, axes[i]); + } + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + dim4 odims = grad_output.dims(); + dim4 idims = inputs[0].dims(); + dim_t count = 1; + for (int i = 0; i < 4; i++) { + count *= idims[i] / odims[i]; + } + inputs[0].addGrad(count * tileAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + Variable matmul(const Variable &lhs, const Variable &rhs) { // lhs:Input[0] -- [M, N] From f481fc07c8269eb352f7d8ead0b7c4a29526ab41 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 23 Jul 2017 18:12:15 -0700 Subject: [PATCH 4/6] Adding uniform, normal, constant initializers --- CMakeLists.txt | 2 +- include/af/nn.h | 2 +- include/af/nn/Init.hpp | 74 +++++++++++++ include/af/nn/Modules/Activations.hpp | 14 +-- include/af/nn/Types.hpp | 22 ---- src/nn/Init.cpp | 151 ++++++++++++++++++++++++++ src/nn/Modules/Activations.cpp | 6 +- src/nn/Modules/Linear.cpp | 6 +- src/nn/Types.cpp | 36 ------ 9 files changed, 240 insertions(+), 73 deletions(-) create mode 100644 include/af/nn/Init.hpp delete mode 100644 include/af/nn/Types.hpp create mode 100644 src/nn/Init.cpp delete mode 100644 src/nn/Types.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c84783d..01c3b28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ target_sources(afml src/nn/Modules/Container.cpp src/nn/Modules/Linear.cpp src/nn/Modules/Module.cpp - src/nn/Types.cpp + src/nn/Init.cpp ) target_include_directories(afml diff --git a/include/af/nn.h b/include/af/nn.h index 88333fc..cf32fea 100644 --- a/include/af/nn.h +++ b/include/af/nn.h @@ -10,4 +10,4 @@ #pragma once #include -#include +#include diff --git a/include/af/nn/Init.hpp b/include/af/nn/Init.hpp new file mode 100644 index 0000000..bddc4d5 --- /dev/null +++ b/include/af/nn/Init.hpp @@ -0,0 +1,74 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af { + namespace nn { + + autograd::Variable input(const af::array &arr); + + autograd::Variable parameter(const af::array &arr); + + autograd::Variable uniform(int input_size, int output_size, + double min = 0, double max = 1, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable uniform(af::dim4 dims, + double min = 0, double max = 1, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable normal(int input_size, int output_size, + double stdv = 1, double mean = 0, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable normal(af::dim4 dims, + double stdv = 1, double mean = 0, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunUniform(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunUniform(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunNormal(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunNormal(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotUniform(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotUniform(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotNormal(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotNormal(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + + autograd::Variable constant(double val, int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable constant(double val, af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable identity(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable identity(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + } +} diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp index 2d00a90..dc6c288 100644 --- a/include/af/nn/Modules/Activations.hpp +++ b/include/af/nn/Modules/Activations.hpp @@ -35,26 +35,26 @@ namespace af { public: ReLU(); - + autograd::Variable forward(const autograd::Variable &input); }; - + class LeakyReLU : public Module { private: double m_slope; public: LeakyReLU(double slope = 0.0); - + autograd::Variable forward(const autograd::Variable &input); }; class PReLU : public Module { public: - PReLU(int size, double spread = 1.0); + PReLU(int size, double value = 1.0); PReLU(const autograd::Variable &w); - + autograd::Variable forward(const autograd::Variable &input); }; @@ -74,11 +74,11 @@ namespace af double m_threshold; public: ThresholdReLU(double threshold = 1.0); - + autograd::Variable forward(const autograd::Variable &input); }; - + } } diff --git a/include/af/nn/Types.hpp b/include/af/nn/Types.hpp deleted file mode 100644 index 6e7e101..0000000 --- a/include/af/nn/Types.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af { - namespace nn { - - autograd::Variable input(const af::array &arr); - - autograd::Variable parameter(const af::array &arr); - - autograd::Variable weight(int input_size, int output_size, float spread = 0.05); - } -} diff --git a/src/nn/Init.cpp b/src/nn/Init.cpp new file mode 100644 index 0000000..65b8088 --- /dev/null +++ b/src/nn/Init.cpp @@ -0,0 +1,151 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +#include + +namespace af { + namespace nn { + + using autograd::Variable; + + Variable input(const af::array &arr) + { + return Variable(arr, false); + } + + Variable parameter(const af::array &arr) + { + return Variable(arr, true); + } + + autograd::Variable uniform(int output_size, int input_size, + double min, double max, + af::dtype type, bool calc_grad) + { + return nn::uniform(af::dim4(output_size, input_size), min, max, type, calc_grad); + } + + autograd::Variable uniform(af::dim4 dims, double min, double max, + af::dtype type, bool calc_grad) + { + af::array result = af::randu(dims, type); + if (min != 0 || max != 1) { + result = (max - min) * result + min; + } + return Variable(result, calc_grad); + } + + autograd::Variable normal(int output_size, int input_size, + double stdv, double mean, + af::dtype type, bool calc_grad) + { + return nn::normal(af::dim4(output_size, input_size), stdv, mean, type, calc_grad); + } + + autograd::Variable normal(af::dim4 dims, double stdv, double mean, + af::dtype type, bool calc_grad) + { + af::array result = af::randn(dims, type); + if (mean != 0 || stdv != 1) { + result = stdv * result + mean; + } + return Variable(result, calc_grad); + } + + autograd::Variable lecunUniform(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::lecunUniform(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable lecunUniform(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + double stdv = ::sqrt(1.0/(double)fan_in); + double limit = ::sqrt(3.0) * stdv; + return nn::uniform(dims, -limit, limit, type, calc_grad); + } + + autograd::Variable lecunNormal(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::lecunNormal(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable lecunNormal(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + double stdv = ::sqrt(1.0/(double)fan_in); + return nn::normal(dims, 0, stdv, type, calc_grad); + } + + autograd::Variable glorotUniform(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::glorotUniform(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable glorotUniform(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + dim_t fan_out = elements / dims[0]; + double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); + double limit = ::sqrt(3.0) * stdv; + return nn::uniform(dims, -limit, limit, type, calc_grad); + } + + autograd::Variable glorotNormal(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::glorotNormal(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable glorotNormal(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + dim_t fan_out = elements / dims[0]; + double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); + return nn::normal(dims, 0, stdv, type, calc_grad); + } + + autograd::Variable constant(double val, int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::constant(val, af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable constant(double val, af::dim4 dims, + af::dtype type, bool calc_grad) + { + return Variable(af::constant(val, dims, type), calc_grad); + } + + autograd::Variable identity(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::identity(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable identity(af::dim4 dims, + af::dtype type, bool calc_grad) + { + return Variable(af::identity(dims, type), calc_grad); + } + } +} diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp index ba0ae80..2b9b4a0 100644 --- a/src/nn/Modules/Activations.cpp +++ b/src/nn/Modules/Activations.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace af { namespace nn @@ -47,9 +47,9 @@ namespace af return max(input, m_slope * input); } - PReLU::PReLU(int size, double spread) + PReLU::PReLU(int size, double value) { - auto w = nn::weight(size, 1, spread); + auto w = nn::constant(value, size, 1); setParams({w}); } diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp index 49140f0..a15d9ae 100644 --- a/src/nn/Modules/Linear.cpp +++ b/src/nn/Modules/Linear.cpp @@ -8,7 +8,7 @@ ********************************************************/ #include -#include +#include #include namespace af @@ -20,9 +20,9 @@ namespace af Linear::Linear(int input_size, int output_size, bool bias, float spread) : m_bias(bias) { - auto w = nn::weight(input_size, output_size, spread); + auto w = nn::lecunNormal(output_size, input_size); if (bias) { - auto b = nn::weight(1, output_size, spread); + auto b = nn::lecunNormal(output_size, 1); setParams({w, b}); } else { setParams({w}); diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp deleted file mode 100644 index 698b497..0000000 --- a/src/nn/Types.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -#include - -namespace af { - namespace nn { - - using autograd::Variable; - - Variable input(const af::array &arr) - { - return Variable(arr, false); - } - - Variable parameter(const af::array &arr) - { - return Variable(arr, true); - } - - Variable weight(int input_size, int output_size, float spread) - { - auto w = af::randu(output_size, input_size) * spread - spread / 2; - w.eval(); - return parameter(w); - } - } -} From ecc2bad9a24a49d83743f81dabdc2fafff980390 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 23 Jul 2017 23:01:26 -0700 Subject: [PATCH 5/6] Adding necessary functions for Loss modules. - log, flat, moddims --- include/af/autograd/Functions.hpp | 5 ++++ src/autograd/Functions.cpp | 39 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 182aba0..e4f471d 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -8,6 +8,7 @@ ********************************************************/ #pragma once +#include #include namespace af { @@ -48,6 +49,7 @@ namespace af { Variable reciprocal(const Variable &input); Variable exp(const Variable &input); + Variable log(const Variable &input); Variable sin(const Variable &input); Variable cos(const Variable &input); Variable tanh(const Variable &input); @@ -73,6 +75,9 @@ namespace af { Variable matmulTN(const Variable &lhs, const Variable &rhs); Variable matmulNT(const Variable &lhs, const Variable &rhs); + Variable abs(const Variable &input); + Variable flat(const Variable &input); + Variable moddims(const Variable &input, const dim4 &dims); } } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index d754689..938dba6 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -194,6 +194,15 @@ namespace af { return Variable(result, {input}, grad_func); } + Variable log(const Variable &input) + { + auto result = log(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output / inputs[0]); + }; + return Variable(result, {input}, grad_func); + } + Variable sin(const Variable &input) { auto result = sin(input.array()); @@ -375,5 +384,35 @@ namespace af { }; return Variable(result, {lhs, rhs}, grad_func); } + + Variable abs(const Variable &input) + { + auto result = af::abs(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // af::sign returns signbit + // Convert it into -1, 1 + auto sign = Variable(1 - 2 * af::sign(inputs[0].array()), false); + inputs[0].addGrad(sign * grad_output); + }; + return Variable(result, {input}, grad_func); + } + + Variable flat(const Variable &input) + { + auto result = af::flat(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(moddims(grad_output, inputs[0].dims())); + }; + return Variable(result, {input}, grad_func); + } + + Variable moddims(const Variable &input, const dim4 &dims) + { + auto result = af::moddims(input.array(), dims); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(moddims(grad_output, inputs[0].dims())); + }; + return Variable(result, {input}, grad_func); + } } } From 7a6456425d1492459a12d8d3ae79a93a2f531677 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Mon, 24 Jul 2017 00:30:58 -0700 Subject: [PATCH 6/6] Adding common loss Modules - MeanSquaredError - MeanAbsoluteError - BinaryCrossEntropyLoss Added typedefs for some alternative names; --- CMakeLists.txt | 1 + examples/autograd.cpp | 1 + examples/perceptron.cpp | 12 +++--- include/af/autograd/Variable.hpp | 1 + include/af/nn/Init.hpp | 2 + include/af/nn/Modules.hpp | 1 + include/af/nn/Modules/Container.hpp | 4 +- include/af/nn/Modules/Loss.hpp | 64 +++++++++++++++++++++++++++++ src/autograd/Variable.cpp | 6 +++ src/nn/Init.cpp | 5 +++ src/nn/Modules/Loss.cpp | 59 ++++++++++++++++++++++++++ 11 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 include/af/nn/Modules/Loss.hpp create mode 100644 src/nn/Modules/Loss.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 01c3b28..f31cd8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ target_sources(afml src/nn/Modules/Activations.cpp src/nn/Modules/Container.cpp src/nn/Modules/Linear.cpp + src/nn/Modules/Loss.cpp src/nn/Modules/Module.cpp src/nn/Init.cpp ) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index e9e8b5e..b70dbe1 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -8,6 +8,7 @@ ********************************************************/ #include +#include #define VERIFY(VAL) do { \ auto res = af::allTrue(af::abs(VAL) < 1E-5); \ diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index d8e7f39..0854369 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -39,7 +39,9 @@ int main() perceptron.add(nn::Linear(inputSize, outputSize)); perceptron.add(nn::Sigmoid()); - Variable result; + auto loss = nn::MeanSquaredError(); + + Variable result, l; for (int i = 0; i < 1000; i++) { for (int j = 0; j < numSamples; j++) { perceptron.train(); @@ -52,17 +54,15 @@ int main() result = perceptron.forward(nn::input(in_j)); // Calculate loss - // TODO: Use loss function - af::array diff = out_j - result.array(); + l = loss.forward(result, nn::noGrad(out_j)); // Backward propagation - auto d_result = Variable(diff, false); - result.backward(d_result); + l.backward(); // Update parameters // TODO: Should use optimizer for (auto ¶m : perceptron.parameters()) { - param.array() += lr * param.grad().array(); + param.array() -= lr * param.grad().array(); param.array().eval(); } } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index f63bebb..db2894b 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -72,6 +72,7 @@ namespace af { void backward(const Variable &grad, bool retain_grad_graph = false); + void backward(bool retain_grad_graph = false); private: void evalGrad(bool retain_grad_graph = false); diff --git a/include/af/nn/Init.hpp b/include/af/nn/Init.hpp index bddc4d5..c015325 100644 --- a/include/af/nn/Init.hpp +++ b/include/af/nn/Init.hpp @@ -15,6 +15,8 @@ namespace af { autograd::Variable input(const af::array &arr); + autograd::Variable noGrad(const af::array &arr); + autograd::Variable parameter(const af::array &arr); autograd::Variable uniform(int input_size, int output_size, diff --git a/include/af/nn/Modules.hpp b/include/af/nn/Modules.hpp index 310e4e9..eeb22a7 100644 --- a/include/af/nn/Modules.hpp +++ b/include/af/nn/Modules.hpp @@ -12,3 +12,4 @@ #include #include #include +#include diff --git a/include/af/nn/Modules/Container.hpp b/include/af/nn/Modules/Container.hpp index 2ee8c0e..5ded60c 100644 --- a/include/af/nn/Modules/Container.hpp +++ b/include/af/nn/Modules/Container.hpp @@ -41,8 +41,6 @@ namespace af ModulePtr get(int id); std::vector modules(); - - virtual autograd::Variable forward(const autograd::Variable &input) = 0; }; class Sequential : public Container @@ -51,7 +49,7 @@ namespace af Sequential(); - virtual autograd::Variable forward(const autograd::Variable &input); + autograd::Variable forward(const autograd::Variable &input); }; } } diff --git a/include/af/nn/Modules/Loss.hpp b/include/af/nn/Modules/Loss.hpp new file mode 100644 index 0000000..44b743a --- /dev/null +++ b/include/af/nn/Modules/Loss.hpp @@ -0,0 +1,64 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af +{ + namespace nn + { + class Loss : public Module + { + public: + Loss() {} + + virtual autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets) = 0; + + autograd::Variable forward(const autograd::Variable &inputs); + }; + + class MeanSquaredError : public Loss + { + public: + MeanSquaredError() {} + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets); + }; + + class MeanAbsoluteError : public Loss + { + public: + MeanAbsoluteError() {} + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets); + }; + + class BinaryCrossEntropyLoss : public Loss + { + public: + BinaryCrossEntropyLoss() {} + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets); + + autograd::Variable forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights); + }; + + typedef MeanSquaredError MSE; + typedef MeanAbsoluteError MAE; + typedef MeanAbsoluteError L1Loss; + typedef BinaryCrossEntropyLoss BCELoss; + } +} diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index d49188a..61e147b 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -172,6 +172,12 @@ namespace af { } } + void Variable::backward(bool retain_grad_graph) + { + auto ones = Variable(af::constant(1, this->dims()), false); + this->backward(ones, retain_grad_graph); + } + Variable::DAG_t Variable::build(const Variable &var) { Cache_t cache; diff --git a/src/nn/Init.cpp b/src/nn/Init.cpp index 65b8088..65669f5 100644 --- a/src/nn/Init.cpp +++ b/src/nn/Init.cpp @@ -21,6 +21,11 @@ namespace af { return Variable(arr, false); } + Variable noGrad(const af::array &arr) + { + return Variable(arr, false); + } + Variable parameter(const af::array &arr) { return Variable(arr, true); diff --git a/src/nn/Modules/Loss.cpp b/src/nn/Modules/Loss.cpp new file mode 100644 index 0000000..ab7f80a --- /dev/null +++ b/src/nn/Modules/Loss.cpp @@ -0,0 +1,59 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#include +#include + + +namespace af +{ + namespace nn + { + using namespace autograd; + + autograd::Variable Loss::forward(const autograd::Variable &inputs) + { + throw af::exception("Loss module requires both inputs and targets"); + } + + autograd::Variable MeanSquaredError::forward(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + auto df = inputs - targets; + auto res = mean(flat(df * df), {0}); + return res; + } + + autograd::Variable MeanAbsoluteError::forward(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + auto df = inputs - targets; + auto res = mean(flat(abs(df)), {0}); + } + + static autograd::Variable + binaryCrossEntropy(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + targets * inputs + (1 - targets) * (1 - inputs); + } + + autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets) + { + return mean(flat(binaryCrossEntropy(inputs, targets)), {0}); + } + + autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, + const autograd::Variable &targets, + const autograd::Variable &weights) + { + return mean(flat(weights * binaryCrossEntropy(inputs, targets)), {0}); + } + } +}