From 5f571e02888d05b9068c79409a1e34348ff7fee1 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 23 Jul 2017 18:12:15 -0700 Subject: [PATCH] Adding uniform, normal, constant initializers --- CMakeLists.txt | 2 +- include/af/nn.h | 2 +- include/af/nn/Init.hpp | 74 +++++++++++++ include/af/nn/Modules/Activations.hpp | 14 +-- include/af/nn/Types.hpp | 22 ---- src/nn/Init.cpp | 151 ++++++++++++++++++++++++++ src/nn/Modules/Activations.cpp | 6 +- src/nn/Modules/Linear.cpp | 6 +- src/nn/Types.cpp | 36 ------ 9 files changed, 240 insertions(+), 73 deletions(-) create mode 100644 include/af/nn/Init.hpp delete mode 100644 include/af/nn/Types.hpp create mode 100644 src/nn/Init.cpp delete mode 100644 src/nn/Types.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c84783d..01c3b28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ target_sources(afml src/nn/Modules/Container.cpp src/nn/Modules/Linear.cpp src/nn/Modules/Module.cpp - src/nn/Types.cpp + src/nn/Init.cpp ) target_include_directories(afml diff --git a/include/af/nn.h b/include/af/nn.h index 88333fc..cf32fea 100644 --- a/include/af/nn.h +++ b/include/af/nn.h @@ -10,4 +10,4 @@ #pragma once #include -#include +#include diff --git a/include/af/nn/Init.hpp b/include/af/nn/Init.hpp new file mode 100644 index 0000000..bddc4d5 --- /dev/null +++ b/include/af/nn/Init.hpp @@ -0,0 +1,74 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af { + namespace nn { + + autograd::Variable input(const af::array &arr); + + autograd::Variable parameter(const af::array &arr); + + autograd::Variable uniform(int input_size, int output_size, + double min = 0, double max = 1, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable uniform(af::dim4 dims, + double min = 0, double max = 1, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable normal(int input_size, int output_size, + double stdv = 1, double mean = 0, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable normal(af::dim4 dims, + double stdv = 1, double mean = 0, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunUniform(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunUniform(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunNormal(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable lecunNormal(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotUniform(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotUniform(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotNormal(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable glorotNormal(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + + autograd::Variable constant(double val, int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable constant(double val, af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable identity(int input_size, int output_size, + af::dtype type = f32, bool calc_grad=true); + + autograd::Variable identity(af::dim4 dims, + af::dtype type = f32, bool calc_grad=true); + + } +} diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp index 2d00a90..dc6c288 100644 --- a/include/af/nn/Modules/Activations.hpp +++ b/include/af/nn/Modules/Activations.hpp @@ -35,26 +35,26 @@ namespace af { public: ReLU(); - + autograd::Variable forward(const autograd::Variable &input); }; - + class LeakyReLU : public Module { private: double m_slope; public: LeakyReLU(double slope = 0.0); - + autograd::Variable forward(const autograd::Variable &input); }; class PReLU : public Module { public: - PReLU(int size, double spread = 1.0); + PReLU(int size, double value = 1.0); PReLU(const autograd::Variable &w); - + autograd::Variable forward(const autograd::Variable &input); }; @@ -74,11 +74,11 @@ namespace af double m_threshold; public: ThresholdReLU(double threshold = 1.0); - + autograd::Variable forward(const autograd::Variable &input); }; - + } } diff --git a/include/af/nn/Types.hpp b/include/af/nn/Types.hpp deleted file mode 100644 index 6e7e101..0000000 --- a/include/af/nn/Types.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af { - namespace nn { - - autograd::Variable input(const af::array &arr); - - autograd::Variable parameter(const af::array &arr); - - autograd::Variable weight(int input_size, int output_size, float spread = 0.05); - } -} diff --git a/src/nn/Init.cpp b/src/nn/Init.cpp new file mode 100644 index 0000000..65b8088 --- /dev/null +++ b/src/nn/Init.cpp @@ -0,0 +1,151 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +#include + +namespace af { + namespace nn { + + using autograd::Variable; + + Variable input(const af::array &arr) + { + return Variable(arr, false); + } + + Variable parameter(const af::array &arr) + { + return Variable(arr, true); + } + + autograd::Variable uniform(int output_size, int input_size, + double min, double max, + af::dtype type, bool calc_grad) + { + return nn::uniform(af::dim4(output_size, input_size), min, max, type, calc_grad); + } + + autograd::Variable uniform(af::dim4 dims, double min, double max, + af::dtype type, bool calc_grad) + { + af::array result = af::randu(dims, type); + if (min != 0 || max != 1) { + result = (max - min) * result + min; + } + return Variable(result, calc_grad); + } + + autograd::Variable normal(int output_size, int input_size, + double stdv, double mean, + af::dtype type, bool calc_grad) + { + return nn::normal(af::dim4(output_size, input_size), stdv, mean, type, calc_grad); + } + + autograd::Variable normal(af::dim4 dims, double stdv, double mean, + af::dtype type, bool calc_grad) + { + af::array result = af::randn(dims, type); + if (mean != 0 || stdv != 1) { + result = stdv * result + mean; + } + return Variable(result, calc_grad); + } + + autograd::Variable lecunUniform(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::lecunUniform(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable lecunUniform(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + double stdv = ::sqrt(1.0/(double)fan_in); + double limit = ::sqrt(3.0) * stdv; + return nn::uniform(dims, -limit, limit, type, calc_grad); + } + + autograd::Variable lecunNormal(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::lecunNormal(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable lecunNormal(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + double stdv = ::sqrt(1.0/(double)fan_in); + return nn::normal(dims, 0, stdv, type, calc_grad); + } + + autograd::Variable glorotUniform(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::glorotUniform(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable glorotUniform(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + dim_t fan_out = elements / dims[0]; + double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); + double limit = ::sqrt(3.0) * stdv; + return nn::uniform(dims, -limit, limit, type, calc_grad); + } + + autograd::Variable glorotNormal(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::glorotNormal(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable glorotNormal(af::dim4 dims, + af::dtype type, bool calc_grad) + { + dim_t elements = dims.elements(); + dim_t fan_in = elements / dims[1]; + dim_t fan_out = elements / dims[0]; + double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); + return nn::normal(dims, 0, stdv, type, calc_grad); + } + + autograd::Variable constant(double val, int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::constant(val, af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable constant(double val, af::dim4 dims, + af::dtype type, bool calc_grad) + { + return Variable(af::constant(val, dims, type), calc_grad); + } + + autograd::Variable identity(int output_size, int input_size, + af::dtype type, bool calc_grad) + { + return nn::identity(af::dim4(output_size, input_size), type, calc_grad); + } + + autograd::Variable identity(af::dim4 dims, + af::dtype type, bool calc_grad) + { + return Variable(af::identity(dims, type), calc_grad); + } + } +} diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp index ba0ae80..2b9b4a0 100644 --- a/src/nn/Modules/Activations.cpp +++ b/src/nn/Modules/Activations.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace af { namespace nn @@ -47,9 +47,9 @@ namespace af return max(input, m_slope * input); } - PReLU::PReLU(int size, double spread) + PReLU::PReLU(int size, double value) { - auto w = nn::weight(size, 1, spread); + auto w = nn::constant(value, size, 1); setParams({w}); } diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp index 49140f0..a15d9ae 100644 --- a/src/nn/Modules/Linear.cpp +++ b/src/nn/Modules/Linear.cpp @@ -8,7 +8,7 @@ ********************************************************/ #include -#include +#include #include namespace af @@ -20,9 +20,9 @@ namespace af Linear::Linear(int input_size, int output_size, bool bias, float spread) : m_bias(bias) { - auto w = nn::weight(input_size, output_size, spread); + auto w = nn::lecunNormal(output_size, input_size); if (bias) { - auto b = nn::weight(1, output_size, spread); + auto b = nn::lecunNormal(output_size, 1); setParams({w, b}); } else { setParams({w}); diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp deleted file mode 100644 index 698b497..0000000 --- a/src/nn/Types.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -#include - -namespace af { - namespace nn { - - using autograd::Variable; - - Variable input(const af::array &arr) - { - return Variable(arr, false); - } - - Variable parameter(const af::array &arr) - { - return Variable(arr, true); - } - - Variable weight(int input_size, int output_size, float spread) - { - auto w = af::randu(output_size, input_size) * spread - spread / 2; - w.eval(); - return parameter(w); - } - } -}