From 5f571e02888d05b9068c79409a1e34348ff7fee1 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Sun, 23 Jul 2017 18:12:15 -0700
Subject: [PATCH] Adding uniform, normal, constant initializers

---
 CMakeLists.txt                        |   2 +-
 include/af/nn.h                       |   2 +-
 include/af/nn/Init.hpp                |  74 +++++++++++++
 include/af/nn/Modules/Activations.hpp |  14 +--
 include/af/nn/Types.hpp               |  22 ----
 src/nn/Init.cpp                       | 151 ++++++++++++++++++++++++++
 src/nn/Modules/Activations.cpp        |   6 +-
 src/nn/Modules/Linear.cpp             |   6 +-
 src/nn/Types.cpp                      |  36 ------
 9 files changed, 240 insertions(+), 73 deletions(-)
 create mode 100644 include/af/nn/Init.hpp
 delete mode 100644 include/af/nn/Types.hpp
 create mode 100644 src/nn/Init.cpp
 delete mode 100644 src/nn/Types.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c84783d..01c3b28 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,7 +16,7 @@ target_sources(afml
   src/nn/Modules/Container.cpp
   src/nn/Modules/Linear.cpp
   src/nn/Modules/Module.cpp
-  src/nn/Types.cpp
+  src/nn/Init.cpp
   )
 
 target_include_directories(afml
diff --git a/include/af/nn.h b/include/af/nn.h
index 88333fc..cf32fea 100644
--- a/include/af/nn.h
+++ b/include/af/nn.h
@@ -10,4 +10,4 @@
 #pragma once
 
 #include <af/nn/Modules.hpp>
-#include <af/nn/Types.hpp>
+#include <af/nn/Init.hpp>
diff --git a/include/af/nn/Init.hpp b/include/af/nn/Init.hpp
new file mode 100644
index 0000000..bddc4d5
--- /dev/null
+++ b/include/af/nn/Init.hpp
@@ -0,0 +1,74 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/autograd/Variable.hpp>
+
+namespace af {
+    namespace nn {
+
+        autograd::Variable input(const af::array &arr);
+
+        autograd::Variable parameter(const af::array &arr);
+
+        autograd::Variable uniform(int input_size, int output_size,
+                                   double min = 0, double max = 1,
+                                   af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable uniform(af::dim4 dims,
+                                   double min = 0, double max = 1,
+                                   af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable normal(int input_size, int output_size,
+                                  double stdv = 1, double mean = 0,
+                                  af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable normal(af::dim4 dims,
+                                  double stdv = 1, double mean = 0,
+                                  af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable lecunUniform(int input_size, int output_size,
+                                        af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable lecunUniform(af::dim4 dims,
+                                        af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable lecunNormal(int input_size, int output_size,
+                                       af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable lecunNormal(af::dim4 dims,
+                                       af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable glorotUniform(int input_size, int output_size,
+                                         af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable glorotUniform(af::dim4 dims,
+                                         af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable glorotNormal(int input_size, int output_size,
+                                        af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable glorotNormal(af::dim4 dims,
+                                        af::dtype type = f32, bool calc_grad=true);
+
+
+        autograd::Variable constant(double val, int input_size, int output_size,
+                                    af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable constant(double val, af::dim4 dims,
+                                    af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable identity(int input_size, int output_size,
+                                    af::dtype type = f32, bool calc_grad=true);
+
+        autograd::Variable identity(af::dim4 dims,
+                                    af::dtype type = f32, bool calc_grad=true);
+
+    }
+}
diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp
index 2d00a90..dc6c288 100644
--- a/include/af/nn/Modules/Activations.hpp
+++ b/include/af/nn/Modules/Activations.hpp
@@ -35,26 +35,26 @@ namespace af
         {
         public:
             ReLU();
-         
+
             autograd::Variable forward(const autograd::Variable &input);
         };
-      
+
         class LeakyReLU : public Module
         {
         private:
             double m_slope;
         public:
             LeakyReLU(double slope = 0.0);
-         
+
             autograd::Variable forward(const autograd::Variable &input);
         };
 
         class PReLU : public Module
         {
         public:
-            PReLU(int size, double spread = 1.0);
+            PReLU(int size, double value = 1.0);
             PReLU(const autograd::Variable &w);
-         
+
             autograd::Variable forward(const autograd::Variable &input);
         };
 
@@ -74,11 +74,11 @@ namespace af
             double m_threshold;
         public:
             ThresholdReLU(double threshold = 1.0);
-         
+
             autograd::Variable forward(const autograd::Variable &input);
         };
 
-      
+
 
     }
 }
diff --git a/include/af/nn/Types.hpp b/include/af/nn/Types.hpp
deleted file mode 100644
index 6e7e101..0000000
--- a/include/af/nn/Types.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*******************************************************
- * Copyright (c) 2017, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/autograd/Variable.hpp>
-
-namespace af {
-    namespace nn {
-
-        autograd::Variable input(const af::array &arr);
-
-        autograd::Variable parameter(const af::array &arr);
-
-        autograd::Variable weight(int input_size, int output_size, float spread = 0.05);
-    }
-}
diff --git a/src/nn/Init.cpp b/src/nn/Init.cpp
new file mode 100644
index 0000000..65b8088
--- /dev/null
+++ b/src/nn/Init.cpp
@@ -0,0 +1,151 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cmath>
+
+#include <af/nn/Init.hpp>
+
+namespace af {
+    namespace nn {
+
+        using autograd::Variable;
+
+        Variable input(const af::array &arr)
+        {
+            return Variable(arr, false);
+        }
+
+        Variable parameter(const af::array &arr)
+        {
+            return Variable(arr, true);
+        }
+
+        autograd::Variable uniform(int output_size, int input_size,
+                                   double min, double max,
+                                   af::dtype type, bool calc_grad)
+        {
+            return nn::uniform(af::dim4(output_size, input_size), min, max, type, calc_grad);
+        }
+
+        autograd::Variable uniform(af::dim4 dims, double min, double max,
+                                   af::dtype type, bool calc_grad)
+        {
+            af::array result = af::randu(dims, type);
+            if (min != 0 || max != 1) {
+                result = (max - min) * result + min;
+            }
+            return Variable(result, calc_grad);
+        }
+
+        autograd::Variable normal(int output_size, int input_size,
+                                  double stdv, double mean,
+                                  af::dtype type, bool calc_grad)
+        {
+            return nn::normal(af::dim4(output_size, input_size), stdv, mean, type, calc_grad);
+        }
+
+        autograd::Variable normal(af::dim4 dims, double stdv, double mean,
+                                  af::dtype type, bool calc_grad)
+        {
+            af::array result = af::randn(dims, type);
+            if (mean != 0 || stdv != 1) {
+                result = stdv * result + mean;
+            }
+            return Variable(result, calc_grad);
+        }
+
+        autograd::Variable lecunUniform(int output_size, int input_size,
+                                        af::dtype type, bool calc_grad)
+        {
+            return nn::lecunUniform(af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable lecunUniform(af::dim4 dims,
+                                        af::dtype type, bool calc_grad)
+        {
+            dim_t elements = dims.elements();
+            dim_t fan_in = elements / dims[1];
+            double stdv = ::sqrt(1.0/(double)fan_in);
+            double limit = ::sqrt(3.0) * stdv;
+            return nn::uniform(dims, -limit, limit, type, calc_grad);
+        }
+
+        autograd::Variable lecunNormal(int output_size, int input_size,
+                                       af::dtype type, bool calc_grad)
+        {
+            return nn::lecunNormal(af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable lecunNormal(af::dim4 dims,
+                                       af::dtype type, bool calc_grad)
+        {
+            dim_t elements = dims.elements();
+            dim_t fan_in = elements / dims[1];
+            double stdv = ::sqrt(1.0/(double)fan_in);
+            return nn::normal(dims, 0, stdv, type, calc_grad);
+        }
+
+        autograd::Variable glorotUniform(int output_size, int input_size,
+                                         af::dtype type, bool calc_grad)
+        {
+            return nn::glorotUniform(af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable glorotUniform(af::dim4 dims,
+                                         af::dtype type, bool calc_grad)
+        {
+            dim_t elements = dims.elements();
+            dim_t fan_in = elements / dims[1];
+            dim_t fan_out = elements / dims[0];
+            double stdv = ::sqrt(2.0/(double)(fan_in + fan_out));
+            double limit = ::sqrt(3.0) * stdv;
+            return nn::uniform(dims, -limit, limit, type, calc_grad);
+        }
+
+        autograd::Variable glorotNormal(int output_size, int input_size,
+                                        af::dtype type, bool calc_grad)
+        {
+            return nn::glorotNormal(af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable glorotNormal(af::dim4 dims,
+                                        af::dtype type, bool calc_grad)
+        {
+            dim_t elements = dims.elements();
+            dim_t fan_in = elements / dims[1];
+            dim_t fan_out = elements / dims[0];
+            double stdv = ::sqrt(2.0/(double)(fan_in + fan_out));
+            return nn::normal(dims, 0, stdv, type, calc_grad);
+        }
+
+        autograd::Variable constant(double val, int output_size, int input_size,
+                                    af::dtype type, bool calc_grad)
+        {
+            return nn::constant(val, af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable constant(double val, af::dim4 dims,
+                                    af::dtype type, bool calc_grad)
+        {
+            return Variable(af::constant(val, dims, type), calc_grad);
+        }
+
+        autograd::Variable identity(int output_size, int input_size,
+                                    af::dtype type, bool calc_grad)
+        {
+            return nn::identity(af::dim4(output_size, input_size), type, calc_grad);
+        }
+
+        autograd::Variable identity(af::dim4 dims,
+                                    af::dtype type, bool calc_grad)
+        {
+            return Variable(af::identity(dims, type), calc_grad);
+        }
+    }
+}
diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp
index ba0ae80..2b9b4a0 100644
--- a/src/nn/Modules/Activations.cpp
+++ b/src/nn/Modules/Activations.cpp
@@ -9,7 +9,7 @@
 
 #include <af/autograd/Functions.hpp>
 #include <af/nn/Modules/Activations.hpp>
-#include <af/nn/Types.hpp>
+#include <af/nn/Init.hpp>
 namespace af
 {
     namespace nn
@@ -47,9 +47,9 @@ namespace af
             return max(input, m_slope * input);
         }
 
-        PReLU::PReLU(int size, double spread)
+        PReLU::PReLU(int size, double value)
         {
-            auto w = nn::weight(size, 1, spread);
+            auto w = nn::constant(value, size, 1);
             setParams({w});
         }
 
diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp
index 49140f0..a15d9ae 100644
--- a/src/nn/Modules/Linear.cpp
+++ b/src/nn/Modules/Linear.cpp
@@ -8,7 +8,7 @@
  ********************************************************/
 #include <af/autograd/Functions.hpp>
 
-#include <af/nn/Types.hpp>
+#include <af/nn/Init.hpp>
 #include <af/nn/Modules/Linear.hpp>
 
 namespace af
@@ -20,9 +20,9 @@ namespace af
         Linear::Linear(int input_size, int output_size, bool bias, float spread) :
             m_bias(bias)
         {
-            auto w = nn::weight(input_size, output_size, spread);
+            auto w = nn::lecunNormal(output_size, input_size);
             if (bias) {
-                auto b = nn::weight(1, output_size, spread);
+                auto b = nn::lecunNormal(output_size, 1);
                 setParams({w, b});
             } else {
                 setParams({w});
diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp
deleted file mode 100644
index 698b497..0000000
--- a/src/nn/Types.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*******************************************************
- * Copyright (c) 2017, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#include <cmath>
-
-#include <af/nn/Types.hpp>
-
-namespace af {
-    namespace nn {
-
-        using autograd::Variable;
-
-        Variable input(const af::array &arr)
-        {
-            return Variable(arr, false);
-        }
-
-        Variable parameter(const af::array &arr)
-        {
-            return Variable(arr, true);
-        }
-
-        Variable weight(int input_size, int output_size, float spread)
-        {
-            auto w = af::randu(output_size, input_size) * spread - spread / 2;
-            w.eval();
-            return parameter(w);
-        }
-    }
-}