diff --git a/CMakeLists.txt b/CMakeLists.txt
index 01c3b28..f31cd8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ target_sources(afml
   src/nn/Modules/Activations.cpp
   src/nn/Modules/Container.cpp
   src/nn/Modules/Linear.cpp
+  src/nn/Modules/Loss.cpp
   src/nn/Modules/Module.cpp
   src/nn/Init.cpp
   )
diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index e9e8b5e..b70dbe1 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -8,6 +8,7 @@
  ********************************************************/
 
 #include <af/autograd.h>
+#include <af/nn.h>
 
 #define VERIFY(VAL) do {                                    \
         auto res = af::allTrue<bool>(af::abs(VAL) < 1E-5);  \
diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index d8e7f39..0854369 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -39,7 +39,9 @@ int main()
     perceptron.add(nn::Linear(inputSize, outputSize));
     perceptron.add(nn::Sigmoid());
 
-    Variable result;
+    auto loss = nn::MeanSquaredError();
+
+    Variable result, l;
     for (int i = 0; i < 1000; i++) {
         for (int j = 0; j < numSamples; j++) {
             perceptron.train();
@@ -52,17 +54,15 @@ int main()
             result = perceptron.forward(nn::input(in_j));
 
             // Calculate loss
-            // TODO: Use loss function
-            af::array diff = out_j - result.array();
+            l = loss.forward(result, nn::noGrad(out_j));
 
             // Backward propagation
-            auto d_result = Variable(diff, false);
-            result.backward(d_result);
+            l.backward();
 
             // Update parameters
             // TODO: Should use optimizer
             for (auto &param : perceptron.parameters()) {
-                param.array() += lr * param.grad().array();
+                param.array() -= lr * param.grad().array();
                 param.array().eval();
             }
         }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index f63bebb..db2894b 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -72,6 +72,7 @@ namespace af {
 
             void backward(const Variable &grad, bool retain_grad_graph = false);
 
+            void backward(bool retain_grad_graph = false);
 
         private:
             void evalGrad(bool retain_grad_graph = false);
diff --git a/include/af/nn/Init.hpp b/include/af/nn/Init.hpp
index bddc4d5..c015325 100644
--- a/include/af/nn/Init.hpp
+++ b/include/af/nn/Init.hpp
@@ -15,6 +15,8 @@ namespace af {
 
         autograd::Variable input(const af::array &arr);
 
+        autograd::Variable noGrad(const af::array &arr);
+
         autograd::Variable parameter(const af::array &arr);
 
         autograd::Variable uniform(int input_size, int output_size,
diff --git a/include/af/nn/Modules.hpp b/include/af/nn/Modules.hpp
index 310e4e9..eeb22a7 100644
--- a/include/af/nn/Modules.hpp
+++ b/include/af/nn/Modules.hpp
@@ -12,3 +12,4 @@
 #include <af/nn/Modules/Linear.hpp>
 #include <af/nn/Modules/Container.hpp>
 #include <af/nn/Modules/Activations.hpp>
+#include <af/nn/Modules/Loss.hpp>
diff --git a/include/af/nn/Modules/Container.hpp b/include/af/nn/Modules/Container.hpp
index 2ee8c0e..5ded60c 100644
--- a/include/af/nn/Modules/Container.hpp
+++ b/include/af/nn/Modules/Container.hpp
@@ -41,8 +41,6 @@ namespace af
             ModulePtr get(int id);
 
             std::vector<ModulePtr> modules();
-
-            virtual autograd::Variable forward(const autograd::Variable &input) = 0;
         };
 
         class Sequential : public Container
@@ -51,7 +49,7 @@ namespace af
 
             Sequential();
 
-            virtual autograd::Variable forward(const autograd::Variable &input);
+            autograd::Variable forward(const autograd::Variable &input);
         };
     }
 }
diff --git a/include/af/nn/Modules/Loss.hpp b/include/af/nn/Modules/Loss.hpp
new file mode 100644
index 0000000..44b743a
--- /dev/null
+++ b/include/af/nn/Modules/Loss.hpp
@@ -0,0 +1,64 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/nn/Modules/Module.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        class Loss : public Module
+        {
+        public:
+            Loss() {}
+
+            virtual autograd::Variable forward(const autograd::Variable &inputs,
+                                               const autograd::Variable &targets) = 0;
+
+            autograd::Variable forward(const autograd::Variable &inputs);
+        };
+
+        class MeanSquaredError : public Loss
+        {
+        public:
+            MeanSquaredError() {}
+
+            autograd::Variable forward(const autograd::Variable &inputs,
+                                       const autograd::Variable &targets);
+        };
+
+        class MeanAbsoluteError : public Loss
+        {
+        public:
+            MeanAbsoluteError() {}
+
+            autograd::Variable forward(const autograd::Variable &inputs,
+                                       const autograd::Variable &targets);
+        };
+
+        class BinaryCrossEntropyLoss : public Loss
+        {
+        public:
+            BinaryCrossEntropyLoss() {}
+
+            autograd::Variable forward(const autograd::Variable &inputs,
+                                       const autograd::Variable &targets);
+
+            autograd::Variable forward(const autograd::Variable &inputs,
+                                       const autograd::Variable &targets,
+                                       const autograd::Variable &weights);
+        };
+
+        typedef MeanSquaredError MSE;
+        typedef MeanAbsoluteError MAE;
+        typedef MeanAbsoluteError L1Loss;
+        typedef BinaryCrossEntropyLoss BCELoss;
+    }
+}
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index d49188a..61e147b 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -172,6 +172,12 @@ namespace af {
             }
         }
 
+        void Variable::backward(bool retain_grad_graph)
+        {
+            auto ones = Variable(af::constant(1, this->dims()), false);
+            this->backward(ones, retain_grad_graph);
+        }
+
         Variable::DAG_t Variable::build(const Variable &var)
         {
             Cache_t cache;
diff --git a/src/nn/Init.cpp b/src/nn/Init.cpp
index 65b8088..65669f5 100644
--- a/src/nn/Init.cpp
+++ b/src/nn/Init.cpp
@@ -21,6 +21,11 @@ namespace af {
             return Variable(arr, false);
         }
 
+        Variable noGrad(const af::array &arr)
+        {
+            return Variable(arr, false);
+        }
+
         Variable parameter(const af::array &arr)
         {
             return Variable(arr, true);
diff --git a/src/nn/Modules/Loss.cpp b/src/nn/Modules/Loss.cpp
new file mode 100644
index 0000000..ab7f80a
--- /dev/null
+++ b/src/nn/Modules/Loss.cpp
@@ -0,0 +1,59 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#include <af/autograd/Functions.hpp>
+#include <af/nn/Modules/Loss.hpp>
+
+
+namespace af
+{
+    namespace nn
+    {
+        using namespace autograd;
+
+        autograd::Variable Loss::forward(const autograd::Variable &inputs)
+        {
+            throw af::exception("Loss module requires both inputs and targets");
+        }
+
+        autograd::Variable MeanSquaredError::forward(const autograd::Variable &inputs,
+                                                     const autograd::Variable &targets)
+        {
+            auto df = inputs - targets;
+            auto res = mean(flat(df * df), {0});
+            return res;
+        }
+
+        autograd::Variable MeanAbsoluteError::forward(const autograd::Variable &inputs,
+                                                      const autograd::Variable &targets)
+        {
+            auto df = inputs - targets;
+            auto res = mean(flat(abs(df)), {0});
+        }
+
+        static autograd::Variable
+        binaryCrossEntropy(const autograd::Variable &inputs,
+                           const autograd::Variable &targets)
+        {
+            targets * inputs + (1 - targets) * (1 - inputs);
+        }
+
+        autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs,
+                                                           const autograd::Variable &targets)
+        {
+            return mean(flat(binaryCrossEntropy(inputs, targets)), {0});
+        }
+
+        autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs,
+                                                           const autograd::Variable &targets,
+                                                           const autograd::Variable &weights)
+        {
+            return mean(flat(weights * binaryCrossEntropy(inputs, targets)), {0});
+        }
+    }
+}