diff --git a/demos/CustomTypeNumDiff.cpp b/demos/CustomTypeNumDiff.cpp
new file mode 100644
index 000000000..d79ff647d
--- /dev/null
+++ b/demos/CustomTypeNumDiff.cpp
@@ -0,0 +1,149 @@
+//--------------------------------------------------------------------*- C++ -*-
+// clad - The C++ Clang-based Automatic Differentiator
+//
+// A demo, describing how users can numerically differentiate functions that
+// take user-defined types as parameters using clad.
+//
+// author: Garima Singh
+//----------------------------------------------------------------------------//
+
+// To run the demo please type:
+// path/to/clang  -Xclang -add-plugin -Xclang clad -Xclang -load -Xclang \
+// path/to/libclad.so  -I../include/ -x c++ -std=c++11 CustomTypeNumDiff.cpp
+//
+// A typical invocation would be:
+// ../../../../obj/Debug+Asserts/bin/clang  -Xclang -add-plugin -Xclang clad \
+// -Xclang -load -Xclang ../../../../obj/Debug+Asserts/lib/libclad.dylib     \
+// -I../include/ -x c++ -std=c++11 CustomTypeNumDiff.cpp
+
+// Necessary for clad to work include
+#include "clad/Differentiator/Differentiator.h"
+
+#include <iostream> // for std::* functions.
+
+// A user defined class to emulate fixed point decimal numbers.
+// It stores the significand and inverted exponent as integers.
+// for a number of the form ax10e-b
+// 'number' refers to 'a' and 'scale' refers to 'b'.
+class FixedPointFloat {
+  long int number;
+  unsigned scale;
+
+public:
+  FixedPointFloat(unsigned num, unsigned n) {
+    // If there are any trailing zeros to num and a non-zero scale value
+    // normalize the number so that we do not do unnecessary multiplications.
+    while (!num % 10 && n) {
+      num = std::floor(num / 10);
+      n--;
+    }
+    number = num;
+    scale = n;
+  }
+  // A function to get the double equivalent of the decimal number stored.
+  double getDouble(unsigned num, unsigned scale) {
+    return (double)num / std::pow(10, scale);
+  }
+  // A function to scale the value of a given number, we use this to
+  // facilitate evaluation of different operations on this class.
+  unsigned getScaled(unsigned num, unsigned scale) {
+    for (unsigned i = 0; i < scale; i++) {
+      num *= 10;
+    }
+    return num;
+  }
+  // Gets the double representation of the number represented by the
+  // calling object.
+  double getNum() { return getDouble(number, scale); }
+  // Some operator overloads for this class...
+  FixedPointFloat operator+(FixedPointFloat a) {
+    unsigned xa, yb;
+    // Here, let us understand this with an example.
+    // consider two numbers -> 0.005 and 0.00002 ~ 5x10e-3 and 2x10e-5
+    // converting them to FixedPointFloat will give us a pair like so:
+    // (5, 3) and (2, 5)
+    // To add the above numbers, we follow the following...
+    // Get the max scale value (i.e the minimum exponent value)
+    // in our case, it is 5.
+    unsigned maxScale = std::max(a.scale, scale);
+    // Scale both numbers with the remaining scale value.
+    // essentially, do the following:
+    // (5x10e-3 + 2x10e-5) * (10e5/10e5)
+    // (5x10e(-3 + 5) + 2x10e(-5+5)) * (1/10e5)
+    // => (500 + 2) * (1/10e5)
+    xa = getScaled(a.number, maxScale - a.scale); // = 500
+    yb = getScaled(number, maxScale - scale);     // = 2
+    // => (500 + 2) * (1/10e5) = 502x10e-5
+    return FixedPointFloat(xa + yb, maxScale); // = (502, 5)
+  }
+  // Similar to above, but here, we just subtract the number instead of adding
+  // it.
+  FixedPointFloat operator-(FixedPointFloat a) {
+    unsigned xa, yb;
+    unsigned maxScale = std::max(a.scale, scale);
+    xa = getScaled(a.number, maxScale - a.scale);
+    yb = getScaled(number, maxScale - scale);
+    return FixedPointFloat(xa - yb, maxScale);
+  }
+
+  FixedPointFloat operator*(FixedPointFloat a) {
+    unsigned xa, yb;
+    // This is fairly straight forward. Let us take the same example as before
+    // 0.005 and 0.00002 ~ 5x10e-3 and 2x10e-5
+    // This operation is equivalent to
+    // = 5x10e-3 x 2x10e-5
+    // = (5x2) x 10e(-5-3)
+    // = 10x10e-8 (which is eventually reformed to 1x10e-7)
+    return FixedPointFloat(a.number * number, a.scale + scale); // = (1, 7)
+  }
+};
+
+// This function is essential if we want to differentiate a function with
+// user-defined data types as arguments. This function describes the "rules"
+// of how to differentiate user-defined data types by specifying how to
+// introduce a small change (h) in the object of the user-defined data type.
+// Details on how to overload this function are provided in the docs.
+FixedPointFloat
+updateIndexParamValue(FixedPointFloat arg, std::size_t idx, std::size_t currIdx,
+                      int multiplier, numerical_diff::precision& h_val,
+                      std::size_t n = 0, std::size_t i = 0) {
+  if (idx == currIdx) {
+    // Here we just introduce an h of 0.000001 to all our FixedPointFloat
+    // numbers.
+    FixedPointFloat h(1, 5);
+    h_val = (h_val == 0) ? h.getNum() : h_val;
+    FixedPointFloat Fmultiplier(multiplier, 0);
+    return arg + Fmultiplier * h;
+  }
+  return arg;
+}
+
+// A simple multiplication function.
+// currently we can only numerically differentiate the following types:
+// - all scalar numeric types
+// - all types with basic arithmetic operators overloaded.
+// - all types that are implicitly convertible to some numeric type.
+double func(FixedPointFloat x, FixedPointFloat y) { return (x * y).getNum(); }
+
+int main() {
+  // Define some values which will reflect the derivative later.
+  double dx = 0, dy = 0;
+  // Define our inputs: 3x10e-3 and 7x10e-2 or 0.003 and 0.07.
+  FixedPointFloat x(3, 3), y(7, 2);
+  // Push the dx and dy values to a tape created by us.
+  // This is how we return the derivative with respect to all arguments.
+  // The order of being placed in this tape should be the same as the order of
+  // the arguments being passed to the function.
+  clad::tape<clad::array_ref<
+      double /*This should be the return value of the function you want to differentiate.*/>>
+      grad = {};
+  // Place the l-value reference of the variables in the tape.
+  grad.emplace_back(&dx);
+  grad.emplace_back(&dy);
+  // Finally, call the numerical diff method, keep the order of the arguments to
+  // the function in mind!
+  numerical_diff::central_difference(func, grad, /*printErrors=*/0, x, y);
+  // Finally print the results!
+  std::cout << "Result of df/dx is = " << dx << "\nResult of df/dx is = " << dy
+            << std::endl;
+}
diff --git a/include/clad/Differentiator/DerivativeBuilder.h b/include/clad/Differentiator/DerivativeBuilder.h
index 06afaf855..060efbdf3 100644
--- a/include/clad/Differentiator/DerivativeBuilder.h
+++ b/include/clad/Differentiator/DerivativeBuilder.h
@@ -97,17 +97,36 @@ namespace clad {
     clang::NamespaceDecl* m_BuiltinDerivativesNSD;
     /// A reference to the model to use for error estimation (if any).
     std::unique_ptr<FPErrorEstimationModel> m_EstModel = nullptr;
+    clang::NamespaceDecl* m_NumericalDiffNSD;
+    /// A flag to keep track of whether error diagnostics are requested by user
+    /// for numerical differentiation.
+    bool m_PrintNumericalDiffErrorDiag = false;
     DeclWithContext cloneFunction(const clang::FunctionDecl* FD,
-                                  clad::VisitorBase VB,
-                                  clang::DeclContext* DC,
+                                  clad::VisitorBase VB, clang::DeclContext* DC,
                                   clang::Sema& m_Sema,
                                   clang::ASTContext& m_Context,
                                   clang::SourceLocation& noLoc,
                                   clang::DeclarationNameInfo name,
                                   clang::QualType functionType);
+    /// Looks for a suitable overload for a given function.
+    ///
+    /// \param[in] DNI The identification information of the function overload
+    /// to be found.
+    /// \param[in] CallArgs The call args to be used to resolve to the
+    /// correct overload.
+    /// \param[in] forCustomDerv A flag to keep track of which
+    /// namespace we should look in for the overloads.
+    /// \param[in] namespaceShouldExist A flag to enforce assertion failure
+    /// if the overload function namespace was not found. If false and
+    /// the function containing namespace was not found, nullptr is returned.
+    ///
+    /// \returns The call expression if a suitable function overload was found,
+    /// null otherwise.
     clang::Expr*
     findOverloadedDefinition(clang::DeclarationNameInfo DNI,
-                             llvm::SmallVectorImpl<clang::Expr*>& CallArgs);
+                             llvm::SmallVectorImpl<clang::Expr*>& CallArgs,
+                             bool forCustomDerv = true,
+                             bool namespaceShouldExist = true);
     bool noOverloadExists(clang::Expr* UnresolvedLookup,
                           llvm::MutableArrayRef<clang::Expr*> ARargs);
     /// Shorthand to issues a warning or error.
@@ -130,6 +149,19 @@ namespace clad {
     /// an in-built one (TaylorApprox) or one provided by the user.
     void
     SetErrorEstimationModel(std::unique_ptr<FPErrorEstimationModel> estModel);
+    /// Fuction to set the error diagnostic printing value for numerical
+    /// differentiation.
+    ///
+    /// \param[in] \c value The new value to be set.
+    void setNumDiffErrDiag(bool value) {
+      m_PrintNumericalDiffErrorDiag = value;
+    }
+    /// Function to return if clad should emit error information for numerical
+    /// differentiation.
+    ///
+    /// \returns The flag  that controls printing of error information for
+    /// numerical differentiation.
+    bool shouldPrintNumDiffErrs() { return m_PrintNumericalDiffErrorDiag; }
     ///\brief Produces the derivative of a given function
     /// according to a given plan.
     ///
diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h
index e7c4dddd2..78d58123d 100644
--- a/include/clad/Differentiator/Differentiator.h
+++ b/include/clad/Differentiator/Differentiator.h
@@ -12,6 +12,7 @@
 #include "BuiltinDerivatives.h"
 #include "CladConfig.h"
 #include "FunctionTraits.h"
+#include "NumericalDiff.h"
 #include "Tape.h"
 
 #include <assert.h>
@@ -58,6 +59,15 @@ namespace clad {
     return val;
   }
 
+  /// Add value to the end of the tape, return the same value.
+  /// A specialization for clad::array_ref types to use in reverse mode.
+  template <typename T, typename U>
+  CUDA_HOST_DEVICE clad::array_ref<T> push(tape<clad::array_ref<T>>& to,
+                                           U val) {
+    to.emplace_back(val);
+    return val;
+  }
+
   /// Remove the last value from the tape, return it.
   template <typename T>
   CUDA_HOST_DEVICE T pop(tape<T>& to) {
diff --git a/include/clad/Differentiator/NumericalDiff.h b/include/clad/Differentiator/NumericalDiff.h
new file mode 100644
index 000000000..e2169a9cc
--- /dev/null
+++ b/include/clad/Differentiator/NumericalDiff.h
@@ -0,0 +1,503 @@
+#ifndef CLAD_NO_NUM_DIFF
+#ifndef CLAD_NUMERICAL_DIFF_H
+#define CLAD_NUMERICAL_DIFF_H
+
+#include "ArrayRef.h"
+#include "FunctionTraits.h"
+#include "Tape.h"
+
+#include <cmath>
+#include <limits>
+#include <memory>
+#include <utility>
+
+extern "C" {
+#if defined(__APPLE__) || defined(_MSC_VER)
+void* malloc(size_t);
+void free(void* ptr);
+#else
+void* malloc(size_t) __THROW __attribute_malloc__ __wur;
+void free(void* ptr) __THROW;
+#endif
+}
+
+namespace numerical_diff {
+
+  /// A class to keep track of the memory we allocate to make sure it is
+  /// deallocated later.
+  class ManageBufferSpace {
+    /// A tape of shared pointers so we don't have to do memory management
+    /// ourselves.
+    clad::tape_impl<void*> data = {};
+
+  public:
+    ~ManageBufferSpace() { free_buffer(); }
+
+    /// A function to make some buffer space and construct object in place
+    /// if the given type has a trivial destructor and construction is
+    /// requested. This provided so that users can numerically differentiate
+    /// functions that take pointers to user-defined data types.
+    /// FIXME: implement type-erasure to support types with non-trivial
+    /// destructors.
+    ///
+    /// \tparam T the type of buffer to make.
+    /// \param[in] \c n The length for the buffer.
+    /// \param[in] \c constructInPlace True if an object has to be constructed
+    /// in place. Use for trivially constructable types ONLY.
+    /// \param[in] \c args Arguments to forward to the constructor.
+    ///
+    /// \returns A raw pointer to the newly created buffer.
+    template <typename T, typename... Args,
+              typename std::enable_if<std::is_trivially_destructible<T>::value,
+                                      bool>::type = true>
+    T* make_buffer_space(std::size_t n, bool constructInPlace, Args&&... args) {
+      void* ptr = malloc(n * sizeof(T));
+      if (constructInPlace) {
+        ::new (ptr) T(std::forward<Args>(args)...);
+      }
+      data.emplace_back(ptr);
+      return static_cast<T*>(ptr);
+    }
+
+    /// A function to make some buffer space.
+    ///
+    /// \tparam T the type of buffer to make.
+    /// \param[in] \c n The length for the buffer.
+    ///
+    /// \returns A raw pointer to the newly created buffer.
+    template <typename T> T* make_buffer_space(std::size_t n) {
+      void* ptr = malloc(n * sizeof(T));
+      data.emplace_back(ptr);
+      return static_cast<T*>(ptr);
+    }
+
+    /// A function to free the space previously allocated.
+    void free_buffer() {
+      while (data.size()) {
+        void* ptr = data.back();
+        data.pop_back();
+        free(ptr);
+      }
+    }
+  };
+
+  /// A buffer manager to request for buffer space
+  /// while forwarding reference/pointer args to the target function.
+  ManageBufferSpace bufferManager;
+
+  /// The precision to do the numerical differentiation calculations in.
+  using precision = double;
+
+  /// A function to make sure the step size being used is machine representable.
+  /// It is likely that if do not have a similar construct, we may end up with
+  /// catastrophic cancellations, hence resulting into a 0 derivative over a
+  /// large interval.
+  ///
+  /// \param[in] \c x Input to the target function.
+  /// \param[in] \c h The h to make representable.
+  ///
+  /// \returns A value of h that does not result in catastrohic cancellation.
+  precision make_h_representable(precision x, precision h) {
+    precision xph = x + h;
+    precision dx = xph - x;
+
+    // If x+h ~ x, we should look for the next closest value so that (x+h) - x
+    // != 0
+    if (dx == 0)
+      h = std::nextafter(x, std::numeric_limits<precision>::max()) - x;
+
+    return h;
+  }
+
+  /// A function to return the h value to use.
+  ///
+  /// \param[in] \c arg The input argument to adjust and get h for.
+  ///
+  /// \returns A calculated and adjusted h value.
+  precision get_h(precision arg) {
+    // First get a suitable h value, we do all of this in elevated precision
+    // (default double). Maximum error in h = eps^4/5
+    precision h = std::pow(11.25 * std::numeric_limits<precision>::epsilon(),
+                           (double)1.0 / 5.0);
+    h = make_h_representable(arg, h);
+    return h;
+  }
+
+  /// A function to print the errors associated with numerically differentiating
+  /// a function to the standard output stream. This can be enabled by
+  /// '-fprint-num-diff-errors'.
+  ///
+  /// \param[in] \c derivError The error associated with numerically
+  /// differentiating a function. This error is calculated by the remainder term
+  /// on expanding the five-point stencil series.
+  /// \param[in] \c evalError This error is associated with the evaluation of
+  /// the target functions.
+  /// \param[in] \c paramPos The position of the parameter to which this error
+  ///  belongs.
+  /// \param[in] \c arrPos The position of the array element
+  /// (-1 if parameter is scalar) to which the error belongs.
+  void printError(precision derivError, precision evalError, unsigned paramPos,
+                  int arrPos = -1) {
+    if (arrPos != -1)
+      printf("\nError Report for parameter at position %d and index %d:\n",
+             paramPos, arrPos);
+    else
+      printf("\nError Report for parameter at position %d:\n", paramPos);
+    printf("Error due to the five-point central difference is: %0.10f"
+           "\nError due to function evaluation is: %0.10f\n",
+           derivError, evalError);
+  }
+
+  /// A function to update scalar parameter values given a multiplier and the
+  /// argument value. Custom data types can be supported as input in the case
+  /// that this function is overloaded.
+  ///
+  /// This function should be overloaded as follows:
+  /// \code
+  /// template <typename <--your-data-type-->>
+  /// <--your-data-type-->* updateIndexParamValue(<--your-data-type--> arg,
+  ///                           std::size_t idx, std::size_t currIdx,
+  ///                          int multiplier, precision& h_val,
+  ///                          std::size_t n = 0, std::size_t i = 0) {
+  ///   if (idx == currIdx) {
+  ///     // Add your type specific code here that would essentially translate
+  ///     // to `return arg + multiplier * h` (h is some very small value)
+  ///     h_val = //Save the 'precision' equivalent value of h here only if it
+  ///     is zero, not doing so will lead to undefined behaviour.
+  ///   }
+  ///   return arg;
+  /// }
+  /// \endcode
+  ///
+  /// \param[in] \c arg The argument to update.
+  /// \param[in] \c idx The index of the current parameter in the target
+  /// function parameter pack, starting from 0. 
+  /// \param[in] \c currIdx The index value of the parameter to be updated.
+  /// \param[in] \c multiplier A multiplier to compound with h before updating
+  /// 'arg'.
+  /// \param[out] \c h_val The h value that was set for each argument.
+  /// \param[in] \c n The length of the input pointer/array. NOOP here.
+  /// \param[in] \c i The specific index to get the adjusted h for. NOOP here.
+  ///
+  /// \returns The updated argument.
+  template <typename T, typename std::enable_if<!std::is_pointer<T>::value,
+                                                bool>::type = true>
+  T updateIndexParamValue(T arg, std::size_t idx, std::size_t currIdx,
+                          int multiplier, precision& h_val, std::size_t n = 0,
+                          std::size_t i = 0) {
+    if (idx == currIdx) {
+      h_val = (h_val == 0) ? get_h(arg) : h_val;
+      return arg + h_val * multiplier;
+    }
+    return arg;
+  }
+
+  /// A function to updated the array/pointer arguments given a multiplier and
+  /// the argument value. Custom data types can be supported as input in the
+  /// case that this function is overloaded.
+  ///
+  /// This function should be overloaded as follows:
+  /// \code
+  /// template <typename <--your-data-type-->>
+  /// <--your-data-type-->* updateIndexParamValue(<--your-data-type--> arg,
+  ///                           std::size_t idx, std::size_t currIdx,
+  ///                          int multiplier, precision& h_val,
+  ///                          std::size_t n = 0, std::size_t i = 0) {
+  ///   if (idx == currIdx) {
+  ///     // Add your type specific code here that would essentially translate
+  ///     // to `return arg + multiplier * h` (h is some very small value)
+  ///     h_val = //Save the 'precision' equivalent value of h here only if it
+  ///     is 0, not doing so will lead to undefined behaviour.
+  ///   }
+  ///   return arg;
+  /// }
+  /// \endcode
+  ///
+  /// \warning User function should not delete the returned 'arg' replacement,
+  /// it will lead to double free or undefined behaviour.
+  ///
+  /// \param[in] \c arg The argument to update.
+  /// \param[in] \c idx The index of the current parameter in the target
+  /// function parameter pack, starting from 0.
+  /// \param[in] \c currIdx The index value of the parameter to be
+  /// updated.
+  /// \param[in] \c multiplier A multiplier to compound with h before
+  /// updating 'arg'.
+  /// \param[out] \c h_val The h value that was set for each
+  /// argument.
+  /// \param[in] \c n The length of the input pointer/array.
+  /// \param[in] \c i The specific index to get the adjusted h for.
+  ///
+  /// \returns The updated argument.
+  template <typename T>
+  T* updateIndexParamValue(T* arg, std::size_t idx, std::size_t currIdx,
+                           int multiplier, precision& h_val, std::size_t n = 0,
+                           std::size_t i = 0) {
+    // malloc some buffer space that we will free later.
+    // this is required to make sure that we are retuning a deep copy
+    // that is valid throughout the scope of the central_diff function.
+    // Temp is system owned.
+    T* temp = bufferManager.make_buffer_space<T>(n);
+    // deepcopy
+    for (std::size_t j = 0; j < n; j++) {
+      temp[j] = arg[j];
+    }
+    if (idx == currIdx) {
+      h_val = (h_val == 0) ? get_h(temp[i]) : h_val;
+      // update the specific array value.
+      temp[i] += h_val * multiplier;
+    }
+    return temp;
+  }
+
+  /// A helper function to calculate the numerical derivative of a target
+  /// function.
+  ///
+  /// \param[in] \c f The target function to numerically differentiate.
+  /// \param[out] \c _grad The gradient array reference to which the gradients
+  /// will be written.
+  /// \param[in] \c printErrors A flag to decide if we want to print numerical
+  /// diff errors estimates.
+  /// \param[in] \c idxSeq The index sequence associated with
+  /// the input parameter pack.
+  /// \param[in] \c args The arguments to the function to differentiate.
+  template <typename F, std::size_t... Ints,
+            typename RetType = typename clad::return_type<F>::type,
+            typename... Args>
+  void central_difference_helper(
+      F f, clad::tape_impl<clad::array_ref<RetType>>& _grad, bool printErrors,
+      clad::IndexSequence<Ints...> idxSeq, Args&&... args) {
+
+    std::size_t argLen = sizeof...(Args);
+    // loop over all the args, selecting each arg to get the derivative with
+    // respect to.
+    for (std::size_t i = 0; i < argLen; i++) {
+      precision h;
+      std::size_t argVecLen = _grad[i].size();
+      for (std::size_t j = 0; j < argVecLen; j++) {
+        // Reset h
+        h = 0;
+        // calculate f[x+h, x-h]
+        // f(..., x+h,...)
+        precision xaf = f(updateIndexParamValue(std::forward<Args>(args), Ints,
+                                                i, /*multiplier=*/1, h,
+                                                _grad[Ints].size(), j)...);
+        precision xbf = f(updateIndexParamValue(std::forward<Args>(args), Ints,
+                                                i, /*multiplier=*/-1, h,
+                                                _grad[Ints].size(), j)...);
+        precision xf1 = (xaf - xbf) / (h + h);
+
+        // calculate f[x+2h, x-2h]
+        precision xaf2 = f(updateIndexParamValue(std::forward<Args>(args), Ints,
+                                                 i,
+                                                 /*multiplier=*/2, h,
+                                                 _grad[Ints].size(), j)...);
+        precision xbf2 = f(updateIndexParamValue(std::forward<Args>(args), Ints,
+                                                 i,
+                                                 /*multiplier=*/-2, h,
+                                                 _grad[Ints].size(), j)...);
+        precision xf2 = (xaf2 - xbf2) / (2 * h + 2 * h);
+
+        if (printErrors) {
+          // calculate f(x+3h) and f(x-3h)
+          precision xaf3 = f(updateIndexParamValue(std::forward<Args>(args),
+                                                   Ints, i,
+                                                   /*multiplier=*/3, h,
+                                                   _grad[Ints].size(), j)...);
+          precision xbf3 = f(updateIndexParamValue(std::forward<Args>(args),
+                                                   Ints, i,
+                                                   /*multiplier=*/-3, h,
+                                                   _grad[Ints].size(), j)...);
+          // Error in derivative due to the five-point stencil formula
+          // E(f'(x)) = f`````(x) * h^4 / 30 + O(h^5) (Taylor Approx) and
+          // f`````(x) = (f[x+3h, x-3h] - 4f[x+2h, x-2h] + 5f[x+h, x-h])/(2 *
+          // h^5) Formula courtesy of 'Abramowitz, Milton; Stegun, Irene A.
+          // (1970), Handbook of Mathematical Functions with Formulas, Graphs,
+          // and Mathematical Tables, Dover. Ninth printing. Table 25.2.`.
+          precision error = ((xaf3 - xbf3) - 4 * (xaf2 - xbf2) +
+                             5 * (xaf - xbf)) /
+                            (60 * h);
+          // This is the error in evaluation of all the function values.
+          precision evalError = std::numeric_limits<precision>::epsilon() *
+                                (std::fabs(xaf2) + std::fabs(xbf2) +
+                                 8 * (std::fabs(xaf) + std::fabs(xbf))) /
+                                (12 * h);
+          // Finally print the error to standard ouput.
+          printError(std::fabs(error), evalError, i, argVecLen > 1 ? j : -1);
+        }
+
+        // five-point stencil formula = (4f[x+h, x-h] - f[x+2h, x-2h])/3
+        _grad[i][j] = 4.0 * xf1 / 3.0 - xf2 / 3.0;
+        bufferManager.free_buffer();
+      }
+    }
+  }
+
+  /// A function to calculate the derivative of a function using the central
+  /// difference formula. Note: we do not propogate errors resulting in the
+  /// following function, it is likely the errors are large enough to be of
+  /// significance, hence it is only wise to use these methods when it is
+  /// absolutely necessary.
+  ///
+  /// \param[in] \c f The target function to numerically differentiate.
+  /// \param[out] \c _grad The gradient array reference to which the gradients
+  /// will be written.
+  /// \param[in] \c printErrors A flag to decide if we want to print numerical
+  /// diff errors estimates.
+  /// \param[in] \c args The arguments to the function to differentiate.
+  template <typename F, std::size_t... Ints,
+            typename RetType = typename clad::return_type<F>::type,
+            typename... Args>
+  void central_difference(F f, clad::tape_impl<clad::array_ref<RetType>>& _grad,
+                          bool printErrors, Args&&... args) {
+    return central_difference_helper(f, _grad, printErrors,
+                                     clad::MakeIndexSequence<sizeof...(Args)>{},
+                                     std::forward<Args>(args)...);
+  }
+
+  /// A helper function to calculate ther derivative with respect to a
+  /// single input.
+  ///
+  /// \param[in] \c f The target function to numerically differentiate.
+  /// \param[in] \c arg The argument with respect to which differentiation is
+  /// requested.
+  /// \param[in] \c n The positional value of 'arg'.
+  /// \param[in] \c arrIdx The index value of the input pointer/array to
+  /// differentiate with respect to.
+  /// \param[in] \c arrLen The length of the pointer/array.
+  /// \param[in] \c printErrors A flag to decide if we want to print numerical
+  /// diff errors estimates.
+  /// \param[in] \c idxSeq The index sequence associated with the input
+  /// parameter pack.
+  /// \param[in] \c args The arguments to the function to differentiate.
+  ///
+  /// \returns The numerical derivative
+  template <typename F, typename T, std::size_t... Ints, typename... Args>
+  precision forward_central_difference_helper(
+      F f, T arg, std::size_t n, int arrIdx, std::size_t arrLen,
+      bool printErrors, clad::IndexSequence<Ints...> idxSeq, Args&&... args) {
+
+    precision xaf, xbf, xaf2, xbf2, xf1, xf2, dx, h = 0;
+    // calculate f[x+h, x-h]
+    xaf = f(updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                  /*multiplier=*/1, h, arrLen, arrIdx)...);
+    xbf = f(updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                  /*multiplier=*/-1, h, arrLen, arrIdx)...);
+    xf1 = (xaf - xbf) / (h + h);
+
+    // calculate f[x+2h, x-2h]
+    xaf2 = f(updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                   /*multiplier=*/2, h, arrLen, arrIdx)...);
+    xbf2 = f(updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                   /*multiplier=*/-2, h, arrLen, arrIdx)...);
+    xf2 = (xaf2 - xbf2) / (2 * h + 2 * h);
+
+    if (printErrors) {
+      // calculate f(x+3h) and f(x-3h)
+      precision xaf3 = f(
+          updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                /*multiplier=*/3, h, arrLen, arrIdx)...);
+      precision xbf3 = f(
+          updateIndexParamValue(std::forward<Args>(args), Ints, n,
+                                /*multiplier=*/-3, h, arrLen, arrIdx)...);
+      // Error in derivative due to the five-point stencil formula
+      // E(f'(x)) = f`````(x) * h^4 / 30 + O(h^5) (Taylor Approx) and
+      // f`````(x) = (f[x+3h, x-3h] - 4f[x+2h, x-2h] + 5f[x+h, x-h])/(2 * h^5)
+      // Formula courtesy of 'Abramowitz, Milton; Stegun, Irene A. (1970),
+      // Handbook of Mathematical Functions with Formulas, Graphs, and
+      // Mathematical Tables, Dover. Ninth printing. Table 25.2.`.
+      precision error = ((xaf3 - xbf3) - 4 * (xaf2 - xbf2) + 5 * (xaf - xbf)) /
+                        (60 * h);
+      // This is the error in evaluation of all the function values.
+      precision evalError = std::numeric_limits<precision>::epsilon() *
+                            (std::fabs(xaf2) + std::fabs(xbf2) +
+                             8 * (std::fabs(xaf) + std::fabs(xbf))) /
+                            (12 * h);
+      // Finally print the error to standard ouput.
+      printError(std::fabs(error), evalError, n, arrIdx);
+    }
+
+    // five-point stencil formula = (4f[x+h, x-h] - f[x+2h, x-2h])/3
+    dx = 4.0 * xf1 / 3.0 - xf2 / 3.0;
+    return dx;
+  }
+
+  /// A function to calculate the derivative of a function using the central
+  /// difference formula. Note: we do not propogate errors resulting in the
+  /// following function, it is likely the errors are large enough to be of
+  /// significance, hence it is only wise to use these methods when it is
+  /// absolutely necessary. This specific version calculates the numerical
+  /// derivative of the target function with respect to only one scalar input
+  /// parameter.
+  /// \note This function will evaluate to incorrect results if one of the
+  /// following conditions are satisfied:
+  /// 1) A reference/pointer, which is not 'arg', is being modified in the
+  /// function,
+  /// 2) A reference/pointer, which is not 'arg', has a length > arrLen.
+  /// If this is the case, you may specify the greatest length any non-scalar
+  /// input may have.
+  /// To overcome these, you may use the central_difference method instead.
+  ///
+  /// \param[in] \c f The target function to numerically differentiate.
+  /// \param[in] \c arg The argument with respect to which differentiation is
+  /// requested.
+  /// \param[in] \c n The positional value of 'arg'.
+  /// \param[in] \c printErrors A flag to decide if we want to print numerical
+  /// diff errors estimates.
+  /// \param[in] \c args The arguments to the function to differentiate.
+  ///
+  /// \returns The derivative value.
+  template <
+      typename F, typename T, typename... Args,
+      typename std::enable_if<!std::is_pointer<T>::value, bool>::type = true>
+  precision forward_central_difference(F f, T arg, std::size_t n,
+                                       bool printErrors, Args&&... args) {
+    return forward_central_difference_helper(f, arg, n, /*arrIdx=*/-1,
+                                             /*arrLen=*/0, printErrors,
+                                             clad::MakeIndexSequence<sizeof...(
+                                                 Args)>{},
+                                             std::forward<Args>(args)...);
+  }
+
+  /// A function to calculate the derivative of a function using the central
+  /// difference formula. Note: we do not propogate errors resulting in the
+  /// following function, it is likely the errors are large enough to be of
+  /// significance, hence it is only wise to use these methods when it is
+  /// absolutely necessary. This specific version calculates the numerical
+  /// derivative of the target function with respect to only one non-scalar
+  /// input parameter.
+  /// \note This function will evaluate to incorrect results if one of the
+  /// following conditions are satisfied:
+  /// 1) A reference/pointer, which is not 'arg', is being modified in the
+  /// function,
+  /// 2) A reference/pointer, which is not 'arg', has a length > arrLen.
+  /// If this is the case, you may specify the greatest length any non-scalar
+  /// input may have.
+  /// To overcome these, you may use the central_difference method instead.
+  ///
+  /// \param[in] \c f The target function to numerically differentiate.
+  /// \param[in] \c arg The argument with respect to which differentiation is
+  /// requested.
+  /// \param[in] \c n The positional value of 'arg'.
+  /// \param[in] \c arrLen The length of the pointer/array.
+  /// \param[in] \c arrIdx The specific index value to differentiate the target
+  /// function with respect to.
+  /// \param[in] \c printErrors A flag to decide if we want to print numerical
+  /// diff errors estimates.
+  /// \param[in] \c args The arguments to the function to differentiate.
+  ///
+  /// \returns The derivative value.
+  template <typename F, typename T, typename... Args>
+  precision forward_central_difference(F f, T arg, std::size_t n,
+                                       std::size_t arrLen, int arrIdx,
+                                       bool printErrors, Args&&... args) {
+    return forward_central_difference_helper(f, arg, n, arrIdx, arrLen,
+                                             printErrors,
+                                             clad::MakeIndexSequence<sizeof...(
+                                                 Args)>{},
+                                             std::forward<Args>(args)...);
+  }
+} // namespace numerical_diff
+
+#endif // CLAD_NUMERICAL_DIFF_H
+#endif // CLAD_NO_NUM_DIFF
diff --git a/include/clad/Differentiator/Tape.h b/include/clad/Differentiator/Tape.h
index beb715e75..969b2994b 100644
--- a/include/clad/Differentiator/Tape.h
+++ b/include/clad/Differentiator/Tape.h
@@ -81,6 +81,16 @@ namespace clad {
       return begin()[_size - 1];
     }
 
+    CUDA_HOST_DEVICE reference operator[](std::size_t i) {
+      assert(i < _size);
+      return begin()[i];
+    }
+
+    CUDA_HOST_DEVICE const_reference operator[](std::size_t i) const {
+      assert(i < _size);
+      return begin()[i];
+    }
+
     /// Remove the last value from the tape.
     CUDA_HOST_DEVICE void pop_back() {
       assert(_size);
diff --git a/include/clad/Differentiator/VisitorBase.h b/include/clad/Differentiator/VisitorBase.h
index d3c8897e5..fa07407b0 100644
--- a/include/clad/Differentiator/VisitorBase.h
+++ b/include/clad/Differentiator/VisitorBase.h
@@ -453,6 +453,49 @@ namespace clad {
     clang::Expr*
     BuildArrayRefSliceExpr(clang::Expr* Base,
                            llvm::MutableArrayRef<clang::Expr*> Args);
+    /// A function to get the single argument "forward_central_difference"
+    /// call expression for the given arguments.
+    ///
+    /// \param[in] targetFuncCall The function to get the derivative for.
+    /// \param[in] targetArg The argument to get the derivative with respect to.
+    /// \param[in] targetPos The relative position of 'targetArg'.
+    /// \param[in] numArgs The total number of 'args'.
+    /// \param[in] args All the arguments to the target function.
+    ///
+    /// \returns The derivative function call.
+    clang::Expr* GetSingleArgCentralDiffCall(
+        clang::Expr* targetFuncCall, clang::Expr* targetArg, 
+        unsigned targetPos, unsigned numArgs, 
+        llvm::SmallVectorImpl<clang::Expr*>& args);
+    /// A function to get the multi-argument "central_difference"
+    /// call expression for the given arguments.
+    ///
+    /// \param[in] targetFuncCall The function to get the derivative for.
+    /// \param[in] retType The return type of the target call expression.
+    /// \param[in] numArgs The total number of 'args'.
+    /// \param[in] NumericalDiffMultiArg The built statements to add to block
+    /// later.
+    /// \param[in] args All the arguments to the target function.
+    /// \param[in] outputArgs The output gradient arguments.
+    ///
+    /// \returns The derivative function call.
+    clang::Expr* GetMultiArgCentralDiffCall(
+        clang::Expr* targetFuncCall, clang::QualType retType, 
+        unsigned numArgs,
+        llvm::SmallVectorImpl<clang::Stmt*>& NumericalDiffMultiArg,
+        llvm::SmallVectorImpl<clang::Expr*>& args,
+        llvm::SmallVectorImpl<clang::Expr*>& outputArgs);
+    /// Emits diagnostic messages on differentiation (or lack thereof) for 
+    /// call expressions.
+    ///
+    /// \param[in] \c funcName The name of the underlying function of the 
+    /// call expression.
+    /// \param[in] \c srcLoc Any associated source location information. 
+    /// \param[in] \c isDerived A flag to determine if differentiation of the
+    /// call expression was successful.
+    void CallExprDiffDiagnostics(llvm::StringRef funcName,
+                                 clang::SourceLocation srcLoc,
+                                 bool isDerived);
 
   public:
     /// Rebuild a sequence of nested namespaces ending with DC.
diff --git a/lib/Differentiator/DerivativeBuilder.cpp b/lib/Differentiator/DerivativeBuilder.cpp
index 7927d2bf0..1b0d08f92 100644
--- a/lib/Differentiator/DerivativeBuilder.cpp
+++ b/lib/Differentiator/DerivativeBuilder.cpp
@@ -37,7 +37,7 @@ namespace clad {
   DerivativeBuilder::DerivativeBuilder(clang::Sema& S, plugin::CladPlugin& P)
     : m_Sema(S), m_CladPlugin(P), m_Context(S.getASTContext()),
       m_NodeCloner(new utils::StmtClone(m_Sema, m_Context)),
-      m_BuiltinDerivativesNSD(nullptr) {}
+      m_BuiltinDerivativesNSD(nullptr), m_NumericalDiffNSD(nullptr) {}
 
   DerivativeBuilder::~DerivativeBuilder() {}
 
diff --git a/lib/Differentiator/ForwardModeVisitor.cpp b/lib/Differentiator/ForwardModeVisitor.cpp
index d36e6e1bf..faebc8ba6 100644
--- a/lib/Differentiator/ForwardModeVisitor.cpp
+++ b/lib/Differentiator/ForwardModeVisitor.cpp
@@ -632,7 +632,6 @@ namespace clad {
           // Populate CandidateSet.
           m_Sema.buildOverloadedCallSet(
               S, UnresolvedLookup, ULE, ARargs, Loc, &CandidateSet, &result);
-
           OverloadCandidateSet::iterator Best;
           OverloadingResult OverloadResult = CandidateSet.BestViableFunction(
               m_Sema, UnresolvedLookup->getBeginLoc(), Best);
@@ -644,9 +643,10 @@ namespace clad {
     return false;
   }
 
-  static NamespaceDecl* LookupBuiltinDerivativesNSD(ASTContext& C, Sema& S) {
-    // Find the builtin derivatives namespace
-    DeclarationName Name = &C.Idents.get("custom_derivatives");
+  static NamespaceDecl* LookupNSD(ASTContext& C, Sema& S,
+                                  llvm::StringRef namespc, bool shouldExist) {
+    // Find the builtin derivatives/numerical diff namespace
+    DeclarationName Name = &C.Idents.get(namespc);
     LookupResult R(S,
                    Name,
                    SourceLocation(),
@@ -655,23 +655,44 @@ namespace clad {
     S.LookupQualifiedName(R,
                           C.getTranslationUnitDecl(),
                           /*allowBuiltinCreation*/ false);
-    assert(!R.empty() && "Cannot find builtin derivatives!");
+    if (!shouldExist && R.empty())
+      return nullptr;
+    assert(!R.empty() && "Cannot find the specified namespace!");
     return cast<NamespaceDecl>(R.getFoundDecl());
   }
 
   Expr* DerivativeBuilder::findOverloadedDefinition(
-      DeclarationNameInfo DNI, llvm::SmallVectorImpl<Expr*>& CallArgs) {
-    if (!m_BuiltinDerivativesNSD)
-      m_BuiltinDerivativesNSD = LookupBuiltinDerivativesNSD(m_Context, m_Sema);
-
+      DeclarationNameInfo DNI, llvm::SmallVectorImpl<Expr*>& CallArgs,
+      bool forCustomDerv /*=true*/, bool namespaceShouldExist /*=true*/) {
+    NamespaceDecl* NSD;
+    std::string namespaceID;
+    if (forCustomDerv) {
+      NSD = m_BuiltinDerivativesNSD;
+      namespaceID = "custom_derivatives";
+    } else {
+      NSD = m_NumericalDiffNSD;
+      namespaceID = "numerical_diff";
+    }
+    if (!NSD){
+      NSD = LookupNSD(m_Context, m_Sema, namespaceID, namespaceShouldExist);
+      if (!forCustomDerv && !NSD) {
+        diag(DiagnosticsEngine::Warning, noLoc,
+             "Numerical differentiation is diabled using the "
+             "-DCLAD_NO_NUM_DIFF "
+             "flag, this means that every try to numerically differentiate a "
+             "function will fail! Remove the flag to revert to default "
+             "behaviour.");
+        return nullptr;
+      }
+    }
     LookupResult R(m_Sema, DNI, Sema::LookupOrdinaryName);
     m_Sema.LookupQualifiedName(R,
-                               m_BuiltinDerivativesNSD,
+                               NSD,
                                /*allowBuiltinCreation*/ false);
     Expr* OverloadedFn = 0;
     if (!R.empty()) {
       CXXScopeSpec CSS;
-      CSS.Extend(m_Context, m_BuiltinDerivativesNSD, noLoc, noLoc);
+      CSS.Extend(m_Context, NSD, noLoc, noLoc);
       Expr* UnresolvedLookup =
           m_Sema.BuildDeclarationNameExpr(CSS, R, /*ADL*/ false).get();
 
@@ -771,28 +792,33 @@ namespace clad {
 
       FunctionDecl* derivedFD =
           plugin::ProcessDiffRequest(m_CladPlugin, request);
-      // Clad failed to derive it.
+      // If clad failed to derive it, try finding its derivative using
+      // numerical diff.
       if (!derivedFD) {
-        // Function was not derived => issue a warning.
-        diag(DiagnosticsEngine::Warning,
-             CE->getBeginLoc(),
-             "function '%0' was not differentiated because clad failed to "
-             "differentiate it and no suitable overload was found in "
-             "namespace 'custom_derivatives'",
-             {FD->getNameAsString()});
-
-        auto zero =
-            ConstantFolder::synthesizeLiteral(m_Context.IntTy, m_Context, 0);
-        return StmtDiff(call, zero);
+        // FIXME: Extend this for multiarg support
+        // Check if the function is eligible for numerical differentiation.
+        if (CE->getNumArgs() == 1) {
+          Expr* fnCallee = cast<CallExpr>(call)->getCallee();
+          callDiff = GetSingleArgCentralDiffCall(fnCallee, CallArgs[0],
+                                                 /*targetPos=*/0, /*numArgs=*/1,
+                                                 CallArgs);
+        }
+        CallExprDiffDiagnostics(FD->getNameAsString(), CE->getBeginLoc(),
+                                  callDiff);
+        if (!callDiff) {
+          auto zero =
+              ConstantFolder::synthesizeLiteral(m_Context.IntTy, m_Context, 0);
+          return StmtDiff(call, zero);
+        }
+      } else {
+        callDiff = m_Sema
+                       .ActOnCallExpr(getCurrentScope(),
+                                      BuildDeclRef(derivedFD),
+                                      noLoc,
+                                      llvm::MutableArrayRef<Expr*>(CallArgs),
+                                      noLoc)
+                       .get();
       }
-
-      callDiff = m_Sema
-                     .ActOnCallExpr(getCurrentScope(),
-                                    BuildDeclRef(derivedFD),
-                                    noLoc,
-                                    llvm::MutableArrayRef<Expr*>(CallArgs),
-                                    noLoc)
-                     .get();
     }
 
     if (Multiplier)
@@ -1345,4 +1371,4 @@ namespace clad {
   StmtDiff ForwardModeVisitor::VisitBreakStmt(const BreakStmt* stmt) {
     return StmtDiff(Clone(stmt));
   }
-} // end namespace clad
\ No newline at end of file
+} // end namespace clad
diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp
index ae6157bb5..42d0ba375 100644
--- a/lib/Differentiator/ReverseModeVisitor.cpp
+++ b/lib/Differentiator/ReverseModeVisitor.cpp
@@ -1151,6 +1151,9 @@ namespace clad {
     llvm::SmallVector<Expr*, 16> CallArgDx{};
     // Stores the call arguments for the derived function
     llvm::SmallVector<Expr*, 16> DerivedCallArgs{};
+    // Stores tape decl and pushes for multiarg numerically differentiated
+    // calls.
+    llvm::SmallVector<Stmt*, 16> NumericalDiffMultiArg{};
     // If the result does not depend on the result of the call, just clone
     // the call and visit arguments (since they may contain side-effects like
     // f(x = y))
@@ -1240,6 +1243,9 @@ namespace clad {
       if (OverloadedDerivedFn)
         asGrad = false;
     }
+    // Store all the derived call output args (if any)
+    llvm::SmallVector<Expr*, 16> DerivedCallOutputArgs{};
+
     // If it has more args or f_darg0 was not found, we look for its gradient.
     if (!OverloadedDerivedFn) {
       IdentifierInfo* II =
@@ -1274,7 +1280,6 @@ namespace clad {
       } else {
         size_t idx = 0;
 
-        llvm::SmallVector<Expr*, 16> DerivedCallOutputArgs{};
         auto ArrayDiffArgType = GetCladArrayOfType(CEType.getCanonicalType());
         for (auto arg : CallArgDx) {
           ResultDecl = nullptr;
@@ -1365,23 +1370,44 @@ namespace clad {
             plugin::ProcessDiffRequest(m_CladPlugin, request);
         // Clad failed to derive it.
         if (!derivedFD) {
-          // Function was not derived => issue a warning.
-          diag(DiagnosticsEngine::Warning,
-               CE->getBeginLoc(),
-               "function '%0' was not differentiated because clad failed to "
-               "differentiate it and no suitable overload was found in "
-               "namespace 'custom_derivatives'",
-               {FD->getNameAsString()});
-          return StmtDiff(Clone(CE));
-        }
-
-        OverloadedDerivedFn =
-            m_Sema
-                .ActOnCallExpr(getCurrentScope(), BuildDeclRef(derivedFD),
-                               noLoc,
-                               llvm::MutableArrayRef<Expr*>(DerivedCallArgs),
-                               noLoc)
-                .get();
+          // Try numerically deriving it.
+          // Build a clone call expression so that we can correctly
+          // scope the function to be differentiated.
+          Expr* call = m_Sema
+                     .ActOnCallExpr(getCurrentScope(),
+                                    Clone(CE->getCallee()),
+                                    noLoc,
+                                    llvm::MutableArrayRef<Expr*>(CallArgs),
+                                    noLoc)
+                     .get();
+          Expr* fnCallee = cast<CallExpr>(call)->getCallee();
+          if (NArgs == 1) {
+            OverloadedDerivedFn = GetSingleArgCentralDiffCall(fnCallee,
+                                                              DerivedCallArgs
+                                                                  [0],
+                                                              /*targetPos=*/0,
+                                                              /*numArgs=*/1,
+                                                              DerivedCallArgs);
+            asGrad = !OverloadedDerivedFn;
+          } else {
+            OverloadedDerivedFn = GetMultiArgCentralDiffCall(
+                fnCallee, CEType.getCanonicalType(), CE->getNumArgs(),
+                NumericalDiffMultiArg, DerivedCallArgs, DerivedCallOutputArgs);
+          }
+          CallExprDiffDiagnostics(FD->getNameAsString(), CE->getBeginLoc(),
+                                  OverloadedDerivedFn);
+          if (!OverloadedDerivedFn)
+            return StmtDiff(Clone(CE));
+        } else {
+          OverloadedDerivedFn = m_Sema
+                                    .ActOnCallExpr(getCurrentScope(),
+                                                   BuildDeclRef(derivedFD),
+                                                   noLoc,
+                                                   llvm::MutableArrayRef<Expr*>(
+                                                       DerivedCallArgs),
+                                                   noLoc)
+                                    .get();
+        }    
       }
     }
 
@@ -1433,6 +1459,9 @@ namespace clad {
           // Insert the _gradX declaration statements
           it = block.insert(it, ArgDeclStmts.begin(), ArgDeclStmts.end());
           it += ArgDeclStmts.size();
+          it = block.insert(it, NumericalDiffMultiArg.begin(),
+                            NumericalDiffMultiArg.end());
+          it += NumericalDiffMultiArg.size();
           // Insert the CallExpr to the derived function
           block.insert(it, OverloadedDerivedFn);
           // If in error estimation, build the statement for the error
diff --git a/lib/Differentiator/VisitorBase.cpp b/lib/Differentiator/VisitorBase.cpp
index 6b0fccb99..8146955a9 100644
--- a/lib/Differentiator/VisitorBase.cpp
+++ b/lib/Differentiator/VisitorBase.cpp
@@ -649,4 +649,100 @@ namespace clad {
   bool VisitorBase::isArrayRefType(QualType QT) {
     return QT.getAsString().find("clad::array_ref") != std::string::npos;
   }
+
+  Expr* VisitorBase::GetSingleArgCentralDiffCall(
+      Expr* targetFuncCall, Expr* targetArg, unsigned targetPos,
+      unsigned numArgs, llvm::SmallVectorImpl<Expr*>& args) {
+    QualType argType = targetArg->getType();
+    int printErrorInf = m_Builder.shouldPrintNumDiffErrs();
+    bool isSupported = argType->isArithmeticType();
+    if (!isSupported)
+      return nullptr;
+    IdentifierInfo* II = &m_Context.Idents.get("forward_central_difference");
+    DeclarationName name(II);
+    DeclarationNameInfo DNInfo(name, noLoc);
+    // Build function args.
+    llvm::SmallVector<Expr*, 16U> NumDiffArgs;
+    NumDiffArgs.push_back(targetFuncCall);
+    NumDiffArgs.push_back(targetArg);
+    NumDiffArgs.push_back(ConstantFolder::synthesizeLiteral(m_Context.IntTy,
+                                                            m_Context,
+                                                            targetPos));
+    NumDiffArgs.push_back(ConstantFolder::synthesizeLiteral(m_Context.IntTy,
+                                                            m_Context,
+                                                            printErrorInf));
+    NumDiffArgs.insert(NumDiffArgs.end(), args.begin(), args.begin() + numArgs);
+    // Return the found overload.
+    return m_Builder.findOverloadedDefinition(DNInfo, NumDiffArgs,
+                                              /*forCustomDerv=*/false,
+                                              /*namespaceShouldExist=*/false);
+  }
+
+  Expr* VisitorBase::GetMultiArgCentralDiffCall(
+      Expr* targetFuncCall, QualType retType, unsigned numArgs,
+      llvm::SmallVectorImpl<Stmt*>& NumericalDiffMultiArg,
+      llvm::SmallVectorImpl<Expr*>& args,
+      llvm::SmallVectorImpl<Expr*>& outputArgs) {
+    int printErrorInf = m_Builder.shouldPrintNumDiffErrs();
+    IdentifierInfo* II = &m_Context.Idents.get("central_difference");
+    DeclarationName name(II);
+    DeclarationNameInfo DNInfo(name, noLoc);
+    llvm::SmallVector<Expr*, 16U> NumDiffArgs = {};
+    NumDiffArgs.push_back(targetFuncCall);
+    // build the clad::tape<clad::array_ref>> = {};
+    QualType RefType = GetCladArrayRefOfType(retType);
+    QualType TapeType = GetCladTapeOfType(RefType);
+    auto VD = BuildVarDecl(TapeType);
+    NumericalDiffMultiArg.push_back(BuildDeclStmt(VD));
+    Expr* TapeRef = BuildDeclRef(VD);
+    NumDiffArgs.push_back(TapeRef);
+    NumDiffArgs.push_back(ConstantFolder::synthesizeLiteral(m_Context.IntTy,
+                                                            m_Context,
+                                                            printErrorInf));
+
+    // Build the tape push expressions.
+    VD->setLocation(m_Function->getLocation());
+    m_Sema.AddInitializerToDecl(VD, getZeroInit(TapeType), false);
+    CXXScopeSpec CSS;
+    CSS.Extend(m_Context, GetCladNamespace(), noLoc, noLoc);
+    LookupResult& Push = GetCladTapePush();
+    auto PushDRE = m_Sema.BuildDeclarationNameExpr(CSS, Push, /*ADL*/ false)
+                       .get();
+    Expr* PushExpr;
+    for (unsigned i = 0, e = numArgs; i < e; i++) {
+      Expr* callArgs[] = {TapeRef, outputArgs[i]};
+      PushExpr = m_Sema
+                     .ActOnCallExpr(getCurrentScope(), PushDRE, noLoc, callArgs,
+                                    noLoc)
+                     .get();
+      NumericalDiffMultiArg.push_back(PushExpr);
+      NumDiffArgs.push_back(args[i]);
+    }
+
+    return m_Builder.findOverloadedDefinition(DNInfo, NumDiffArgs,
+                                              /*forCustomDerv=*/false,
+                                              /*namespaceShouldExist=*/false);
+  }
+
+  void VisitorBase::CallExprDiffDiagnostics(llvm::StringRef funcName,
+                                 SourceLocation srcLoc, bool isDerived){
+    if (!isDerived) {
+      // Function was not derived => issue a warning.
+      diag(DiagnosticsEngine::Warning,
+           srcLoc,
+           "function '%0' was not differentiated because clad failed to "
+           "differentiate it and no suitable overload was found in "
+           "namespace 'custom_derivatives', and function may not be "
+            "eligible for numerical differentiation.",
+           {funcName});
+    } else {
+      diag(DiagnosticsEngine::Warning, noLoc,
+           "Falling back to numerical differentiation for '%0' since no "
+           "suitable overload was found and clad could not derive it. "
+           "To disable this feature, compile your programs with "
+           "-DCLAD_NO_NUM_DIFF.",
+           {funcName});
+    }
+  }
+
 } // end namespace clad
diff --git a/test/FirstDerivative/CodeGenSimple.C b/test/FirstDerivative/CodeGenSimple.C
index ea5a4fb12..0759b15a2 100644
--- a/test/FirstDerivative/CodeGenSimple.C
+++ b/test/FirstDerivative/CodeGenSimple.C
@@ -7,7 +7,7 @@
 extern "C" int printf(const char* fmt, ...);
 
 int f_1(int x) {
-   printf("I am being run!\n"); //expected-warning{{attempted to differentiate unsupported statement, no changes applied}} //expected-warning{{function 'printf' was not differentiated because clad failed to differentiate it and no suitable overload was found in namespace 'custom_derivatives'}}
+   printf("I am being run!\n"); //expected-warning{{attempted to differentiate unsupported statement, no changes applied}} //expected-warning{{function 'printf' was not differentiated because clad failed to differentiate it and no suitable overload was found in namespace 'custom_derivatives', and function may not be eligible for numerical differentiation.}}
    return x * x;
 }
 // CHECK: int f_1_darg0(int x) {
diff --git a/test/FirstDerivative/FunctionCalls.C b/test/FirstDerivative/FunctionCalls.C
index b12491f85..0ad5cf2b4 100644
--- a/test/FirstDerivative/FunctionCalls.C
+++ b/test/FirstDerivative/FunctionCalls.C
@@ -1,9 +1,11 @@
-// RUN: %cladclang %s -I%S/../../include -fsyntax-only -Xclang -verify 2>&1 | FileCheck %s
+// RUN: %cladnumdiffclang %s -I%S/../../include -fsyntax-only -Xclang -verify 2>&1 | FileCheck %s
 
 //CHECK-NOT: {{.*error|warning|note:.*}}
 
 #include "clad/Differentiator/Differentiator.h"
 
+#include <math.h>
+
 int printf(const char* fmt, ...);
 int no_body(int x);
 int custom_fn(int x);
@@ -74,7 +76,7 @@ float test_3() {
 // CHECK-NOT: float test_3_darg0() {
 
 float test_4(int x) {
-  return overloaded(); // expected-warning {{function 'overloaded' was not differentiated because clad failed to differentiate it and no suitable overload was found in namespace 'custom_derivatives'}}
+  return overloaded(); // expected-warning {{function 'overloaded' was not differentiated because clad failed to differentiate it and no suitable overload was found in namespace 'custom_derivatives', and function may not be eligible for numerical differentiation.}}
 }
 
 // CHECK: float test_4_darg0(int x) {
diff --git a/test/Gradient/Gradients.C b/test/Gradient/Gradients.C
index 606bd0164..1431c84bb 100644
--- a/test/Gradient/Gradients.C
+++ b/test/Gradient/Gradients.C
@@ -1,4 +1,4 @@
-// RUN: %cladclang %s -lm -I%S/../../include -oGradients.out 2>&1 | FileCheck %s
+// RUN: %cladnumdiffclang -lm -lstdc++ %s  -I%S/../../include -oGradients.out 2>&1 | FileCheck %s
 // RUN: ./Gradients.out | FileCheck -check-prefix=CHECK-EXEC %s
 
 //CHECK-NOT: {{.*error|warning|note:.*}}
@@ -872,7 +872,7 @@ void f_const_grad(const double a, const double b, clad::array_ref<double> _d_a,
     printf("Result is = {%.2f, %.2f}\n", result[0], result[1]);                \
   }
 
-int main() { // expected-no-diagnostics
+int main() {
   double result[2];
 
   TEST(f_add1, 1, 1); // CHECK-EXEC: Result is = {1.00, 1.00}
diff --git a/test/Misc/Args.C b/test/Misc/Args.C
index ec1f2c679..58e44c751 100644
--- a/test/Misc/Args.C
+++ b/test/Misc/Args.C
@@ -6,6 +6,7 @@
 // CHECK_HELP-NEXT: -fdump-derived-fn-ast
 // CHECK_HELP-NEXT: -fgenerate-source-file
 // CHECK_HELP-NEXT: -fcustom-estimation-model
+// CHECK_HELP-NEXT: -fprint-num-diff-errors
 // CHECK_HELP-NEXT: -help
 
 // RUN: clang -fsyntax-only -fplugin=%cladlib -Xclang -plugin-arg-clad\
diff --git a/test/Misc/RunDemos.C b/test/Misc/RunDemos.C
index 41f4578ec..62873cad7 100644
--- a/test/Misc/RunDemos.C
+++ b/test/Misc/RunDemos.C
@@ -237,3 +237,13 @@
 //CHECK_GRADIENT_DESCENT-NEXT:         * _d_x += _r2;
 //CHECK_GRADIENT_DESCENT-NEXT:     }
 //CHECK_GRADIENT_DESCENT-NEXT: }
+
+//-----------------------------------------------------------------------------/
+// Demo: Custom Type Numerical Diff
+//-----------------------------------------------------------------------------/
+// RUN: %cladnumdiffclang -lm -lstdc++ %S/../../demos/CustomTypeNumDiff.cpp -I%S/../../include -oCustomTypeNumDiff.out
+// RUN: ./CustomTypeNumDiff.out | FileCheck -check-prefix CHECK_CUSTOM_NUM_DIFF_EXEC %s
+// CHECK_CUSTOM_NUM_DIFF_EXEC: Result of df/dx is = 0.07
+// CHECK_CUSTOM_NUM_DIFF_EXEC: Result of df/dx is = 0.003
+
+
diff --git a/test/NumericalDiff/GradientMultiArg.C b/test/NumericalDiff/GradientMultiArg.C
new file mode 100644
index 000000000..e58d32b89
--- /dev/null
+++ b/test/NumericalDiff/GradientMultiArg.C
@@ -0,0 +1,45 @@
+// RUN: %cladnumdiffclang -lm -lstdc++ %s -I%S/../../include -oGradientMultiArg.out 2>&1 | FileCheck -check-prefix=CHECK %s
+// RUN: ./GradientMultiArg.out | FileCheck -check-prefix=CHECK-EXEC %s
+
+//CHECK-NOT: {{.*error|warning|note:.*}}
+
+#include "clad/Differentiator/Differentiator.h"
+
+#include <cmath>
+#include <algorithm>
+
+double test_1(double x, double y){
+   return std::hypot(x, y);
+}
+// CHECK: warning: Falling back to numerical differentiation for 'hypot' since no suitable overload was found and clad could not derive it. To disable this feature, compile your programs with -DCLAD_NO_NUM_DIFF.
+// CHECK: void test_1_grad(double x, double y, clad::array_ref<double> _d_x, clad::array_ref<double> _d_y) {
+// CHECK-NEXT:     double _t0;
+// CHECK-NEXT:     double _t1;
+// CHECK-NEXT:     _t0 = x;
+// CHECK-NEXT:     _t1 = y;
+// CHECK-NEXT:     double test_1_return = std::hypot(_t0, _t1);
+// CHECK-NEXT:     goto _label0;
+// CHECK-NEXT:   _label0:
+// CHECK-NEXT:     {
+// CHECK-NEXT:         double _grad0 = 0.;
+// CHECK-NEXT:         double _grad1 = 0.;
+// CHECK-NEXT:         clad::tape<clad::array_ref<double> > _t2 = {};
+// CHECK-NEXT:         clad::push(_t2, &_grad0);
+// CHECK-NEXT:         clad::push(_t2, &_grad1);
+// CHECK-NEXT:         numerical_diff::central_difference(std::hypot, _t2, 0, _t0, _t1);
+// CHECK-NEXT:         double _r0 = 1 * _grad0;
+// CHECK-NEXT:         * _d_x += _r0;
+// CHECK-NEXT:         double _r1 = 1 * _grad1;
+// CHECK-NEXT:         * _d_y += _r1;
+// CHECK-NEXT:     }
+// CHECK-NEXT: }
+
+
+int main(){
+    auto df = clad::gradient(test_1);  
+    double dx = 0, dy = 0;
+    df.execute(3, 4, &dx, &dy);
+    printf("Result is = %f\n", dx); // CHECK-EXEC: Result is = 0.600000
+    printf("Result is = %f\n", dy); // CHECK-EXEC: Result is = 0.800000
+}
+
diff --git a/test/NumericalDiff/NumDiff.C b/test/NumericalDiff/NumDiff.C
new file mode 100644
index 000000000..65dad231c
--- /dev/null
+++ b/test/NumericalDiff/NumDiff.C
@@ -0,0 +1,37 @@
+// RUN: %cladnumdiffclang -lm -lstdc++ %s -I%S/../../include -oNumDiff.out 2>&1 | FileCheck -check-prefix=CHECK %s
+
+//CHECK-NOT: {{.*error|warning|note:.*}}
+
+#include "clad/Differentiator/Differentiator.h"
+
+double test_1(double x){
+   return tanh(x); 
+}
+//CHECK: warning: Falling back to numerical differentiation for 'tanh' since no suitable overload was found and clad could not derive it. To disable this feature, compile your programs with -DCLAD_NO_NUM_DIFF.
+//CHECK: warning: Falling back to numerical differentiation for 'log10' since no suitable overload was found and clad could not derive it. To disable this feature, compile your programs with -DCLAD_NO_NUM_DIFF.
+
+//CHECK: void test_1_grad(double x, clad::array_ref<double> _d_x) {
+//CHECK-NEXT:     double _t0;
+//CHECK-NEXT:     _t0 = x;
+//CHECK-NEXT:     double test_1_return = tanh(_t0);
+//CHECK-NEXT:     goto _label0;
+//CHECK-NEXT:   _label0:
+//CHECK-NEXT:     {
+//CHECK-NEXT:         double _r0 = 1 * numerical_diff::forward_central_difference(tanh, _t0, 0, 0, _t0);
+//CHECK-NEXT:         * _d_x += _r0;
+//CHECK-NEXT:     }
+//CHECK-NEXT: }
+
+
+double test_2(double x){
+   return std::log10(x);
+}
+//CHECK: double test_2_darg0(double x) {
+//CHECK-NEXT:     double _d_x = 1;
+//CHECK-NEXT:     return numerical_diff::forward_central_difference(std::log10, x, 0, 0, x) * _d_x;
+//CHECK-NEXT: }
+
+int main(){
+    clad::gradient(test_1);
+    clad::differentiate(test_2, 0);
+}
diff --git a/test/NumericalDiff/PrintErrorNumDiff.C b/test/NumericalDiff/PrintErrorNumDiff.C
new file mode 100644
index 000000000..cabf53fc8
--- /dev/null
+++ b/test/NumericalDiff/PrintErrorNumDiff.C
@@ -0,0 +1,39 @@
+// RUN: %cladnumdiffclang -lm -lstdc++ -Xclang -plugin-arg-clad -Xclang -fprint-num-diff-errors %s -I%S/../../include -oPrintErrorNumDiff.out 2>&1 | FileCheck -check-prefix=CHECK %s
+// -Xclang -verify 2>&1 RUN: ./PrintErrorNumDiff.out | FileCheck -check-prefix=CHECK-EXEC %s
+
+//CHECK-NOT: {{.*error|warning|note:.*}}
+
+#include "clad/Differentiator/Differentiator.h"
+
+#include <cmath>
+
+extern "C" int printf(const char* fmt, ...);
+
+double test_1(double x){
+   return tanh(x); 
+}
+
+//CHECK: warning: Falling back to numerical differentiation for 'tanh' since no suitable overload was found and clad could not derive it. To disable this feature, compile your programs with -DCLAD_NO_NUM_DIFF.
+//CHECK: void test_1_grad(double x, clad::array_ref<double> _d_x) {
+//CHECK-NEXT:     double _t0;
+//CHECK-NEXT:     _t0 = x;
+//CHECK-NEXT:     double test_1_return = tanh(_t0);
+//CHECK-NEXT:     goto _label0;
+//CHECK-NEXT:   _label0:
+//CHECK-NEXT:     {
+//CHECK-NEXT:         double _r0 = 1 * numerical_diff::forward_central_difference(tanh, _t0, 0, 1, _t0);
+//CHECK-NEXT:         * _d_x += _r0;
+//CHECK-NEXT:     }
+//CHECK-NEXT: }
+
+
+int main(){
+    auto df = clad::gradient(test_1);
+    double x = 0.05, dx = 0;
+    df.execute(x, &dx);
+    printf("Result is:%f", dx);
+    //CHECK-EXEC: Error Report for parameter at position 0:
+    //CHECK-EXEC: Error due to the five-point central difference is: 0.0000000000
+    //CHECK-EXEC: Error due to function evaluation is: 0.0000000000
+    //CHECK-EXEC: Result is:0.997504 
+}
diff --git a/test/NumericalDiff/PureCentralDiffCalls.C b/test/NumericalDiff/PureCentralDiffCalls.C
new file mode 100644
index 000000000..25aa75114
--- /dev/null
+++ b/test/NumericalDiff/PureCentralDiffCalls.C
@@ -0,0 +1,103 @@
+// RUN: %cladnumdiffclang -lm -lstdc++ %s -I%S/../../include -oPureCentralDiffCalls.out
+// -Xclang -verify 2>&1 RUN: ./PureCentralDiffCalls.out | FileCheck -check-prefix=CHECK-EXEC %s
+
+// CHECK-NOT: {{.*error|warning|note:.*}}
+
+#include "clad/Differentiator/Differentiator.h"
+
+#include <functional>
+
+extern "C" int printf(const char* fmt, ...);
+
+// Scalar args
+double func(double x, double y, double z) { return x * y + x * (y + z); }
+
+// All pointer args
+double func1(double* x, double* y) {
+  return x[0] * y[0] + x[1] * y[1] + x[2] * y[2];
+}
+
+// Mix args...
+double func2(double* x, double y, int z) {
+  double sum = 0;
+  for (int i = 0; i < z; i++) {
+    x[i] += y;
+    sum += x[i];
+  }
+  return sum;
+}
+
+struct myStruct {
+  double data;
+  bool effect;
+  myStruct(double x, bool eff) {
+    data = x;
+    effect = eff;
+  }
+};
+
+double operator+(myStruct a, double b) { return a.data + b; }
+
+myStruct operator+(myStruct a, myStruct b) {
+  myStruct out(0, false);
+  out.data = a.data + b.data;
+  out.effect = a.effect || b.effect;
+  return out;
+}
+
+myStruct
+updateIndexParamValue(myStruct arg, std::size_t idx, std::size_t currIdx,
+                      int multiplier, numerical_diff::precision& h_val,
+                      std::size_t n = 0, std::size_t i = 0) {
+  if (idx == currIdx) {
+    h_val = (h_val == 0) ? numerical_diff::get_h(arg.data) : h_val;
+    if (arg.effect)
+      return myStruct(arg.data + h_val * multiplier, arg.effect);
+  }
+  return arg;
+}
+
+double func3(myStruct a, myStruct b) { return (a + b + a).data; }
+
+int main() { // expected-no-diagnostics
+  double x = 10, y = 2, z = 10, dy = 0, dz = 0, z1 = 3;
+  double x1[3] = {1, 1, 1}, y1[3] = {2, 3, 4}, dx[3] = {0, 0, 0};
+  myStruct a(10, true), b(11, false);
+
+  // Forward mode, derivative wrt one arg
+  // df/dx
+  double func_res = numerical_diff::forward_central_difference(func, y, 1, false, x, y,
+                                                               z);
+  printf("Result is = %f\n", func_res); // CHECK-EXEC: Result is = 20.000000
+  // df/dx[0]
+  double func1_res = numerical_diff::forward_central_difference(func1, x1, 0, 3,
+                                                                0, false, x1, y1);
+  printf("Result is = %f\n", func1_res); // CHECK-EXEC: Result is = 2.000000
+
+  // Gradients, derivative wrt all args
+  clad::tape<clad::array_ref<double>> grad = {};
+  grad.emplace_back(dx, 3);
+  grad.emplace_back(&dy);
+  grad.emplace_back(&dz);
+  numerical_diff::central_difference(func2, grad, false, x1, y, z1);
+  printf("Result is = %f\n", dx[0]); // CHECK-EXEC: Result is = 1.000000
+  printf("Result is = %f\n", dx[1]); // CHECK-EXEC: Result is = 1.000000
+  printf("Result is = %f\n", dx[2]); // CHECK-EXEC: Result is = 1.000000
+  printf("Result is = %f\n", dy);    // CHECK-EXEC: Result is = 3.000000
+
+  // Functors...
+  double functor_res = numerical_diff::
+      forward_central_difference(std::multiplies<double>(), y, 1, false, x, y);
+  printf("Result is = %f\n", functor_res); // CHECK-EXEC: Result is = 10.000000
+
+  // Overloaded operators...
+  double operator_res = numerical_diff::forward_central_difference<
+      double (*)(myStruct, double)>(operator+, a, 0, false, a, y);
+  printf("Result is = %f\n", operator_res); // CHECK-EXEC: Result is = 1.000000
+
+  // functions with user defined data types...
+  double userDefined_res = numerical_diff::forward_central_difference(func3, b,
+                                                                      1, false, a, b);
+  printf("Result is = %f\n",
+         userDefined_res); // CHECK-EXEC: Result is = 0.000000
+}
diff --git a/test/NumericalDiff/UserDefinedPointers.C b/test/NumericalDiff/UserDefinedPointers.C
new file mode 100644
index 000000000..fed7d6a7a
--- /dev/null
+++ b/test/NumericalDiff/UserDefinedPointers.C
@@ -0,0 +1,65 @@
+// RUN: %cladnumdiffclang -lm -lstdc++ %s -I%S/../../include -oUserDefinedPointers.out -Xclang -verify 2>&1
+// RUN: ./UserDefinedPointers.out | FileCheck -check-prefix=CHECK-EXEC %s
+
+//CHECK-NOT: {{.*error|warning|note:.*}}
+
+#include "clad/Differentiator/Differentiator.h"
+
+struct myStruct
+{
+  double data;
+  bool effect;
+  myStruct(double x, bool eff) { 
+    data = x;
+    effect = eff;
+  }
+  ~myStruct() = default;
+};
+
+myStruct operator+(myStruct a, myStruct b){
+  myStruct out(0, false);
+  out.data = a.data + b.data;
+  out.effect = a.effect || b.effect;
+  return out;
+}
+
+myStruct updateIndexParamValue(myStruct arg,
+                               std::size_t idx, std::size_t currIdx,
+                               int multiplier, numerical_diff::precision &h_val,
+                               std::size_t n = 0, std::size_t i = 0) {
+  if (idx == currIdx) {
+    h_val = (h_val == 0) ? numerical_diff::get_h(arg.data) : h_val;
+    if (arg.effect)
+      return myStruct(arg.data + h_val * multiplier, arg.effect);
+  }
+  return arg;
+}
+
+myStruct* updateIndexParamValue(myStruct* arg,
+                               std::size_t idx, std::size_t currIdx,
+                               int multiplier, numerical_diff::precision &h_val,
+                               std::size_t n = 0, std::size_t i = 0) {
+  myStruct* temp = numerical_diff::bufferManager
+                       .make_buffer_space<myStruct>(1, true, arg->data,
+                                                    arg->effect);
+  if (idx == currIdx) {
+    h_val = (h_val == 0) ? numerical_diff::get_h(arg->data) : h_val;
+    if (arg->effect)
+      temp->data = arg->data + h_val * multiplier;
+  }
+  return temp;
+}
+
+double func3(myStruct* a, myStruct b) { 
+    a->data = a->data * b.data;
+    a->data = a->data + a->data;
+    return a->data;
+}
+
+int main(){ // expected-no-diagnostics
+  myStruct a(3, true), b(5, true);
+  double userDefined_res =
+      numerical_diff::forward_central_difference(func3, &a, 0, 1, 0, false, &a, b);
+  printf("Result is = %f\n", userDefined_res); // CHECK-EXEC: Result is = 10.000000 
+
+}
diff --git a/test/lit.cfg b/test/lit.cfg
index 968ccb8f1..d2a040108 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -263,10 +263,12 @@ flags = ' -std=c++11 -Xclang -add-plugin -Xclang clad -Xclang \
 
 config.substitutions.append( ('%cladclang_cuda', config.clang + flags) )
 
-config.substitutions.append( ('%cladclang', config.clang + ' -x c++' + flags) )
+config.substitutions.append( ('%cladclang', config.clang + ' -DCLAD_NO_NUM_DIFF -x c++ ' + flags) )
 
 config.substitutions.append( ('%cladlib', config.cladlib) )
 
+config.substitutions.append( ('%cladnumdiffclang', config.clang + ' -x c++ ' + flags) )
+
 # When running under valgrind, we mangle '-vg' onto the end of the triple so we
 # can check it with XFAIL and XTARGET.
 if lit_config.useValgrind:
diff --git a/tools/ClangPlugin.cpp b/tools/ClangPlugin.cpp
index c6a95c9ce..dbb636006 100644
--- a/tools/ClangPlugin.cpp
+++ b/tools/ClangPlugin.cpp
@@ -179,6 +179,12 @@ namespace clad {
               estimationPlugin->InstantiateCustomModel(*m_DerivativeBuilder));
         }
       }
+
+      // If enabled, set the proper fields in derivative builder.
+      if (m_DO.PrintNumDiffErrorInfo) {
+        m_DerivativeBuilder->setNumDiffErrDiag(true);
+      }
+
       FunctionDecl* DerivativeDecl = nullptr;
       Decl* DerivativeDeclContext = nullptr;
       FunctionDecl* OverloadedDerivativeDecl = nullptr;
diff --git a/tools/ClangPlugin.h b/tools/ClangPlugin.h
index 751484c08..e4b90defa 100644
--- a/tools/ClangPlugin.h
+++ b/tools/ClangPlugin.h
@@ -40,7 +40,7 @@ namespace clad {
           : DumpSourceFn(false), DumpSourceFnAST(false), DumpDerivedFn(false),
             DumpDerivedAST(false), GenerateSourceFile(false),
             ValidateClangVersion(false), CustomEstimationModel(false),
-            CustomModelName("") {}
+            PrintNumDiffErrorInfo(false), CustomModelName("") {}
 
       bool DumpSourceFn : 1;
       bool DumpSourceFnAST : 1;
@@ -49,6 +49,7 @@ namespace clad {
       bool GenerateSourceFile : 1;
       bool ValidateClangVersion : 1;
       bool CustomEstimationModel : 1;
+      bool PrintNumDiffErrorInfo : 1;
       std::string CustomModelName;
     };
 
@@ -127,6 +128,8 @@ namespace clad {
               return false;
             }
             m_DO.CustomModelName = args[i];
+          } else if (args[i] == "-fprint-num-diff-errors") {
+            m_DO.PrintNumDiffErrorInfo = true;
           } else if (args[i] == "-help") {
             // Print some help info.
             llvm::errs()
@@ -143,7 +146,10 @@ namespace clad {
                 << "-fgenerate-source-file - Produces a file containing the "
                    "derivatives.\n"
                 << "-fcustom-estimation-model - allows user to send in a "
-                   "shared object to use as the custom estimation model.\n";
+                   "shared object to use as the custom estimation model.\n"
+                << "-fprint-num-diff-errors - allows users to print the "
+                   "calculated numerical diff errors, this flag is overriden "
+                   "by -DCLAD_NO_NUM_DIFF.\n";
 
             llvm::errs() << "-help - Prints out this screen.\n\n";
           } else {