From 7b900b87f8e6b5443ca0c9062fac71f0a9713cca Mon Sep 17 00:00:00 2001 From: ToshitJain Date: Fri, 20 Dec 2024 11:57:52 +0530 Subject: [PATCH] Fix for Parenthesis issue in gradients --- include/clad/Differentiator/Differentiator.h | 1116 +++++++++--------- 1 file changed, 565 insertions(+), 551 deletions(-) diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h index c8aaaa286..775f66c36 100644 --- a/include/clad/Differentiator/Differentiator.h +++ b/include/clad/Differentiator/Differentiator.h @@ -21,8 +21,8 @@ #include "Tape.h" #include -#include #include +#include namespace clad { @@ -41,6 +41,37 @@ inline CUDA_HOST_DEVICE unsigned int GetLength(const char* code) { return count; } +// Fixes consecutive negative signs +// (-1)*(-x) gives *_d_x += 1 * 1; instead of +// *_d_x += --1 * 1; +CUDA_HOST_DEVICE void ResolveParenthesis(char* temp, const char* code) { + int negative_sign_count = 0; + int current_block = 0; + while (*code != '\0') { + if (*code == '-') { + current_block = 1; + negative_sign_count ^= 1; + code++; + continue; + } + if (current_block && *code != ' ') { + if (negative_sign_count == 1) { + *temp++ = '-'; + if (*code == '(') + *temp++ = ' '; + } + current_block = 0; + negative_sign_count = 0; + } + if (current_block && *code == ' ') { + code++; + continue; + } + *temp++ = *code++; + } + *temp = *code; +} + #ifdef __CUDACC__ #define CUDA_ARGS bool CUDAkernel, dim3 grid, dim3 block, #define CUDA_REST_ARGS size_t shared_mem, cudaStream_t stream, @@ -59,615 +90,598 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { return to.back(); } - /// Add value to the end of the tape, return the same value. - /// A specialization for clad::array_ref types to use in reverse mode. - template - CUDA_HOST_DEVICE clad::array_ref push(tape>& to, - U val) { - to.emplace_back(val); - return val; - } +/// Add value to the end of the tape, return the same value. +/// A specialization for clad::array_ref types to use in reverse mode. +template +CUDA_HOST_DEVICE clad::array_ref push(tape>& to, U val) { + to.emplace_back(val); + return val; +} - /// Remove the last value from the tape, return it. - template - CUDA_HOST_DEVICE T pop(tape& to) { - T val = to.back(); - to.pop_back(); - return val; - } +/// Remove the last value from the tape, return it. +template CUDA_HOST_DEVICE T pop(tape& to) { + T val = to.back(); + to.pop_back(); + return val; +} - /// Access return the last value in the tape. - template CUDA_HOST_DEVICE T& back(tape& of) { - return of.back(); - } +/// Access return the last value in the tape. +template CUDA_HOST_DEVICE T& back(tape& of) { + return of.back(); +} - /// The purpose of this function is to initialize adjoints - /// (or all of its differentiable fields) with 0. - // FIXME: Add support for objects. - /// Initialize a non-array variable. - template CUDA_HOST_DEVICE void zero_init(T& x) { new (&x) T(); } - - /// Initialize a non-const sized array when the size is known and is equal to - /// N. - template CUDA_HOST_DEVICE void zero_init(T* x, std::size_t N) { - for (std::size_t i = 0; i < N; ++i) - zero_init(x[i]); - } +/// The purpose of this function is to initialize adjoints +/// (or all of its differentiable fields) with 0. +// FIXME: Add support for objects. +/// Initialize a non-array variable. +template CUDA_HOST_DEVICE void zero_init(T& x) { new (&x) T(); } + +/// Initialize a non-const sized array when the size is known and is equal to +/// N. +template CUDA_HOST_DEVICE void zero_init(T* x, std::size_t N) { + for (std::size_t i = 0; i < N; ++i) + zero_init(x[i]); +} - /// Initialize a const sized array. - // NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays) - template - CUDA_HOST_DEVICE void zero_init(T (&arr)[N]) { - zero_init((T*)arr, N); - } - // NOLINTEND(cppcoreguidelines-avoid-c-arrays) - - /// Pad the args supplied with nullptr(s) or zeros to match the the num of - /// params of the function and then execute the function using the padded args - /// i.e. we are adding default arguments as we cannot do that with - /// meta programming - /// - /// For example: - /// Let's assume we have a function with the signature: - /// fn_grad(double i, double j, int k, int l); - /// and f is a pointer to fn_grad - /// and args are the supplied arguments- 1.0, 2.0 and Args has their type - /// (double, double) - /// - /// When pad_and_execute(DropArgs_t{}, f, args) - /// is run, the Rest variadic argument will have the types (int, int). - /// pad_and_execute will then make up for the remaining args by appending 0s - /// and the return statement translates to: - /// return f(1.0, 2.0, 0, 0); - // for executing non-member functions - template ::type = true> - constexpr CUDA_HOST_DEVICE return_type_t - execute_with_default_args(list, F f, list, - CUDA_ARGS CUDA_REST_ARGS Args&&... args) { +/// Initialize a const sized array. +// NOLINTBEGIN(cppcoreguidelines-avoid-c-arrays) +template +CUDA_HOST_DEVICE void zero_init(T (&arr)[N]) { + zero_init((T*)arr, N); +} +// NOLINTEND(cppcoreguidelines-avoid-c-arrays) + +/// Pad the args supplied with nullptr(s) or zeros to match the the num of +/// params of the function and then execute the function using the padded args +/// i.e. we are adding default arguments as we cannot do that with +/// meta programming +/// +/// For example: +/// Let's assume we have a function with the signature: +/// fn_grad(double i, double j, int k, int l); +/// and f is a pointer to fn_grad +/// and args are the supplied arguments- 1.0, 2.0 and Args has their type +/// (double, double) +/// +/// When pad_and_execute(DropArgs_t{}, f, args) +/// is run, the Rest variadic argument will have the types (int, int). +/// pad_and_execute will then make up for the remaining args by appending 0s +/// and the return statement translates to: +/// return f(1.0, 2.0, 0, 0); +// for executing non-member functions +template ::type = true> +constexpr CUDA_HOST_DEVICE return_type_t +execute_with_default_args(list, F f, list, + CUDA_ARGS CUDA_REST_ARGS Args&&... args) { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) - if (CUDAkernel) { - constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest); - std::array argPtrs = {(void*)(&args)..., - static_cast(nullptr)...}; - - void* null_param = nullptr; - for (size_t i = sizeof...(args); i < totalArgs; ++i) - argPtrs[i] = &null_param; - - cudaLaunchKernel((void*)f, grid, block, argPtrs.data(), shared_mem, - stream); - return return_type_t(); - } else { - return f(static_cast(args)..., static_cast(nullptr)...); - } -#else + if (CUDAkernel) { + constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest); + std::array argPtrs = {(void*)(&args)..., + static_cast(nullptr)...}; + + void* null_param = nullptr; + for (size_t i = sizeof...(args); i < totalArgs; ++i) + argPtrs[i] = &null_param; + + cudaLaunchKernel((void*)f, grid, block, argPtrs.data(), shared_mem, stream); + return return_type_t(); + } else { return f(static_cast(args)..., static_cast(nullptr)...); -#endif } +#else + return f(static_cast(args)..., static_cast(nullptr)...); +#endif +} - template ::type = true> - constexpr return_type_t - execute_with_default_args(list, F f, list, - CUDA_ARGS CUDA_REST_ARGS Args&&... args) { +template ::type = true> +constexpr return_type_t +execute_with_default_args(list, F f, list, + CUDA_ARGS CUDA_REST_ARGS Args&&... args) { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) - if (CUDAkernel) { - void* argPtrs[] = {(void*)&args...}; - cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream); - return return_type_t(); - } - return f(static_cast(args)...); + if (CUDAkernel) { + void* argPtrs[] = {(void*)&args...}; + cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream); + return return_type_t(); + } + return f(static_cast(args)...); #else - return f(static_cast(args)...); + return f(static_cast(args)...); #endif - } +} - // for executing member-functions - template ::type = true> - constexpr CUDA_HOST_DEVICE auto - execute_with_default_args(list, ReturnType C::*f, Obj&& obj, - list, - Args&&... args) -> return_type_t { - return (static_cast(obj).*f)((fArgTypes)(args)..., - static_cast(nullptr)...); - } +// for executing member-functions +template ::type = true> +constexpr CUDA_HOST_DEVICE auto +execute_with_default_args(list, ReturnType C::* f, Obj&& obj, + list, Args&&... args) + -> return_type_t { + return (static_cast(obj).*f)((fArgTypes)(args)..., + static_cast(nullptr)...); +} - template ::type = true> - constexpr auto - execute_with_default_args(list, ReturnType C::*f, Obj&& obj, - list, - Args&&... args) -> return_type_t { - return (static_cast(obj).*f)(static_cast(args)...); - } +template ::type = true> +constexpr auto execute_with_default_args(list, ReturnType C::* f, + Obj&& obj, list, + Args&&... args) + -> return_type_t { + return (static_cast(obj).*f)(static_cast(args)...); +} - // Using std::function and std::mem_fn introduces a lot of overhead, which we - // do not need. Another disadvantage is that it is difficult to distinguish a - // 'normal' use of std::{function,mem_fn} from the ones we must differentiate. - /// Explicitly passing `FunctorT` type is necessary for maintaining - /// const correctness of functor types. - /// Default value of `Functor` here is temporary, and should be removed - /// once all clad differentiation functions support differentiating functors. - template , - bool EnablePadding = false, bool ImmediateMode = false> - class CladFunction { - public: - using CladFunctionType = F; - using FunctorType = FunctorT; - - private: - CladFunctionType m_Function; - const char* m_Code; - FunctorType *m_Functor = nullptr; - bool m_CUDAkernel = false; - - public: +// Using std::function and std::mem_fn introduces a lot of overhead, which we +// do not need. Another disadvantage is that it is difficult to distinguish a +// 'normal' use of std::{function,mem_fn} from the ones we must differentiate. +/// Explicitly passing `FunctorT` type is necessary for maintaining +/// const correctness of functor types. +/// Default value of `Functor` here is temporary, and should be removed +/// once all clad differentiation functions support differentiating functors. +template , + bool EnablePadding = false, bool ImmediateMode = false> +class CladFunction { +public: + using CladFunctionType = F; + using FunctorType = FunctorT; + +private: + CladFunctionType m_Function; + const char* m_Code; + FunctorType* m_Functor = nullptr; + bool m_CUDAkernel = false; + +public: #ifdef __cpp_concepts - CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, - FunctorType* functor = nullptr, - bool CUDAkernel = false) - requires(!ImmediateMode) - : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { + CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, + FunctorType* functor = nullptr, + bool CUDAkernel = false) + requires(!ImmediateMode) + : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { #ifndef __CLAD__ - static_assert(false, "clad doesn't appear to be loaded; make sure that " - "you pass clad.so to clang."); + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); #endif - size_t length = GetLength(code); - char* temp = (char*)malloc(length + 1); - m_Code = temp; - while ((*temp++ = *code++)) - ; - } + size_t length = GetLength(code); + char* temp = (char*)malloc(length + 1); + m_Code = temp; + ResolveParenthesis(temp, code); + } - constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, - FunctorType* functor = nullptr, - bool CUDAkernel = false) - requires(ImmediateMode) - : m_Function(f), m_Code(""), - m_Functor(functor), m_CUDAkernel(CUDAkernel) { + constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, + FunctorType* functor = nullptr, + bool CUDAkernel = false) + requires(ImmediateMode) + : m_Function(f), m_Code(""), + m_Functor(functor), m_CUDAkernel(CUDAkernel) { #ifndef __CLAD__ - static_assert(false, "clad doesn't appear to be loaded; make sure that " - "you pass clad.so to clang."); + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); #endif - } + } #else - CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, - FunctorType* functor = nullptr, - bool CUDAkernel = false) - : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { + CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, + FunctorType* functor = nullptr, + bool CUDAkernel = false) + : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { #ifndef __CLAD__ - static_assert(false, "clad doesn't appear to be loaded; make sure that " - "you pass clad.so to clang."); + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); #endif - size_t length = GetLength(code); - char* temp = (char*)malloc(length + 1); - m_Code = temp; - while ((*temp++ = *code++)) - ; - } + size_t length = GetLength(code); + char* temp = (char*)malloc(length + 1); + m_Code = temp; + ResolveParenthesis(temp, code); + } #endif - /// Constructor overload for initializing `m_Functor` when functor - /// is passed by reference. - CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, - FunctorType& functor) - : CladFunction(f, code, &functor) {}; - - constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, - FunctorType& functor) - : CladFunction(f, &functor) {}; - - // Intentionally leak m_Code, otherwise we have to link against c++ runtime, - // i.e -lstdc++. - //~CladFunction() { /*free(m_Code);*/ } - - constexpr CladFunctionType getFunctionPtr() const { return m_Function; } - - template - typename std::enable_if::value, - return_type_t>::type constexpr CUDA_HOST_DEVICE - execute(Args&&... args) const { - if (!m_Function) - return static_cast>(return_type_t()); - if (m_CUDAkernel) { - printf("Use execute_kernel() for global CUDA kernels\n"); - return static_cast>(return_type_t()); - } - // here static_cast is used to achieve perfect forwarding + /// Constructor overload for initializing `m_Functor` when functor + /// is passed by reference. + CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, + FunctorType& functor) + : CladFunction(f, code, &functor) {}; + + constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, + FunctorType& functor) + : CladFunction(f, &functor) {}; + + // Intentionally leak m_Code, otherwise we have to link against c++ runtime, + // i.e -lstdc++. + //~CladFunction() { /*free(m_Code);*/ } + + constexpr CladFunctionType getFunctionPtr() const { return m_Function; } + + template + typename std::enable_if::value, + return_type_t>::type constexpr CUDA_HOST_DEVICE + execute(Args&&... args) const { + if (!m_Function) + return static_cast>(return_type_t()); + if (m_CUDAkernel) { + printf("Use execute_kernel() for global CUDA kernels\n"); + return static_cast>(return_type_t()); + } + // here static_cast is used to achieve perfect forwarding #ifdef __CUDACC__ - return execute_helper(m_Function, m_CUDAkernel, dim3(0), dim3(0), - static_cast(args)...); + return execute_helper(m_Function, m_CUDAkernel, dim3(0), dim3(0), + static_cast(args)...); #else - return execute_helper(m_Function, static_cast(args)...); + return execute_helper(m_Function, static_cast(args)...); #endif - } + } #ifdef __CUDACC__ - template - typename std::enable_if::value, - return_type_t>::type - execute_kernel(dim3 grid, dim3 block, Args&&... args) CUDA_HOST_DEVICE { - if (!m_Function) { - printf("CladFunction is invalid\n"); - return static_cast>(return_type_t()); - } - if (!m_CUDAkernel) { - printf("Use execute() for non-global CUDA kernels\n"); - return static_cast>(return_type_t()); - } - - return execute_helper(m_Function, m_CUDAkernel, grid, block, - static_cast(args)...); + template + typename std::enable_if::value, + return_type_t>::type + execute_kernel(dim3 grid, dim3 block, Args&&... args) CUDA_HOST_DEVICE { + if (!m_Function) { + printf("CladFunction is invalid\n"); + return static_cast>(return_type_t()); } + if (!m_CUDAkernel) { + printf("Use execute() for non-global CUDA kernels\n"); + return static_cast>(return_type_t()); + } + + return execute_helper(m_Function, m_CUDAkernel, grid, block, + static_cast(args)...); + } #endif - /// `Execute` overload to be used when derived function type cannot be - /// deduced. One reason for this can be when user tries to differentiate - /// an object of class which do not have user-defined call operator. - /// Error handling is handled in the clad side using clang diagnostics - /// subsystem. - template - typename std::enable_if::value, - return_type_t>::type constexpr CUDA_HOST_DEVICE - execute(Args&&... args) const { - return static_cast>(0); - } + /// `Execute` overload to be used when derived function type cannot be + /// deduced. One reason for this can be when user tries to differentiate + /// an object of class which do not have user-defined call operator. + /// Error handling is handled in the clad side using clang diagnostics + /// subsystem. + template + typename std::enable_if::value, + return_type_t>::type constexpr CUDA_HOST_DEVICE + execute(Args&&... args) const { + return static_cast>(0); + } - /// Return the string representation for the generated derivative. - constexpr const char* getCode() const { - if (m_Code) - return m_Code; - return ""; - } + /// Return the string representation for the generated derivative. + constexpr const char* getCode() const { + if (m_Code) + return m_Code; + return ""; + } - void dump() const { - printf("The code is: \n%s\n", getCode()); - } + void dump() const { printf("The code is: \n%s\n", getCode()); } - /// Set object pointed by the functor as the default object for - /// executing derived member function. - void setObject(FunctorType* functor) { - m_Functor = functor; - } + /// Set object pointed by the functor as the default object for + /// executing derived member function. + void setObject(FunctorType* functor) { m_Functor = functor; } - /// Set functor object as the default object for executing derived - // member function. - void setObject(FunctorType& functor) { - m_Functor = &functor; - } + /// Set functor object as the default object for executing derived + // member function. + void setObject(FunctorType& functor) { m_Functor = &functor; } - /// Clears default object (if any) for executing derived member function. - void clearObject() { - m_Functor = nullptr; - } + /// Clears default object (if any) for executing derived member function. + void clearObject() { m_Functor = nullptr; } - private: - /// Helper function for executing non-member derived functions. - template - constexpr CUDA_HOST_DEVICE return_type_t - execute_helper(Fn f, CUDA_ARGS Args&&... args) const { - // `static_cast` is required here for perfect forwarding. +private: + /// Helper function for executing non-member derived functions. + template + constexpr CUDA_HOST_DEVICE return_type_t + execute_helper(Fn f, CUDA_ARGS Args&&... args) const { + // `static_cast` is required here for perfect forwarding. #if defined(__CUDACC__) - if constexpr (sizeof...(Args) >= 2) { - auto secondArg = - std::get<1>(std::forward_as_tuple(std::forward(args)...)); - if constexpr (std::is_same, - cudaStream_t>::value) { - return [&](auto shared_mem, cudaStream_t stream, auto&&... args_) { - return execute_with_default_args( - DropArgs_t{}, f, - TakeNFirstArgs_t{}, - CUDAkernel, grid, block, shared_mem, stream, - static_cast(args_)...); - }(static_cast(args)...); - } else { - return execute_with_default_args( - DropArgs_t{}, f, - TakeNFirstArgs_t{}, CUDAkernel, - grid, block, 0, nullptr, static_cast(args)...); - } - } else { + if constexpr (sizeof...(Args) >= 2) { + auto secondArg = + std::get<1>(std::forward_as_tuple(std::forward(args)...)); + if constexpr (std::is_same, + cudaStream_t>::value) { + return [&](auto shared_mem, cudaStream_t stream, auto&&... args_) { return execute_with_default_args( - DropArgs_t{}, f, - TakeNFirstArgs_t{}, CUDAkernel, - grid, block, 0, nullptr, static_cast(args)...); - } -#else + DropArgs_t{}, f, + TakeNFirstArgs_t{}, CUDAkernel, + grid, block, shared_mem, stream, + static_cast(args_)...); + }(static_cast(args)...); + } else { return execute_with_default_args( DropArgs_t{}, f, - TakeNFirstArgs_t{}, - static_cast(args)...); -#endif + TakeNFirstArgs_t{}, CUDAkernel, grid, + block, 0, nullptr, static_cast(args)...); } - - /// Helper functions for executing member derived functions. - /// If user have passed object explicitly, then this specialization will - /// be used and derived function will be called through the passed object. - template ::type, C>::value>::type, - class... Args> - constexpr return_type_t - execute_helper(ReturnType C::*f, Obj&& obj, Args&&... args) const { - // `static_cast` is required here for perfect forwarding. - return execute_with_default_args( - DropArgs_t{}, f, - static_cast(obj), - TakeNFirstArgs_t{}, - static_cast(args)...); - } - /// If user have not passed object explicitly, then this specialization - /// will be used and derived function will be called through the object - /// saved in `CladFunction`. - template - constexpr return_type_t - execute_helper(ReturnType C::*f, Args&&... args) const { - // `static_cast` is required here for perfect forwarding. - return execute_with_default_args( - DropArgs_t{}, f, *m_Functor, - TakeNFirstArgs_t{}, - static_cast(args)...); - } - }; - - // This is the function which will be instantiated with the concrete arguments - // After that our AD library will have all the needed information. For eg: - // which is the differentiated function, which is the argument with respect - // to. - // - // This will be useful in future when we are ready to support partial diff. - // - - /// Differentiates function using forward mode. - /// - /// Performs partial differentiation of the `fn` argument using forward mode - /// wrt parameter specified in `args`. Template parameter `BitMaskedOpts` - /// denotes the derivative order and any extra options. To differentiate `fn` - /// wrt several parameters, please see `clad::gradient`. \param[in] fn - /// function to differentiate \param[in] args independent parameter - /// information \returns `CladFunction` object to access the corresponding - /// derived function. - template , - typename = typename std::enable_if< - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::vector_mode) && - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("D"))) - differentiate(F fn, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction>(derivedFn, - code); + } else { + return execute_with_default_args( + DropArgs_t{}, f, + TakeNFirstArgs_t{}, CUDAkernel, grid, + block, 0, nullptr, static_cast(args)...); + } +#else + return execute_with_default_args( + DropArgs_t{}, f, + TakeNFirstArgs_t{}, + static_cast(args)...); +#endif } - template , - typename = typename std::enable_if< - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::vector_mode) && - clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - constexpr CladFunction, false, - true> __attribute__((annotate("D"))) - differentiate(F fn, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr)) { - return CladFunction, false, true>( - derivedFn); + /// Helper functions for executing member derived functions. + /// If user have passed object explicitly, then this specialization will + /// be used and derived function will be called through the passed object. + template ::type, C>::value>::type, + class... Args> + constexpr return_type_t + execute_helper(ReturnType C::* f, Obj&& obj, Args&&... args) const { + // `static_cast` is required here for perfect forwarding. + return execute_with_default_args( + DropArgs_t{}, f, static_cast(obj), + TakeNFirstArgs_t{}, + static_cast(args)...); } - - /// Specialization for differentiating functors. - /// The specialization is needed because objects have to be passed - /// by reference whereas functions have to be passed by value. - template , - typename = typename std::enable_if< - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::vector_mode) && - std::is_class>::value>::type> - constexpr CladFunction< - DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("D"))) - differentiate(F&& f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction>(derivedFn, - code, f); + /// If user have not passed object explicitly, then this specialization + /// will be used and derived function will be called through the object + /// saved in `CladFunction`. + template + constexpr return_type_t + execute_helper(ReturnType C::* f, Args&&... args) const { + // `static_cast` is required here for perfect forwarding. + return execute_with_default_args( + DropArgs_t{}, f, *m_Functor, + TakeNFirstArgs_t{}, + static_cast(args)...); } +}; + +// This is the function which will be instantiated with the concrete arguments +// After that our AD library will have all the needed information. For eg: +// which is the differentiated function, which is the argument with respect +// to. +// +// This will be useful in future when we are ready to support partial diff. +// + +/// Differentiates function using forward mode. +/// +/// Performs partial differentiation of the `fn` argument using forward mode +/// wrt parameter specified in `args`. Template parameter `BitMaskedOpts` +/// denotes the derivative order and any extra options. To differentiate `fn` +/// wrt several parameters, please see `clad::gradient`. \param[in] fn +/// function to differentiate \param[in] args independent parameter +/// information \returns `CladFunction` object to access the corresponding +/// derived function. +template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::vector_mode) && + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +CladFunction> __attribute__(( + annotate("D"))) +differentiate(F fn, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction>(derivedFn, + code); +} - /// Generates function which computes derivative of `fn` argument w.r.t - /// all parameters using a vectorized version of forward mode. - /// - /// \param[in] fn function to differentiate - /// \param[in] args independent parameters information - /// \returns `CladFunction` object to access the corresponding derived - /// function. - template , - typename = typename std::enable_if< - clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::vector_mode) && - !std::is_class>::value>::type> - constexpr CladFunction, - true> __attribute__((annotate("D"))) - differentiate(F fn, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction, true>( - derivedFn, code); - } +template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::vector_mode) && + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +constexpr CladFunction, false, + true> __attribute__((annotate("D"))) +differentiate(F fn, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr)) { + return CladFunction, false, true>( + derivedFn); +} - /// Generates function which computes gradient of the given function wrt the - /// parameters specified in `args` using reverse mode differentiation. - /// - /// \param[in] fn function to differentiate - /// \param[in] args independent parameters information - /// \returns `CladFunction` object to access the corresponding derived - /// function. - template , - typename = typename std::enable_if< - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - constexpr CladFunction, - true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE - gradient(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "", bool CUDAkernel = false) { - return CladFunction, true>( - derivedFn /* will be replaced by gradient*/, code, nullptr, CUDAkernel); - } +/// Specialization for differentiating functors. +/// The specialization is needed because objects have to be passed +/// by reference whereas functions have to be passed by value. +template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::vector_mode) && + std::is_class>::value>::type> +constexpr CladFunction> __attribute__(( + annotate("D"))) +differentiate(F&& f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction>(derivedFn, code, + f); +} - template , - typename = typename std::enable_if< - clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - constexpr CladFunction, true, - true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE - gradient(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - bool CUDAkernel = false) { - return CladFunction, true, true>( - derivedFn /* will be replaced by gradient*/, nullptr, CUDAkernel); - } +/// Generates function which computes derivative of `fn` argument w.r.t +/// all parameters using a vectorized version of forward mode. +/// +/// \param[in] fn function to differentiate +/// \param[in] args independent parameters information +/// \returns `CladFunction` object to access the corresponding derived +/// function. +template , + typename = typename std::enable_if< + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::vector_mode) && + !std::is_class>::value>::type> +constexpr CladFunction, + true> __attribute__((annotate("D"))) +differentiate(F fn, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction, true>(derivedFn, + code); +} - /// Specialization for differentiating functors. - /// The specialization is needed because objects have to be passed - /// by reference whereas functions have to be passed by value. - template , - typename = typename std::enable_if< - std::is_class>::value>::type> - constexpr CladFunction, - true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE - gradient(F&& f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction, true>( - derivedFn /* will be replaced by gradient*/, code, f); - } +/// Generates function which computes gradient of the given function wrt the +/// parameters specified in `args` using reverse mode differentiation. +/// +/// \param[in] fn function to differentiate +/// \param[in] args independent parameters information +/// \returns `CladFunction` object to access the corresponding derived +/// function. +template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +constexpr CladFunction, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE +gradient(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "", bool CUDAkernel = false) { + return CladFunction, true>( + derivedFn /* will be replaced by gradient*/, code, nullptr, CUDAkernel); +} - /// Generates function which computes hessian matrix of the given function wrt - /// the parameters specified in `args`. - /// - /// \param[in] fn function to differentiate - /// \param[in] args independent parameters information - /// \returns `CladFunction` object to access the corresponding derived - /// function. - template , - typename = typename std::enable_if< - !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - constexpr CladFunction< - DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("H"))) - hessian(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction>( - derivedFn /* will be replaced by hessian*/, code); - } +template , + typename = typename std::enable_if< + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +constexpr CladFunction, true, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE +gradient(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + bool CUDAkernel = false) { + return CladFunction, true, true>( + derivedFn /* will be replaced by gradient*/, nullptr, CUDAkernel); +} - template , - typename = typename std::enable_if< - clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), - opts::immediate_mode) && - !std::is_class>::value>::type> - constexpr CladFunction, false, - true> __attribute__((annotate("H"))) - hessian(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr)) { - return CladFunction, false, true>( - derivedFn /* will be replaced by hessian*/); - } +/// Specialization for differentiating functors. +/// The specialization is needed because objects have to be passed +/// by reference whereas functions have to be passed by value. +template , + typename = typename std::enable_if< + std::is_class>::value>::type> +constexpr CladFunction, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE +gradient(F&& f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction, true>( + derivedFn /* will be replaced by gradient*/, code, f); +} - /// Specialization for differentiating functors. - /// The specialization is needed because objects have to be passed - /// by reference whereas functions have to be passed by value. - template , - typename = typename std::enable_if< - std::is_class>::value>::type> - constexpr CladFunction< - DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("H"))) - hessian(F&& f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction>( - derivedFn /* will be replaced by hessian*/, code, f); - } +/// Generates function which computes hessian matrix of the given function wrt +/// the parameters specified in `args`. +/// +/// \param[in] fn function to differentiate +/// \param[in] args independent parameters information +/// \returns `CladFunction` object to access the corresponding derived +/// function. +template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +constexpr CladFunction> __attribute__(( + annotate("H"))) +hessian(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction>( + derivedFn /* will be replaced by hessian*/, code); +} - /// Generates function which computes jacobian matrix of the given function - /// wrt the parameters specified in `args` using reverse mode differentiation. - /// - /// \param[in] fn function to differentiate - /// \param[in] args independent parameters information - /// \returns `CladFunction` object to access the corresponding derived - /// function. - template , - typename = typename std::enable_if< - !std::is_class>::value>::type> - constexpr CladFunction, - /*EnablePadding=*/true> __attribute__((annotate("J"))) - jacobian(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction, - /*EnablePadding=*/true>( - derivedFn /* will be replaced by Jacobian*/, code); - } +template , + typename = typename std::enable_if< + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> +constexpr CladFunction, false, + true> __attribute__((annotate("H"))) +hessian(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr)) { + return CladFunction, false, true>( + derivedFn /* will be replaced by hessian*/); +} - /// Specialization for differentiating functors. - /// The specialization is needed because objects have to be passed - /// by reference whereas functions have to be passed by value. - template , - typename = typename std::enable_if< - std::is_class>::value>::type> - constexpr CladFunction, - /*EnablePadding=*/true> __attribute__((annotate("J"))) - jacobian(F&& f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction, - /*EnablePadding=*/true>( - derivedFn /* will be replaced by Jacobian*/, code, f); - } +/// Specialization for differentiating functors. +/// The specialization is needed because objects have to be passed +/// by reference whereas functions have to be passed by value. +template , + typename = typename std::enable_if< + std::is_class>::value>::type> +constexpr CladFunction> __attribute__(( + annotate("H"))) +hessian(F&& f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction>( + derivedFn /* will be replaced by hessian*/, code, f); +} - template > - constexpr CladFunction __attribute__((annotate("E"))) - estimate_error(F f, ArgSpec args = "", - DerivedFnType derivedFn = static_cast(nullptr), - const char* code = "") { - return CladFunction< - DerivedFnType>(derivedFn /* will be replaced by estimation code*/, - code); - } +/// Generates function which computes jacobian matrix of the given function +/// wrt the parameters specified in `args` using reverse mode differentiation. +/// +/// \param[in] fn function to differentiate +/// \param[in] args independent parameters information +/// \returns `CladFunction` object to access the corresponding derived +/// function. +template , + typename = typename std::enable_if< + !std::is_class>::value>::type> +constexpr CladFunction, + /*EnablePadding=*/true> __attribute__((annotate("J"))) +jacobian(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction, + /*EnablePadding=*/true>( + derivedFn /* will be replaced by Jacobian*/, code); +} + +/// Specialization for differentiating functors. +/// The specialization is needed because objects have to be passed +/// by reference whereas functions have to be passed by value. +template , + typename = typename std::enable_if< + std::is_class>::value>::type> +constexpr CladFunction, + /*EnablePadding=*/true> __attribute__((annotate("J"))) +jacobian(F&& f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction, + /*EnablePadding=*/true>( + derivedFn /* will be replaced by Jacobian*/, code, f); +} + +template > +constexpr CladFunction __attribute__((annotate("E"))) +estimate_error(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + const char* code = "") { + return CladFunction( + derivedFn /* will be replaced by estimation code*/, code); +} - // Gradient Structure for Reverse Mode Enzyme - template struct EnzymeGradient { double d_arr[N]; }; - } // namespace clad +// Gradient Structure for Reverse Mode Enzyme +template struct EnzymeGradient { + double d_arr[N]; +}; +} // namespace clad #endif // CLAD_DIFFERENTIATOR // Enable clad after the header was included.