diff --git a/include/heyoka/detail/llvm_helpers.hpp b/include/heyoka/detail/llvm_helpers.hpp index 4ab3cf43b..f8227066b 100644 --- a/include/heyoka/detail/llvm_helpers.hpp +++ b/include/heyoka/detail/llvm_helpers.hpp @@ -57,7 +57,9 @@ HEYOKA_DLL_PUBLIC std::uint64_t get_alignment(llvm::Module &, llvm::Type *); HEYOKA_DLL_PUBLIC llvm::Value *load_vector_from_memory(ir_builder &, llvm::Value *, std::uint32_t); HEYOKA_DLL_PUBLIC void store_vector_to_memory(ir_builder &, llvm::Value *, llvm::Value *); -llvm::Value *gather_vector_from_memory(ir_builder &, llvm::Type *, llvm::Value *); + +HEYOKA_DLL_PUBLIC llvm::Value *gather_vector_from_memory(ir_builder &, llvm::Type *, llvm::Value *); +HEYOKA_DLL_PUBLIC void scatter_vector_to_memory(ir_builder &, llvm::Value *, llvm::Value *); HEYOKA_DLL_PUBLIC llvm::Value *vector_splat(ir_builder &, llvm::Value *, std::uint32_t); diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index 516461665..a8bbc4808 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -381,35 +381,9 @@ inline llvm::Value *taylor_diff(llvm_state &s, const expression &ex, const std:: } } -HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func_dbl(llvm_state &, const expression &, std::uint32_t, std::uint32_t, - bool); - -HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, const expression &, std::uint32_t, - std::uint32_t, bool); - -#if defined(HEYOKA_HAVE_REAL128) - -HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func_f128(llvm_state &, const expression &, std::uint32_t, - std::uint32_t, bool); - -#endif - template -inline llvm::Function *taylor_c_diff_func(llvm_state &s, const expression &ex, std::uint32_t n_uvars, - std::uint32_t batch_size, bool high_accuracy) -{ - if constexpr (std::is_same_v) { - return taylor_c_diff_func_dbl(s, ex, n_uvars, batch_size, high_accuracy); - } else if constexpr (std::is_same_v) { - return taylor_c_diff_func_ldbl(s, ex, n_uvars, batch_size, high_accuracy); -#if defined(HEYOKA_HAVE_REAL128) - } else if constexpr (std::is_same_v) { - return taylor_c_diff_func_f128(s, ex, n_uvars, batch_size, high_accuracy); -#endif - } else { - static_assert(detail::always_false_v, "Unhandled type."); - } -} +HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func(llvm_state &, const expression &, std::uint32_t, std::uint32_t, + bool, std::uint32_t); HEYOKA_DLL_PUBLIC std::uint32_t get_param_size(const expression &); diff --git a/include/heyoka/func.hpp b/include/heyoka/func.hpp index fbd16dd25..d90079439 100644 --- a/include/heyoka/func.hpp +++ b/include/heyoka/func.hpp @@ -135,10 +135,13 @@ struct HEYOKA_DLL_PUBLIC func_inner_base { const std::vector &, llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t, bool) const = 0; #endif - virtual llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const = 0; - virtual llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const = 0; + virtual llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, + std::uint32_t) const = 0; + virtual llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, + std::uint32_t) const = 0; #if defined(HEYOKA_HAVE_REAL128) - virtual llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const = 0; + virtual llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, + std::uint32_t) const = 0; #endif private: @@ -310,7 +313,7 @@ template using func_taylor_c_diff_func_dbl_t = decltype(std::declval>().taylor_c_diff_func_dbl( std::declval(), std::declval(), std::declval(), - std::declval())); + std::declval(), std::declval())); template inline constexpr bool func_has_taylor_c_diff_func_dbl_v @@ -320,7 +323,7 @@ template using func_taylor_c_diff_func_ldbl_t = decltype(std::declval>().taylor_c_diff_func_ldbl( std::declval(), std::declval(), std::declval(), - std::declval())); + std::declval(), std::declval())); template inline constexpr bool func_has_taylor_c_diff_func_ldbl_v @@ -332,7 +335,7 @@ template using func_taylor_c_diff_func_f128_t = decltype(std::declval>().taylor_c_diff_func_f128( std::declval(), std::declval(), std::declval(), - std::declval())); + std::declval(), std::declval())); template inline constexpr bool func_has_taylor_c_diff_func_f128_v @@ -593,20 +596,20 @@ struct HEYOKA_DLL_PUBLIC_INLINE_CLASS func_inner final : func_inner_base { } #endif llvm::Function *taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool high_accuracy) const final + bool high_accuracy, std::uint32_t vector_size) const final { if constexpr (func_has_taylor_c_diff_func_dbl_v) { - return m_value.taylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy); + return m_value.taylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy, vector_size); } else { throw not_implemented_error("double Taylor diff in compact mode is not implemented for the function '" + get_name() + "'"); } } llvm::Function *taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool high_accuracy) const final + bool high_accuracy, std::uint32_t vector_size) const final { if constexpr (func_has_taylor_c_diff_func_ldbl_v) { - return m_value.taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy); + return m_value.taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy, vector_size); } else { throw not_implemented_error("long double Taylor diff in compact mode is not implemented for the function '" + get_name() + "'"); @@ -614,10 +617,10 @@ struct HEYOKA_DLL_PUBLIC_INLINE_CLASS func_inner final : func_inner_base { } #if defined(HEYOKA_HAVE_REAL128) llvm::Function *taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool high_accuracy) const final + bool high_accuracy, std::uint32_t vector_size) const final { if constexpr (func_has_taylor_c_diff_func_f128_v) { - return m_value.taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy); + return m_value.taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy, vector_size); } else { throw not_implemented_error("float128 Taylor diff in compact mode is not implemented for the function '" + get_name() + "'"); @@ -784,10 +787,10 @@ class HEYOKA_DLL_PUBLIC func llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t, bool) const; #endif - llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; - llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; + llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #if defined(HEYOKA_HAVE_REAL128) - llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #endif }; diff --git a/include/heyoka/math/binary_op.hpp b/include/heyoka/math/binary_op.hpp index 022f3ee87..c58c88516 100644 --- a/include/heyoka/math/binary_op.hpp +++ b/include/heyoka/math/binary_op.hpp @@ -94,10 +94,10 @@ class HEYOKA_DLL_PUBLIC binary_op : public func_base std::uint32_t, bool) const; #endif - llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; - llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; + llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #if defined(HEYOKA_HAVE_REAL128) - llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #endif }; diff --git a/include/heyoka/math/pow.hpp b/include/heyoka/math/pow.hpp index fa8a0094a..5cac894e5 100644 --- a/include/heyoka/math/pow.hpp +++ b/include/heyoka/math/pow.hpp @@ -82,10 +82,10 @@ class HEYOKA_DLL_PUBLIC pow_impl : public func_base std::uint32_t, bool) const; #endif - llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; - llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; + llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #if defined(HEYOKA_HAVE_REAL128) - llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #endif }; diff --git a/include/heyoka/math/sum.hpp b/include/heyoka/math/sum.hpp index 352ec851a..df61cfdd0 100644 --- a/include/heyoka/math/sum.hpp +++ b/include/heyoka/math/sum.hpp @@ -54,10 +54,10 @@ class HEYOKA_DLL_PUBLIC sum_impl : public func_base llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t, bool) const; #endif - llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; - llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; + llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #if defined(HEYOKA_HAVE_REAL128) - llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #endif }; diff --git a/include/heyoka/math/sum_sq.hpp b/include/heyoka/math/sum_sq.hpp index 067d4e16f..d4a0289ca 100644 --- a/include/heyoka/math/sum_sq.hpp +++ b/include/heyoka/math/sum_sq.hpp @@ -61,10 +61,10 @@ class HEYOKA_DLL_PUBLIC sum_sq_impl : public func_base std::uint32_t, bool) const; #endif - llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; - llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; + llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #if defined(HEYOKA_HAVE_REAL128) - llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool) const; + llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t, bool, std::uint32_t) const; #endif }; diff --git a/include/heyoka/taylor.hpp b/include/heyoka/taylor.hpp index 1a93f3157..99f2014dc 100644 --- a/include/heyoka/taylor.hpp +++ b/include/heyoka/taylor.hpp @@ -103,10 +103,15 @@ llvm::Value *taylor_codegen_numparam(llvm_state &s, const U &n, llvm::Value *par } } +// TODO remove old overloads. HEYOKA_DLL_PUBLIC llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &, const number &, llvm::Value *, llvm::Value *, std::uint32_t); +HEYOKA_DLL_PUBLIC llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &, const number &, llvm::Value *, + llvm::Value *, std::uint32_t, std::uint32_t); HEYOKA_DLL_PUBLIC llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &, const param &, llvm::Value *, llvm::Value *, std::uint32_t); +HEYOKA_DLL_PUBLIC llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &, const param &, llvm::Value *, llvm::Value *, + std::uint32_t, std::uint32_t); HEYOKA_DLL_PUBLIC llvm::Value *taylor_fetch_diff(const std::vector &, std::uint32_t, std::uint32_t, std::uint32_t); @@ -172,6 +177,12 @@ taylor_c_diff_func_name_args(llvm::LLVMContext &c, const std::string &name, std: return taylor_c_diff_func_name_args_impl(c, name, val_t, n_uvars, args, n_hidden_deps); } +// TODO remove the other version, then rename? +template +HEYOKA_DLL_PUBLIC std::pair> +taylor_c_diff_vfunc_name_args(llvm::LLVMContext &, const std::string &, std::uint32_t, std::uint32_t, std::uint32_t, + const std::vector> &, std::uint32_t = 0); + // Add a function for computing the dense output // via polynomial evaluation. template diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp index 92fca177f..9698e82da 100644 --- a/src/detail/llvm_helpers.cpp +++ b/src/detail/llvm_helpers.cpp @@ -276,12 +276,16 @@ void store_vector_to_memory(ir_builder &builder, llvm::Value *ptr, llvm::Value * } } -// Gather a vector of type vec_tp from the vector of pointers ptrs. +// Gather a vector of type vec_tp from ptrs. If vec_tp is a vector type, then ptrs +// must be a vector of pointers of the same size and the returned value is also a vector +// of that size. Otherwise, ptrs must be a single scalar pointer and the returned value is a scalar. llvm::Value *gather_vector_from_memory(ir_builder &builder, llvm::Type *vec_tp, llvm::Value *ptrs) { if (llvm::isa(vec_tp)) { // LCOV_EXCL_START assert(llvm::isa(ptrs->getType())); + assert(llvm::cast(vec_tp)->getNumElements() + == llvm::cast(ptrs->getType())->getNumElements()); assert(ptrs->getType()->getScalarType()->getPointerElementType() == vec_tp->getScalarType()); // LCOV_EXCL_STOP @@ -311,11 +315,48 @@ llvm::Value *gather_vector_from_memory(ir_builder &builder, llvm::Type *vec_tp, } } +// Scatter val to ptrs. If val is a vector, then ptrs must be a vector of pointers +// and a vector scatter takes place. Otherwise, ptrs must be a single scalar pointer +// and a scalar store takes place. +void scatter_vector_to_memory(ir_builder &builder, llvm::Value *val, llvm::Value *ptrs) +{ + if (llvm::isa(ptrs->getType())) { + // LCOV_EXCL_START + assert(llvm::isa(val->getType())); + assert(llvm::cast(val->getType())->getNumElements() + == llvm::cast(ptrs->getType())->getNumElements()); + assert(val->getType()->getScalarType() == ptrs->getType()->getScalarType()->getPointerElementType()); + // LCOV_EXCL_STOP + + // Fetch the alignment of the scalar type. + const auto align = get_alignment(*builder.GetInsertBlock()->getModule(), val->getType()->getScalarType()); + + builder.CreateMaskedScatter(val, ptrs, +#if LLVM_VERSION_MAJOR == 10 + boost::numeric_cast(align) +#else + llvm::Align(align) +#endif + ); + } else { + // LCOV_EXCL_START + assert(!llvm::isa(val->getType())); + assert(ptrs->getType()->getPointerElementType() == val->getType()); + // LCOV_EXCL_STOP + + // Not a vector, store val directly. + builder.CreateStore(val, ptrs); + } +} + // Create a SIMD vector of size vector_size filled with the value c. If vector_size is 1, // c will be returned. llvm::Value *vector_splat(ir_builder &builder, llvm::Value *c, std::uint32_t vector_size) { + // LCOV_EXCL_START assert(vector_size > 0u); + assert(!llvm::isa(c->getType())); + // LCOV_EXCL_STOP if (vector_size == 1u) { return c; @@ -326,15 +367,18 @@ llvm::Value *vector_splat(ir_builder &builder, llvm::Value *c, std::uint32_t vec llvm::Type *make_vector_type(llvm::Type *t, std::uint32_t vector_size) { + // LCOV_EXCL_START assert(t != nullptr); assert(vector_size > 0u); + assert(!llvm::isa(t)); + // LCOV_EXCL_STOP if (vector_size == 1u) { return t; } else { auto retval = llvm_vector_type::get(t, boost::numeric_cast(vector_size)); - assert(retval != nullptr); + assert(retval != nullptr); // LCOV_EXCL_LINE return retval; } @@ -1380,8 +1424,7 @@ llvm::Function *llvm_add_csc_impl(llvm_state &s, llvm::Type *scal_t, std::uint32 vector_splat(builder, builder.getInt32(batch_size), batch_size))); assert(llvm_depr_GEP_type_check(cf_ptr_v, scal_t)); // LCOV_EXCL_LINE auto last_nz_ptr = builder.CreateInBoundsGEP(scal_t, cf_ptr_v, last_nz_ptr_idx); - auto last_nz_cf = batch_size > 1u ? gather_vector_from_memory(builder, cur_cf->getType(), last_nz_ptr) - : static_cast(builder.CreateLoad(scal_t, last_nz_ptr)); + auto last_nz_cf = gather_vector_from_memory(builder, cur_cf->getType(), last_nz_ptr); // Compute the sign of the current coefficient(s). auto cur_sgn = llvm_sgn(s, cur_cf); diff --git a/src/expression.cpp b/src/expression.cpp index 3548f07be..efca092c6 100644 --- a/src/expression.cpp +++ b/src/expression.cpp @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -1336,24 +1337,18 @@ llvm::Value *taylor_diff_f128(llvm_state &s, const expression &ex, const std::ve #endif -namespace detail -{ - -namespace -{ - template -llvm::Function *taylor_c_diff_func_impl(llvm_state &s, const expression &ex, std::uint32_t n_uvars, - std::uint32_t batch_size, bool high_accuracy) +llvm::Function *taylor_c_diff_func(llvm_state &s, const expression &ex, std::uint32_t n_uvars, std::uint32_t batch_size, + bool high_accuracy, std::uint32_t vector_size) { if (auto fptr = std::get_if(&ex.value())) { if constexpr (std::is_same_v) { - return fptr->taylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy); + return fptr->taylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy, vector_size); } else if constexpr (std::is_same_v) { - return fptr->taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy); + return fptr->taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy, vector_size); #if defined(HEYOKA_HAVE_REAL128) } else if constexpr (std::is_same_v) { - return fptr->taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy); + return fptr->taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy, vector_size); #endif } else { static_assert(detail::always_false_v, "Unhandled type."); @@ -1365,29 +1360,16 @@ llvm::Function *taylor_c_diff_func_impl(llvm_state &s, const expression &ex, std } } -} // namespace - -} // namespace detail - -llvm::Function *taylor_c_diff_func_dbl(llvm_state &s, const expression &ex, std::uint32_t n_uvars, - std::uint32_t batch_size, bool high_accuracy) -{ - return detail::taylor_c_diff_func_impl(s, ex, n_uvars, batch_size, high_accuracy); -} +template HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func(llvm_state &, const expression &, std::uint32_t, + std::uint32_t, bool, std::uint32_t); -llvm::Function *taylor_c_diff_func_ldbl(llvm_state &s, const expression &ex, std::uint32_t n_uvars, - std::uint32_t batch_size, bool high_accuracy) -{ - return detail::taylor_c_diff_func_impl(s, ex, n_uvars, batch_size, high_accuracy); -} +template HEYOKA_DLL_PUBLIC llvm::Function * +taylor_c_diff_func(llvm_state &, const expression &, std::uint32_t, std::uint32_t, bool, std::uint32_t); #if defined(HEYOKA_HAVE_REAL128) -llvm::Function *taylor_c_diff_func_f128(llvm_state &s, const expression &ex, std::uint32_t n_uvars, - std::uint32_t batch_size, bool high_accuracy) -{ - return detail::taylor_c_diff_func_impl(s, ex, n_uvars, batch_size, high_accuracy); -} +template HEYOKA_DLL_PUBLIC llvm::Function * +taylor_c_diff_func(llvm_state &, const expression &, std::uint32_t, std::uint32_t, bool, std::uint32_t); #endif diff --git a/src/func.cpp b/src/func.cpp index 4ca47a234..82e1d2d61 100644 --- a/src/func.cpp +++ b/src/func.cpp @@ -603,7 +603,7 @@ llvm::Value *func::taylor_diff_f128(llvm_state &s, const std::vectortaylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy); + if (vector_size == 0u || (vector_size > 1u && batch_size > 1u)) { + throw std::invalid_argument(fmt::format( + "Invalid vector_size detected in func::taylor_c_diff_func_dbl() for the function '{}'", get_name())); + } + + auto retval = ptr()->taylor_c_diff_func_dbl(s, n_uvars, batch_size, high_accuracy, vector_size); if (retval == nullptr) { throw std::invalid_argument( @@ -627,7 +632,7 @@ llvm::Function *func::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvar } llvm::Function *func::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool high_accuracy) const + bool high_accuracy, std::uint32_t vector_size) const { if (batch_size == 0u) { throw std::invalid_argument( @@ -640,7 +645,12 @@ llvm::Function *func::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uva get_name())); } - auto retval = ptr()->taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy); + if (vector_size == 0u || (vector_size > 1u && batch_size > 1u)) { + throw std::invalid_argument(fmt::format( + "Invalid vector_size detected in func::taylor_c_diff_func_ldbl() for the function '{}'", get_name())); + } + + auto retval = ptr()->taylor_c_diff_func_ldbl(s, n_uvars, batch_size, high_accuracy, vector_size); if (retval == nullptr) { throw std::invalid_argument( @@ -653,7 +663,7 @@ llvm::Function *func::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uva #if defined(HEYOKA_HAVE_REAL128) llvm::Function *func::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool high_accuracy) const + bool high_accuracy, std::uint32_t vector_size) const { if (batch_size == 0u) { throw std::invalid_argument( @@ -666,7 +676,12 @@ llvm::Function *func::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uva get_name())); } - auto retval = ptr()->taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy); + if (vector_size == 0u || (vector_size > 1u && batch_size > 1u)) { + throw std::invalid_argument(fmt::format( + "Invalid vector_size detected in func::taylor_c_diff_func_f128() for the function '{}'", get_name())); + } + + auto retval = ptr()->taylor_c_diff_func_f128(s, n_uvars, batch_size, high_accuracy, vector_size); if (retval == nullptr) { throw std::invalid_argument( diff --git a/src/math/binary_op.cpp b/src/math/binary_op.cpp index 059aab85e..b11e30c82 100644 --- a/src/math/binary_op.cpp +++ b/src/math/binary_op.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -619,17 +620,19 @@ namespace template llvm::Function *bo_taylor_c_diff_func_num_num(llvm_state &s, const binary_op &bo, const U &n0, const V &n1, std::uint32_t n_uvars, std::uint32_t batch_size, - const std::string &op_name) + const std::string &op_name, std::uint32_t vector_size) { + assert(vector_size == 1u || batch_size == 1u); // LCOV_EXCL_LINE + auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, op_name, n_uvars, batch_size, {n0, n1}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, op_name, n_uvars, batch_size, vector_size, {n0, n1}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -664,8 +667,8 @@ llvm::Function *bo_taylor_c_diff_func_num_num(llvm_state &s, const binary_op &bo s, builder.CreateICmpEQ(ord, builder.getInt32(0)), [&]() { // If the order is zero, run the codegen. - auto vnum0 = taylor_c_diff_numparam_codegen(s, n0, num0, par_ptr, batch_size); - auto vnum1 = taylor_c_diff_numparam_codegen(s, n1, num1, par_ptr, batch_size); + auto vnum0 = taylor_c_diff_numparam_codegen(s, n0, num0, par_ptr, batch_size, vector_size); + auto vnum1 = taylor_c_diff_numparam_codegen(s, n1, num1, par_ptr, batch_size, vector_size); switch (bo.op()) { case binary_op::type::add: @@ -683,7 +686,7 @@ llvm::Function *bo_taylor_c_diff_func_num_num(llvm_state &s, const binary_op &bo }, [&]() { // Otherwise, return zero. - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), retval); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), retval); }); // Return the result. @@ -712,26 +715,29 @@ llvm::Function *bo_taylor_c_diff_func_num_num(llvm_state &s, const binary_op &bo template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op &bo, const U &num0, const V &num1, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { - return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, AddOrSub ? "add" : "sub"); + return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, AddOrSub ? "add" : "sub", + vector_size); } // Derivative of number +- var. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op &, const U &n, const variable &var, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair - = taylor_c_diff_func_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, {n, var}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, + vector_size, {n, var}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -767,7 +773,7 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op s, builder.CreateICmpEQ(order, builder.getInt32(0)), [&]() { // For order zero, run the codegen. - auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size); + auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size); auto ret = taylor_c_load_diff(s, diff_arr, n_uvars, builder.getInt32(0), var_idx); builder.CreateStore(AddOrSub ? builder.CreateFAdd(num_vec, ret) : builder.CreateFSub(num_vec, ret), @@ -810,18 +816,19 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op // Derivative of var +- number. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op &, const variable &var, const U &n, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair - = taylor_c_diff_func_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, {var, n}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, + vector_size, {var, n}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -858,7 +865,7 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op [&]() { // For order zero, run the codegen. auto ret = taylor_c_load_diff(s, diff_arr, n_uvars, builder.getInt32(0), var_idx); - auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size); + auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size); builder.CreateStore(AddOrSub ? builder.CreateFAdd(ret, num_vec) : builder.CreateFSub(ret, num_vec), retval); @@ -893,18 +900,19 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op // Derivative of var +- var. template llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op &, const variable &var0, - const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size) + const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair - = taylor_c_diff_func_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, {var0, var1}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, AddOrSub ? "add" : "sub", n_uvars, batch_size, + vector_size, {var0, var1}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -966,7 +974,7 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &s, const binary_op template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &, const binary_op &, const V1 &, const V2 &, - std::uint32_t, std::uint32_t) + std::uint32_t, std::uint32_t, std::uint32_t) { throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative " "of add()/sub() in compact mode"); @@ -975,22 +983,22 @@ llvm::Function *bo_taylor_c_diff_func_addsub_impl(llvm_state &, const binary_op template llvm::Function *bo_taylor_c_diff_func_add(llvm_state &s, const binary_op &bo, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { return std::visit( [&](const auto &v1, const auto &v2) { - return bo_taylor_c_diff_func_addsub_impl(s, bo, v1, v2, n_uvars, batch_size); + return bo_taylor_c_diff_func_addsub_impl(s, bo, v1, v2, n_uvars, batch_size, vector_size); }, bo.lhs().value(), bo.rhs().value()); } template llvm::Function *bo_taylor_c_diff_func_sub(llvm_state &s, const binary_op &bo, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { return std::visit( [&](const auto &v1, const auto &v2) { - return bo_taylor_c_diff_func_addsub_impl(s, bo, v1, v2, n_uvars, batch_size); + return bo_taylor_c_diff_func_addsub_impl(s, bo, v1, v2, n_uvars, batch_size, vector_size); }, bo.lhs().value(), bo.rhs().value()); } @@ -999,25 +1007,27 @@ llvm::Function *bo_taylor_c_diff_func_sub(llvm_state &s, const binary_op &bo, st template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &bo, const U &num0, const V &num1, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { - return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, "mul"); + return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, "mul", vector_size); } // Derivative of var * number. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, const variable &var, const U &n, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "mul", n_uvars, batch_size, {var, n}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "mul", n_uvars, batch_size, vector_size, {var, n}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1050,7 +1060,8 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, auto ret = taylor_c_load_diff(s, diff_arr, n_uvars, order, var_idx); // Create the return value. - builder.CreateRet(builder.CreateFMul(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size))); + builder.CreateRet( + builder.CreateFMul(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size))); // Verify. s.verify_function(f); @@ -1074,17 +1085,18 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, // Derivative of number * var. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, const U &n, const variable &var, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "mul", n_uvars, batch_size, {n, var}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "mul", n_uvars, batch_size, vector_size, {n, var}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1117,7 +1129,8 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, auto ret = taylor_c_load_diff(s, diff_arr, n_uvars, order, var_idx); // Create the return value. - builder.CreateRet(builder.CreateFMul(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size))); + builder.CreateRet( + builder.CreateFMul(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size))); // Verify. s.verify_function(f); @@ -1141,17 +1154,19 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, // Derivative of var * var. template llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, const variable &var0, - const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size) + const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "mul", n_uvars, batch_size, {var0, var1}); + const auto na_pair + = taylor_c_diff_vfunc_name_args(context, "mul", n_uvars, batch_size, vector_size, {var0, var1}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1181,7 +1196,7 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, // Create the accumulator. auto acc = builder.CreateAlloca(val_t); - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), acc); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), acc); // Run the loop. llvm_loop_u32(s, builder.getInt32(0), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) { @@ -1217,7 +1232,7 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &s, const binary_op &, template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &, const binary_op &, const V1 &, const V2 &, std::uint32_t, - std::uint32_t) + std::uint32_t, std::uint32_t) { throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative " "of mul() in compact mode"); @@ -1226,11 +1241,11 @@ llvm::Function *bo_taylor_c_diff_func_mul_impl(llvm_state &, const binary_op &, template llvm::Function *bo_taylor_c_diff_func_mul(llvm_state &s, const binary_op &bo, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { return std::visit( [&](const auto &v1, const auto &v2) { - return bo_taylor_c_diff_func_mul_impl(s, bo, v1, v2, n_uvars, batch_size); + return bo_taylor_c_diff_func_mul_impl(s, bo, v1, v2, n_uvars, batch_size, vector_size); }, bo.lhs().value(), bo.rhs().value()); } @@ -1239,25 +1254,27 @@ llvm::Function *bo_taylor_c_diff_func_mul(llvm_state &s, const binary_op &bo, st template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &bo, const U &num0, const V &num1, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { - return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, "div"); + return bo_taylor_c_diff_func_num_num(s, bo, num0, num1, n_uvars, batch_size, "div", vector_size); } // Derivative of var / number. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, const variable &var, const U &n, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "div", n_uvars, batch_size, {var, n}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "div", n_uvars, batch_size, vector_size, {var, n}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1290,7 +1307,8 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, auto ret = taylor_c_load_diff(s, diff_arr, n_uvars, order, var_idx); // Create the return value. - builder.CreateRet(builder.CreateFDiv(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size))); + builder.CreateRet( + builder.CreateFDiv(ret, taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size))); // Verify. s.verify_function(f); @@ -1314,17 +1332,18 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, // Derivative of number / var. template , int> = 0> llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, const U &n, const variable &var, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "div", n_uvars, batch_size, {n, var}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "div", n_uvars, batch_size, vector_size, {n, var}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1367,14 +1386,14 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, s, builder.CreateICmpEQ(ord, builder.getInt32(0)), [&]() { // For order zero, run the codegen. - auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size); + auto num_vec = taylor_c_diff_numparam_codegen(s, n, num, par_ptr, batch_size, vector_size); auto ret = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), var_idx); builder.CreateStore(builder.CreateFDiv(num_vec, ret), retval); }, [&]() { // Init the accumulator. - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), acc); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), acc); // Run the loop. llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) { @@ -1418,17 +1437,19 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, // Derivative of var / var. template llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, const variable &var0, - const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size) + const variable &var1, std::uint32_t n_uvars, std::uint32_t batch_size, + std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "div", n_uvars, batch_size, {var0, var1}); + const auto na_pair + = taylor_c_diff_vfunc_name_args(context, "div", n_uvars, batch_size, vector_size, {var0, var1}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -1459,7 +1480,7 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, // Create the accumulator. auto acc = builder.CreateAlloca(val_t); - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), acc); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), acc); // Run the loop. llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) { @@ -1499,7 +1520,7 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &s, const binary_op &, template , is_num_param>, int> = 0> llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &, const binary_op &, const V1 &, const V2 &, std::uint32_t, - std::uint32_t) + std::uint32_t, std::uint32_t) { throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative " "of div() in compact mode"); @@ -1508,51 +1529,51 @@ llvm::Function *bo_taylor_c_diff_func_div_impl(llvm_state &, const binary_op &, template llvm::Function *bo_taylor_c_diff_func_div(llvm_state &s, const binary_op &bo, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { return std::visit( [&](const auto &v1, const auto &v2) { - return bo_taylor_c_diff_func_div_impl(s, bo, v1, v2, n_uvars, batch_size); + return bo_taylor_c_diff_func_div_impl(s, bo, v1, v2, n_uvars, batch_size, vector_size); }, bo.lhs().value(), bo.rhs().value()); } template llvm::Function *taylor_c_diff_func_bo_impl(llvm_state &s, const binary_op &bo, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { switch (bo.op()) { case binary_op::type::add: - return bo_taylor_c_diff_func_add(s, bo, n_uvars, batch_size); + return bo_taylor_c_diff_func_add(s, bo, n_uvars, batch_size, vector_size); case binary_op::type::sub: - return bo_taylor_c_diff_func_sub(s, bo, n_uvars, batch_size); + return bo_taylor_c_diff_func_sub(s, bo, n_uvars, batch_size, vector_size); case binary_op::type::mul: - return bo_taylor_c_diff_func_mul(s, bo, n_uvars, batch_size); + return bo_taylor_c_diff_func_mul(s, bo, n_uvars, batch_size, vector_size); default: - return bo_taylor_c_diff_func_div(s, bo, n_uvars, batch_size); + return bo_taylor_c_diff_func_div(s, bo, n_uvars, batch_size, vector_size); } } } // namespace -llvm::Function *binary_op::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *binary_op::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size, vector_size); } -llvm::Function *binary_op::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *binary_op::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size, vector_size); } #if defined(HEYOKA_HAVE_REAL128) -llvm::Function *binary_op::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *binary_op::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_bo_impl(s, *this, n_uvars, batch_size, vector_size); } #endif diff --git a/src/math/pow.cpp b/src/math/pow.cpp index 16defb95b..fafac0f89 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -368,16 +369,16 @@ namespace template , is_num_param>, int> = 0> llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, const U &n0, const V &n1, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); - const auto na_pair = taylor_c_diff_func_name_args(context, "pow", n_uvars, batch_size, {n0, n1}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "pow", n_uvars, batch_size, vector_size, {n0, n1}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -413,14 +414,15 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, c [&]() { // If the order is zero, run the codegen. builder.CreateStore( - codegen_from_values(s, fn, - {taylor_c_diff_numparam_codegen(s, n0, num_base, par_ptr, batch_size), - taylor_c_diff_numparam_codegen(s, n1, num_exp, par_ptr, batch_size)}), + codegen_from_values( + s, fn, + {taylor_c_diff_numparam_codegen(s, n0, num_base, par_ptr, batch_size, vector_size), + taylor_c_diff_numparam_codegen(s, n1, num_exp, par_ptr, batch_size, vector_size)}), retval); }, [&]() { // Otherwise, return zero. - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), retval); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), retval); }); // Return the result. @@ -448,16 +450,17 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, c // Derivative of pow(variable, number). template , int> = 0> llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, const variable &var, const U &n, - std::uint32_t n_uvars, std::uint32_t batch_size) + std::uint32_t n_uvars, std::uint32_t batch_size, std::uint32_t vector_size) { auto &module = s.module(); auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + const auto val_t_width = vector_size > 1u ? vector_size : batch_size; + auto val_t = to_llvm_vector_type(context, val_t_width); - const auto na_pair = taylor_c_diff_func_name_args(context, "pow", n_uvars, batch_size, {var, n}); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "pow", n_uvars, batch_size, vector_size, {var, n}); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -498,18 +501,19 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, c [&]() { // For order 0, invoke the function on the order 0 of var_idx. builder.CreateStore( - codegen_from_values(s, fn, - {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), var_idx), - taylor_c_diff_numparam_codegen(s, n, exponent, par_ptr, batch_size)}), + codegen_from_values( + s, fn, + {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), var_idx), + taylor_c_diff_numparam_codegen(s, n, exponent, par_ptr, batch_size, vector_size)}), retval); }, [&]() { // Create FP vector versions of exponent and order. - auto alpha_v = taylor_c_diff_numparam_codegen(s, n, exponent, par_ptr, batch_size); - auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type(context)), batch_size); + auto alpha_v = taylor_c_diff_numparam_codegen(s, n, exponent, par_ptr, batch_size, vector_size); + auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type(context)), val_t_width); // Init the accumulator. - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), acc); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), acc); // Run the loop. llvm_loop_u32(s, builder.getInt32(0), ord, [&](llvm::Value *j) { @@ -517,12 +521,10 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, c auto aj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, u_idx); // Compute the factor n*alpha-j*(alpha+1). - auto j_v = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type(context)), batch_size); + auto j_v = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type(context)), val_t_width); auto fac = builder.CreateFSub( builder.CreateFMul(ord_v, alpha_v), - builder.CreateFMul( - j_v, - builder.CreateFAdd(alpha_v, vector_splat(builder, codegen(s, number{1.}), batch_size)))); + builder.CreateFMul(j_v, builder.CreateFAdd(alpha_v, llvm::ConstantFP::get(val_t, 1.)))); builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(val_t, acc), builder.CreateFMul(fac, builder.CreateFMul(b_nj, aj))), @@ -563,7 +565,7 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, const pow_impl &fn, c template , is_num_param>, int> = 0> llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &, const pow_impl &, const U1 &, const U2 &, std::uint32_t, - std::uint32_t) + std::uint32_t, std::uint32_t) { throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative " "of a pow() in compact mode"); @@ -571,37 +573,37 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &, const pow_impl &, cons template llvm::Function *taylor_c_diff_func_pow(llvm_state &s, const pow_impl &fn, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { assert(fn.args().size() == 2u); return std::visit( [&](const auto &v1, const auto &v2) { - return taylor_c_diff_func_pow_impl(s, fn, v1, v2, n_uvars, batch_size); + return taylor_c_diff_func_pow_impl(s, fn, v1, v2, n_uvars, batch_size, vector_size); }, fn.args()[0].value(), fn.args()[1].value()); } } // namespace -llvm::Function *pow_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *pow_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size, vector_size); } -llvm::Function *pow_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *pow_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size, vector_size); } #if defined(HEYOKA_HAVE_REAL128) -llvm::Function *pow_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *pow_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size); + return taylor_c_diff_func_pow(s, *this, n_uvars, batch_size, vector_size); } #endif diff --git a/src/math/sum.cpp b/src/math/sum.cpp index 979c406be..545d4a781 100644 --- a/src/math/sum.cpp +++ b/src/math/sum.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -186,7 +187,7 @@ namespace template llvm::Function *sum_taylor_c_diff_func_impl(llvm_state &s, const sum_impl &sf, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { // NOTE: this is prevented in the implementation // of the sum() function. @@ -196,8 +197,8 @@ llvm::Function *sum_taylor_c_diff_func_impl(llvm_state &s, const sum_impl &sf, s auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Build the vector of arguments needed to determine the function name. std::vector> nm_args; @@ -220,7 +221,7 @@ llvm::Function *sum_taylor_c_diff_func_impl(llvm_state &s, const sum_impl &sf, s } // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "sum", n_uvars, batch_size, nm_args); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "sum", n_uvars, batch_size, vector_size, nm_args); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -270,12 +271,12 @@ llvm::Function *sum_taylor_c_diff_func_impl(llvm_state &s, const sum_impl &sf, s [&]() { // If the order is zero, run the codegen. builder.CreateStore( - taylor_c_diff_numparam_codegen(s, v, terms + i, par_ptr, batch_size), retval); + taylor_c_diff_numparam_codegen(s, v, terms + i, par_ptr, batch_size, vector_size), + retval); }, [&]() { // Otherwise, return zero. - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), - retval); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), retval); }); return builder.CreateLoad(val_t, retval); @@ -315,24 +316,24 @@ llvm::Function *sum_taylor_c_diff_func_impl(llvm_state &s, const sum_impl &sf, s } // namespace -llvm::Function *sum_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *sum_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } -llvm::Function *sum_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *sum_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } #if defined(HEYOKA_HAVE_REAL128) -llvm::Function *sum_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const +llvm::Function *sum_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, bool, + std::uint32_t vector_size) const { - return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } #endif diff --git a/src/math/sum_sq.cpp b/src/math/sum_sq.cpp index 7f3b51d3b..725ee3ec7 100644 --- a/src/math/sum_sq.cpp +++ b/src/math/sum_sq.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -290,7 +291,7 @@ namespace template llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl &sf, std::uint32_t n_uvars, - std::uint32_t batch_size) + std::uint32_t batch_size, std::uint32_t vector_size) { // NOTE: this is prevented in the implementation // of the sum() function. @@ -300,8 +301,8 @@ llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl auto &builder = s.builder(); auto &context = s.context(); - // Fetch the floating-point type. - auto val_t = to_llvm_vector_type(context, batch_size); + // Fetch the return type. + auto val_t = to_llvm_vector_type(context, vector_size > 1u ? vector_size : batch_size); // Build the vector of arguments needed to determine the function name. std::vector> nm_args; @@ -324,7 +325,7 @@ llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl } // Fetch the function name and arguments. - const auto na_pair = taylor_c_diff_func_name_args(context, "sum_sq", n_uvars, batch_size, nm_args); + const auto na_pair = taylor_c_diff_vfunc_name_args(context, "sum_sq", n_uvars, batch_size, vector_size, nm_args); const auto &fname = na_pair.first; const auto &fargs = na_pair.second; @@ -359,7 +360,7 @@ llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl v_accs.resize(boost::numeric_cast(sf.args().size())); for (auto &acc : v_accs) { acc = builder.CreateAlloca(val_t); - builder.CreateStore(vector_splat(builder, codegen(s, number{0.}), batch_size), acc); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), acc); } // Create the return value. @@ -465,13 +466,13 @@ llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl s, builder.CreateICmpEQ(order, builder.getInt32(0)), [&]() { // Order 0, store the num/param. - builder.CreateStore( - taylor_c_diff_numparam_codegen(s, v, terms + k, par_ptr, batch_size), ret); + builder.CreateStore(taylor_c_diff_numparam_codegen(s, v, terms + k, par_ptr, + batch_size, vector_size), + ret); }, [&]() { // Order 2 or higher, store zero. - builder.CreateStore( - vector_splat(builder, codegen(s, number{0.}), batch_size), ret); + builder.CreateStore(llvm::ConstantFP::get(val_t, 0.), ret); }); auto val = builder.CreateLoad(val_t, ret); @@ -522,23 +523,23 @@ llvm::Function *sum_sq_taylor_c_diff_func_impl(llvm_state &s, const sum_sq_impl } // namespace llvm::Function *sum_sq_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const + bool, std::uint32_t vector_size) const { - return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } llvm::Function *sum_sq_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const + bool, std::uint32_t vector_size) const { - return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } #if defined(HEYOKA_HAVE_REAL128) llvm::Function *sum_sq_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size, - bool) const + bool, std::uint32_t vector_size) const { - return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size); + return sum_sq_taylor_c_diff_func_impl(s, *this, n_uvars, batch_size, vector_size); } #endif diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 0dfff03a7..019a0888b 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -67,6 +67,7 @@ #endif #include +#include #include #include #include @@ -198,6 +199,118 @@ taylor_c_diff_func_name_args_impl(llvm::LLVMContext &context, const std::string return std::make_pair(std::move(fname), std::move(fargs)); } +// TODO remove the other version, then rename? +template +std::pair> +taylor_c_diff_vfunc_name_args(llvm::LLVMContext &context, const std::string &name, std::uint32_t n_uvars, + std::uint32_t batch_size, std::uint32_t vector_size, + const std::vector> &args, + std::uint32_t n_hidden_deps) +{ + // LCOV_EXCL_START + assert(n_uvars > 0u); + assert(vector_size == 1u || batch_size == 1u); + // LCOV_EXCL_STOP + + // Fetch the scalar floating-point type corresponding to T. + auto scal_fp_t = to_llvm_type(context); + + // Fetch the floating-point type stored in diff_arr. + auto diff_val_t = make_vector_type(scal_fp_t, batch_size); + + // Init the name. + auto fname = fmt::format("heyoka.taylor_c_diff.{}.{}.", vector_size, name); + + // Init the vector of arguments: + // - diff order, + // - idx/indices of the u variable(s) whose diff is being computed, + // - diff array (pointer to diff_val_t), + // - par ptr (pointer to scalar), + // - time ptr (pointer to scalar). + std::vector fargs{llvm::Type::getInt32Ty(context), + make_vector_type(llvm::Type::getInt32Ty(context), vector_size), + llvm::PointerType::getUnqual(diff_val_t), llvm::PointerType::getUnqual(scal_fp_t), + llvm::PointerType::getUnqual(scal_fp_t)}; + + // Add the mangling and LLVM arg types for the argument types. Also, detect if + // we have variables in the arguments. + bool with_var = false; + for (decltype(args.size()) i = 0; i < args.size(); ++i) { + // Detect variable. + if (std::holds_alternative(args[i])) { + with_var = true; + } + + // Name mangling. + fname += std::visit([](const auto &v) { return taylor_c_diff_mangle(v); }, args[i]); + + // Add the arguments separator, if we are not at the + // last argument. + if (i != args.size() - 1u) { + fname += '_'; + } + + // Add the LLVM function argument type. + fargs.push_back(std::visit( + [&](const auto &v) -> llvm::Type * { + using type = detail::uncvref_t; + + if constexpr (std::is_same_v) { + // For numbers, the argument is passed as a scalar + // floating-point value, or a vector of floating-point + // values in vector mode. + return make_vector_type(scal_fp_t, vector_size); + } else { + // For vars and params, the argument is an index + // in an array, or a vector of indices in vector mode. + return make_vector_type(llvm::Type::getInt32Ty(context), vector_size); + } + }, + args[i])); + } + + // Close the argument list with a ".". + // NOTE: this will result in a ".." in the name + // if the function has zero arguments. + fname += '.'; + + // If we have variables in the arguments, add mangling + // for n_uvars. This is needed because the function logic + // for accessing the derivatives depends on n_uvars. + if (with_var) { + fname += fmt::format("n_uvars_{}.", n_uvars); + } + + // Finally, add the mangling for diff_val_t. + fname += llvm_mangle_type(diff_val_t); + + // Fill in the hidden dependency arguments. These are all indices or vectors + // of indices. + fargs.insert(fargs.end(), boost::numeric_cast(n_hidden_deps), + make_vector_type(llvm::Type::getInt32Ty(context), vector_size)); + + return std::make_pair(std::move(fname), std::move(fargs)); +} + +template HEYOKA_DLL_PUBLIC std::pair> +taylor_c_diff_vfunc_name_args(llvm::LLVMContext &, const std::string &, std::uint32_t, std::uint32_t, + std::uint32_t, const std::vector> &, + std::uint32_t); + +template HEYOKA_DLL_PUBLIC std::pair> +taylor_c_diff_vfunc_name_args(llvm::LLVMContext &, const std::string &, std::uint32_t, std::uint32_t, + std::uint32_t, const std::vector> &, + std::uint32_t); + +#if defined(HEYOKA_HAVE_REAL128) + +template HEYOKA_DLL_PUBLIC std::pair> +taylor_c_diff_vfunc_name_args(llvm::LLVMContext &, const std::string &, std::uint32_t, std::uint32_t, + std::uint32_t, const std::vector> &, + std::uint32_t); + +#endif + namespace { @@ -280,13 +393,33 @@ llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &s, const number &, llvm: return vector_splat(s.builder(), n, batch_size); } +llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &s, const number &, llvm::Value *n, llvm::Value *, + std::uint32_t batch_size, std::uint32_t vector_size) +{ + // LCOV_EXCL_START +#if !defined(NDEBUG) + assert(batch_size > 0u); + assert(vector_size > 0u); + + if (vector_size == 1u) { + assert(!llvm::isa(n->getType())); + } else { + assert(batch_size == 1u); + assert(llvm::isa(n->getType())); + assert(llvm::cast(n->getType())->getNumElements() == vector_size); + } +#endif + // LCOV_EXCL_STOP + + return vector_size == 1u ? vector_splat(s.builder(), n, batch_size) : n; +} + llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &s, const param &, llvm::Value *p, llvm::Value *par_ptr, std::uint32_t batch_size) { // LCOV_EXCL_START assert(batch_size > 0u); assert(llvm::isa(par_ptr->getType())); - assert(!llvm::cast(par_ptr->getType())->isVectorTy()); // LCOV_EXCL_STOP auto &builder = s.builder(); @@ -299,6 +432,46 @@ llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &s, const param &, llvm:: return load_vector_from_memory(builder, ptr, batch_size); } +llvm::Value *taylor_c_diff_numparam_codegen(llvm_state &s, const param &, llvm::Value *p, llvm::Value *par_ptr, + std::uint32_t batch_size, std::uint32_t vector_size) +{ + // LCOV_EXCL_START +#if !defined(NDEBUG) + assert(batch_size > 0u); + assert(vector_size > 0u); + + assert(llvm::isa(par_ptr->getType())); + assert(!llvm::isa(llvm::cast(par_ptr->getType())->getPointerElementType())); + + if (vector_size == 1u) { + assert(!llvm::isa(p->getType())); + } else { + assert(batch_size == 1u); + assert(llvm::isa(p->getType())); + assert(llvm::cast(p->getType())->getNumElements() == vector_size); + } +#endif + // LCOV_EXCL_STOP + + auto &builder = s.builder(); + + // Fetch the scalar floating-point type of the parameters. + auto *scal_fp_t = llvm::cast(par_ptr->getType())->getPointerElementType(); + + // NOTE: overflow checks are done in taylor_compute_jet(). + if (vector_size == 1u) { + auto *ptr = builder.CreateInBoundsGEP(scal_fp_t, par_ptr, builder.CreateMul(p, builder.getInt32(batch_size))); + + return load_vector_from_memory(builder, ptr, batch_size); + } else { + // Fetch the pointers into par_ptr. + auto *ptrs = builder.CreateInBoundsGEP(scal_fp_t, par_ptr, p); + + // Gather. + return gather_vector_from_memory(builder, make_vector_type(scal_fp_t, vector_size), ptrs); + } +} + // Helper to fetch the derivative of order 'order' of the u variable at index u_idx from the // derivative array 'arr'. The total number of u variables is n_uvars. llvm::Value *taylor_fetch_diff(const std::vector &arr, std::uint32_t u_idx, std::uint32_t order, @@ -314,38 +487,90 @@ llvm::Value *taylor_fetch_diff(const std::vector &arr, std::uint3 return arr[idx]; } -// Load the derivative of order 'order' of the u variable u_idx from the array of Taylor derivatives diff_arr. +// Load the derivative of order 'order' of the u variable(s) u_idx from the array of Taylor derivatives diff_arr. // n_uvars is the total number of u variables. llvm::Value *taylor_c_load_diff(llvm_state &s, llvm::Value *diff_arr, std::uint32_t n_uvars, llvm::Value *order, llvm::Value *u_idx) { - auto &builder = s.builder(); - // NOTE: overflow check has already been done to ensure that the // total size of diff_arr fits in a 32-bit unsigned integer. - assert(llvm_depr_GEP_type_check(diff_arr, pointee_type(diff_arr))); // LCOV_EXCL_LINE - auto *ptr - = builder.CreateInBoundsGEP(pointee_type(diff_arr), diff_arr, - builder.CreateAdd(builder.CreateMul(order, builder.getInt32(n_uvars)), u_idx)); + auto &builder = s.builder(); + + // Fetch the floating-point type in diff_arr. + auto *diff_val_t = pointee_type(diff_arr); + + if (auto *vec_idx_t = llvm::dyn_cast(u_idx->getType())) { + // Vector of indices. - return builder.CreateLoad(pointee_type(diff_arr), ptr); + // NOTE: vector mode and batch mode are mutually exclusive. + assert(!llvm::isa(diff_val_t)); // LCOV_EXCL_LINE + + // Fetch the vector floating-point type. + const auto vector_size = boost::numeric_cast(vec_idx_t->getNumElements()); + // NOTE: the expectation here is that vectors of size 1 never show up, as they are always + // turned into scalars by helpers such as make_vector_type() & co. + assert(vector_size > 1u); // LCOV_EXCL_LINE + auto *vec_fp_t = make_vector_type(diff_val_t, vector_size); + + // Compute the pointers. + auto *ptrs = builder.CreateInBoundsGEP( + diff_val_t, diff_arr, + builder.CreateAdd(vector_splat(builder, builder.CreateMul(order, builder.getInt32(n_uvars)), vector_size), + u_idx)); + + // Load. + return gather_vector_from_memory(builder, vec_fp_t, ptrs); + } else { + // Single index. + auto *ptr = builder.CreateInBoundsGEP( + diff_val_t, diff_arr, builder.CreateAdd(builder.CreateMul(order, builder.getInt32(n_uvars)), u_idx)); + + return builder.CreateLoad(diff_val_t, ptr); + } } -// Store the value val as the derivative of order 'order' of the u variable u_idx +// Store the value x as the derivative of order 'order' of the u variable u_idx // into the array of Taylor derivatives diff_arr. n_uvars is the total number of u variables. void taylor_c_store_diff(llvm_state &s, llvm::Value *diff_arr, std::uint32_t n_uvars, llvm::Value *order, - llvm::Value *u_idx, llvm::Value *val) + llvm::Value *u_idx, llvm::Value *x) { - auto &builder = s.builder(); - // NOTE: overflow check has already been done to ensure that the // total size of diff_arr fits in a 32-bit unsigned integer. - assert(llvm_depr_GEP_type_check(diff_arr, pointee_type(diff_arr))); // LCOV_EXCL_LINE - auto *ptr - = builder.CreateInBoundsGEP(pointee_type(diff_arr), diff_arr, - builder.CreateAdd(builder.CreateMul(order, builder.getInt32(n_uvars)), u_idx)); + auto &builder = s.builder(); + + // Fetch the floating-point type in diff_arr. + auto *diff_val_t = pointee_type(diff_arr); - builder.CreateStore(val, ptr); + if (auto *vec_idx_t = llvm::dyn_cast(u_idx->getType())) { + // Vector mode. + + // LCOV_EXCL_START + // NOTE: vector mode and batch mode are mutually exclusive. + assert(!llvm::isa(diff_val_t)); + assert(llvm::isa(x->getType())); + assert(llvm::cast(x->getType())->getNumElements() == vec_idx_t->getNumElements()); + // LCOV_EXCL_STOP + + const auto vector_size = boost::numeric_cast(vec_idx_t->getNumElements()); + // NOTE: the expectation here is that vectors of size 1 never show up, as they are always + // turned into scalars by helpers such as make_vector_type() & co. + assert(vector_size > 1u); // LCOV_EXCL_LINE + + // Compute the pointers. + auto *ptrs = builder.CreateInBoundsGEP( + diff_val_t, diff_arr, + builder.CreateAdd(vector_splat(builder, builder.CreateMul(order, builder.getInt32(n_uvars)), vector_size), + u_idx)); + + // Store. + scatter_vector_to_memory(builder, x, ptrs); + } else { + // Scalar mode. + auto *ptr = builder.CreateInBoundsGEP( + diff_val_t, diff_arr, builder.CreateAdd(builder.CreateMul(order, builder.getInt32(n_uvars)), u_idx)); + + builder.CreateStore(x, ptr); + } } namespace diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index 167e35b8e..374543603 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1052,12 +1053,16 @@ struct llvm_func_name_compare { }; // For each segment in s_dc, this function will return a dict mapping an LLVM function -// f for the computation of a Taylor derivative to a size and a vector of std::functions. For example, one entry -// in the return value will read something like: -// {f : (2, [g_0, g_1, g_2])} +// f for the computation of a Taylor derivative to: +// - an integer, +// - a vector of std::functions, +// - an expression. +// For example, one entry in the return value will read something like: +// {f : (2, [g_0, g_1, g_2], func(u_0, u_1, u_2))} // The meaning in this example is that the arity of f is 3 and it will be called with 2 different // sets of arguments. The g_i functions are expected to be called with input argument j in [0, 1] // to yield the value of the i-th function argument for f at the j-th invocation. +// func(u_0, u_1, u_2) is the heyoka expression from which f was generated. template auto taylor_build_function_maps(llvm_state &s, const std::vector &s_dc, std::uint32_t n_eq, std::uint32_t n_uvars, std::uint32_t batch_size, bool high_accuracy) @@ -1070,62 +1075,70 @@ auto taylor_build_function_maps(llvm_state &s, const std::vector &s // functions are invoked in taylor_compute_jet_compact_mode() is always the same. If we used directly pointer // comparisons instead, the order could vary across different executions and different platforms. The name // mangling we do when creating the function names should ensure that there are no possible name collisions. - std::vector< - std::map>>, - llvm_func_name_compare>> + using gen_vec_t = std::vector>; + std::vector, llvm_func_name_compare>> retval; - // Variable to keep track of the u variable + // Counter to keep track of the index of the u variable // on whose definition we are operating. auto cur_u_idx = n_eq; for (const auto &seg : s_dc) { - // This structure maps an LLVM function to sets of arguments - // with which the function is to be called. For instance, if function - // f(x, y, z) is to be called as f(a, b, c) and f(d, e, f), then tmp_map - // will contain {f : [[a, b, c], [d, e, f]]}. + // This structure maps an LLVM function to: + // - the sets of arguments with which the function is to be called, + // - the expression that was used for the generation of the LLVM function. + // For instance, if the LLVM function f generated from the heyoka expression + // func(x, y, z) is to be called as f(a, b, c) and f(d, e, f), then tmp_map + // will contain {f : ([[a, b, c], [d, e, f]], func(x, y, z))}. // After construction, we have verified that for each function // in the map the sets of arguments have all the same size. - std::unordered_map>>> tmp_map; + using v_args_t = std::vector>>; + std::unordered_map> tmp_map; for (const auto &ex : seg) { - // Get the function for the computation of the derivative. - auto func = taylor_c_diff_func(s, ex.first, n_uvars, batch_size, high_accuracy); + // Generate or get the function for the computation of the derivative. + auto func = taylor_c_diff_func(s, ex.first, n_uvars, batch_size, high_accuracy, 1); // Insert the function into tmp_map. - const auto [it, is_new_func] = tmp_map.try_emplace(func); + const auto [it, is_new_func] = tmp_map.try_emplace(func, v_args_t{}, ex.first); - assert(is_new_func || !it->second.empty()); // LCOV_EXCL_LINE + assert(is_new_func || !std::get<0>(it->second).empty()); // LCOV_EXCL_LINE // Convert the variables/constants in the current dc // element into a set of indices/constants. const auto cdiff_args = taylor_udef_to_variants(ex.first, ex.second); - if (!is_new_func && it->second.back().size() - 1u != cdiff_args.size()) { + if (!is_new_func && std::get<0>(it->second).back().size() - 1u != cdiff_args.size()) { throw std::invalid_argument( "Inconsistent arity detected in a Taylor derivative function in compact " "mode: the same function is being called with both {} and {} arguments"_format( - it->second.back().size() - 1u, cdiff_args.size())); + std::get<0>(it->second).back().size() - 1u, cdiff_args.size())); } // Add the new set of arguments. - it->second.emplace_back(); + std::get<0>(it->second).emplace_back(); // Add the idx of the u variable. - it->second.back().emplace_back(cur_u_idx); + std::get<0>(it->second).back().emplace_back(cur_u_idx); // Add the actual function arguments. - it->second.back().insert(it->second.back().end(), cdiff_args.begin(), cdiff_args.end()); + std::get<0>(it->second) + .back() + .insert(std::get<0>(it->second).back().end(), cdiff_args.begin(), cdiff_args.end()); ++cur_u_idx; } - // Now we build the transposition of tmp_map: from {f : [[a, b, c], [d, e, f]]} - // to {f : [[a, d], [b, e], [c, f]]}. - std::unordered_map, std::vector>>> - tmp_map_transpose; - for (const auto &[func, vv] : tmp_map) { - assert(!vv.empty()); // LCOV_EXCL_LINE + // Now we build the arguments transposition of tmp_map: from {f : ([[a, b, c], [d, e, f]], func(x, y, z))} + // to {f : ([[a, d], [b, e], [c, f]], func(x, y, z))}. + using v_args_t_t = std::vector, std::vector>>; + std::unordered_map> tmp_map_transpose; + for (auto &[func, tup] : tmp_map) { + const auto &vv = std::get<0>(tup); + + assert(!vv.empty()); // LCOV_EXCL_LINE + assert(std::holds_alternative(std::get<1>(tup).value())); // LCOV_EXCL_LINE // Add the function. - const auto [it, ins_status] = tmp_map_transpose.try_emplace(func); + const auto [it, ins_status] + = tmp_map_transpose.try_emplace(func, v_args_t_t{}, std::move(std::get<1>(tup))); assert(ins_status); // LCOV_EXCL_LINE const auto n_calls = vv.size(); @@ -1145,7 +1158,7 @@ auto taylor_build_function_maps(llvm_state &s, const std::vector &s // Turn tmp_c_vec (a vector of variants) into a variant // of vectors, and insert the result. - it->second.push_back(taylor_c_vv_transpose(tmp_c_vec)); + std::get<0>(it->second).push_back(taylor_c_vv_transpose(tmp_c_vec)); } } @@ -1153,31 +1166,34 @@ auto taylor_build_function_maps(llvm_state &s, const std::vector &s retval.emplace_back(); auto &a_map = retval.back(); - for (const auto &[func, vv] : tmp_map_transpose) { + for (auto &[func, tup] : tmp_map_transpose) { + const auto &vv = std::get<0>(tup); + assert(!vv.empty()); // LCOV_EXCL_LINE + // Compute the number of calls for this function. + const auto ncalls + = std::visit([](const auto &x) { return boost::numeric_cast(x.size()); }, vv[0]); + assert(ncalls > 0u); // LCOV_EXCL_LINE + // Add the function. - const auto [it, ins_status] = a_map.try_emplace(func); + const auto [it, ins_status] = a_map.try_emplace(func, ncalls, gen_vec_t{}, std::move(std::get<1>(tup))); assert(ins_status); // LCOV_EXCL_LINE - // Set the number of calls for this function. - it->second.first - = std::visit([](const auto &x) { return boost::numeric_cast(x.size()); }, vv[0]); - assert(it->second.first > 0u); // LCOV_EXCL_LINE - // Create the g functions for each argument. for (const auto &v : vv) { - it->second.second.push_back(std::visit( - [&s](const auto &x) { - using type = detail::uncvref_t; + std::get<1>(it->second) + .push_back(std::visit( + [&s](const auto &x) { + using type = detail::uncvref_t; - if constexpr (std::is_same_v>) { - return taylor_c_make_arg_gen_vidx(s, x); - } else { - return taylor_c_make_arg_gen_vc(s, x); - } - }, - v)); + if constexpr (std::is_same_v>) { + return taylor_c_make_arg_gen_vidx(s, x); + } else { + return taylor_c_make_arg_gen_vc(s, x); + } + }, + v)); } } } @@ -1193,7 +1209,7 @@ auto taylor_build_function_maps(llvm_state &s, const std::vector &s fm_bd.emplace_back(); for (const auto &p : m) { - fm_bd.back().push_back(p.second.first); + fm_bd.back().push_back(std::get<0>(p.second)); } } @@ -1341,10 +1357,10 @@ llvm::Value *taylor_compute_jet_compact_mode(llvm_state &s, llvm::Value *order0, const auto &func = p.first; // The number of func calls. - const auto ncalls = p.second.first; + const auto ncalls = std::get<0>(p.second); // The generators for the arguments of func. - const auto &gens = p.second.second; + const auto &gens = std::get<1>(p.second); // Fetch the current insertion block. auto *orig_bb = builder.GetInsertBlock(); @@ -1434,35 +1450,167 @@ llvm::Value *taylor_compute_jet_compact_mode(llvm_state &s, llvm::Value *order0, // func is the LLVM function for the computation of the Taylor derivative in the block, // ncalls the number of times it must be called, gens the generators for the // function arguments and cur_order the order of the derivative. - auto block_diff = [&](const auto &func, const auto &ncalls, const auto &gens, llvm::Value *cur_order) { + // TODO fix docs. + auto block_diff = [&](const auto &func, const auto &ncalls, const auto &gens, const expression &ex, + llvm::Value *cur_order) { // LCOV_EXCL_START assert(ncalls > 0u); assert(!gens.empty()); assert(std::all_of(gens.begin(), gens.end(), [](const auto &f) { return static_cast(f); })); // LCOV_EXCL_STOP - // Loop over the number of calls. - llvm_loop_u32(s, builder.getInt32(0), builder.getInt32(ncalls), [&](llvm::Value *cur_call_idx) { - // Create the u variable index from the first generator. - auto u_idx = gens[0](cur_call_idx); + if (batch_size == 1u) { + // The batch size is 1: we can implement the vectorized codepath. + + const auto barfo_size = 4u; + + const auto nregs = ncalls / barfo_size, rem = ncalls % barfo_size; + + llvm_loop_u32(s, builder.getInt32(0), builder.getInt32(nregs), [&](llvm::Value *idx) { + // Turn the sets of arguments returned by the generators into a single set of vector arguments. + std::vector gen_vec_args, tmp; + + for (const auto &gen : gens) { + // Generate the arguments into tmp. + tmp.clear(); + + for (std::uint32_t i = 0; i < barfo_size; ++i) { + tmp.push_back(gen(builder.CreateAdd(builder.CreateMul(idx, builder.getInt32(barfo_size)), + builder.getInt32(i)))); + } + + // Transform tmp into a vector and add it + // to gen_vec_args. + // NOTE: if ncalls is 1, then scalars_to_vector() + // will just return the first element of tmp. + // TODO fix docs + gen_vec_args.push_back(scalars_to_vector(builder, tmp)); + } + + // Create the vector diff function. + auto *vfunc = taylor_c_diff_func(s, ex, n_uvars, 1, high_accuracy, barfo_size); + + // Initialise the arguments with which vfunc must be called. The following + // initial arguments are always present: + // - current Taylor order, + // - u indices of the variables, + // - array of derivatives, + // - pointer to the param values, + // - pointer to the time value(s). + std::vector args{cur_order, gen_vec_args[0], diff_arr, par_ptr, time_ptr}; + + // Append the other arguments. + for (decltype(gens.size()) i = 1; i < gens.size(); ++i) { + args.push_back(gen_vec_args[i]); + } + + // Calculate the derivative and store the result. + taylor_c_store_diff(s, diff_arr, n_uvars, cur_order, gen_vec_args[0], builder.CreateCall(vfunc, args)); + }); - // Initialise the vector of arguments with which func must be called. The following + if (rem != 0u) { + std::vector gen_vec_args, tmp; + + for (const auto &gen : gens) { + // Generate the arguments into tmp. + tmp.clear(); + + for (std::uint32_t i = 0; i < rem; ++i) { + tmp.push_back(gen(builder.getInt32(nregs * barfo_size + i))); + } + + // Transform tmp into a vector and add it + // to gen_vec_args. + // NOTE: if ncalls is 1, then scalars_to_vector() + // will just return the first element of tmp. + // TODO fix docs + gen_vec_args.push_back(scalars_to_vector(builder, tmp)); + } + + // Create the vector diff function. + auto *vfunc = taylor_c_diff_func(s, ex, n_uvars, 1, high_accuracy, rem); + + // Initialise the arguments with which vfunc must be called. The following + // initial arguments are always present: + // - current Taylor order, + // - u indices of the variables, + // - array of derivatives, + // - pointer to the param values, + // - pointer to the time value(s). + std::vector args{cur_order, gen_vec_args[0], diff_arr, par_ptr, time_ptr}; + + // Append the other arguments. + for (decltype(gens.size()) i = 1; i < gens.size(); ++i) { + args.push_back(gen_vec_args[i]); + } + + // Calculate the derivative and store the result. + taylor_c_store_diff(s, diff_arr, n_uvars, cur_order, gen_vec_args[0], builder.CreateCall(vfunc, args)); + } + +#if 0 + // Turn the sets of arguments returned by the generators into a single set of vector arguments. + std::vector gen_vec_args, tmp; + + for (const auto &gen : gens) { + // Generate the arguments into tmp. + tmp.clear(); + + for (std::uint32_t i = 0; i < ncalls; ++i) { + tmp.push_back(gen(builder.getInt32(i))); + } + + // Transform tmp into a vector and add it + // to gen_vec_args. + // NOTE: if ncalls is 1, then scalars_to_vector() + // will just return the first element of tmp. + gen_vec_args.push_back(scalars_to_vector(builder, tmp)); + } + + // Create the vector diff function. + auto *vfunc = taylor_c_diff_func(s, ex, n_uvars, 1, high_accuracy, ncalls); + + // Initialise the arguments with which vfunc must be called. The following // initial arguments are always present: // - current Taylor order, - // - u index of the variable, + // - u indices of the variables, // - array of derivatives, // - pointer to the param values, // - pointer to the time value(s). - std::vector args{cur_order, u_idx, diff_arr, par_ptr, time_ptr}; + std::vector args{cur_order, gen_vec_args[0], diff_arr, par_ptr, time_ptr}; - // Create the other arguments via the generators. + // Append the other arguments. for (decltype(gens.size()) i = 1; i < gens.size(); ++i) { - args.push_back(gens[i](cur_call_idx)); + args.push_back(gen_vec_args[i]); } // Calculate the derivative and store the result. - taylor_c_store_diff(s, diff_arr, n_uvars, cur_order, u_idx, builder.CreateCall(func, args)); - }); + taylor_c_store_diff(s, diff_arr, n_uvars, cur_order, gen_vec_args[0], builder.CreateCall(vfunc, args)); +#endif + } else { + // Loop over the number of calls. + llvm_loop_u32(s, builder.getInt32(0), builder.getInt32(ncalls), [&](llvm::Value *cur_call_idx) { + // Create the u variable index from the first generator. + auto u_idx = gens[0](cur_call_idx); + + // Initialise the arguments with which func must be called. The following + // initial arguments are always present: + // - current Taylor order, + // - u index of the variable, + // - array of derivatives, + // - pointer to the param values, + // - pointer to the time value(s). + std::vector args{cur_order, u_idx, diff_arr, par_ptr, time_ptr}; + + // Create the other arguments via the generators. + for (decltype(gens.size()) i = 1; i < gens.size(); ++i) { + args.push_back(gens[i](cur_call_idx)); + } + + // Calculate the derivative and store the result. + taylor_c_store_diff(s, diff_arr, n_uvars, cur_order, u_idx, builder.CreateCall(func, args)); + }); + } }; // Helper to compute concurrently all the derivatives @@ -1521,7 +1669,7 @@ llvm::Value *taylor_compute_jet_compact_mode(llvm_state &s, llvm::Value *order0, // of order cur_order serially. for (const auto &map : f_maps) { for (const auto &p : map) { - block_diff(p.first, p.second.first, p.second.second, cur_order); + block_diff(p.first, std::get<0>(p.second), std::get<1>(p.second), std::get<2>(p.second), cur_order); } } } @@ -1571,7 +1719,7 @@ llvm::Value *taylor_compute_jet_compact_mode(llvm_state &s, llvm::Value *order0, // that each block in a segment processes the derivatives // of exactly ncalls u variables. for (const auto &p : f_maps[i]) { - const auto ncalls = p.second.first; + const auto ncalls = std::get<0>(p.second); cur_start_u_idx += ncalls; } } @@ -1584,9 +1732,9 @@ llvm::Value *taylor_compute_jet_compact_mode(llvm_state &s, llvm::Value *order0, // Compute the derivatives of all the blocks in the segment. for (const auto &p : map) { - const auto ncalls = p.second.first; + const auto ncalls = std::get<0>(p.second); - block_diff(p.first, ncalls, p.second.second, builder.getInt32(order)); + block_diff(p.first, ncalls, std::get<1>(p.second), std::get<2>(p.second), builder.getInt32(order)); // Update cur_start_u_idx taking advantage of the fact // that each block in a segment processes the derivatives diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4b1e2e3db..70e836ee3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -126,3 +126,4 @@ ADD_HEYOKA_TESTCASE(c_output) ADD_HEYOKA_TESTCASE(ensemble_propagate) ADD_HEYOKA_TESTCASE(parallel_mode) ADD_HEYOKA_TESTCASE(opt_checks) +ADD_HEYOKA_TESTCASE(tmp_vec_mode) diff --git a/test/tmp_vec_mode.cpp b/test/tmp_vec_mode.cpp new file mode 100644 index 000000000..c70529bb0 --- /dev/null +++ b/test/tmp_vec_mode.cpp @@ -0,0 +1,35 @@ +// Copyright 2020, 2021, 2022 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com) +// +// This file is part of the heyoka library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include + +#include "catch.hpp" +#include "test_utils.hpp" + +using namespace heyoka; + +TEST_CASE("foo") +{ + auto [x, y, z, t] = make_vars("x", "y", "z", "t"); + + taylor_adaptive ta{{prime(x) = x + y, prime(y) = y + z, prime(z) = z + t, prime(t) = t + x}, + {1., 2., 3., 4.}, + kw::compact_mode = true, + kw::opt_level = 3u}; + + std::cout << ta.get_llvm_state().get_ir() << '\n'; + + for (const auto &[ex, _] : ta.get_decomposition()) { + std::cout << ex << '\n'; + } + + ta.propagate_until(5.); + + std::cout << ta << '\n'; +}