Skip to content

Commit

Permalink
Merge pull request #1752 from su2code/vectorize_when_possible
Browse files Browse the repository at this point in the history
Always use vectorization when the numerical scheme supports it
  • Loading branch information
pcarruscag authored Sep 26, 2022
2 parents d32ccec + c9af050 commit bc6ef2a
Show file tree
Hide file tree
Showing 58 changed files with 358 additions and 297 deletions.
27 changes: 13 additions & 14 deletions Common/doc/docmain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
/*!
* \mainpage SU2 version 7.4.0 "Blackbird"
* SU2 suite is an open-source collection of C++ based software tools
* to perform PDE analysis and PDE constrained optimization problems. The toolset is designed with
* to perform PDE analysis and PDE constrained optimization. The toolset is designed with
* computational fluid dynamics and aerodynamic shape optimization in mind, but is extensible to
* include other families of governing equations such as potential flow, electrodynamics, chemically reacting
* flows, and many others. SU2 is released under an
Expand All @@ -38,54 +38,53 @@
*/

/*!
* \defgroup Config Descriptions of Configuration Options.
* \defgroup Config Description of the Configuration Options
* \brief Group of variables that can be set using the configuration file.
*/

/*!
* \defgroup ConvDiscr Discretization of the convective terms.
* \defgroup ConvDiscr Discretization of the convective terms
* \brief Group of classes which define the numerical methods for
* discretizing the convective terms of a Partial Differential Equation.
* There are methods for solving the direct, adjoint and linearized
* systems of equations.
*/

/*!
* \defgroup ViscDiscr Discretization of the viscous terms.
* \defgroup ViscDiscr Discretization of the viscous terms
* \brief Group of classes which define the numerical methods for
* discretizing the viscous terms of a Partial Differential Equation.
* There are methods for solving the direct, adjoint and linearized
* systems of equations.
*/

/*!
* \defgroup SourceDiscr Discretization of the source terms.
* \defgroup SourceDiscr Discretization of the source terms
* \brief Group of classes which define the numerical methods for
* discretizing the source terms of a Partial Differential Equation.
* There are methods for solving the direct, adjoint and linearized
* systems of equations.
*/

/*!
* \defgroup Potential_Flow_Equation Solving the potential flow equation.
* \brief Group of classes which define the system of Potential flow equation in
* three formulations: direct, adjoint, and linearized.
*/

/*!
* \defgroup Euler_Equations Solving the Euler's equations.
* \defgroup Euler_Equations Solving the Euler equations
* \brief Group of classes which define the system of Euler equations in
* three formulations: direct, adjoint, and linearized.
*/

/*!
* \defgroup Navier_Stokes_Equations Solving the Navier-Stokes' equations.
* \defgroup Navier_Stokes_Equations Solving the Navier-Stokes equations
* \brief Group of classes which define the system of Navier-Stokes equations in
* three formulations: direct, adjoint, and linearized.
*/

/*!
* \defgroup Turbulence_Model Solving the turbulence models.
* \defgroup Turbulence_Model Solving the turbulence model equations
* \brief Group of classes which define the turbulence model in
* three formulations: direct, adjoint, and linearized.
*/

/*!
* \defgroup Elasticity_Equations Solving the elasticity equations
* \brief Group of classes to solve solid deformation problems.
*/
15 changes: 15 additions & 0 deletions Common/include/basic_types/datatype_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,21 @@ namespace SU2_TYPE {
FORCEINLINE void SetDerivative(su2double &, const passivedouble &) {}
#endif

/*!
* \brief Get the passive value of any variable. For most types return directly,
* specialize for su2double to call GetValue.
* \note This is a struct instead of a function because the return type of the
* su2double specialization changes.
*/
template <class T>
struct Passive {
FORCEINLINE static T Value(const T& val) {return val;}
};
template <>
struct Passive<su2double> {
FORCEINLINE static passivedouble Value(const su2double& val) {return GetValue(val);}
};

/*!
* \brief Casts the primitive value to int (uses GetValue, already implemented for each type).
* \param[in] data - The non-primitive datatype.
Expand Down
57 changes: 35 additions & 22 deletions Common/include/linear_algebra/vector_expressions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <cassert>
#include <cstdlib>
#include <cmath>
#include <cstdint>

namespace VecExpr {

Expand Down Expand Up @@ -157,21 +158,28 @@ FORCEINLINE auto FUN(decay_t<S> u, const CVecExpr<V,S>& v) \
RETURNS( EXPR<Bcast<S>,V,S>(Bcast<S>(u), v.derived()) \
) \

/*--- std::max/min have issues (maybe because they return by reference).
* For AD codi::max/min need to be used to avoid issues in debug builds. ---*/

#if defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)
#define max_impl math::max
#define min_impl math::min
#else
#define max_impl(a,b) a<b? Scalar(b) : Scalar(a)
#define min_impl(a,b) b<a? Scalar(b) : Scalar(a)
#endif
MAKE_BINARY_FUN(max, max_, max_impl)
MAKE_BINARY_FUN(min, min_, min_impl)
/*--- std::max/min have issues (because they return by reference).
* fmin and fmax return by value and thus are fine, but they would force
* conversions to double, to avoid that we provide integer overloads.
* We use int32/64 instead of int/long to avoid issues with Windows,
* where long is 32 bits (instead of 64 bits). ---*/

#define MAKE_FMINMAX_OVERLOADS(TYPE) \
FORCEINLINE TYPE fmax(TYPE a, TYPE b) { return a<b? b : a; } \
FORCEINLINE TYPE fmin(TYPE a, TYPE b) { return a<b? a : b; }
MAKE_FMINMAX_OVERLOADS(int32_t)
MAKE_FMINMAX_OVERLOADS(int64_t)
MAKE_FMINMAX_OVERLOADS(uint32_t)
MAKE_FMINMAX_OVERLOADS(uint64_t)
/*--- Make the float and double versions of fmin/max available in this
* namespace to avoid ambiguous overloads. ---*/
using std::fmax;
using std::fmin;
#undef MAKE_FMINMAX_OVERLOADS

MAKE_BINARY_FUN(fmax, max_, fmax)
MAKE_BINARY_FUN(fmin, min_, fmin)
MAKE_BINARY_FUN(pow, pow_, math::pow)
#undef max_impl
#undef min_impl

/*--- sts::plus and co. were tried, the code was horrendous (due to the forced
* conversion between different types) and creating functions for these ops
Expand All @@ -190,20 +198,25 @@ MAKE_BINARY_FUN(operator/, div_, div_impl)
#undef mul_impl
#undef div_impl

/*--- Relational operators need to be cast to the scalar type to allow vectorization. ---*/

#define le_impl(a,b) Scalar(a<=b)
#define ge_impl(a,b) Scalar(a>=b)
#define eq_impl(a,b) Scalar(a==b)
#define ne_impl(a,b) Scalar(a!=b)
#define lt_impl(a,b) Scalar(a<b)
#define gt_impl(a,b) Scalar(a>b)
/*--- Relational operators need to be cast to the scalar type to allow vectorization.
* TO_PASSIVE is used to convert active scalars to passive, which CoDi will then capture
* by value in its expressions, and thus dangling references are avoided. No AD info
* is lost since these operators are non-differentiable. ---*/

#define TO_PASSIVE(IMPL) SU2_TYPE::Passive<Scalar>::Value(IMPL)
#define le_impl(a,b) TO_PASSIVE(a<=b)
#define ge_impl(a,b) TO_PASSIVE(a>=b)
#define eq_impl(a,b) TO_PASSIVE(a==b)
#define ne_impl(a,b) TO_PASSIVE(a!=b)
#define lt_impl(a,b) TO_PASSIVE(a<b)
#define gt_impl(a,b) TO_PASSIVE(a>b)
MAKE_BINARY_FUN(operator<=, le_, le_impl)
MAKE_BINARY_FUN(operator>=, ge_, ge_impl)
MAKE_BINARY_FUN(operator==, eq_, eq_impl)
MAKE_BINARY_FUN(operator!=, ne_, ne_impl)
MAKE_BINARY_FUN(operator<, lt_, lt_impl)
MAKE_BINARY_FUN(operator>, gt_, gt_impl)
#undef TO_PASSIVE
#undef le_impl
#undef ge_impl
#undef eq_impl
Expand Down
4 changes: 2 additions & 2 deletions Common/include/parallelization/special_vectorization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ MAKE_BINARY_FUN(operator==, eq_p)
MAKE_BINARY_FUN(operator!=, ne_p)
MAKE_BINARY_FUN(operator<=, le_p)
MAKE_BINARY_FUN(operator>=, ge_p)
MAKE_BINARY_FUN(max, max_p)
MAKE_BINARY_FUN(min, min_p)
MAKE_BINARY_FUN(fmax, max_p)
MAKE_BINARY_FUN(fmin, min_p)

#undef MAKE_BINARY_FUN

Expand Down
12 changes: 11 additions & 1 deletion Common/include/parallelization/vectorization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,17 @@ template<class T>
constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(T); }

template<>
constexpr size_t preferredLen<su2double>() { return PREFERRED_SIZE / sizeof(passivedouble); }
constexpr size_t preferredLen<su2double>() {
#ifdef CODI_REVERSE_TYPE
/*--- Use a SIMD size of 1 for reverse AD, larger sizes increase
* the pre-accumulation time with no performance benefit. ---*/
return 1;
#else
/*--- For forward AD there is a performance benefit. This covers
* forward AD and primal mode (su2double == passivedouble). ---*/
return PREFERRED_SIZE / sizeof(passivedouble);
#endif
}

/*!
* \class Array
Expand Down
2 changes: 1 addition & 1 deletion SU2_CFD/include/numerics/elasticity/CFEAElasticity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@

/*!
* \class CFEAElasticity
* \ingroup Elasticity_Equations
* \brief Abstract class for computing the tangent matrix and the residual for structural problems.
* \note At the next level of abstraction (linear or not) a class must define the constitutive term.
* The methods we override in this class with an empty implementation are here just to better
* document the public interface of this class hierarchy.
* \ingroup FEM_Discr
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down
4 changes: 2 additions & 2 deletions SU2_CFD/include/numerics/elasticity/CFEALinearElasticity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
/*!
* \class CFEALinearElasticity
* \brief Class for computing the stiffness matrix of a linear, elastic problem.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down Expand Up @@ -88,7 +88,7 @@ class CFEALinearElasticity : public CFEAElasticity {
/*!
* \class CFEAMeshElasticity
* \brief Particular case of linear elasticity used for mesh deformation.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* This class does not implement a particular model, that will be done by its children.
* \note In addition to Compute_Constitutive_Matrix, derived classes MUST further implement
* Compute_Plane_Stress_Term and Compute_Stress_Tensor.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down
8 changes: 4 additions & 4 deletions SU2_CFD/include/numerics/elasticity/nonlinear_models.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
/*!
* \class CFEM_NeoHookean_Comp
* \brief Class for computing the constitutive and stress tensors for a neo-Hookean material model, compressible.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down Expand Up @@ -81,7 +81,7 @@ class CFEM_NeoHookean_Comp final : public CFEANonlinearElasticity {
/*!
* \class CFEM_NeoHookean_Comp
* \brief Constitutive and stress tensors for a Knowles stored-energy function, nearly incompressible.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down Expand Up @@ -132,7 +132,7 @@ class CFEM_Knowles_NearInc final : public CFEANonlinearElasticity {
/*!
* \class CFEM_DielectricElastomer
* \brief Class for computing the constitutive and stress tensors for a dielectric elastomer.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down Expand Up @@ -180,7 +180,7 @@ class CFEM_DielectricElastomer final : public CFEANonlinearElasticity {
/*!
* \class CFEM_IdealDE
* \brief Class for computing the constitutive and stress tensors for a nearly-incompressible ideal DE.
* \ingroup FEM_Discr
* \ingroup Elasticity_Equations
* \author R.Sanchez
* \version 7.4.0 "Blackbird"
*/
Expand Down
5 changes: 4 additions & 1 deletion SU2_CFD/include/numerics_simd/CNumericsSIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,12 @@ CNumericsSIMD* createNumerics(const CConfig& config, int iMesh, const CVariable*
* numerical methods.
*/
CNumericsSIMD* CNumericsSIMD::CreateNumerics(const CConfig& config, int nDim, int iMesh, const CVariable* turbVars) {
#ifndef CODI_REVERSE_TYPE
if ((Double::Size < 4) && (SU2_MPI::GetRank() == MASTER_NODE)) {
cout << "WARNING: SU2 was not compiled for an AVX-capable architecture." << endl;
cout << "WARNING: SU2 was not compiled for an AVX-capable architecture. Performance could be better,\n"
" see https://su2code.github.io/docs_v7/Build-SU2-Linux-MacOS/#compiler-optimizations" << endl;
}
#endif
if (nDim == 2) return createNumerics<2>(config, iMesh, turbVars);
if (nDim == 3) return createNumerics<3>(config, iMesh, turbVars);

Expand Down
1 change: 1 addition & 0 deletions SU2_CFD/include/numerics_simd/CNumericsSIMD.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ using SparseMatrixType = CSysMatrix<su2mixedfloat>;

/*!
* \class CNumericsSIMD
* \ingroup ConvDiscr
* \brief Base class to define the interface.
* \note See CNumericsEmptyDecorator.
*/
Expand Down
13 changes: 9 additions & 4 deletions SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

/*!
* \class CCenteredBase
* \ingroup ConvDiscr
* \brief Base class for Centered schemes, derived classes implement
* the dissipation term in a const "finalizeFlux" method.
* \note See CRoeBase for the role of Base.
Expand Down Expand Up @@ -186,6 +187,7 @@ class CCenteredBase : public Base {

/*!
* \class CJSTScheme
* \ingroup ConvDiscr
* \brief Classical JST scheme with scalar dissipation.
*/
template<class Decorator>
Expand Down Expand Up @@ -243,7 +245,7 @@ class CJSTScheme : public CCenteredBase<CJSTScheme<Decorator>,Decorator> {
const auto si = gatherVariables(iPoint, solution.GetSensor());
const auto sj = gatherVariables(jPoint, solution.GetSensor());
const Double eps2 = kappa2 * 0.5*(si+sj) * sc2;
const Double eps4 = max(0.0, kappa4-eps2) * sc4;
const Double eps4 = fmax(0.0, kappa4-eps2) * sc4;

/*--- Update flux and Jacobians with dissipation terms. ---*/

Expand All @@ -265,6 +267,7 @@ class CJSTScheme : public CCenteredBase<CJSTScheme<Decorator>,Decorator> {

/*!
* \class CJSTmatScheme
* \ingroup ConvDiscr
* \brief JST scheme with matrix dissipation.
*/
template<class Decorator>
Expand Down Expand Up @@ -321,7 +324,7 @@ class CJSTmatScheme : public CCenteredBase<CJSTmatScheme<Decorator>,Decorator> {
const auto si = gatherVariables(iPoint, solution.GetSensor());
const auto sj = gatherVariables(jPoint, solution.GetSensor());
const Double eps2 = kappa2 * 0.5*(si+sj) * sc2;
const Double eps4 = max(0.0, kappa4-eps2) * sc4;
const Double eps4 = fmax(0.0, kappa4-eps2) * sc4;

const auto lapl_i = gatherVariables<nVar>(iPoint, solution.GetUndivided_Laplacian());
const auto lapl_j = gatherVariables<nVar>(jPoint, solution.GetUndivided_Laplacian());
Expand Down Expand Up @@ -357,10 +360,10 @@ class CJSTmatScheme : public CCenteredBase<CJSTmatScheme<Decorator>,Decorator> {
lambda(nDim) = projVel + avgV.speedSound()*area;
lambda(nDim+1) = projVel - avgV.speedSound()*area;

const Double maxLambda = max(lambda(nDim), -lambda(nDim+1));
const Double maxLambda = fmax(lambda(nDim), -lambda(nDim+1));

for (size_t iVar = 0; iVar < nVar; ++iVar) {
lambda(iVar) = max(abs(lambda(iVar)), entropyFix*maxLambda);
lambda(iVar) = fmax(abs(lambda(iVar)), entropyFix*maxLambda);
}

/*--- Update flux and Jacobians with scaled dissipation terms. ---*/
Expand Down Expand Up @@ -391,6 +394,7 @@ class CJSTmatScheme : public CCenteredBase<CJSTmatScheme<Decorator>,Decorator> {

/*!
* \class CJSTkeScheme
* \ingroup ConvDiscr
* \brief JST scheme without 4th order dissipation.
*/
template<class Decorator>
Expand Down Expand Up @@ -461,6 +465,7 @@ class CJSTkeScheme : public CCenteredBase<CJSTkeScheme<Decorator>,Decorator> {

/*!
* \class CLaxScheme
* \ingroup ConvDiscr
* \brief Lax–Friedrichs 1st order scheme.
*/
template<class Decorator>
Expand Down
Loading

0 comments on commit bc6ef2a

Please sign in to comment.