From cbeff6b369c284a790a5947183cdc5325307ffc5 Mon Sep 17 00:00:00 2001
From: Alexander Debus <a.debus@hzdr.de>
Date: Thu, 17 Oct 2024 21:47:19 +0200
Subject: [PATCH 1/5] Generalize twtstight for arbitrary beta0 and code
 refactoring

---
 .../background/templates/twtstight/BField.hpp |   2 +-
 .../background/templates/twtstight/BField.tpp | 193 ++++++------------
 .../background/templates/twtstight/EField.hpp |   2 +-
 .../background/templates/twtstight/EField.tpp | 193 ++++++------------
 .../templates/twtstight/twtstight.hpp         |   6 +-
 .../picongpu/benchmarks/TWEAC-FOM/cmakeFlags  |  12 +-
 .../picongpu/param/incidentField.param        |  26 ++-
 7 files changed, 165 insertions(+), 269 deletions(-)
diff --git a/include/picongpu/fields/background/templates/twtstight/BField.hpp b/include/picongpu/fields/background/templates/twtstight/BField.hpp
index c5d7307407..16944c3db0 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.hpp
@@ -38,7 +38,7 @@ namespace picongpu
             class BField
             {
             public:
-                using float_T = float_64;
+                using float_T = float_X;
 
                 /** Center of simulation volume in number of cells */
                 PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
diff --git a/include/picongpu/fields/background/templates/twtstight/BField.tpp b/include/picongpu/fields/background/templates/twtstight/BField.tpp
index 7f49803a9e..375bdab163 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.tpp
@@ -205,29 +205,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T sinPhiReal;
-                float_T cosPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for
-                 * documentation purposes.
-                 * float_T const eta = float_T(PI/2) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -244,13 +228,9 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -262,15 +242,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
@@ -281,9 +258,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
@@ -291,15 +269,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
                                           * (-(besselJ1const
@@ -327,30 +307,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T sinPhiReal;
-                float_T cosPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis.
-                 * Not used, but remains in code for documentation purposes.
-                 * float_T const eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -367,13 +330,9 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -385,15 +344,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
                 float_T const cosPolAngle = math::cos(polAngle);
@@ -403,9 +359,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const rhom2 = rhom * rhom;
@@ -415,15 +372,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
@@ -463,29 +422,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T cosPhiReal;
-                float_T sinPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis.
-                 * Not used, but remains in code for documentation purposes.
-                 * float_T const eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T cosPhi;
+                float_T sinPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -502,13 +445,9 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -520,15 +459,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
@@ -540,9 +476,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const rhom2 = rhom * rhom;
@@ -551,15 +488,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.hpp b/include/picongpu/fields/background/templates/twtstight/EField.hpp
index b8c3bae5d7..80d6140803 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.hpp
@@ -38,7 +38,7 @@ namespace picongpu
             class EField
             {
             public:
-                using float_T = float_64;
+                using float_T = float_X;
 
                 /** Center of simulation volume in number of cells */
                 PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.tpp b/include/picongpu/fields/background/templates/twtstight/EField.tpp
index b9d1226424..d81691c9cf 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.tpp
@@ -210,29 +210,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T sinPhiReal;
-                float_T cosPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for
-                 * documentation purposes.
-                 * float_T const eta = (PI / 2) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -249,13 +233,9 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -267,15 +247,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
@@ -286,9 +263,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const rhom2 = rhom * rhom;
@@ -297,15 +275,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, 0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
@@ -344,29 +324,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T sinPhiReal;
-                float_T cosPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for
-                 * documentation purposes.
-                 * float_T const eta = (PI / 2) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -383,13 +347,9 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -401,15 +361,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
@@ -420,9 +377,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
@@ -430,15 +388,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
                                           * (besselJ1const
@@ -465,29 +425,13 @@ namespace picongpu
                 auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * z-axis of the coordinate system in this function.
-                 */
-                auto const phiReal = float_T(math::abs(phi));
-                float_T sinPhiReal;
-                float_T cosPhiReal;
-                pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal);
-                float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal);
-                /* Definition of the laser pulse front tilt angle for the laser field below.
-                 *
-                 * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the
-                 * local "phi_T" not equal in value to the object member "phiReal" or "phi"?
-                 * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system
-                 * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence
-                 * the dispersion will (although physically correct) be slightly off the ideal TWTS
-                 * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for
-                 * scenarios close to beta0 = 1.
-                 */
-                float_T const phiT = float_T(2.0) * alphaTilt;
-
-                /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for
-                 * documentation purposes.
-                 * float_T const eta = (PI / 2) - (phiReal - alphaTilt);
+                 * y-axis of the coordinate system in this function.
                  */
+                auto const phiT = float_T(math::abs(phi));
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
                 auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
@@ -504,17 +448,12 @@ namespace picongpu
                  * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
                  * in double precision.
                  */
-                float_64 sinPhiVal;
-                float_64 cosPhiVal;
-                pmacc::math::sincos(precisionCast<float_64>(phi), sinPhiVal, cosPhiVal);
-                float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal);
-                float_64 const tanFocalLine = math::tan(PI / 2.0 - phi);
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight() * (1.0 + tanAlpha / tanFocalLine);
-                float_64 const deltaY = wavelength_SI * cosPhiVal + wavelength_SI * sinPhiVal * sinPhiVal / cosPhiVal;
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
                 auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
                 auto const y = float_T(yMod / sim.unit.length());
                 auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
@@ -522,15 +461,12 @@ namespace picongpu
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
-                if(math::abs(y - z * math::tan(phiT / float_T(2.0)) - (cspeed * t)) > (numSigmas * tauG * cspeed))
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const cosPhi2 = math::cos(phiT / float_T(2.0));
-                float_T const tanPhi2 = math::tan(phiT / float_T(2.0));
+                float_T const tanPhi = math::tan(phiT);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
                 float_T const cosPolAngle = math::cos(polAngle);
@@ -541,9 +477,10 @@ namespace picongpu
                 float_T const tauG2 = tauG * tauG;
                 float_T const psi0 = float_T(2.0) / k;
                 float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
 
                 complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanPhi2 / cspeed;
+                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
                 complex_T const rhom
                     = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
                 complex_T const rhom2 = rhom * rhom;
@@ -553,15 +490,17 @@ namespace picongpu
                 complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
                 complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
 
-                complex_T const zeroOrder = tauG * math::sqrt(omega0 * (float_T(1.0) + cosPhi))
+                complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
-                           omega0 * pmacc::math::cPow((t - nu + xi) * cosPhi2, static_cast<uint32_t>(2u))
-                           / (+(omega0 * tauG2) / float_T(2.0) - complex_T(0, 1) * nu
-                              + ((omega0 * tauG2) / float_T(2.0) + complex_T(0, 1) * (nu - xi)) * cosPhi))
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
+                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
+                              - complex_T(0, 2) * nu / sinPhi_2))
                        * math::sqrt(
-                           +(omega0 * tauG2) / float_T(2.0) - I * nu
-                           + ((omega0 * tauG2) / float_T(2.0) + I * (nu - xi)) * cosPhi));
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
+                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, 0.125) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
diff --git a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp b/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
index d4b9d70417..1123f635ce 100644
--- a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
@@ -68,10 +68,10 @@ namespace picongpu
             /** To avoid underflows in computation, numsigmas controls where a zero cutoff is made.
              *  The fields thus are set to zero at a position (numSigmas * tauG * cspeed) ahead
              *  and behind the respective TWTS pulse envelope.
-             *  Developer note: In case the float_T-type is set to float_X instead of float_64,
-             *  numSigma needs to be adjusted to numSigmas = 6 to avoid numerical issues.
+             *  Developer note: In case the float_T-type is set to float_64 instead of float_X,
+             *  numSigma can be increased to numSigmas = 10 without running into numerical issues.
              */
-            constexpr uint32_t numSigmas = 10;
+            constexpr uint32_t numSigmas = 6;
         } // namespace twtstight
     } // namespace templates
 } // namespace picongpu
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/cmakeFlags b/share/picongpu/benchmarks/TWEAC-FOM/cmakeFlags
index 2f1fd23a9d..7b0e320ba0 100755
--- a/share/picongpu/benchmarks/TWEAC-FOM/cmakeFlags
+++ b/share/picongpu/benchmarks/TWEAC-FOM/cmakeFlags
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Copyright 2013-2023 Axel Huebl, Rene Widera, Sergei Bastrakov
+# Copyright 2013-2024 Axel Huebl, Rene Widera, Sergei Bastrakov, Alexander Debus
 #
 # This file is part of PIConGPU.
 #
@@ -29,17 +29,17 @@
 #   - start with zero index
 #   - increase by 1, no gaps
 
-# use field background for laser generation (will not use incident field)
-flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELD_BACKGROUND=1'"
+# use incident field for laser generation (will not use field background and should be faster)
+# use componentwise field calculation (may be faster?)
+flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELD_BACKGROUND=0;-DPARAM_COMPONENTWISE=1'"
 
 # limit examles if we run CI tests
 if [ -z  "$PIC_CI_MAINLINE" ] ; then
   # use incident field for laser generation (will not use field background and should be faster)
   # use full field calculation
   flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELD_BACKGROUND=0;-DPARAM_COMPONENTWISE=0'"
-  # use incident field for laser generation (will not use field background and should be faster)
-  # use componentwise field calculation (may be faster?)
-  flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELD_BACKGROUND=0;-DPARAM_COMPONENTWISE=1'"
+  # use field background for laser generation (will not use incident field)
+  flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELD_BACKGROUND=1'"
 fi
 
 ################################################################################
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
index f98884a4cc..4d6455ca67 100644
--- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
+++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
@@ -262,11 +262,29 @@ namespace picongpu
             using ZMin = MyProfile;
             using ZMax = MyProfile;
 
-            // These values are chosen to match the background, have to be changed for higher order field solvers
+            /** Position in cells of the Huygens surface relative to start of the total domain
+             *
+             * The position is set as an offset, in cells, counted from the start of the total domain.
+             * For the max boundaries, negative position values are allowed.
+             * These negative values are treated as position at (global_domain_size[d] + POSITION[d][1]).
+             * It is also possible to specify the position explicitly as a positive number.
+             * Then it is on a user to make sure the position is correctly calculated wrt the grid size.
+             *
+             * Except moving window simulations, the position must be inside the global domain.
+             * The distance between the Huygens surface and each global domain boundary must be at least
+             * absorber_thickness + (FDTD_spatial_order / 2 - 1). However beware of setting position = direction *
+             * (absorber_thickness + const), as then changing absorber parameters will affect laser positioning.
+             * When all used profiles are None, the check for POSITION validity is skipped.
+             *
+             * For moving window simulations, POSITION for the YMax side can be located outside the initially
+             * simulated volume. In this case, parts of the generation surface outside of the currently simulated
+             * volume is are treated as if they had zero incident field and it is user's responsibility to apply a
+             * source matching such a case.
+             */
             constexpr int32_t POSITION[3][2] = {
-                {1, -1}, // x direction [negative, positive]
-                {1, -1}, // y direction [negative, positive]
-                {1, -1} // z direction [negative, positive]
+                {16, -16}, // x direction [negative, positive]
+                {16, -16}, // y direction [negative, positive]
+                {16, -16} // z direction [negative, positive]
             };
 
         } // namespace incidentField

From 8e97a169d574cdcb58a5d3c425f414684a9bd751 Mon Sep 17 00:00:00 2001
From: Alexander Debus <a.debus@hzdr.de>
Date: Tue, 29 Oct 2024 15:37:15 +0100
Subject: [PATCH 2/5] Refactoring code duplications using constructive bindings

---
 .../background/templates/twtstight/BField.hpp |  62 +++
 .../background/templates/twtstight/BField.tpp | 377 ++++++++++--------
 .../background/templates/twtstight/EField.hpp |  60 +++
 .../background/templates/twtstight/EField.tpp | 376 +++++++++--------
 4 files changed, 527 insertions(+), 348 deletions(-)

diff --git a/include/picongpu/fields/background/templates/twtstight/BField.hpp b/include/picongpu/fields/background/templates/twtstight/BField.hpp
index 16944c3db0..ae4c472eae 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.hpp
@@ -27,6 +27,8 @@
 #include <pmacc/math/Vector.hpp>
 #include <pmacc/types.hpp>
 
+#include <tuple>
+
 namespace picongpu
 {
     /* Load pre-defined background field */
@@ -159,6 +161,66 @@ namespace picongpu
                     pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
                     float_X const currentStep) const;
 
+                HDINLINE
+                std::tuple<
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T>
+                defineBasicHelperVariables() const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
+                    float3_64 const& pos,
+                    float_64 const time) const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
+                    float_T const absPhi,
+                    float_T const sinPhi) const;
+
+                HDINLINE
+                std::tuple<
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>>
+                defineCommonHelperVariables(
+                    float_T const absPhi,
+                    float_T const sinPhi,
+                    float_T const cosPhi,
+                    float_T const beta0,
+                    float_T const tanAlpha,
+                    float_T const cspeed,
+                    float_T const lambda0,
+                    float_T const omega0,
+                    float_T const tauG,
+                    float_T const w0,
+                    float_T const k,
+                    float_T const x,
+                    float_T const y,
+                    float_T const z,
+                    float_T const t,
+                    float_T const cotPhi,
+                    float_T const sinPhi_2) const;
+
                 /** Calculate the By(r,t) field
                  *
                  * @param pos Spatial position of the target field.
diff --git a/include/picongpu/fields/background/templates/twtstight/BField.tpp b/include/picongpu/fields/background/templates/twtstight/BField.tpp
index 375bdab163..e4640d0cbb 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.tpp
@@ -33,6 +33,8 @@
 #include <pmacc/math/Vector.hpp>
 #include <pmacc/types.hpp>
 
+#include <tuple>
+
 namespace picongpu
 {
     /** Load pre-defined background field */
@@ -190,27 +192,33 @@ namespace picongpu
                     return (*this)(cellIdx, currentStep)[T_component];
             }
 
-            /** Calculate the By(r,t) field here
-             *
-             * @param pos Spatial position of the target field.
-             * @param time Absolute time (SI, including all offsets and transformations)
-             *             for calculating the field */
             HDINLINE
-            BField::float_T BField::calcTWTSBy(float3_64 const& pos, float_64 const time) const
+            std::tuple<
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T>
+            BField::defineBasicHelperVariables() const
             {
-                using complex_T = alpaka::Complex<float_T>;
-                using complex_64 = alpaka::Complex<float_64>;
-
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
                  * y-axis of the coordinate system in this function.
                  */
-                auto const phiT = float_T(math::abs(phi));
+                auto const absPhi = float_T(math::abs(phi));
+
+                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
+                auto const beta0 = float_T(beta_0);
+
                 float_T sinPhi;
                 float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
                 float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
@@ -222,6 +230,13 @@ namespace picongpu
                 auto const w0 = float_T(w_x_SI / sim.unit.length());
                 auto const k = float_T(2.0 * PI / lambda0);
 
+                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
+            }
+
+            HDINLINE
+            std::tuple<BField::float_T, BField::float_T, BField::float_T, BField::float_T> BField::
+                defineMinimalCoordinates(float3_64 const& pos, float_64 const time) const
+            {
                 /* In order to calculate in single-precision and in order to account for errors in
                  * the approximations far from the coordinate origin, we use the wavelength-periodicity and
                  * the known propagation direction for realizing the laser pulse using relative coordinates
@@ -230,7 +245,7 @@ namespace picongpu
                  */
                 float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
                     / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
+                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -239,20 +254,62 @@ namespace picongpu
                 auto const y = float_T(yMod / sim.unit.length());
                 auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
                 auto const t = float_T(timeMod / sim.unit.time());
-                /* To avoid underflows in computation, fields are set to zero
-                 * before and after the respective TWTS pulse envelope.
-                 */
-                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
-                    return float_T(0.0);
 
+                return std::make_tuple(x, y, z, t);
+            }
+
+            HDINLINE
+            std::tuple<BField::float_T, BField::float_T, BField::float_T, BField::float_T, BField::float_T> BField::
+                defineTrigonometryShortcuts(BField::float_T const absPhi, BField::float_T const sinPhi) const
+            {
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
+                float_T const tanPhi = math::tan(absPhi);
                 float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
                 float_T const cosPolAngle = math::cos(polAngle);
 
+                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
+            }
+
+            HDINLINE
+            std::tuple<
+                alpaka::Complex<BField::float_T>,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                BField::float_T,
+                alpaka::Complex<BField::float_T>,
+                alpaka::Complex<BField::float_T>,
+                BField::float_T,
+                alpaka::Complex<BField::float_T>,
+                alpaka::Complex<BField::float_T>,
+                alpaka::Complex<BField::float_T>>
+            BField::defineCommonHelperVariables(
+                BField::float_T const absPhi,
+                BField::float_T const sinPhi,
+                BField::float_T const cosPhi,
+                BField::float_T const beta0,
+                BField::float_T const tanAlpha,
+                BField::float_T const cspeed,
+                BField::float_T const lambda0,
+                BField::float_T const omega0,
+                BField::float_T const tauG,
+                BField::float_T const w0,
+                BField::float_T const k,
+                BField::float_T const x,
+                BField::float_T const y,
+                BField::float_T const z,
+                BField::float_T const t,
+                BField::float_T const cotPhi,
+                BField::float_T const sinPhi_2) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
                 complex_T I = complex_T(0, 1);
                 float_T const x2 = x * x;
                 float_T const tauG2 = tauG * tauG;
@@ -260,27 +317,89 @@ namespace picongpu
                 float_T const w02 = w0 * w0;
                 float_T const beta02 = beta0 * beta0;
 
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
+                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
+                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
+                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
+                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
                 float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
+                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
+                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
 
                 complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
                            beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
+                              - complex_T(0, 2) * (nu / sinPhi_2)))
                        * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
                            / omega0));
 
+                return std::make_tuple(
+                    I,
+                    x2,
+                    tauG2,
+                    psi0,
+                    w02,
+                    beta02,
+                    nu,
+                    xi,
+                    rhom,
+                    Xm,
+                    besselI0const,
+                    besselJ0const,
+                    besselJ1const,
+                    zeroOrder);
+            }
+
+            /** Calculate the By(r,t) field here
+             *
+             * @param pos Spatial position of the target field.
+             * @param time Absolute time (SI, including all offsets and transformations)
+             *             for calculating the field */
+            HDINLINE
+            BField::float_T BField::calcTWTSBy(float3_64 const& pos, float_64 const time) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+
+                /* To avoid underflows in computation, fields are set to zero
+                 * before and after the respective TWTS pulse envelope.
+                 */
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
+                    return float_T(0.0);
+
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
+
+                /* Calculating shortcuts for speeding up field calculation */
+                float_T const cosPhi_2 = cosPhi * cosPhi;
+
                 complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
                                           * (-(besselJ1const
                                                * (Xm * cosPolAngle * (float_T(1.0) + cosPhi_2)
@@ -303,86 +422,41 @@ namespace picongpu
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                /* propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const phiT = float_T(math::abs(phi));
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
-
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
 
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
+                /* Calculating shortcuts for speeding up field calculation */
                 complex_T const rhom2 = rhom * rhom;
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
                 complex_T const Xm2 = Xm * Xm;
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
-                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
@@ -418,87 +492,42 @@ namespace picongpu
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const phiT = float_T(math::abs(phi));
-                float_T cosPhi;
-                float_T sinPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
+
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-                float_T const sin2Phi = math::sin(float_T(2.0) * phiT);
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
+                float_T const sin2Phi = math::sin(float_T(2.0) * absPhi);
                 complex_T const rhom2 = rhom * rhom;
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
-                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.hpp b/include/picongpu/fields/background/templates/twtstight/EField.hpp
index 80d6140803..acb4d16176 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.hpp
@@ -158,6 +158,66 @@ namespace picongpu
                     pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
                     float_X const currentStep) const;
 
+                HDINLINE
+                std::tuple<
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T>
+                defineBasicHelperVariables() const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
+                    float3_64 const& pos,
+                    float_64 const time) const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
+                    float_T const absPhi,
+                    float_T const sinPhi) const;
+
+                HDINLINE
+                std::tuple<
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>>
+                defineCommonHelperVariables(
+                    float_T const absPhi,
+                    float_T const sinPhi,
+                    float_T const cosPhi,
+                    float_T const beta0,
+                    float_T const tanAlpha,
+                    float_T const cspeed,
+                    float_T const lambda0,
+                    float_T const omega0,
+                    float_T const tauG,
+                    float_T const w0,
+                    float_T const k,
+                    float_T const x,
+                    float_T const y,
+                    float_T const z,
+                    float_T const t,
+                    float_T const cotPhi,
+                    float_T const sinPhi_2) const;
+
                 /** Calculate the Ex(r,t) field
                  *
                  * @param pos Spatial position of the target field
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.tpp b/include/picongpu/fields/background/templates/twtstight/EField.tpp
index d81691c9cf..ad9181edc8 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.tpp
@@ -196,26 +196,33 @@ namespace picongpu
                     return (*this)(cellIdx, currentStep)[T_component];
             }
 
-            /** Calculate the Ex(r,t) field here
-             *
-             * @param pos Spatial position of the target field.
-             * @param time Absolute time (SI, including all offsets and transformations) for calculating
-             *             the field */
-            HDINLINE EField::float_T EField::calcTWTSEx(float3_64 const& pos, float_64 const time) const
+            HDINLINE
+            std::tuple<
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T>
+            EField::defineBasicHelperVariables() const
             {
-                using complex_T = alpaka::Complex<float_T>;
-                using complex_64 = alpaka::Complex<float_64>;
-
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
                 /* If phi < 0 the formulas below are not directly applicable.
                  * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
                  * y-axis of the coordinate system in this function.
                  */
-                auto const phiT = float_T(math::abs(phi));
+                auto const absPhi = float_T(math::abs(phi));
+
+                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
+                auto const beta0 = float_T(beta_0);
+
                 float_T sinPhi;
                 float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
+                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
                 float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
 
                 auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
@@ -227,6 +234,13 @@ namespace picongpu
                 auto const w0 = float_T(w_x_SI / sim.unit.length());
                 auto const k = float_T(2.0 * PI / lambda0);
 
+                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
+            }
+
+            HDINLINE
+            std::tuple<EField::float_T, EField::float_T, EField::float_T, EField::float_T> EField::
+                defineMinimalCoordinates(float3_64 const& pos, float_64 const time) const
+            {
                 /* In order to calculate in single-precision and in order to account for errors in
                  * the approximations far from the coordinate origin, we use the wavelength-periodicity and
                  * the known propagation direction for realizing the laser pulse using relative coordinates
@@ -235,7 +249,7 @@ namespace picongpu
                  */
                 float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
                     / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
+                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
                 float_64 const numberOfPeriods = math::floor(time / deltaT);
                 auto const timeMod = float_T(time - numberOfPeriods * deltaT);
                 auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
@@ -244,20 +258,62 @@ namespace picongpu
                 auto const y = float_T(yMod / sim.unit.length());
                 auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
                 auto const t = float_T(timeMod / sim.unit.time());
-                /* To avoid underflows in computation, fields are set to zero
-                 * before and after the respective TWTS pulse envelope.
-                 */
-                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
-                    return float_T(0.0);
 
+                return std::make_tuple(x, y, z, t);
+            }
+
+            HDINLINE
+            std::tuple<EField::float_T, EField::float_T, EField::float_T, EField::float_T, EField::float_T> EField::
+                defineTrigonometryShortcuts(EField::float_T const absPhi, EField::float_T const sinPhi) const
+            {
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
+                float_T const tanPhi = math::tan(absPhi);
                 float_T const cotPhi = float_T(1.0) / tanPhi;
                 float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const cosPhi_2 = cosPhi * cosPhi;
                 float_T const sinPolAngle = math::sin(polAngle);
                 float_T const cosPolAngle = math::cos(polAngle);
 
+                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
+            }
+
+            HDINLINE
+            std::tuple<
+                alpaka::Complex<EField::float_T>,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                EField::float_T,
+                alpaka::Complex<EField::float_T>,
+                alpaka::Complex<EField::float_T>,
+                EField::float_T,
+                alpaka::Complex<EField::float_T>,
+                alpaka::Complex<EField::float_T>,
+                alpaka::Complex<EField::float_T>>
+            EField::defineCommonHelperVariables(
+                EField::float_T const absPhi,
+                EField::float_T const sinPhi,
+                EField::float_T const cosPhi,
+                EField::float_T const beta0,
+                EField::float_T const tanAlpha,
+                EField::float_T const cspeed,
+                EField::float_T const lambda0,
+                EField::float_T const omega0,
+                EField::float_T const tauG,
+                EField::float_T const w0,
+                EField::float_T const k,
+                EField::float_T const x,
+                EField::float_T const y,
+                EField::float_T const z,
+                EField::float_T const t,
+                EField::float_T const cotPhi,
+                EField::float_T const sinPhi_2) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
                 complex_T I = complex_T(0, 1);
                 float_T const x2 = x * x;
                 float_T const tauG2 = tauG * tauG;
@@ -265,28 +321,89 @@ namespace picongpu
                 float_T const w02 = w0 * w0;
                 float_T const beta02 = beta0 * beta0;
 
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
-                complex_T const rhom2 = rhom * rhom;
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
+                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
+                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
+                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
+                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
                 float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
+                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
+                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
 
                 complex_T const zeroOrder = (beta0 * tauG)
                     / (math::sqrt(float_T(2.0))
                        * math::exp(
                            beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
+                              - complex_T(0, 2) * (nu / sinPhi_2)))
                        * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
                            / omega0));
 
+                return std::make_tuple(
+                    I,
+                    x2,
+                    tauG2,
+                    psi0,
+                    w02,
+                    beta02,
+                    nu,
+                    xi,
+                    rhom,
+                    Xm,
+                    besselI0const,
+                    besselJ0const,
+                    besselJ1const,
+                    zeroOrder);
+            }
+
+            /** Calculate the Ex(r,t) field here
+             *
+             * @param pos Spatial position of the target field.
+             * @param time Absolute time (SI, including all offsets and transformations) for calculating
+             *             the field */
+            HDINLINE EField::float_T EField::calcTWTSEx(float3_64 const& pos, float_64 const time) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+
+                /* To avoid underflows in computation, fields are set to zero
+                 * before and after the respective TWTS pulse envelope.
+                 */
+                if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
+                    return float_T(0.0);
+
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
+
+                /* Calculating shortcuts for speeding up field calculation */
+                float_T const cosPhi_2 = cosPhi * cosPhi;
+                complex_T const rhom2 = rhom * rhom;
+
                 complex_T const result
                     = (complex_T(0, 0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
                        * (k * rhom * besselJ0const
@@ -320,85 +437,40 @@ namespace picongpu
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const phiT = float_T(math::abs(phi));
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
+
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
                 float_T const cosPhi_2 = cosPhi * cosPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
-                           / omega0));
 
                 complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
                                           * (besselJ1const
@@ -421,86 +493,42 @@ namespace picongpu
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const phiT = float_T(math::abs(phi));
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(phiT, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
+                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                    = defineBasicHelperVariables();
+                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
                  */
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(phiT);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-                float_T const sin2Phi = math::sin(float_T(2.0) * phiT);
+                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                    = defineTrigonometryShortcuts(absPhi, sinPhi);
+                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(
+                        absPhi,
+                        sinPhi,
+                        cosPhi,
+                        beta0,
+                        tanAlpha,
+                        cspeed,
+                        lambda0,
+                        omega0,
+                        tauG,
+                        w0,
+                        k,
+                        x,
+                        y,
+                        z,
+                        t,
+                        cotPhi,
+                        sinPhi_2);
 
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                complex_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                complex_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const rhom
-                    = math::sqrt(x2 + pmacc::math::cPow(-z - complex_T(0, 0.5) * k * w02, static_cast<uint32_t>(2u)));
+                /* Calculating shortcuts for speeding up field calculation */
+                float_T const sin2Phi = math::sin(float_T(2.0) * absPhi);
                 complex_T const rhom2 = rhom * rhom;
-                complex_T const Xm = -z - complex_T(0, 0.5) * k * w02;
                 complex_T const Xm2 = Xm * Xm;
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * rhom * sinPhi);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * rhom * sinPhi);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * beta02 * (nu - xi) * cotPhi * cotPhi
-                              + complex_T(0, 2) * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi
-                              - complex_T(0, 2) * nu / sinPhi_2))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * beta02 * (nu - xi) * cotPhi * cotPhi
-                            + I * beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi - I * nu / sinPhi_2)
-                           / omega0));
 
                 complex_T const result
                     = (complex_T(0, 0.125) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder

From 9586a84bbba444a9c6d89013df904bc7adabacaf Mon Sep 17 00:00:00 2001
From: Alexander Debus <a.debus@hzdr.de>
Date: Mon, 4 Nov 2024 06:04:12 -0500
Subject: [PATCH 3/5] Move duplicate code to a new TWTSTight base class.

---
 .../background/templates/twtstight/BField.hpp | 138 +----------
 .../background/templates/twtstight/BField.tpp | 204 ---------------
 .../background/templates/twtstight/EField.hpp | 137 +---------
 .../background/templates/twtstight/EField.tpp | 207 ----------------
 .../templates/twtstight/twtstight.hpp         | 149 ++++++++++-
 .../templates/twtstight/twtstight.tpp         | 233 ++++++++++++++++++
 6 files changed, 384 insertions(+), 684 deletions(-)

diff --git a/include/picongpu/fields/background/templates/twtstight/BField.hpp b/include/picongpu/fields/background/templates/twtstight/BField.hpp
index ae4c472eae..6cc45a7664 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.hpp
@@ -37,84 +37,10 @@ namespace picongpu
         /* Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            class BField
+            class BField : public TWTSTight
             {
             public:
-                using float_T = float_X;
-
-                /** Center of simulation volume in number of cells */
-                PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
-                /** y-position of TWTS coordinate origin inside the simulation coordinates [meter]
-                 *  The other origin coordinates (x and z) default to globally centered values
-                 *  with respect to the simulation volume.
-                 */
-                PMACC_ALIGN(focus_y_SI, float_64 const);
-                /** Laser wavelength [meter] */
-                PMACC_ALIGN(wavelength_SI, float_64 const);
-                /** TWTS laser pulse duration [second] */
-                PMACC_ALIGN(pulselength_SI, float_64 const);
-                /** line focus height of TWTS pulse [meter] */
-                PMACC_ALIGN(w_x_SI, float_64 const);
-                /** TWTS interaction angle
-                 *  Enclosed by the laser propagation direction and the y-axis.
-                 *  For a positive value of the interaction angle, the laser propagation direction
-                 *  points along the y-axis and against the z-axis.
-                 *  That is, for phi = 90 degree the laser propagates in the -z direction.
-                 * [rad]
-                 */
-                PMACC_ALIGN(phi, float_X const);
-                /** Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */
-                PMACC_ALIGN(phiPositive, float_X);
-                /** propagation speed of TWTS laser overlap
-                    normalized to the speed of light. [Default: beta0 = 1.0] */
-                PMACC_ALIGN(beta_0, float_X const);
-                /** If auto_tdelay=FALSE, then a user defined delay is used. [second] */
-                PMACC_ALIGN(tdelay_user_SI, float_64 const);
-                /** Make time step constant accessible to device. */
-                PMACC_ALIGN(dt, float_64 const);
-                /** Make length normalization constant accessible to device. */
-                PMACC_ALIGN(unit_length, float_64 const);
-                /** TWTS laser time delay */
-                PMACC_ALIGN(tdelay, float_64);
-                /** Should the TWTS laser time delay be chosen automatically, such that
-                 * the laser gradually enters the simulation volume? [Default: TRUE]
-                 */
-                PMACC_ALIGN(auto_tdelay, bool const);
-                /** Polarization angle of TWTS laser with respect to x-axis rotated around propagation direction. */
-                PMACC_ALIGN(polAngle, float_X const);
-
-                /** Magnetic field of the TWTS laser
-                 *
-                 * @param focus_y_SI the distance to the laser focus in y-direction [m]
-                 * @param wavelength_SI central wavelength [m]
-                 * @param pulselength_SI sigma of std. gauss for intensity (E^2),
-                 *  pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)]
-                 * @param w_x beam waist: distance from the axis where the pulse electric field
-                 *  decreases to its 1/e^2-th part at the focus position of the laser [m]
-                 * @param phi interaction angle between TWTS laser propagation vector and
-                 *  the y-axis [rad, default = 90. * (PI/180.)]
-                 * @param beta_0 propagation speed of overlap normalized to
-                 *  the speed of light [c, default = 1.0]
-                 * @param tdelay_user manual time delay if auto_tdelay is false
-                 * @param auto_tdelay calculate the time delay such that the TWTS pulse is not
-                 *  inside the simulation volume at simulation start timestep = 0 [default = true]
-                 * @param polAngle determines the TWTS laser polarization angle with respect to x-axis around
-                 * propagation direction [rad, default = 0. * (PI/180.)] Normal to laser pulse front tilt plane:
-                 * polAngle = 0.0 * (PI/180.) (linear polarization parallel to x-axis) Parallel to laser pulse front
-                 * tilt plane: polAngle = 90.0 * (PI/180.) (linear polarization parallel to yz-plane)
-                 */
-                HINLINE
-                BField(
-                    float_64 const focus_y_SI,
-                    float_64 const wavelength_SI,
-                    float_64 const pulselength_SI,
-                    float_64 const w_x_SI,
-                    float_X const phi = 90. * (PI / 180.),
-                    float_X const beta_0 = 1.0,
-                    float_64 const tdelay_user_SI = 0.0,
-                    bool const auto_tdelay = true,
-                    float_X const polAngle = 0. * (PI / 180.));
-
+                using TWTSTight::TWTSTight;
 
                 /** Specify your background field B(r,t) here
                  *
@@ -161,66 +87,6 @@ namespace picongpu
                     pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
                     float_X const currentStep) const;
 
-                HDINLINE
-                std::tuple<
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T>
-                defineBasicHelperVariables() const;
-
-                HDINLINE
-                std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
-                    float3_64 const& pos,
-                    float_64 const time) const;
-
-                HDINLINE
-                std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
-                    float_T const absPhi,
-                    float_T const sinPhi) const;
-
-                HDINLINE
-                std::tuple<
-                    alpaka::Complex<float_T>,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>,
-                    float_T,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>>
-                defineCommonHelperVariables(
-                    float_T const absPhi,
-                    float_T const sinPhi,
-                    float_T const cosPhi,
-                    float_T const beta0,
-                    float_T const tanAlpha,
-                    float_T const cspeed,
-                    float_T const lambda0,
-                    float_T const omega0,
-                    float_T const tauG,
-                    float_T const w0,
-                    float_T const k,
-                    float_T const x,
-                    float_T const y,
-                    float_T const z,
-                    float_T const t,
-                    float_T const cotPhi,
-                    float_T const sinPhi_2) const;
-
                 /** Calculate the By(r,t) field
                  *
                  * @param pos Spatial position of the target field.
diff --git a/include/picongpu/fields/background/templates/twtstight/BField.tpp b/include/picongpu/fields/background/templates/twtstight/BField.tpp
index e4640d0cbb..f21ee7dc25 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.tpp
@@ -43,48 +43,6 @@ namespace picongpu
         /** Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            HINLINE
-            BField::BField(
-                float_64 const focus_y_SI,
-                float_64 const wavelength_SI,
-                float_64 const pulselength_SI,
-                float_64 const w_x_SI,
-                float_X const phi,
-                float_X const beta_0,
-                float_64 const tdelay_user_SI,
-                bool const auto_tdelay,
-                float_X const polAngle)
-                : focus_y_SI(focus_y_SI)
-                , wavelength_SI(wavelength_SI)
-                , pulselength_SI(pulselength_SI)
-                , w_x_SI(w_x_SI)
-                , phi(phi)
-                , phiPositive(float_X(1.0))
-                , beta_0(beta_0)
-                , tdelay_user_SI(tdelay_user_SI)
-                , dt(sim.si.getDt())
-                , unit_length(sim.unit.length())
-                , auto_tdelay(auto_tdelay)
-                , polAngle(polAngle)
-
-            {
-                /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done
-                 * on host (see fieldBackground.param), this is no problem.
-                 */
-                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
-                halfSimSize = subGrid.getGlobalDomain().size / 2;
-                tdelay = detail::getInitialTimeDelay_SI(
-                    auto_tdelay,
-                    tdelay_user_SI,
-                    halfSimSize,
-                    pulselength_SI,
-                    focus_y_SI,
-                    phi,
-                    beta_0);
-                if(phi < float_X(0.0))
-                    phiPositive = float_X(-1.0);
-            }
-
             template<>
             HDINLINE float3_X BField::getTWTSBfield_Normalized<DIM3>(
                 pmacc::math::Vector<floatD_64, detail::numComponents> const& bFieldPositions_SI,
@@ -192,168 +150,6 @@ namespace picongpu
                     return (*this)(cellIdx, currentStep)[T_component];
             }
 
-            HDINLINE
-            std::tuple<
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T>
-            BField::defineBasicHelperVariables() const
-            {
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const absPhi = float_T(math::abs(phi));
-
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
-
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
-            }
-
-            HDINLINE
-            std::tuple<BField::float_T, BField::float_T, BField::float_T, BField::float_T> BField::
-                defineMinimalCoordinates(float3_64 const& pos, float_64 const time) const
-            {
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
-
-                return std::make_tuple(x, y, z, t);
-            }
-
-            HDINLINE
-            std::tuple<BField::float_T, BField::float_T, BField::float_T, BField::float_T, BField::float_T> BField::
-                defineTrigonometryShortcuts(BField::float_T const absPhi, BField::float_T const sinPhi) const
-            {
-                /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(absPhi);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-
-                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
-            }
-
-            HDINLINE
-            std::tuple<
-                alpaka::Complex<BField::float_T>,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                BField::float_T,
-                alpaka::Complex<BField::float_T>,
-                alpaka::Complex<BField::float_T>,
-                BField::float_T,
-                alpaka::Complex<BField::float_T>,
-                alpaka::Complex<BField::float_T>,
-                alpaka::Complex<BField::float_T>>
-            BField::defineCommonHelperVariables(
-                BField::float_T const absPhi,
-                BField::float_T const sinPhi,
-                BField::float_T const cosPhi,
-                BField::float_T const beta0,
-                BField::float_T const tanAlpha,
-                BField::float_T const cspeed,
-                BField::float_T const lambda0,
-                BField::float_T const omega0,
-                BField::float_T const tauG,
-                BField::float_T const w0,
-                BField::float_T const k,
-                BField::float_T const x,
-                BField::float_T const y,
-                BField::float_T const z,
-                BField::float_T const t,
-                BField::float_T const cotPhi,
-                BField::float_T const sinPhi_2) const
-            {
-                using complex_T = alpaka::Complex<float_T>;
-                using complex_64 = alpaka::Complex<float_64>;
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
-                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
-                              - complex_T(0, 2) * (nu / sinPhi_2)))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
-                           / omega0));
-
-                return std::make_tuple(
-                    I,
-                    x2,
-                    tauG2,
-                    psi0,
-                    w02,
-                    beta02,
-                    nu,
-                    xi,
-                    rhom,
-                    Xm,
-                    besselI0const,
-                    besselJ0const,
-                    besselJ1const,
-                    zeroOrder);
-            }
-
             /** Calculate the By(r,t) field here
              *
              * @param pos Spatial position of the target field.
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.hpp b/include/picongpu/fields/background/templates/twtstight/EField.hpp
index acb4d16176..839e8a6f59 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.hpp
@@ -35,83 +35,10 @@ namespace picongpu
         /* Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            class EField
+            class EField : public TWTSTight
             {
             public:
-                using float_T = float_X;
-
-                /** Center of simulation volume in number of cells */
-                PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
-                /** y-position of TWTS coordinate origin inside the simulation coordinates [meter]
-                    The other origin coordinates (x and z) default to globally centered values
-                    with respect to the simulation volume. */
-                PMACC_ALIGN(focus_y_SI, float_64 const);
-                /** Laser wavelength [meter] */
-                PMACC_ALIGN(wavelength_SI, float_64 const);
-                /** TWTS laser pulse duration [second] */
-                PMACC_ALIGN(pulselength_SI, float_64 const);
-                /** line focus height of TWTS pulse [meter] */
-                PMACC_ALIGN(w_x_SI, float_64 const);
-                /** TWTS interaction angle
-                 *  Enclosed by the laser propagation direction and the y-axis.
-                 *  For a positive value of the interaction angle, the laser propagation direction
-                 *  points along the y-axis and against the z-axis.
-                 *  That is, for phi = 90 degree the laser propagates in the -z direction.
-                 * [rad]
-                 */
-                PMACC_ALIGN(phi, float_X const);
-                /** Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */
-                PMACC_ALIGN(phiPositive, float_X);
-                /** propagation speed of TWTS laser overlap
-                normalized to the speed of light. [Default: beta0=1.0] */
-                PMACC_ALIGN(beta_0, float_X const);
-                /** If auto_tdelay=FALSE, then a user defined delay is used. [second] */
-                PMACC_ALIGN(tdelay_user_SI, float_64 const);
-                /** Make time step constant accessible to device. */
-                PMACC_ALIGN(dt, float_64 const);
-                /** Make length normalization constant accessible to device. */
-                PMACC_ALIGN(unit_length, float_64 const);
-                /** TWTS laser time delay */
-                PMACC_ALIGN(tdelay, float_64);
-                /** Should the TWTS laser delay be chosen automatically, such that
-                 *  the laser gradually enters the simulation volume? [Default: TRUE]
-                 */
-                PMACC_ALIGN(auto_tdelay, bool const);
-                /** Polarization of TWTS laser with respect to x-axis around propagation direction [rad, default = 0. *
-                 * (PI/180.)] */
-                PMACC_ALIGN(polAngle, float_X const);
-
-                /** Electric field of the TWTS laser
-                 *
-                 * @param focus_y_SI the distance to the laser focus in y-direction [m]
-                 * @param wavelength_SI central wavelength [m]
-                 * @param pulselength_SI sigma of std. gauss for intensity (E^2),
-                 *  pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)]
-                 * @param w_x beam waist: distance from the axis where the pulse electric field
-                 *  decreases to its 1/e^2-th part at the focus position of the laser [m]
-                 * @param phi interaction angle between TWTS laser propagation vector and
-                 *  the y-axis [rad, default = 90. * (PI/180.)]
-                 * @param beta_0 propagation speed of overlap normalized to
-                 *  the speed of light [c, default = 1.0]
-                 * @param tdelay_user manual time delay if auto_tdelay is false
-                 * @param auto_tdelay calculate the time delay such that the TWTS pulse is not
-                 *  inside the simulation volume at simulation start timestep = 0 [default = true]
-                 * @param polAngle determines the TWTS laser polarization angle with respect to x-axis around
-                 * propagation direction [rad, default = 0. * (PI/180.)] Normal to laser pulse front tilt plane:
-                 * polAngle = 0.0 * (PI/180.) (linear polarization parallel to x-axis) Parallel to laser pulse front
-                 * tilt plane: polAngle = 90.0 * (PI/180.) (linear polarization parallel to yz-plane)
-                 */
-                HINLINE
-                EField(
-                    float_64 const focus_y_SI,
-                    float_64 const wavelength_SI,
-                    float_64 const pulselength_SI,
-                    float_64 const w_x_SI,
-                    float_X const phi = 90. * (PI / 180.),
-                    float_X const beta_0 = 1.0,
-                    float_64 const tdelay_user_SI = 0.0,
-                    bool const auto_tdelay = true,
-                    float_X const polAngle = 0. * (PI / 180.));
+                using TWTSTight::TWTSTight;
 
                 /** Specify your background field E(r,t) here
                  *
@@ -158,66 +85,6 @@ namespace picongpu
                     pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
                     float_X const currentStep) const;
 
-                HDINLINE
-                std::tuple<
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T>
-                defineBasicHelperVariables() const;
-
-                HDINLINE
-                std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
-                    float3_64 const& pos,
-                    float_64 const time) const;
-
-                HDINLINE
-                std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
-                    float_T const absPhi,
-                    float_T const sinPhi) const;
-
-                HDINLINE
-                std::tuple<
-                    alpaka::Complex<float_T>,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    float_T,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>,
-                    float_T,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>,
-                    alpaka::Complex<float_T>>
-                defineCommonHelperVariables(
-                    float_T const absPhi,
-                    float_T const sinPhi,
-                    float_T const cosPhi,
-                    float_T const beta0,
-                    float_T const tanAlpha,
-                    float_T const cspeed,
-                    float_T const lambda0,
-                    float_T const omega0,
-                    float_T const tauG,
-                    float_T const w0,
-                    float_T const k,
-                    float_T const x,
-                    float_T const y,
-                    float_T const z,
-                    float_T const t,
-                    float_T const cotPhi,
-                    float_T const sinPhi_2) const;
-
                 /** Calculate the Ex(r,t) field
                  *
                  * @param pos Spatial position of the target field
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.tpp b/include/picongpu/fields/background/templates/twtstight/EField.tpp
index ad9181edc8..8117e68a18 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.tpp
@@ -41,47 +41,6 @@ namespace picongpu
         /* Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            HINLINE
-            EField::EField(
-                float_64 const focus_y_SI,
-                float_64 const wavelength_SI,
-                float_64 const pulselength_SI,
-                float_64 const w_x_SI,
-                float_X const phi,
-                float_X const beta_0,
-                float_64 const tdelay_user_SI,
-                bool const auto_tdelay,
-                float_X const polAngle)
-                : focus_y_SI(focus_y_SI)
-                , wavelength_SI(wavelength_SI)
-                , pulselength_SI(pulselength_SI)
-                , w_x_SI(w_x_SI)
-                , phi(phi)
-                , phiPositive(float_X(1.0))
-                , beta_0(beta_0)
-                , tdelay_user_SI(tdelay_user_SI)
-                , dt(sim.si.getDt())
-                , unit_length(sim.unit.length())
-                , auto_tdelay(auto_tdelay)
-                , polAngle(polAngle)
-            {
-                /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done
-                         on host (see fieldBackground.param), this is no problem.
-                 */
-                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
-                halfSimSize = subGrid.getGlobalDomain().size / 2;
-                tdelay = detail::getInitialTimeDelay_SI(
-                    auto_tdelay,
-                    tdelay_user_SI,
-                    halfSimSize,
-                    pulselength_SI,
-                    focus_y_SI,
-                    phi,
-                    beta_0);
-                if(phi < 0.0_X)
-                    phiPositive = float_X(-1.0);
-            }
-
             template<>
             HDINLINE float3_X EField::getTWTSEfield_Normalized<DIM3>(
                 pmacc::math::Vector<floatD_64, detail::numComponents> const& eFieldPositions_SI,
@@ -171,10 +130,6 @@ namespace picongpu
                         eFieldPositions_SI[T_component][0],
                         eFieldPositions_SI[T_component][1],
                         eFieldPositions_SI[T_component][2]};
-                    float_X sinPhi;
-                    float_X cosPhi;
-                    pmacc::math::sincos(phi, sinPhi, cosPhi);
-
                     if constexpr(T_component == 0)
                         return static_cast<float_X>(calcTWTSEx(pos, time_SI));
                     else
@@ -196,168 +151,6 @@ namespace picongpu
                     return (*this)(cellIdx, currentStep)[T_component];
             }
 
-            HDINLINE
-            std::tuple<
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T>
-            EField::defineBasicHelperVariables() const
-            {
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const absPhi = float_T(math::abs(phi));
-
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
-
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
-            }
-
-            HDINLINE
-            std::tuple<EField::float_T, EField::float_T, EField::float_T, EField::float_T> EField::
-                defineMinimalCoordinates(float3_64 const& pos, float_64 const time) const
-            {
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
-
-                return std::make_tuple(x, y, z, t);
-            }
-
-            HDINLINE
-            std::tuple<EField::float_T, EField::float_T, EField::float_T, EField::float_T, EField::float_T> EField::
-                defineTrigonometryShortcuts(EField::float_T const absPhi, EField::float_T const sinPhi) const
-            {
-                /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(absPhi);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-
-                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
-            }
-
-            HDINLINE
-            std::tuple<
-                alpaka::Complex<EField::float_T>,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                EField::float_T,
-                alpaka::Complex<EField::float_T>,
-                alpaka::Complex<EField::float_T>,
-                EField::float_T,
-                alpaka::Complex<EField::float_T>,
-                alpaka::Complex<EField::float_T>,
-                alpaka::Complex<EField::float_T>>
-            EField::defineCommonHelperVariables(
-                EField::float_T const absPhi,
-                EField::float_T const sinPhi,
-                EField::float_T const cosPhi,
-                EField::float_T const beta0,
-                EField::float_T const tanAlpha,
-                EField::float_T const cspeed,
-                EField::float_T const lambda0,
-                EField::float_T const omega0,
-                EField::float_T const tauG,
-                EField::float_T const w0,
-                EField::float_T const k,
-                EField::float_T const x,
-                EField::float_T const y,
-                EField::float_T const z,
-                EField::float_T const t,
-                EField::float_T const cotPhi,
-                EField::float_T const sinPhi_2) const
-            {
-                using complex_T = alpaka::Complex<float_T>;
-                using complex_64 = alpaka::Complex<float_64>;
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
-                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
-                              - complex_T(0, 2) * (nu / sinPhi_2)))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
-                           / omega0));
-
-                return std::make_tuple(
-                    I,
-                    x2,
-                    tauG2,
-                    psi0,
-                    w02,
-                    beta02,
-                    nu,
-                    xi,
-                    rhom,
-                    Xm,
-                    besselI0const,
-                    besselJ0const,
-                    besselJ1const,
-                    zeroOrder);
-            }
-
             /** Calculate the Ex(r,t) field here
              *
              * @param pos Spatial position of the target field.
diff --git a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp b/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
index 1123f635ce..ebcad46b8b 100644
--- a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
@@ -55,9 +55,12 @@
  */
 
 #pragma once
+#include "picongpu/defines.hpp"
+// include "picongpu/fields/background/templates/twtstight/numComponents.hpp"
 
-#include "picongpu/fields/background/templates/twtstight/BField.hpp"
-#include "picongpu/fields/background/templates/twtstight/EField.hpp"
+#include <pmacc/dimensions/DataSpace.hpp>
+// include <pmacc/math/Vector.hpp>
+#include <pmacc/types.hpp>
 
 namespace picongpu
 {
@@ -72,8 +75,150 @@ namespace picongpu
              *  numSigma can be increased to numSigmas = 10 without running into numerical issues.
              */
             constexpr uint32_t numSigmas = 6;
+
+            class TWTSTight
+            {
+            public:
+                using float_T = float_X;
+
+                /** Center of simulation volume in number of cells */
+                PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
+                /** y-position of TWTS coordinate origin inside the simulation coordinates [meter]
+                    The other origin coordinates (x and z) default to globally centered values
+                    with respect to the simulation volume. */
+                PMACC_ALIGN(focus_y_SI, float_64 const);
+                /** Laser wavelength [meter] */
+                PMACC_ALIGN(wavelength_SI, float_64 const);
+                /** TWTS laser pulse duration [second] */
+                PMACC_ALIGN(pulselength_SI, float_64 const);
+                /** line focus height of TWTS pulse [meter] */
+                PMACC_ALIGN(w_x_SI, float_64 const);
+                /** TWTS interaction angle
+                 *  Enclosed by the laser propagation direction and the y-axis.
+                 *  For a positive value of the interaction angle, the laser propagation direction
+                 *  points along the y-axis and against the z-axis.
+                 *  That is, for phi = 90 degree the laser propagates in the -z direction.
+                 * [rad]
+                 */
+                PMACC_ALIGN(phi, float_X const);
+                /** Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */
+                PMACC_ALIGN(phiPositive, float_X);
+                /** propagation speed of TWTS laser overlap
+                normalized to the speed of light. [Default: beta0=1.0] */
+                PMACC_ALIGN(beta_0, float_X const);
+                /** If auto_tdelay=FALSE, then a user defined delay is used. [second] */
+                PMACC_ALIGN(tdelay_user_SI, float_64 const);
+                /** Make time step constant accessible to device. */
+                PMACC_ALIGN(dt, float_64 const);
+                /** Make length normalization constant accessible to device. */
+                PMACC_ALIGN(unit_length, float_64 const);
+                /** TWTS laser time delay */
+                PMACC_ALIGN(tdelay, float_64);
+                /** Should the TWTS laser delay be chosen automatically, such that
+                 *  the laser gradually enters the simulation volume? [Default: TRUE]
+                 */
+                PMACC_ALIGN(auto_tdelay, bool const);
+                /** Polarization of TWTS laser with respect to x-axis around propagation direction [rad, default = 0. *
+                 * (PI/180.)] */
+                PMACC_ALIGN(polAngle, float_X const);
+
+                /** Electric or magnetic field of the TWTS laser
+                 *
+                 * @param focus_y_SI the distance to the laser focus in y-direction [m]
+                 * @param wavelength_SI central wavelength [m]
+                 * @param pulselength_SI sigma of std. gauss for intensity (E^2),
+                 *  pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)]
+                 * @param w_x beam waist: distance from the axis where the pulse electric field
+                 *  decreases to its 1/e^2-th part at the focus position of the laser [m]
+                 * @param phi interaction angle between TWTS laser propagation vector and
+                 *  the y-axis [rad, default = 90. * (PI/180.)]
+                 * @param beta_0 propagation speed of overlap normalized to
+                 *  the speed of light [c, default = 1.0]
+                 * @param tdelay_user manual time delay if auto_tdelay is false
+                 * @param auto_tdelay calculate the time delay such that the TWTS pulse is not
+                 *  inside the simulation volume at simulation start timestep = 0 [default = true]
+                 * @param polAngle determines the TWTS laser polarization angle with respect to x-axis around
+                 * propagation direction [rad, default = 0. * (PI/180.)] Normal to laser pulse front tilt plane:
+                 * polAngle = 0.0 * (PI/180.) (linear polarization parallel to x-axis) Parallel to laser pulse front
+                 * tilt plane: polAngle = 90.0 * (PI/180.) (linear polarization parallel to yz-plane)
+                 */
+                HINLINE
+                TWTSTight(
+                    float_64 const focus_y_SI,
+                    float_64 const wavelength_SI,
+                    float_64 const pulselength_SI,
+                    float_64 const w_x_SI,
+                    float_X const phi = 90. * (PI / 180.),
+                    float_X const beta_0 = 1.0,
+                    float_64 const tdelay_user_SI = 0.0,
+                    bool const auto_tdelay = true,
+                    float_X const polAngle = 0. * (PI / 180.));
+
+                HDINLINE
+                std::tuple<
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T>
+                defineBasicHelperVariables() const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
+                    float3_64 const& pos,
+                    float_64 const& time) const;
+
+                HDINLINE
+                std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
+                    float_T const& absPhi,
+                    float_T const& sinPhi) const;
+
+                HDINLINE
+                std::tuple<
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    float_T,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>,
+                    alpaka::Complex<float_T>>
+                defineCommonHelperVariables(
+                    float_T const& absPhi,
+                    float_T const& sinPhi,
+                    float_T const& cosPhi,
+                    float_T const& beta0,
+                    float_T const& tanAlpha,
+                    float_T const& cspeed,
+                    float_T const& lambda0,
+                    float_T const& omega0,
+                    float_T const& tauG,
+                    float_T const& w0,
+                    float_T const& k,
+                    float_T const& x,
+                    float_T const& y,
+                    float_T const& z,
+                    float_T const& t,
+                    float_T const& cotPhi,
+                    float_T const& sinPhi_2) const;
+            };
+
         } // namespace twtstight
     } // namespace templates
 } // namespace picongpu
 
+#include "picongpu/fields/background/templates/twtstight/BField.hpp"
+#include "picongpu/fields/background/templates/twtstight/EField.hpp"
 #include "picongpu/fields/background/templates/twtstight/twtstight.tpp"
diff --git a/include/picongpu/fields/background/templates/twtstight/twtstight.tpp b/include/picongpu/fields/background/templates/twtstight/twtstight.tpp
index 9d80e7857a..4adfe86e75 100644
--- a/include/picongpu/fields/background/templates/twtstight/twtstight.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/twtstight.tpp
@@ -20,5 +20,238 @@
 
 #pragma once
 
+#include "picongpu/defines.hpp"
 #include "picongpu/fields/background/templates/twtstight/BField.tpp"
 #include "picongpu/fields/background/templates/twtstight/EField.tpp"
+// include "picongpu/fields/YeeCell.hpp"
+// include "picongpu/fields/background/templates/twtstight/EField.hpp"
+#include "picongpu/fields/background/templates/twtstight/GetInitialTimeDelay_SI.tpp"
+// include "picongpu/fields/background/templates/twtstight/getFieldPositions_SI.tpp"
+#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
+
+#include <pmacc/dimensions/DataSpace.hpp>
+#include <pmacc/mappings/simulation/SubGrid.hpp>
+#include <pmacc/math/Complex.hpp>
+// include <pmacc/math/Vector.hpp>
+#include <pmacc/types.hpp>
+
+namespace picongpu
+{
+    /* Load pre-defined background field */
+    namespace templates
+    {
+        /* Traveling-wave Thomson scattering laser pulse */
+        namespace twtstight
+        {
+            HINLINE
+            TWTSTight::TWTSTight(
+                float_64 const focus_y_SI,
+                float_64 const wavelength_SI,
+                float_64 const pulselength_SI,
+                float_64 const w_x_SI,
+                float_X const phi,
+                float_X const beta_0,
+                float_64 const tdelay_user_SI,
+                bool const auto_tdelay,
+                float_X const polAngle)
+                : focus_y_SI(focus_y_SI)
+                , wavelength_SI(wavelength_SI)
+                , pulselength_SI(pulselength_SI)
+                , w_x_SI(w_x_SI)
+                , phi(phi)
+                , phiPositive(float_X(1.0))
+                , beta_0(beta_0)
+                , tdelay_user_SI(tdelay_user_SI)
+                , dt(sim.si.getDt())
+                , unit_length(sim.unit.length())
+                , auto_tdelay(auto_tdelay)
+                , polAngle(polAngle)
+            {
+                /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done
+                         on host (see fieldBackground.param), this is no problem.
+                 */
+                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
+                halfSimSize = subGrid.getGlobalDomain().size / 2;
+                tdelay = detail::getInitialTimeDelay_SI(
+                    auto_tdelay,
+                    tdelay_user_SI,
+                    halfSimSize,
+                    pulselength_SI,
+                    focus_y_SI,
+                    phi,
+                    beta_0);
+                if(phi < 0.0_X)
+                    phiPositive = float_X(-1.0);
+            }
+
+            HDINLINE
+            std::tuple<
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T>
+            TWTSTight::defineBasicHelperVariables() const
+            {
+                /* If phi < 0 the formulas below are not directly applicable.
+                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
+                 * y-axis of the coordinate system in this function.
+                 */
+                auto const absPhi = float_T(math::abs(phi));
+
+                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
+                auto const beta0 = float_T(beta_0);
+
+                float_T sinPhi;
+                float_T cosPhi;
+                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
+
+                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
+                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
+                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
+                /* factor 2  in tauG arises from definition convention in laser formula */
+                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
+                /* w0 is wx here --> w0 could be replaced by wx */
+                auto const w0 = float_T(w_x_SI / sim.unit.length());
+                auto const k = float_T(2.0 * PI / lambda0);
+
+                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
+            }
+
+            HDINLINE
+            std::tuple<TWTSTight::float_T, TWTSTight::float_T, TWTSTight::float_T, TWTSTight::float_T> TWTSTight::
+                defineMinimalCoordinates(float3_64 const& pos, float_64 const& time) const
+            {
+                /* In order to calculate in single-precision and in order to account for errors in
+                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
+                 * the known propagation direction for realizing the laser pulse using relative coordinates
+                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
+                 * in double precision.
+                 */
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
+                float_64 const numberOfPeriods = math::floor(time / deltaT);
+                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
+                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
+
+                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
+                auto const y = float_T(yMod / sim.unit.length());
+                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
+                auto const t = float_T(timeMod / sim.unit.time());
+
+                return std::make_tuple(x, y, z, t);
+            }
+
+            HDINLINE
+            std::tuple<
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T>
+            TWTSTight::defineTrigonometryShortcuts(TWTSTight::float_T const& absPhi, TWTSTight::float_T const& sinPhi)
+                const
+            {
+                /* Calculating shortcuts for speeding up field calculation */
+                float_T const tanPhi = math::tan(absPhi);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
+                float_T const sinPhi_2 = sinPhi * sinPhi;
+                float_T const sinPolAngle = math::sin(polAngle);
+                float_T const cosPolAngle = math::cos(polAngle);
+
+                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
+            }
+
+            HDINLINE
+            std::tuple<
+                alpaka::Complex<TWTSTight::float_T>,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                TWTSTight::float_T,
+                alpaka::Complex<TWTSTight::float_T>,
+                alpaka::Complex<TWTSTight::float_T>,
+                TWTSTight::float_T,
+                alpaka::Complex<TWTSTight::float_T>,
+                alpaka::Complex<TWTSTight::float_T>,
+                alpaka::Complex<TWTSTight::float_T>>
+            TWTSTight::defineCommonHelperVariables(
+                TWTSTight::float_T const& absPhi,
+                TWTSTight::float_T const& sinPhi,
+                TWTSTight::float_T const& cosPhi,
+                TWTSTight::float_T const& beta0,
+                TWTSTight::float_T const& tanAlpha,
+                TWTSTight::float_T const& cspeed,
+                TWTSTight::float_T const& lambda0,
+                TWTSTight::float_T const& omega0,
+                TWTSTight::float_T const& tauG,
+                TWTSTight::float_T const& w0,
+                TWTSTight::float_T const& k,
+                TWTSTight::float_T const& x,
+                TWTSTight::float_T const& y,
+                TWTSTight::float_T const& z,
+                TWTSTight::float_T const& t,
+                TWTSTight::float_T const& cotPhi,
+                TWTSTight::float_T const& sinPhi_2) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
+                complex_T I = complex_T(0, 1);
+                float_T const x2 = x * x;
+                float_T const tauG2 = tauG * tauG;
+                float_T const psi0 = float_T(2.0) / k;
+                float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
+
+                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
+                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
+                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
+                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
+                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
+                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
+                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
+
+                complex_T const zeroOrder = (beta0 * tauG)
+                    / (math::sqrt(float_T(2.0))
+                       * math::exp(
+                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
+                              - complex_T(0, 2) * (nu / sinPhi_2)))
+                       * math::sqrt(
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
+                           / omega0));
+
+                return std::make_tuple(
+                    I,
+                    x2,
+                    tauG2,
+                    psi0,
+                    w02,
+                    beta02,
+                    nu,
+                    xi,
+                    rhom,
+                    Xm,
+                    besselI0const,
+                    besselJ0const,
+                    besselJ1const,
+                    zeroOrder);
+            }
+
+        } /* namespace twtstight */
+    } /* namespace templates */
+} /* namespace picongpu */

From e6c8b8d61141131c0c92b51b1ba91d107797f16b Mon Sep 17 00:00:00 2001
From: Alexander Debus <a.debus@hzdr.de>
Date: Mon, 4 Nov 2024 13:41:46 -0500
Subject: [PATCH 4/5] Further move duplicate code into base class

---
 .../background/templates/twtstight/BField.hpp | 126 ------
 .../background/templates/twtstight/BField.tpp | 312 ++++-----------
 .../background/templates/twtstight/EField.hpp | 124 ------
 .../background/templates/twtstight/EField.tpp | 253 +++---------
 .../{twtstight.hpp => TWTSTight.hpp}          | 102 ++++-
 .../templates/twtstight/TWTSTight.tpp         | 366 ++++++++++++++++++
 .../templates/twtstight/twtstight.tpp         | 257 ------------
 .../picongpu/param/fieldBackground.param      |   2 +-
 .../picongpu/param/incidentField.param        |   6 +-
 .../picongpu/param/fieldBackground.param      |   2 +-
 10 files changed, 594 insertions(+), 956 deletions(-)
 delete mode 100644 include/picongpu/fields/background/templates/twtstight/BField.hpp
 delete mode 100644 include/picongpu/fields/background/templates/twtstight/EField.hpp
 rename include/picongpu/fields/background/templates/twtstight/{twtstight.hpp => TWTSTight.hpp} (65%)
 create mode 100644 include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
 delete mode 100644 include/picongpu/fields/background/templates/twtstight/twtstight.tpp

diff --git a/include/picongpu/fields/background/templates/twtstight/BField.hpp b/include/picongpu/fields/background/templates/twtstight/BField.hpp
deleted file mode 100644
index 6cc45a7664..0000000000
--- a/include/picongpu/fields/background/templates/twtstight/BField.hpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/* Copyright 2014-2024 Alexander Debus, Axel Huebl, Sergei Bastrakov
- *
- * This file is part of PIConGPU.
- *
- * PIConGPU is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * PIConGPU is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with PIConGPU.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#pragma once
-
-#include "picongpu/defines.hpp"
-#include "picongpu/fields/background/templates/twtstight/numComponents.hpp"
-
-#include <pmacc/dimensions/DataSpace.hpp>
-#include <pmacc/math/Vector.hpp>
-#include <pmacc/types.hpp>
-
-#include <tuple>
-
-namespace picongpu
-{
-    /* Load pre-defined background field */
-    namespace templates
-    {
-        /* Traveling-wave Thomson scattering laser pulse */
-        namespace twtstight
-        {
-            class BField : public TWTSTight
-            {
-            public:
-                using TWTSTight::TWTSTight;
-
-                /** Specify your background field B(r,t) here
-                 *
-                 * @param cellIdx The total cell id counted from the start at t=0, note it can be fractional
-                 * @param currentStep The current time step for the field to be calculated at, note it can be
-                 * fractional
-                 * @return float3_X with field normalized to amplitude in range [-1.:1.]
-                 *
-                 * @{
-                 */
-
-                //! Integer index version, adds in-cell shifts according to the grid used; t = currentStep * dt
-                //! This interface is used by the fieldBackground approach for implementing fields.
-                HDINLINE float3_X operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const;
-
-                //! Floating-point index version, uses fractional cell index as provided; t = currentStep * dt
-                //! This interface is used by the incidentField approach for implementing fields.
-                HDINLINE float3_X operator()(floatD_X const& cellIdx, float_X const currentStep) const;
-
-                /** @} */
-
-                /** Calculate the given component of B(r, t)
-                 *
-                 * Result is same as for the fractional version of operator()(cellIdx, currentStep)[T_component].
-                 * This version exists for optimizing usage in incident field where single components are needed.
-                 *
-                 * @tparam T_component field component, 0 = x, 1 = y, 2 = z
-                 *
-                 * @param cellIdx The total fractional cell id counted from the start at t=0
-                 * @param currentStep The current time step for the field to be calculated at
-                 * @return float_X with field component normalized to amplitude in range [-1.:1.]
-                 */
-                template<uint32_t T_component>
-                HDINLINE float_X getComponent(floatD_X const& cellIdx, float_X const currentStep) const;
-
-                /** Calculate B(r, t) for given position, time, and extra in-cell shifts
-                 *
-                 * @param cellIdx The total cell id counted from the start at t=0, note it is fractional
-                 * @param extraShifts The extra in-cell shifts to be added to calculate the position
-                 * @param currentStep The current time step for the field to be calculated at, note it is fractional
-                 */
-                HDINLINE float3_X getValue(
-                    floatD_X const& cellIdx,
-                    pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
-                    float_X const currentStep) const;
-
-                /** Calculate the By(r,t) field
-                 *
-                 * @param pos Spatial position of the target field.
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return By-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSBy(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the Bz(r,t) field
-                 *
-                 * @param pos Spatial position of the target field.
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return Bz-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSBz(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the Bx(r,t) field
-                 *
-                 * @param pos Spatial position of the target field.
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return Bx-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSBx(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the B-field vector of the TWTS laser in SI units.
-                 * @tparam T_dim Specializes for the simulation dimension
-                 * @param cellIdx The total cell id counted from the start at timestep 0
-                 * @return B-field vector of the TWTS field in SI units */
-                template<unsigned T_dim>
-                HDINLINE float3_X getTWTSBfield_Normalized(
-                    pmacc::math::Vector<floatD_64, detail::numComponents> const& bFieldPositions_SI,
-                    float_64 const time) const;
-            };
-
-        } /* namespace twtstight */
-    } /* namespace templates */
-} /* namespace picongpu */
diff --git a/include/picongpu/fields/background/templates/twtstight/BField.tpp b/include/picongpu/fields/background/templates/twtstight/BField.tpp
index f21ee7dc25..1bec18d79e 100644
--- a/include/picongpu/fields/background/templates/twtstight/BField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/BField.tpp
@@ -21,16 +21,9 @@
 #pragma once
 
 #include "picongpu/defines.hpp"
-#include "picongpu/fields/YeeCell.hpp"
-#include "picongpu/fields/background/templates/twtstight/BField.hpp"
-#include "picongpu/fields/background/templates/twtstight/GetInitialTimeDelay_SI.tpp"
-#include "picongpu/fields/background/templates/twtstight/getFieldPositions_SI.tpp"
-#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
 
-#include <pmacc/dimensions/DataSpace.hpp>
-#include <pmacc/mappings/simulation/SubGrid.hpp>
 #include <pmacc/math/Complex.hpp>
-#include <pmacc/math/Vector.hpp>
 #include <pmacc/types.hpp>
 
 #include <tuple>
@@ -43,127 +36,22 @@ namespace picongpu
         /** Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            template<>
-            HDINLINE float3_X BField::getTWTSBfield_Normalized<DIM3>(
-                pmacc::math::Vector<floatD_64, detail::numComponents> const& bFieldPositions_SI,
-                float_64 const time) const
-            {
-                using PosVecVec = pmacc::math::Vector<float3_64, detail::numComponents>;
-                PosVecVec pos(PosVecVec::create(float3_64::create(0.0)));
-
-                for(uint32_t k = 0; k < detail::numComponents; ++k)
-                {
-                    for(uint32_t i = 0; i < simDim; ++i)
-                    {
-                        pos[k][i] = bFieldPositions_SI[k][i];
-                    }
-                }
-                /* B-field normalized to the peak amplitude. */
-                return float3_X(calcTWTSBx(pos[0], time), calcTWTSBy(pos[1], time), calcTWTSBz(pos[2], time));
-            }
-
-            template<>
-            HDINLINE float3_X BField::getTWTSBfield_Normalized<DIM2>(
-                pmacc::math::Vector<floatD_64, detail::numComponents> const& bFieldPositions_SI,
-                float_64 const time) const
-            {
-                using PosVecVec = pmacc::math::Vector<float3_64, detail::numComponents>;
-                PosVecVec pos(PosVecVec::create(float3_64::create(0.0)));
-
-                for(uint32_t k = 0; k < detail::numComponents; ++k)
-                {
-                    /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */
-                    for(uint32_t i = 0; i < DIM2; ++i)
-                    {
-                        pos[k][i + 1] = bFieldPositions_SI[k][i];
-                    }
-                }
-
-                /* B-field normalized to the peak amplitude. */
-                return float3_X(calcTWTSBx(pos[0], time), calcTWTSBy(pos[1], time), calcTWTSBz(pos[2], time));
-            }
-
-            HDINLINE
-            float3_X BField::operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const
-            {
-                traits::FieldPosition<fields::YeeCell, FieldB> const fieldPosB;
-                return getValue(precisionCast<float_X>(cellIdx), fieldPosB(), static_cast<float_X>(currentStep));
-            }
-
-            HDINLINE
-            float3_X BField::operator()(floatD_X const& cellIdx, float_X const currentStep) const
-            {
-                pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
-                for(uint32_t component = 0; component < detail::numComponents; ++component)
-                    zeroShifts[component] = floatD_X::create(0.0);
-                return getValue(cellIdx, zeroShifts, currentStep);
-            }
-
-            HDINLINE
-            float3_X BField::getValue(
-                floatD_X const& cellIdx,
-                pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
-                float_X const currentStep) const
-            {
-                float_64 const time_SI = float_64(currentStep) * dt - tdelay;
-
-                pmacc::math::Vector<floatD_64, detail::numComponents> const bFieldPositions_SI
-                    = detail::getFieldPositions_SI(cellIdx, halfSimSize, extraShifts, unit_length, focus_y_SI, phi);
-                /* Single TWTS-Pulse */
-                return getTWTSBfield_Normalized<simDim>(bFieldPositions_SI, time_SI);
-            }
-
-            template<uint32_t T_component>
-            HDINLINE float_X BField::getComponent(floatD_X const& cellIdx, float_X const currentStep) const
-            {
-                // The optimized way is only implemented for 3D, fall back to full field calculation in 2D
-                if constexpr(simDim == DIM3)
-                {
-                    float_64 const time_SI = float_64(currentStep) * dt - tdelay;
-                    pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
-                    for(uint32_t component = 0; component < detail::numComponents; ++component)
-                        zeroShifts[component] = floatD_X::create(0.0);
-                    pmacc::math::Vector<floatD_64, detail::numComponents> const bFieldPositions_SI
-                        = detail::getFieldPositions_SI(cellIdx, halfSimSize, zeroShifts, unit_length, focus_y_SI, phi);
-                    // Explicitly use a 3D vector so that this function compiles for 2D as well
-                    auto const pos = float3_64{
-                        bFieldPositions_SI[T_component][0],
-                        bFieldPositions_SI[T_component][1],
-                        bFieldPositions_SI[T_component][2]};
-                    if constexpr(T_component == 0)
-                        return static_cast<float_X>(calcTWTSBx(pos, time_SI));
-                    else
-                    {
-                        if constexpr(T_component == 1)
-                        {
-                            return static_cast<float_X>(calcTWTSBy(pos, time_SI));
-                        }
-                        if constexpr(T_component == 2)
-                        {
-                            return static_cast<float_X>(calcTWTSBz(pos, time_SI));
-                        }
-                    }
-                    // we should never be here
-                    return NAN;
-                }
-                if constexpr(simDim != DIM3)
-                    return (*this)(cellIdx, currentStep)[T_component];
-            }
+            using BField = TWTSTight<FieldB>;
 
-            /** Calculate the By(r,t) field here
+            /** Calculate the Bx(r,t) field
              *
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations)
              *             for calculating the field */
-            HDINLINE
-            BField::float_T BField::calcTWTSBy(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T BField::calcTWTSFieldX(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -171,56 +59,56 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
                 float_T const cosPhi_2 = cosPhi * cosPhi;
+                float_T const sin2Phi = math::sin(float_T(2.0) * absPhi);
+                complex_T const rhom2 = rhom * rhom;
 
-                complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
-                                          * (-(besselJ1const
-                                               * (Xm * cosPolAngle * (float_T(1.0) + cosPhi_2)
-                                                  + (Xm + float_T(2.0) * x * cosPhi - Xm * cosPhi_2) * sinPolAngle))
-                                             + I * rhom * besselJ0const * (cosPolAngle - sinPolAngle) * sinPhi_2)
-                                          * psi0)
-                    / (float_T(4.0) * cspeed * rhom * besselI0const);
+                complex_T const result
+                    = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
+                       * (k * rhom * besselJ0const
+                              * (cosPolAngle * (-rhom2 + x2 + x * cosPhi * Xm) * sinPhi_2
+                                 - sinPolAngle
+                                     * (rhom2 + rhom2 * cosPhi_2 - x2 * sinPhi_2 + x * cosPhi * sinPhi_2 * Xm))
+                          + besselJ1const
+                              * (cosPolAngle * sinPhi
+                                     * (rhom2 - float_T(2.0) * x2 + I * Xm * rhom2 * (k * sinPhi)
+                                        + x * cosPhi * (float_T(-2.0) * Xm - I * rhom2 * (k * sinPhi)))
+                                 + sinPolAngle
+                                     * ((rhom2 - float_T(2.0) * x2) * sinPhi
+                                        + I * rhom2 * (-Xm + x * cosPhi) * (k * sinPhi_2) + x * sin2Phi * Xm)))
+                       * psi0)
+                    / (cspeed * besselI0const * rhom * rhom2);
 
-                return result.real() / sim.unit.speed();
+                /* A 180deg-rotation of the field vector around the y-axis
+                 * leads to a sign flip in the x- and z- components, respectively.
+                 * This is implemented by multiplying the result by "phiPositive".
+                 */
+                return phiPositive * result.real() / sim.unit.speed();
             }
 
-            /** Calculate the Bz(r,t) field
+            /** Calculate the By(r,t) field here
              *
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations)
              *             for calculating the field */
-            HDINLINE
-            BField::float_T BField::calcTWTSBz(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T BField::calcTWTSFieldY(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -228,69 +116,42 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
-                complex_T const rhom2 = rhom * rhom;
-                complex_T const Xm2 = Xm * Xm;
+                float_T const cosPhi_2 = cosPhi * cosPhi;
 
-                complex_T const result
-                    = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
-                       * (besselJ1const * sinPhi
-                              * (sinPolAngle
-                                     * (x * (float_T(2.0) * Xm - I * k * rhom2 * sinPhi)
-                                        + cosPhi * (rhom2 - float_T(2.0) * Xm2 - I * k * rhom2 * Xm * sinPhi))
-                                 + cosPolAngle
-                                     * (x * (float_T(2.0) * Xm + I * k * rhom2 * sinPhi)
-                                        + cosPhi * (-rhom2 + float_T(2.0) * Xm2 + I * k * rhom2 * Xm * sinPhi)))
-                          + k * rhom * besselJ0const
-                              * (Xm * (-x + Xm * cosPhi) * sinPolAngle * sinPhi_2
-                                 - cosPolAngle
-                                     * (x * Xm * sinPhi_2 + cosPhi * (float_T(-2.0) * rhom2 + Xm2 * sinPhi_2))))
-                       * psi0)
-                    / (cspeed * rhom * rhom2 * besselI0const);
+                complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder * (k * sinPhi)
+                                          * (-(besselJ1const
+                                               * (cosPolAngle * (float_T(1.0) + cosPhi_2) * Xm
+                                                  + (Xm + float_T(2.0) * x * cosPhi - Xm * cosPhi_2) * sinPolAngle))
+                                             + I * rhom * besselJ0const * (cosPolAngle - sinPolAngle) * sinPhi_2)
+                                          * psi0)
+                    / (float_T(4.0) * cspeed * besselI0const * rhom);
 
-                /* A 180deg-rotation of the field vector around the y-axis
-                 * leads to a sign flip in the x- and z- components, respectively.
-                 * This is implemented by multiplying the result by "phiPositive".
-                 */
-                return phiPositive * result.real() / sim.unit.speed();
+                return result.real() / sim.unit.speed();
             }
 
-            /** Calculate the Bx(r,t) field
+            /** Calculate the Bz(r,t) field
              *
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations)
              *             for calculating the field */
-            HDINLINE
-            BField::float_T BField::calcTWTSBx(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T BField::calcTWTSFieldZ(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -298,48 +159,33 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
-                float_T const cosPhi_2 = cosPhi * cosPhi;
-                float_T const sin2Phi = math::sin(float_T(2.0) * absPhi);
                 complex_T const rhom2 = rhom * rhom;
+                complex_T const Xm2 = Xm * Xm;
 
                 complex_T const result
                     = (complex_T(0, -0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
-                       * (k * rhom * besselJ0const
-                              * (cosPolAngle * (-rhom2 + x2 + x * Xm * cosPhi) * sinPhi_2
-                                 - sinPolAngle
-                                     * (rhom2 + rhom2 * cosPhi_2 - x2 * sinPhi_2 + x * Xm * cosPhi * sinPhi_2))
-                          + besselJ1const
-                              * (cosPolAngle * sinPhi
-                                     * (rhom2 - float_T(2.0) * x2 + I * k * rhom2 * Xm * sinPhi
-                                        + x * cosPhi * (float_T(-2.0) * Xm - I * k * rhom2 * sinPhi))
-                                 + sinPolAngle
-                                     * ((rhom2 - float_T(2.0) * x2) * sinPhi
-                                        + I * k * rhom2 * (-Xm + x * cosPhi) * sinPhi_2 + x * Xm * sin2Phi)))
+                       * (besselJ1const * sinPhi
+                              * (sinPolAngle
+                                     * (x * (float_T(2.0) * Xm - I * (k * rhom2 * sinPhi))
+                                        + cosPhi * (rhom2 - float_T(2.0) * Xm2 - I * Xm * (k * rhom2 * sinPhi)))
+                                 + cosPolAngle
+                                     * (x * (float_T(2.0) * Xm + I * (k * rhom2 * sinPhi))
+                                        + cosPhi * (-rhom2 + float_T(2.0) * Xm2 + I * Xm * (k * rhom2 * sinPhi))))
+                          + k * rhom * besselJ0const
+                              * (Xm * (-x + Xm * cosPhi) * (sinPolAngle * sinPhi_2)
+                                 - cosPolAngle
+                                     * (x * sinPhi_2 * Xm + cosPhi * (float_T(-2.0) * rhom2 + Xm2 * sinPhi_2))))
                        * psi0)
-                    / (cspeed * rhom * rhom2 * besselI0const);
+                    / (cspeed * besselI0const * rhom * rhom2);
 
                 /* A 180deg-rotation of the field vector around the y-axis
                  * leads to a sign flip in the x- and z- components, respectively.
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.hpp b/include/picongpu/fields/background/templates/twtstight/EField.hpp
deleted file mode 100644
index 839e8a6f59..0000000000
--- a/include/picongpu/fields/background/templates/twtstight/EField.hpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/* Copyright 2014-2024 Alexander Debus, Axel Huebl, Sergei Bastrakov
- *
- * This file is part of PIConGPU.
- *
- * PIConGPU is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * PIConGPU is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with PIConGPU.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#pragma once
-
-#include "picongpu/defines.hpp"
-#include "picongpu/fields/background/templates/twtstight/numComponents.hpp"
-
-#include <pmacc/dimensions/DataSpace.hpp>
-#include <pmacc/math/Vector.hpp>
-#include <pmacc/types.hpp>
-
-namespace picongpu
-{
-    /* Load pre-defined background field */
-    namespace templates
-    {
-        /* Traveling-wave Thomson scattering laser pulse */
-        namespace twtstight
-        {
-            class EField : public TWTSTight
-            {
-            public:
-                using TWTSTight::TWTSTight;
-
-                /** Specify your background field E(r,t) here
-                 *
-                 * @param cellIdx The total cell id counted from the start at t=0, note it can be fractional
-                 * @param currentStep The current time step for the field to be calculated at, note it can be
-                 * fractional
-                 * @return float3_X with field normalized to amplitude in range [-1.:1.]
-                 *
-                 * @{
-                 */
-
-                //! Integer index version, adds in-cell shifts according to the grid used; t = currentStep * dt
-                //! This interface is used by the fieldBackground approach for implementing fields.
-                HDINLINE float3_X operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const;
-
-                //! Floating-point index version, uses fractional cell index as provided; t = currentStep * dt
-                //! This interface is used by the incidentField approach for implementing fields.
-                HDINLINE float3_X operator()(floatD_X const& cellIdx, float_X const currentStep) const;
-
-                /** @} */
-
-                /** Calculate the given component of E(r, t)
-                 *
-                 * Result is same as for the fractional version of operator()(cellIdx, currentStep)[T_component].
-                 * This version exists for optimizing usage in incident field where single components are needed.
-                 *
-                 * @tparam T_component field component, 0 = x, 1 = y, 2 = z
-                 *
-                 * @param cellIdx The total fractional cell id counted from the start at t=0
-                 * @param currentStep The current time step for the field to be calculated at
-                 * @return float_X with field component normalized to amplitude in range [-1.:1.]
-                 */
-                template<uint32_t T_component>
-                HDINLINE float_X getComponent(floatD_X const& cellIdx, float_X const currentStep) const;
-
-                /** Calculate E(r, t) for given position, time, and extra in-cell shifts
-                 *
-                 * @param cellIdx The total cell id counted from the start at t=0, note it is fractional
-                 * @param extraShifts The extra in-cell shifts to be added to calculate the position
-                 * @param currentStep The current time step for the field to be calculated at, note it is fractional
-                 */
-                HDINLINE float3_X getValue(
-                    floatD_X const& cellIdx,
-                    pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
-                    float_X const currentStep) const;
-
-                /** Calculate the Ex(r,t) field
-                 *
-                 * @param pos Spatial position of the target field
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return Ex-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSEx(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the Ey(r,t) field
-                 *
-                 * @param pos Spatial position of the target field
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return Ey-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSEy(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the Ez(r,t) field
-                 *
-                 * @param pos Spatial position of the target field.
-                 * @param time Absolute time (SI, including all offsets and transformations)
-                 *  for calculating the field
-                 * @return Ez-field component of the TWTS field in SI units */
-                HDINLINE float_T calcTWTSEz(float3_64 const& pos, float_64 const time) const;
-
-                /** Calculate the E-field vector of the TWTS laser in SI units.
-                 * @tparam T_dim Specializes for the simulation dimension
-                 * @param cellIdx The total cell id counted from the start at timestep 0
-                 * @return Efield vector of the TWTS field in SI units */
-                template<unsigned T_dim>
-                HDINLINE float3_X getTWTSEfield_Normalized(
-                    pmacc::math::Vector<floatD_64, detail::numComponents> const& eFieldPositions_SI,
-                    float_64 const time) const;
-            };
-
-        } /* namespace twtstight */
-    } /* namespace templates */
-} /* namespace picongpu */
diff --git a/include/picongpu/fields/background/templates/twtstight/EField.tpp b/include/picongpu/fields/background/templates/twtstight/EField.tpp
index 8117e68a18..97c5786a48 100644
--- a/include/picongpu/fields/background/templates/twtstight/EField.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/EField.tpp
@@ -21,16 +21,9 @@
 #pragma once
 
 #include "picongpu/defines.hpp"
-#include "picongpu/fields/YeeCell.hpp"
-#include "picongpu/fields/background/templates/twtstight/EField.hpp"
-#include "picongpu/fields/background/templates/twtstight/GetInitialTimeDelay_SI.tpp"
-#include "picongpu/fields/background/templates/twtstight/getFieldPositions_SI.tpp"
-#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
 
-#include <pmacc/dimensions/DataSpace.hpp>
-#include <pmacc/mappings/simulation/SubGrid.hpp>
 #include <pmacc/math/Complex.hpp>
-#include <pmacc/math/Vector.hpp>
 #include <pmacc/types.hpp>
 
 namespace picongpu
@@ -41,129 +34,22 @@ namespace picongpu
         /* Traveling-wave Thomson scattering laser pulse */
         namespace twtstight
         {
-            template<>
-            HDINLINE float3_X EField::getTWTSEfield_Normalized<DIM3>(
-                pmacc::math::Vector<floatD_64, detail::numComponents> const& eFieldPositions_SI,
-                float_64 const time) const
-            {
-                using PosVecVec = pmacc::math::Vector<float3_64, detail::numComponents>;
-                PosVecVec pos(PosVecVec::create(float3_64::create(0.0)));
-
-                for(uint32_t k = 0; k < detail::numComponents; ++k)
-                {
-                    for(uint32_t i = 0; i < simDim; ++i)
-                        pos[k][i] = eFieldPositions_SI[k][i];
-                }
-
-                /* E-field normalized to the peak amplitude. */
-                return float3_X(
-                    float_X(calcTWTSEx(pos[0], time)),
-                    float_X(calcTWTSEy(pos[1], time)),
-                    float_X(calcTWTSEz(pos[2], time)));
-            }
-
-            template<>
-            HDINLINE float3_X EField::getTWTSEfield_Normalized<DIM2>(
-                pmacc::math::Vector<floatD_64, detail::numComponents> const& eFieldPositions_SI,
-                float_64 const time) const
-            {
-                using PosVecVec = pmacc::math::Vector<float3_64, detail::numComponents>;
-                PosVecVec pos(PosVecVec::create(float3_64::create(0.0)));
-
-                for(uint32_t k = 0; k < detail::numComponents; ++k)
-                {
-                    /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */
-                    for(uint32_t i = 0; i < DIM2; ++i)
-                    {
-                        pos[k][i + 1] = eFieldPositions_SI[k][i];
-                    }
-                }
-
-                /* E-field normalized to the peak amplitude. */
-                return float3_X(calcTWTSEx(pos[0], time), calcTWTSEy(pos[1], time), calcTWTSEz(pos[2], time));
-            }
-
-            HDINLINE float3_X EField::operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const
-            {
-                traits::FieldPosition<fields::YeeCell, FieldE> const fieldPosE;
-                return getValue(precisionCast<float_X>(cellIdx), fieldPosE(), static_cast<float_X>(currentStep));
-            }
-
-            HDINLINE
-            float3_X EField::operator()(floatD_X const& cellIdx, float_X const currentStep) const
-            {
-                pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
-                for(uint32_t component = 0; component < detail::numComponents; ++component)
-                    zeroShifts[component] = floatD_X::create(0.0);
-                return getValue(cellIdx, zeroShifts, currentStep);
-            }
-
-            HDINLINE
-            float3_X EField::getValue(
-                floatD_X const& cellIdx,
-                pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
-                float_X const currentStep) const
-            {
-                float_64 const time_SI = float_64(currentStep) * dt - tdelay;
-
-                pmacc::math::Vector<floatD_64, detail::numComponents> const eFieldPositions_SI
-                    = detail::getFieldPositions_SI(cellIdx, halfSimSize, extraShifts, unit_length, focus_y_SI, phi);
-
-                /* Single TWTS-Pulse */
-                return getTWTSEfield_Normalized<simDim>(eFieldPositions_SI, time_SI);
-            }
-
-            template<uint32_t T_component>
-            HDINLINE float_X EField::getComponent(floatD_X const& cellIdx, float_X const currentStep) const
-            {
-                // The optimized way is only implemented for 3D, fall back to full field calculation in 2d
-                if constexpr(simDim == DIM3)
-                {
-                    float_64 const time_SI = float_64(currentStep) * dt - tdelay;
-                    pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
-                    for(uint32_t component = 0; component < detail::numComponents; ++component)
-                        zeroShifts[component] = floatD_X::create(0.0);
-                    pmacc::math::Vector<floatD_64, detail::numComponents> const eFieldPositions_SI
-                        = detail::getFieldPositions_SI(cellIdx, halfSimSize, zeroShifts, unit_length, focus_y_SI, phi);
-                    // Explicitly use a 3D vector so that this function compiles for 2D as well
-                    auto const pos = float3_64{
-                        eFieldPositions_SI[T_component][0],
-                        eFieldPositions_SI[T_component][1],
-                        eFieldPositions_SI[T_component][2]};
-                    if constexpr(T_component == 0)
-                        return static_cast<float_X>(calcTWTSEx(pos, time_SI));
-                    else
-                    {
-                        if constexpr(T_component == 1)
-                        {
-                            return static_cast<float_X>(calcTWTSEy(pos, time_SI));
-                        }
-                        if constexpr(T_component == 2)
-                        {
-                            return static_cast<float_X>(calcTWTSEz(pos, time_SI));
-                        }
-                    }
-
-                    // we should never be here
-                    return NAN;
-                }
-                if constexpr(simDim != DIM3)
-                    return (*this)(cellIdx, currentStep)[T_component];
-            }
+            using EField = TWTSTight<FieldE>;
 
             /** Calculate the Ex(r,t) field here
              *
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations) for calculating
              *             the field */
-            HDINLINE EField::float_T EField::calcTWTSEx(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T EField::calcTWTSFieldX(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -171,27 +57,13 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
                 float_T const cosPhi_2 = cosPhi * cosPhi;
@@ -200,16 +72,16 @@ namespace picongpu
                 complex_T const result
                     = (complex_T(0, 0.25) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
                        * (k * rhom * besselJ0const
-                              * ((rhom2 - x2 + x * Xm * cosPhi) * sinPolAngle * sinPhi_2
+                              * ((rhom2 - x2 + x * Xm * cosPhi) * (sinPolAngle * sinPhi_2)
                                  + cosPolAngle
-                                     * (rhom2 + rhom2 * cosPhi_2 - x2 * sinPhi_2 - x * Xm * cosPhi * sinPhi_2))
+                                     * (rhom2 + rhom2 * cosPhi_2 - x2 * sinPhi_2 - x * cosPhi * sinPhi_2 * Xm))
                           + besselJ1const * sinPhi
                               * (+sinPolAngle
-                                     * (-rhom2 + float_T(2.0) * x2 - I * k * rhom2 * Xm * sinPhi
-                                        + x * cosPhi * (float_T(-2.0) * Xm - I * k * rhom2 * sinPhi))
+                                     * (-rhom2 + float_T(2.0) * x2 - I * rhom2 * Xm * (k * sinPhi)
+                                        + x * cosPhi * (float_T(-2.0) * Xm - I * rhom2 * (k * sinPhi)))
                                  + cosPolAngle
-                                     * (-rhom2 + float_T(2.0) * x2 + I * k * rhom2 * Xm * sinPhi
-                                        + x * cosPhi * (float_T(+2.0) * Xm + I * k * rhom2 * sinPhi))))
+                                     * (-rhom2 + float_T(2.0) * x2 + I * rhom2 * Xm * (k * sinPhi)
+                                        + x * cosPhi * (float_T(+2.0) * Xm + I * rhom2 * (k * sinPhi)))))
                        * psi0)
                     / (rhom * rhom2 * besselI0const);
 
@@ -225,14 +97,15 @@ namespace picongpu
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations) for calculating
              *             the field */
-            HDINLINE EField::float_T EField::calcTWTSEy(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T EField::calcTWTSFieldY(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -240,38 +113,24 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
                 float_T const cosPhi_2 = cosPhi * cosPhi;
 
-                complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * k * zeroOrder * sinPhi
+                complex_T const result = (math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder * (k * sinPhi)
                                           * (besselJ1const
                                                  * (cosPolAngle * (Xm - float_T(2.0) * x * cosPhi - Xm * cosPhi_2)
-                                                    + Xm * (float_T(1.0) + cosPhi_2) * sinPolAngle)
-                                             + I * rhom * besselJ0const * (cosPolAngle - sinPolAngle) * sinPhi_2)
+                                                    + (float_T(1.0) + cosPhi_2) * sinPolAngle * Xm)
+                                             + I * rhom * besselJ0const * ((cosPolAngle - sinPolAngle) * sinPhi_2))
                                           * psi0)
-                    / (float_T(4.0) * rhom * besselI0const);
+                    / (float_T(4.0) * besselI0const * rhom);
 
                 return result.real();
             }
@@ -281,14 +140,15 @@ namespace picongpu
              * @param pos Spatial position of the target field.
              * @param time Absolute time (SI, including all offsets and transformations) for calculating
              *             the field */
-            HDINLINE EField::float_T EField::calcTWTSEz(float3_64 const& pos, float_64 const time) const
+            template<>
+            HDINLINE float_T EField::calcTWTSFieldZ(float3_64 const& pos, float_64 const time) const
             {
                 using complex_T = alpaka::Complex<float_T>;
                 using complex_64 = alpaka::Complex<float_64>;
 
-                auto const& [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
+                auto const [absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k]
                     = defineBasicHelperVariables();
-                auto const& [x, y, z, t] = defineMinimalCoordinates(pos, time);
+                auto const [x, y, z, t] = defineMinimalCoordinates(pos, time);
 
                 /* To avoid underflows in computation, fields are set to zero
                  * before and after the respective TWTS pulse envelope.
@@ -296,27 +156,13 @@ namespace picongpu
                 if(math::abs(y - z * tanAlpha - (beta0 * cspeed * t)) > (numSigmas * tauG * cspeed))
                     return float_T(0.0);
 
-                auto const& [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
+                auto const [tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle]
                     = defineTrigonometryShortcuts(absPhi, sinPhi);
-                auto const& [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
-                    = defineCommonHelperVariables(
-                        absPhi,
-                        sinPhi,
-                        cosPhi,
-                        beta0,
-                        tanAlpha,
-                        cspeed,
-                        lambda0,
-                        omega0,
-                        tauG,
-                        w0,
-                        k,
-                        x,
-                        y,
-                        z,
-                        t,
-                        cotPhi,
-                        sinPhi_2);
+                // clang-format off
+                auto const [I, x2, tauG2, psi0, w02, beta02, nu, xi, rhom, Xm, besselI0const, besselJ0const, besselJ1const, zeroOrder]
+                    = defineCommonHelperVariables(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k,
+                                                  x, y, z, t, cotPhi, sinPhi_2);
+                // clang-format on
 
                 /* Calculating shortcuts for speeding up field calculation */
                 float_T const sin2Phi = math::sin(float_T(2.0) * absPhi);
@@ -326,19 +172,20 @@ namespace picongpu
                 complex_T const result
                     = (complex_T(0, 0.125) * math::exp(I * (omega0 * t - k * y * cosPhi)) * zeroOrder
                        * (float_T(2.0) * k * rhom * besselJ0const
-                              * (x * Xm * (cosPolAngle + sinPolAngle) * sinPhi_2
+                              * (x * (cosPolAngle + sinPolAngle) * sinPhi_2 * Xm
                                  + cosPhi
-                                     * (Xm2 * cosPolAngle * sinPhi_2
+                                     * (cosPolAngle * sinPhi_2 * Xm2
                                         + sinPolAngle * (float_T(2.0) * rhom2 - Xm2 * sinPhi_2)))
                           + besselJ1const * sinPhi
                               * (cosPolAngle
-                                     * (float_T(-4.0) * x * Xm + float_T(2.0) * (rhom2 - float_T(2.0) * Xm2) * cosPhi
-                                        + complex_T(0, 2) * k * rhom2 * (x - Xm * cosPhi) * sinPhi)
+                                     * (float_T(-4.0) * x * Xm + float_T(2.0) * cosPhi * (rhom2 - float_T(2.0) * Xm2)
+                                        + complex_T(0, 2) * rhom2 * (x - Xm * cosPhi) * (k * sinPhi))
                                  + sinPolAngle
-                                     * (float_T(-4.0) * x * Xm - float_T(2.0) * (rhom2 - float_T(2.0) * Xm2) * cosPhi
-                                        - complex_T(0, 2) * k * rhom2 * x * sinPhi + I * k * rhom2 * Xm * sin2Phi)))
+                                     * (float_T(-4.0) * x * Xm - float_T(2.0) * cosPhi * (rhom2 - float_T(2.0) * Xm2)
+                                        - complex_T(0, 2) * rhom2 * (k * x * sinPhi)
+                                        + I * rhom2 * Xm * (k * sin2Phi))))
                        * psi0)
-                    / (rhom * rhom2 * besselI0const);
+                    / (besselI0const * rhom * rhom2);
 
                 /* A 180deg-rotation of the field vector around the y-axis
                  * leads to a sign flip in the x- and z- components, respectively.
diff --git a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp b/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
similarity index 65%
rename from include/picongpu/fields/background/templates/twtstight/twtstight.hpp
rename to include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
index ebcad46b8b..5238fd1cda 100644
--- a/include/picongpu/fields/background/templates/twtstight/twtstight.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
@@ -56,10 +56,10 @@
 
 #pragma once
 #include "picongpu/defines.hpp"
-// include "picongpu/fields/background/templates/twtstight/numComponents.hpp"
+#include "picongpu/fields/background/templates/twtstight/numComponents.hpp"
 
 #include <pmacc/dimensions/DataSpace.hpp>
-// include <pmacc/math/Vector.hpp>
+#include <pmacc/math/Vector.hpp>
 #include <pmacc/types.hpp>
 
 namespace picongpu
@@ -68,6 +68,7 @@ namespace picongpu
     {
         namespace twtstight
         {
+            using float_T = float_X;
             /** To avoid underflows in computation, numsigmas controls where a zero cutoff is made.
              *  The fields thus are set to zero at a position (numSigmas * tauG * cspeed) ahead
              *  and behind the respective TWTS pulse envelope.
@@ -76,11 +77,15 @@ namespace picongpu
              */
             constexpr uint32_t numSigmas = 6;
 
-            class TWTSTight
+            /** Provides the E- or B-field functors of the TWTSTight laser for the
+             *  fieldBackground and incidentField approaches.
+             *  @tparam T_Field Specializes for E- or B-Field
+             *  @tparam T_dim Specializes for the simulation dimension
+             */
+            template<typename T_Field>
+            struct TWTSTight
             {
             public:
-                using float_T = float_X;
-
                 /** Center of simulation volume in number of cells */
                 PMACC_ALIGN(halfSimSize, DataSpace<simDim>);
                 /** y-position of TWTS coordinate origin inside the simulation coordinates [meter]
@@ -154,6 +159,52 @@ namespace picongpu
                     bool const auto_tdelay = true,
                     float_X const polAngle = 0. * (PI / 180.));
 
+                /** Specify your background field E(r, t) or B(r, t) here
+                 *
+                 * @param cellIdx The total cell id counted from the start at t=0, note it can be fractional
+                 * @param currentStep The current time step for the field to be calculated at, note it can be
+                 * fractional
+                 * @return float3_X with field normalized to amplitude in range [-1.:1.]
+                 *
+                 * @{
+                 */
+
+                //! Integer index version, adds in-cell shifts according to the grid used; t = currentStep * dt
+                //! This interface is used by the fieldBackground approach for implementing fields.
+                HDINLINE float3_X operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const;
+
+                //! Floating-point index version, uses fractional cell index as provided; t = currentStep * dt
+                //! This interface is used by the incidentField approach for implementing fields.
+                HDINLINE float3_X operator()(floatD_X const& cellIdx, float_X const currentStep) const;
+
+                /** @} */
+
+                /** Calculate the given component of E(r, t) or B(r, t)
+                 *
+                 * Result is same as for the fractional version of operator()(cellIdx, currentStep)[T_component].
+                 * This version exists for optimizing usage in incident field where single components are needed.
+                 *
+                 * @tparam T_component field component, 0 = x, 1 = y, 2 = z
+                 *
+                 * @param cellIdx The total fractional cell id counted from the start at t=0
+                 * @param currentStep The current time step for the field to be calculated at
+                 * @return float_X with field component normalized to amplitude in range [-1.:1.]
+                 */
+                template<uint32_t T_component>
+                HDINLINE float_X getComponent(floatD_X const& cellIdx, float_X const currentStep) const;
+
+                /** Calculate E(r, t) or B(r, t) for given position, time, and extra in-cell shifts
+                 *
+                 * @param cellIdx The total cell id counted from the start at t=0, note it is fractional
+                 * @param extraShifts The extra in-cell shifts to be added to calculate the position
+                 * @param currentStep The current time step for the field to be calculated at, note it is fractional
+                 */
+                HDINLINE float3_X getValue(
+                    floatD_X const& cellIdx,
+                    pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
+                    float_X const currentStep) const;
+
+                //! Helper method to define basic TWTS variables
                 HDINLINE
                 std::tuple<
                     float_T,
@@ -169,16 +220,20 @@ namespace picongpu
                     float_T>
                 defineBasicHelperVariables() const;
 
+                //! Helper method to define reduced TWTSTight coordinates
                 HDINLINE
                 std::tuple<float_T, float_T, float_T, float_T> defineMinimalCoordinates(
                     float3_64 const& pos,
                     float_64 const& time) const;
 
+                //! Helper method to define trigonometry shortcuts
                 HDINLINE
                 std::tuple<float_T, float_T, float_T, float_T, float_T> defineTrigonometryShortcuts(
                     float_T const& absPhi,
                     float_T const& sinPhi) const;
 
+                //! Helper method to define common (complex-valued) variables
+                //! for TWTSTight field calculation
                 HDINLINE
                 std::tuple<
                     alpaka::Complex<float_T>,
@@ -213,12 +268,43 @@ namespace picongpu
                     float_T const& t,
                     float_T const& cotPhi,
                     float_T const& sinPhi_2) const;
+
+                /** Calculate the Ex(r,t) or Bx(r,t) field
+                 *
+                 * @param pos Spatial position of the target field.
+                 * @param time Absolute time (SI, including all offsets and transformations)
+                 *  for calculating the field
+                 * @tparam T_Field Specializes for E- or B-Field
+                 * @return Ex- or Bx-field component of the TWTS field in SI units */
+                HDINLINE float_T calcTWTSFieldX(float3_64 const& pos, float_64 const time) const;
+
+                /** Calculate the Ey(r,t) or By(r,t) field
+                 *
+                 * @param pos Spatial position of the target field.
+                 * @param time Absolute time (SI, including all offsets and transformations)
+                 *  for calculating the field
+                 * @tparam T_Field Specializes for E- or B-Field
+                 * @return Ey- or By-field component of the TWTS field in SI units */
+                HDINLINE float_T calcTWTSFieldY(float3_64 const& pos, float_64 const time) const;
+
+                /** Calculate the Ez(r,t) or Bz(r,t) field
+                 *
+                 * @param pos Spatial position of the target field.
+                 * @param time Absolute time (SI, including all offsets and transformations)
+                 *  for calculating the field
+                 * @return Ey- or Bz-field component of the TWTS field in SI units */
+                HDINLINE float_T calcTWTSFieldZ(float3_64 const& pos, float_64 const time) const;
+
+                /** Calculate the E- or B-field vector of the TWTS laser in SI units.
+                 * @param cellIdx The total cell id counted from the start at timestep 0
+                 * @return E- or B-field vector of the TWTS field in SI units */
+                HDINLINE float3_X getTWTSField_Normalized(
+                    pmacc::math::Vector<floatD_64, detail::numComponents> const& fieldPositions_SI,
+                    float_64 const time) const;
             };
 
         } // namespace twtstight
     } // namespace templates
 } // namespace picongpu
 
-#include "picongpu/fields/background/templates/twtstight/BField.hpp"
-#include "picongpu/fields/background/templates/twtstight/EField.hpp"
-#include "picongpu/fields/background/templates/twtstight/twtstight.tpp"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.tpp"
diff --git a/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp b/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
new file mode 100644
index 0000000000..4a55625d52
--- /dev/null
+++ b/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
@@ -0,0 +1,366 @@
+/* Copyright 2014-2024 Alexander Debus
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/YeeCell.hpp"
+#include "picongpu/fields/background/templates/twtstight/BField.tpp"
+#include "picongpu/fields/background/templates/twtstight/EField.tpp"
+#include "picongpu/fields/background/templates/twtstight/GetInitialTimeDelay_SI.tpp"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
+#include "picongpu/fields/background/templates/twtstight/getFieldPositions_SI.tpp"
+
+#include <pmacc/dimensions/DataSpace.hpp>
+#include <pmacc/mappings/simulation/SubGrid.hpp>
+#include <pmacc/math/Complex.hpp>
+#include <pmacc/math/Vector.hpp>
+#include <pmacc/types.hpp>
+
+namespace picongpu
+{
+    /* Load pre-defined background field */
+    namespace templates
+    {
+        /* Traveling-wave Thomson scattering laser pulse */
+        namespace twtstight
+        {
+            template<typename T_Field>
+            HINLINE TWTSTight<T_Field>::TWTSTight(
+                float_64 const focus_y_SI,
+                float_64 const wavelength_SI,
+                float_64 const pulselength_SI,
+                float_64 const w_x_SI,
+                float_X const phi,
+                float_X const beta_0,
+                float_64 const tdelay_user_SI,
+                bool const auto_tdelay,
+                float_X const polAngle)
+                : focus_y_SI(focus_y_SI)
+                , wavelength_SI(wavelength_SI)
+                , pulselength_SI(pulselength_SI)
+                , w_x_SI(w_x_SI)
+                , phi(phi)
+                , phiPositive(float_X(1.0))
+                , beta_0(beta_0)
+                , tdelay_user_SI(tdelay_user_SI)
+                , dt(sim.si.getDt())
+                , unit_length(sim.unit.length())
+                , auto_tdelay(auto_tdelay)
+                , polAngle(polAngle)
+            {
+                /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done
+                         on host (see fieldBackground.param), this is no problem.
+                 */
+                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
+                halfSimSize = subGrid.getGlobalDomain().size / 2;
+                tdelay = detail::getInitialTimeDelay_SI(
+                    auto_tdelay,
+                    tdelay_user_SI,
+                    halfSimSize,
+                    pulselength_SI,
+                    focus_y_SI,
+                    phi,
+                    beta_0);
+                if(phi < 0.0_X)
+                    phiPositive = float_X(-1.0);
+            }
+
+            template<typename T_Field>
+            HDINLINE float3_X TWTSTight<T_Field>::getTWTSField_Normalized(
+                pmacc::math::Vector<floatD_64, detail::numComponents> const& fieldPositions_SI,
+                float_64 const time) const
+            {
+                using PosVecVec = pmacc::math::Vector<float3_64, detail::numComponents>;
+                PosVecVec pos(PosVecVec::create(float3_64::create(0.0)));
+
+                if constexpr(simDim == DIM3)
+                {
+                    for(uint32_t k = 0; k < detail::numComponents; ++k)
+                    {
+                        for(uint32_t i = 0; i < simDim; ++i)
+                        {
+                            pos[k][i] = fieldPositions_SI[k][i];
+                        }
+                    }
+                    /* E- or B-field normalized to the peak amplitude. */
+                    return static_cast<float3_X>(
+                        calcTWTSFieldX(pos[0], time),
+                        calcTWTSFieldY(pos[1], time),
+                        calcTWTSFieldZ(pos[2], time));
+                }
+                if constexpr(simDim == DIM2)
+                {
+                    for(uint32_t k = 0; k < detail::numComponents; ++k)
+                    {
+                        /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */
+                        for(uint32_t i = 0; i < simDim; ++i)
+                        {
+                            pos[k][i + 1] = fieldPositions_SI[k][i];
+                        }
+                    }
+                    /* E- or B-field normalized to the peak amplitude. */
+                    return static_cast<float3_X>(
+                        calcTWTSFieldX(pos[0], time),
+                        calcTWTSFieldY(pos[1], time),
+                        calcTWTSFieldZ(pos[2], time));
+                }
+                // We should never be here.
+                else
+                    return static_cast<float3_X>(NAN);
+            }
+
+            template<typename T_Field>
+            HDINLINE float3_X
+            TWTSTight<T_Field>::operator()(DataSpace<simDim> const& cellIdx, uint32_t const currentStep) const
+            {
+                traits::FieldPosition<fields::YeeCell, T_Field> const fieldPos;
+                return getValue(precisionCast<float_X>(cellIdx), fieldPos(), static_cast<float_X>(currentStep));
+            }
+
+            template<typename T_Field>
+            HDINLINE float3_X TWTSTight<T_Field>::operator()(floatD_X const& cellIdx, float_X const currentStep) const
+            {
+                pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
+                for(uint32_t component = 0; component < detail::numComponents; ++component)
+                    zeroShifts[component] = floatD_X::create(0.0);
+                return getValue(cellIdx, zeroShifts, currentStep);
+            }
+
+            template<typename T_Field>
+            HDINLINE float3_X TWTSTight<T_Field>::getValue(
+                floatD_X const& cellIdx,
+                pmacc::math::Vector<floatD_X, detail::numComponents> const& extraShifts,
+                float_X const currentStep) const
+            {
+                float_64 const time_SI = float_64(currentStep) * dt - tdelay;
+
+                pmacc::math::Vector<floatD_64, detail::numComponents> const fieldPositions_SI
+                    = detail::getFieldPositions_SI(cellIdx, halfSimSize, extraShifts, unit_length, focus_y_SI, phi);
+
+                /* Single TWTS-Pulse */
+                return getTWTSField_Normalized(fieldPositions_SI, time_SI);
+            }
+
+            template<typename T_Field>
+            template<uint32_t T_component>
+            HDINLINE float_X TWTSTight<T_Field>::getComponent(floatD_X const& cellIdx, float_X const currentStep) const
+            {
+                // The optimized way is only implemented for 3D, fall back to full field calculation in 2d
+                if constexpr(simDim == DIM3)
+                {
+                    float_64 const time_SI = float_64(currentStep) * dt - tdelay;
+                    pmacc::math::Vector<floatD_X, detail::numComponents> zeroShifts;
+                    for(uint32_t component = 0; component < detail::numComponents; ++component)
+                        zeroShifts[component] = floatD_X::create(0.0);
+                    pmacc::math::Vector<floatD_64, detail::numComponents> const fieldPositions_SI
+                        = detail::getFieldPositions_SI(cellIdx, halfSimSize, zeroShifts, unit_length, focus_y_SI, phi);
+                    // Explicitly use a 3D vector so that this function compiles for 2D as well
+                    auto const pos = float3_64{
+                        fieldPositions_SI[T_component][0],
+                        fieldPositions_SI[T_component][1],
+                        fieldPositions_SI[T_component][2]};
+                    if constexpr(T_component == 0)
+                        return static_cast<float_X>(calcTWTSFieldX(pos, time_SI));
+                    else
+                    {
+                        if constexpr(T_component == 1)
+                        {
+                            return static_cast<float_X>(calcTWTSFieldY(pos, time_SI));
+                        }
+                        if constexpr(T_component == 2)
+                        {
+                            return static_cast<float_X>(calcTWTSFieldZ(pos, time_SI));
+                        }
+                    }
+
+                    // we should never be here
+                    return NAN;
+                }
+                if constexpr(simDim != DIM3)
+                    return (*this)(cellIdx, currentStep)[T_component];
+            }
+
+            template<typename T_Field>
+            HDINLINE std::tuple<
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T>
+            TWTSTight<T_Field>::defineBasicHelperVariables() const
+            {
+                /* If phi < 0 the formulas below are not directly applicable.
+                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
+                 * y-axis of the coordinate system in this function.
+                 */
+                auto const absPhi = float_T(math::abs(phi));
+
+                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
+                auto const beta0 = float_T(beta_0);
+
+                float_T sinPhi;
+                float_T cosPhi;
+                math::sincos(absPhi, sinPhi, cosPhi);
+                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
+
+                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
+                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
+                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
+                /* factor 2  in tauG arises from definition convention in laser formula */
+                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
+                /* w0 is wx here --> w0 could be replaced by wx */
+                auto const w0 = float_T(w_x_SI / sim.unit.length());
+                auto const k = float_T(2.0 * PI / lambda0);
+
+                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
+            }
+
+            template<typename T_Field>
+            HDINLINE std::tuple<float_T, float_T, float_T, float_T> TWTSTight<T_Field>::defineMinimalCoordinates(
+                float3_64 const& pos,
+                float_64 const& time) const
+            {
+                /* In order to calculate in single-precision and in order to account for errors in
+                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
+                 * the known propagation direction for realizing the laser pulse using relative coordinates
+                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
+                 * in double precision.
+                 */
+                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
+                    / (1.0 - beta_0 * math::cos(precisionCast<float_64>(phi)));
+                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
+                float_64 const numberOfPeriods = math::floor(time / deltaT);
+                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
+                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
+
+                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
+                auto const y = float_T(yMod / sim.unit.length());
+                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
+                auto const t = float_T(timeMod / sim.unit.time());
+
+                return std::make_tuple(x, y, z, t);
+            }
+
+            template<typename T_Field>
+            HDINLINE std::tuple<float_T, float_T, float_T, float_T, float_T> TWTSTight<
+                T_Field>::defineTrigonometryShortcuts(float_T const& absPhi, float_T const& sinPhi) const
+            {
+                /* Calculating shortcuts for speeding up field calculation */
+                float_T const tanPhi = math::tan(absPhi);
+                float_T const cotPhi = float_T(1.0) / tanPhi;
+                float_T const sinPhi_2 = sinPhi * sinPhi;
+                float_T const sinPolAngle = math::sin(polAngle);
+                float_T const cosPolAngle = math::cos(polAngle);
+
+                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
+            }
+
+            template<typename T_Field>
+            HDINLINE std::tuple<
+                alpaka::Complex<float_T>,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                float_T,
+                alpaka::Complex<float_T>,
+                alpaka::Complex<float_T>,
+                float_T,
+                alpaka::Complex<float_T>,
+                alpaka::Complex<float_T>,
+                alpaka::Complex<float_T>>
+            TWTSTight<T_Field>::defineCommonHelperVariables(
+                float_T const& absPhi,
+                float_T const& sinPhi,
+                float_T const& cosPhi,
+                float_T const& beta0,
+                float_T const& tanAlpha,
+                float_T const& cspeed,
+                float_T const& lambda0,
+                float_T const& omega0,
+                float_T const& tauG,
+                float_T const& w0,
+                float_T const& k,
+                float_T const& x,
+                float_T const& y,
+                float_T const& z,
+                float_T const& t,
+                float_T const& cotPhi,
+                float_T const& sinPhi_2) const
+            {
+                using complex_T = alpaka::Complex<float_T>;
+                using complex_64 = alpaka::Complex<float_64>;
+
+                complex_T I = complex_T(0, 1);
+                float_T const x2 = x * x;
+                float_T const tauG2 = tauG * tauG;
+                float_T const psi0 = float_T(2.0) / k;
+                float_T const w02 = w0 * w0;
+                float_T const beta02 = beta0 * beta0;
+
+                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
+                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
+                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
+                complex_T const rhom = math::sqrt(x2 + math::cPow(Xm, static_cast<uint32_t>(2u)));
+                float_T const besselI0const = math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
+                complex_T const besselJ0const = math::bessel::j0(k * sinPhi * rhom);
+                complex_T const besselJ1const = math::bessel::j1(k * sinPhi * rhom);
+
+                complex_T const zeroOrder = (beta0 * tauG)
+                    / (math::sqrt(float_T(2.0))
+                       * math::exp(
+                           beta02 * omega0 * math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
+                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
+                              - complex_T(0, 2) * (nu / sinPhi_2)))
+                       * math::sqrt(
+                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
+                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
+                           / omega0));
+
+                return std::make_tuple(
+                    I,
+                    x2,
+                    tauG2,
+                    psi0,
+                    w02,
+                    beta02,
+                    nu,
+                    xi,
+                    rhom,
+                    Xm,
+                    besselI0const,
+                    besselJ0const,
+                    besselJ1const,
+                    zeroOrder);
+            }
+
+        } /* namespace twtstight */
+    } /* namespace templates */
+} /* namespace picongpu */
diff --git a/include/picongpu/fields/background/templates/twtstight/twtstight.tpp b/include/picongpu/fields/background/templates/twtstight/twtstight.tpp
deleted file mode 100644
index 4adfe86e75..0000000000
--- a/include/picongpu/fields/background/templates/twtstight/twtstight.tpp
+++ /dev/null
@@ -1,257 +0,0 @@
-/* Copyright 2014-2024 Alexander Debus
- *
- * This file is part of PIConGPU.
- *
- * PIConGPU is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * PIConGPU is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with PIConGPU.
- * If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#pragma once
-
-#include "picongpu/defines.hpp"
-#include "picongpu/fields/background/templates/twtstight/BField.tpp"
-#include "picongpu/fields/background/templates/twtstight/EField.tpp"
-// include "picongpu/fields/YeeCell.hpp"
-// include "picongpu/fields/background/templates/twtstight/EField.hpp"
-#include "picongpu/fields/background/templates/twtstight/GetInitialTimeDelay_SI.tpp"
-// include "picongpu/fields/background/templates/twtstight/getFieldPositions_SI.tpp"
-#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
-
-#include <pmacc/dimensions/DataSpace.hpp>
-#include <pmacc/mappings/simulation/SubGrid.hpp>
-#include <pmacc/math/Complex.hpp>
-// include <pmacc/math/Vector.hpp>
-#include <pmacc/types.hpp>
-
-namespace picongpu
-{
-    /* Load pre-defined background field */
-    namespace templates
-    {
-        /* Traveling-wave Thomson scattering laser pulse */
-        namespace twtstight
-        {
-            HINLINE
-            TWTSTight::TWTSTight(
-                float_64 const focus_y_SI,
-                float_64 const wavelength_SI,
-                float_64 const pulselength_SI,
-                float_64 const w_x_SI,
-                float_X const phi,
-                float_X const beta_0,
-                float_64 const tdelay_user_SI,
-                bool const auto_tdelay,
-                float_X const polAngle)
-                : focus_y_SI(focus_y_SI)
-                , wavelength_SI(wavelength_SI)
-                , pulselength_SI(pulselength_SI)
-                , w_x_SI(w_x_SI)
-                , phi(phi)
-                , phiPositive(float_X(1.0))
-                , beta_0(beta_0)
-                , tdelay_user_SI(tdelay_user_SI)
-                , dt(sim.si.getDt())
-                , unit_length(sim.unit.length())
-                , auto_tdelay(auto_tdelay)
-                , polAngle(polAngle)
-            {
-                /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done
-                         on host (see fieldBackground.param), this is no problem.
-                 */
-                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
-                halfSimSize = subGrid.getGlobalDomain().size / 2;
-                tdelay = detail::getInitialTimeDelay_SI(
-                    auto_tdelay,
-                    tdelay_user_SI,
-                    halfSimSize,
-                    pulselength_SI,
-                    focus_y_SI,
-                    phi,
-                    beta_0);
-                if(phi < 0.0_X)
-                    phiPositive = float_X(-1.0);
-            }
-
-            HDINLINE
-            std::tuple<
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T>
-            TWTSTight::defineBasicHelperVariables() const
-            {
-                /* If phi < 0 the formulas below are not directly applicable.
-                 * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the
-                 * y-axis of the coordinate system in this function.
-                 */
-                auto const absPhi = float_T(math::abs(phi));
-
-                /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */
-                auto const beta0 = float_T(beta_0);
-
-                float_T sinPhi;
-                float_T cosPhi;
-                pmacc::math::sincos(absPhi, sinPhi, cosPhi);
-                float_T const tanAlpha = (float_T(1.0) - beta0 * cosPhi) / (beta0 * sinPhi);
-
-                auto const cspeed = float_T(sim.si.getSpeedOfLight() / sim.unit.speed());
-                auto const lambda0 = float_T(wavelength_SI / sim.unit.length());
-                float_T const omega0 = float_T(2.0 * PI) * cspeed / lambda0;
-                /* factor 2  in tauG arises from definition convention in laser formula */
-                auto const tauG = float_T(pulselength_SI * 2.0 / sim.unit.time());
-                /* w0 is wx here --> w0 could be replaced by wx */
-                auto const w0 = float_T(w_x_SI / sim.unit.length());
-                auto const k = float_T(2.0 * PI / lambda0);
-
-                return std::make_tuple(absPhi, sinPhi, cosPhi, beta0, tanAlpha, cspeed, lambda0, omega0, tauG, w0, k);
-            }
-
-            HDINLINE
-            std::tuple<TWTSTight::float_T, TWTSTight::float_T, TWTSTight::float_T, TWTSTight::float_T> TWTSTight::
-                defineMinimalCoordinates(float3_64 const& pos, float_64 const& time) const
-            {
-                /* In order to calculate in single-precision and in order to account for errors in
-                 * the approximations far from the coordinate origin, we use the wavelength-periodicity and
-                 * the known propagation direction for realizing the laser pulse using relative coordinates
-                 * (i.e. from a finite coordinate range) only. All these quantities have to be calculated
-                 * in double precision.
-                 */
-                float_64 const deltaT = wavelength_SI / sim.si.getSpeedOfLight()
-                    / (1.0 - beta_0 * pmacc::math::cos(precisionCast<float_64>(phi)));
-                float_64 const deltaY = beta_0 * sim.si.getSpeedOfLight() * deltaT;
-                float_64 const numberOfPeriods = math::floor(time / deltaT);
-                auto const timeMod = float_T(time - numberOfPeriods * deltaT);
-                auto const yMod = float_T(pos.y() - numberOfPeriods * deltaY);
-
-                auto const x = float_T(phiPositive * pos.x() / sim.unit.length());
-                auto const y = float_T(yMod / sim.unit.length());
-                auto const z = float_T(phiPositive * pos.z() / sim.unit.length());
-                auto const t = float_T(timeMod / sim.unit.time());
-
-                return std::make_tuple(x, y, z, t);
-            }
-
-            HDINLINE
-            std::tuple<
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T>
-            TWTSTight::defineTrigonometryShortcuts(TWTSTight::float_T const& absPhi, TWTSTight::float_T const& sinPhi)
-                const
-            {
-                /* Calculating shortcuts for speeding up field calculation */
-                float_T const tanPhi = math::tan(absPhi);
-                float_T const cotPhi = float_T(1.0) / tanPhi;
-                float_T const sinPhi_2 = sinPhi * sinPhi;
-                float_T const sinPolAngle = math::sin(polAngle);
-                float_T const cosPolAngle = math::cos(polAngle);
-
-                return std::make_tuple(tanPhi, cotPhi, sinPhi_2, sinPolAngle, cosPolAngle);
-            }
-
-            HDINLINE
-            std::tuple<
-                alpaka::Complex<TWTSTight::float_T>,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                TWTSTight::float_T,
-                alpaka::Complex<TWTSTight::float_T>,
-                alpaka::Complex<TWTSTight::float_T>,
-                TWTSTight::float_T,
-                alpaka::Complex<TWTSTight::float_T>,
-                alpaka::Complex<TWTSTight::float_T>,
-                alpaka::Complex<TWTSTight::float_T>>
-            TWTSTight::defineCommonHelperVariables(
-                TWTSTight::float_T const& absPhi,
-                TWTSTight::float_T const& sinPhi,
-                TWTSTight::float_T const& cosPhi,
-                TWTSTight::float_T const& beta0,
-                TWTSTight::float_T const& tanAlpha,
-                TWTSTight::float_T const& cspeed,
-                TWTSTight::float_T const& lambda0,
-                TWTSTight::float_T const& omega0,
-                TWTSTight::float_T const& tauG,
-                TWTSTight::float_T const& w0,
-                TWTSTight::float_T const& k,
-                TWTSTight::float_T const& x,
-                TWTSTight::float_T const& y,
-                TWTSTight::float_T const& z,
-                TWTSTight::float_T const& t,
-                TWTSTight::float_T const& cotPhi,
-                TWTSTight::float_T const& sinPhi_2) const
-            {
-                using complex_T = alpaka::Complex<float_T>;
-                using complex_64 = alpaka::Complex<float_64>;
-
-                complex_T I = complex_T(0, 1);
-                float_T const x2 = x * x;
-                float_T const tauG2 = tauG * tauG;
-                float_T const psi0 = float_T(2.0) / k;
-                float_T const w02 = w0 * w0;
-                float_T const beta02 = beta0 * beta0;
-
-                float_T const nu = (y * cosPhi - z * sinPhi) / cspeed;
-                float_T const xi = (-z * cosPhi - y * sinPhi) * tanAlpha / cspeed;
-                complex_T const Xm = -z - complex_T(0, 0.5) * (k * w02);
-                complex_T const rhom = math::sqrt(x2 + pmacc::math::cPow(Xm, static_cast<uint32_t>(2u)));
-                float_T const besselI0const = pmacc::math::bessel::i0(k * k * sinPhi * w02 / float_T(2.0));
-                complex_T const besselJ0const = pmacc::math::bessel::j0(k * sinPhi * rhom);
-                complex_T const besselJ1const = pmacc::math::bessel::j1(k * sinPhi * rhom);
-
-                complex_T const zeroOrder = (beta0 * tauG)
-                    / (math::sqrt(float_T(2.0))
-                       * math::exp(
-                           beta02 * omega0 * pmacc::math::cPow(t - nu + xi, static_cast<uint32_t>(2u))
-                           / (beta02 * omega0 * tauG2 - complex_T(0, 2) * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                              + complex_T(0, 2) * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi)
-                              - complex_T(0, 2) * (nu / sinPhi_2)))
-                       * math::sqrt(
-                           ((beta02 * omega0 * tauG2) / float_T(2.0) - I * (beta02 * (nu - xi) * cotPhi * cotPhi)
-                            + I * (beta0 * (float_T(2.0) * nu - xi) * cotPhi / sinPhi) - I * (nu / sinPhi_2))
-                           / omega0));
-
-                return std::make_tuple(
-                    I,
-                    x2,
-                    tauG2,
-                    psi0,
-                    w02,
-                    beta02,
-                    nu,
-                    xi,
-                    rhom,
-                    Xm,
-                    besselI0const,
-                    besselJ0const,
-                    besselJ1const,
-                    zeroOrder);
-            }
-
-        } /* namespace twtstight */
-    } /* namespace templates */
-} /* namespace picongpu */
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/fieldBackground.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/fieldBackground.param
index 6eb11ed138..7aaa406f53 100644
--- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/fieldBackground.param
+++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/fieldBackground.param
@@ -24,7 +24,7 @@
 
 #pragma once
 
-#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
 
 /** Load parameters of the TWTS background laser field */
 #include "TwtsBackgroundLaser.param"
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
index 4d6455ca67..3a1cfeed27 100644
--- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
+++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
@@ -55,8 +55,8 @@
 
 #pragma once
 
-#include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
-#include "picongpu/fields/incidentField/profiles/profiles.def"
+#include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
+#include "picongpu/fields/incidentField/profiles/profiles.def
 
 /** Load parameters of the TWTS background laser field */
 #include "TwtsBackgroundLaser.param"
@@ -278,7 +278,7 @@ namespace picongpu
              *
              * For moving window simulations, POSITION for the YMax side can be located outside the initially
              * simulated volume. In this case, parts of the generation surface outside of the currently simulated
-             * volume is are treated as if they had zero incident field and it is user's responsibility to apply a
+             * volume are treated as if they had zero incident field and it is user's responsibility to apply a
              * source matching such a case.
              */
             constexpr int32_t POSITION[3][2] = {
diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param b/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param
index 06156131e8..71a0408491 100644
--- a/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param
+++ b/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param
@@ -21,7 +21,7 @@
 
 /** Load pre-defined templates */
 #if PARAM_TWTSTIGHT == 1
-#    include "picongpu/fields/background/templates/twtstight/twtstight.hpp"
+#    include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
 #else
 #    include "picongpu/fields/background/templates/TWTS/TWTS.hpp"
 #endif

From 69121cc0192e340044a0e399c70d0e9c6de658be Mon Sep 17 00:00:00 2001
From: Alexander Debus <a.debus@hzdr.de>
Date: Thu, 7 Nov 2024 02:05:08 -0500
Subject: [PATCH 5/5] Fix typos and attributions

---
 .../background/templates/twtstight/TWTSTight.hpp       | 10 +++++-----
 .../background/templates/twtstight/TWTSTight.tpp       |  2 +-
 .../include/picongpu/param/incidentField.param         |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp b/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
index 5238fd1cda..b4659e0583 100644
--- a/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
+++ b/include/picongpu/fields/background/templates/twtstight/TWTSTight.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2014-2024 Alexander Debus
+/* Copyright 2014-2024 Alexander Debus, Axel Huebl, Sergei Bastrakov
  *
  * This file is part of PIConGPU.
  *
@@ -271,7 +271,7 @@ namespace picongpu
 
                 /** Calculate the Ex(r,t) or Bx(r,t) field
                  *
-                 * @param pos Spatial position of the target field.
+                 * @param pos Spatial position of the target field
                  * @param time Absolute time (SI, including all offsets and transformations)
                  *  for calculating the field
                  * @tparam T_Field Specializes for E- or B-Field
@@ -280,7 +280,7 @@ namespace picongpu
 
                 /** Calculate the Ey(r,t) or By(r,t) field
                  *
-                 * @param pos Spatial position of the target field.
+                 * @param pos Spatial position of the target field
                  * @param time Absolute time (SI, including all offsets and transformations)
                  *  for calculating the field
                  * @tparam T_Field Specializes for E- or B-Field
@@ -289,10 +289,10 @@ namespace picongpu
 
                 /** Calculate the Ez(r,t) or Bz(r,t) field
                  *
-                 * @param pos Spatial position of the target field.
+                 * @param pos Spatial position of the target field
                  * @param time Absolute time (SI, including all offsets and transformations)
                  *  for calculating the field
-                 * @return Ey- or Bz-field component of the TWTS field in SI units */
+                 * @return Ez- or Bz-field component of the TWTS field in SI units */
                 HDINLINE float_T calcTWTSFieldZ(float3_64 const& pos, float_64 const time) const;
 
                 /** Calculate the E- or B-field vector of the TWTS laser in SI units.
diff --git a/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp b/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
index 4a55625d52..df1d9f9076 100644
--- a/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
+++ b/include/picongpu/fields/background/templates/twtstight/TWTSTight.tpp
@@ -1,4 +1,4 @@
-/* Copyright 2014-2024 Alexander Debus
+/* Copyright 2014-2024 Alexander Debus, Axel Huebl, Sergei Bastrakov
  *
  * This file is part of PIConGPU.
  *
diff --git a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
index 3a1cfeed27..fe41775694 100644
--- a/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
+++ b/share/picongpu/benchmarks/TWEAC-FOM/include/picongpu/param/incidentField.param
@@ -56,7 +56,7 @@
 #pragma once
 
 #include "picongpu/fields/background/templates/twtstight/TWTSTight.hpp"
-#include "picongpu/fields/incidentField/profiles/profiles.def
+#include "picongpu/fields/incidentField/profiles/profiles.def"
 
 /** Load parameters of the TWTS background laser field */
 #include "TwtsBackgroundLaser.param"