diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 1806bdb00d1..7e4edc2dc36 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -41,7 +41,7 @@ jobs: WARPX_CI_EB: 'TRUE' # default: 60; maximum: 360 - timeoutInMinutes: 180 + timeoutInMinutes: 240 steps: # set up caches: diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index e7ecf36a034..f16b405973e 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -115,7 +115,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 68244ec91d118b5d4cc21f93376eaae8b56c51eb && cd - + cd ../amrex && git checkout --detach 2230caa24c7d4bd07edb08b54e9368f9c73eae6e && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 4 ccache -s diff --git a/Docs/source/developers/particles.rst b/Docs/source/developers/particles.rst index a0182f3845f..4b203e483c4 100644 --- a/Docs/source/developers/particles.rst +++ b/Docs/source/developers/particles.rst @@ -36,17 +36,14 @@ A typical loop over particles reads: } } -The innermost step ``[MY INNER LOOP]`` typically calls ``amrex::ParallelFor`` to perform operations on all particles in a portable way. For this reasons, the particle data needs to be converted in plain-old-data structures. The innermost loop in the code snippet above could look like: +The innermost step ``[MY INNER LOOP]`` typically calls ``amrex::ParallelFor`` to perform operations on all particles in a portable way. The innermost loop in the code snippet above could look like: .. code-block:: cpp - // Get Array-Of-Struct particle data, also called data - // (x, y, z, id, cpu) - const auto& particles = pti.GetArrayOfStructs(); // Get Struct-Of-Array particle data, also called attribs - // (ux, uy, uz, w, Exp, Ey, Ez, Bx, By, Bz) + // (x, y, z, ux, uy, uz, w) auto& attribs = pti.GetAttribs(); - auto& Exp = attribs[PIdx::Ex]; + auto& x = attribs[PIdx::x]; // [...] // Number of particles in this box const long np = pti.numParticles(); @@ -66,7 +63,6 @@ On a loop over boxes in a ``MultiFab`` (``MFIter``), it can be useful to access const int tile_id = mfi.LocalTileIndex(); // Get GPU-friendly arrays of particle data auto& ptile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; - ParticleType* pp = particle_tile.GetArrayOfStructs()().data(); // Only need attribs (i.e., SoA data) auto& soa = ptile.GetStructOfArrays(); // As an example, let's get the ux momentum diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst index 608c482a383..7409f9aaebf 100644 --- a/Docs/source/usage/parameters.rst +++ b/Docs/source/usage/parameters.rst @@ -2032,8 +2032,8 @@ Particle push, charge and current deposition, field gathering If ``algo.particle_pusher`` is not specified, ``boris`` is the default. -* ``algo.particle_shape`` (`integer`; `1`, `2`, or `3`) - The order of the shape factors (splines) for the macro-particles along all spatial directions: `1` for linear, `2` for quadratic, `3` for cubic. +* ``algo.particle_shape`` (`integer`; `1`, `2`, `3`, or `4`) + The order of the shape factors (splines) for the macro-particles along all spatial directions: `1` for linear, `2` for quadratic, `3` for cubic, `4` for quartic. Low-order shape factors result in faster simulations, but may lead to more noisy results. High-order shape factors are computationally more expensive, but may increase the overall accuracy of the results. For production runs it is generally safer to use high-order shape factors, such as cubic order. @@ -2752,10 +2752,11 @@ The data collected at each boundary is written out to a subdirectory of the diag By default, all of the collected particle data is written out at the end of the simulation. Optionally, the ``.intervals`` parameter can be given to specify writing out the data more often. This can be important if a large number of particles are lost, avoiding filling up memory with the accumulated lost particle data. -In addition to their usual attributes, the saved particles have an integer attribute ``stepScraped``, which -indicates the PIC iteration at which each particle was absorbed at the boundary, -and a real attribute ``deltaTimeScraped``, which indicates the fraction of time between the time associated to the last step -and the exact time when each particle hits the boundary. ``deltaTimeScraped`` is a real between ``0`` and ``dt``. +In addition to their usual attributes, the saved particles have + an integer attribute ``stepScraped``, which indicates the PIC iteration at which each particle was absorbed at the boundary, + a real attribute ``deltaTimeScraped``, which indicates the time between the time associated to `stepScraped` + and the exact time when each particle hits the boundary. + 3 real attributes ``nx``, ``ny``, ``nz``, which represents the three components of the normal to the boundary on the point of contact of the particles (not saved if they reach non-EB boundaries) ``BoundaryScrapingDiagnostics`` can be used with ``..random_fraction``, ``..uniform_stride``, and ``..plot_filter_function``, which have the same behavior as for ``FullDiagnostics``. For ``BoundaryScrapingDiagnostics``, these filters are applied at the time the data is written to file. An implication of this is that more particles may initially be accumulated in memory than are ultimately written. ``t`` in ``plot_filter_function`` refers to the time the diagnostic is written rather than the time the particle crossed the boundary. diff --git a/Examples/Tests/langmuir/analysis_2d.py b/Examples/Tests/langmuir/analysis_2d.py index e137d50229d..b3327703b82 100755 --- a/Examples/Tests/langmuir/analysis_2d.py +++ b/Examples/Tests/langmuir/analysis_2d.py @@ -38,6 +38,9 @@ # Parse test name and check if Vay current deposition (algo.current_deposition=vay) is used vay_deposition = True if re.search( 'Vay_deposition', fn ) else False +# Parse test name and check if particle_shape = 4 is used +particle_shape_4 = True if re.search('particle_shape_4', fn) else False + # Parameters (these parameters must match the parameters in `inputs.multi.rt`) epsilon = 0.01 n = 4.e24 @@ -114,7 +117,11 @@ def get_theoretical_field( field, t ): fig.tight_layout() fig.savefig('Langmuir_multi_2d_analysis.png', dpi = 200) -tolerance_rel = 0.05 +if particle_shape_4: +# lower fidelity, due to smoothing + tolerance_rel = 0.07 +else: + tolerance_rel = 0.05 print("error_rel : " + str(error_rel)) print("tolerance_rel: " + str(tolerance_rel)) diff --git a/Examples/Tests/point_of_contact_EB/analysis.py b/Examples/Tests/point_of_contact_EB/analysis.py index 5e5f714af10..7ac354423f9 100755 --- a/Examples/Tests/point_of_contact_EB/analysis.py +++ b/Examples/Tests/point_of_contact_EB/analysis.py @@ -26,35 +26,45 @@ ts_scraping = OpenPMDTimeSeries('./diags/diag2/particles_at_eb/') it=ts_scraping.iterations -step_scraped, delta, x, y, z=ts_scraping.get_particle( ['stepScraped','deltaTimeScraped','x','y','z'], species='electron', iteration=it ) -delta_reduced=delta[0]*1e10 +step_scraped, time_scraped, x, y, z, nx, ny, nz=ts_scraping.get_particle( ['stepScraped','timeScraped','x','y','z', 'nx', 'ny', 'nz'], species='electron', iteration=it ) +time_scraped_reduced=time_scraped[0]*1e10 # Analytical results calculated x_analytic=-0.1983 y_analytic=0.02584 z_analytic=0.0000 +nx_analytic=-0.99 +ny_analytic=0.13 +nz_analytic=0.0 #result obtained by analysis of simulations step_ref=3 delta_reduced_ref=0.59 print('NUMERICAL coordinates of the point of contact:') -print('step_scraped=%d, delta=%5.4f e-10, x=%5.4f, y=%5.4f, z=%5.4f' % (step_scraped[0],delta_reduced,x[0], y[0], z[0])) +print('step_scraped=%d, time_stamp=%5.4f e-10, x=%5.4f, y=%5.4f, z=%5.4f, nx=%5.4f, ny=%5.4f, nz=%5.4f' % (step_scraped[0],time_reduced,x[0], y[0], z[0], nx[0], ny[0], nz[0])) print('\n') print('ANALYTICAL coordinates of the point of contact:') -print('step_scraped=%d, delta=%5.4f e-10, x=%5.4f, y=%5.4f, z=%5.4f' % (step_ref, delta_reduced_ref, x_analytic, y_analytic, z_analytic)) +print('step_scraped=%d, time_stamp=%5.4f e-10, x=%5.4f, y=%5.4f, z=%5.4f, nx=%5.4f, ny=%5.4f, nz=%5.4f' % (step, time_reduced, x_analytic, y_analytic, z_analytic, nx_analytic, ny_analytic, nz_analytic)) tolerance=0.001 -tolerance_t=0.01 +tolerance_t=0.003 +tolerance_n=0.01 print("tolerance = "+ str(tolerance *100) + '%') print("tolerance for the time = "+ str(tolerance_t *100) + '%') +print("tolerance for the normal components = "+ str(tolerance_n *100) + '%') diff_step=np.abs((step_scraped[0]-step_ref)/step_ref) diff_delta=np.abs((delta_reduced-delta_reduced_ref)/delta_reduced_ref) diff_x=np.abs((x[0]-x_analytic)/x_analytic) diff_y=np.abs((y[0]-y_analytic)/y_analytic) +diff_nx=np.abs((nx[0]-nx_analytic)/nx_analytic) +diff_ny=np.abs((ny[0]-ny_analytic)/ny_analytic) print("percentage error for x = %5.4f %%" %(diff_x *100)) print("percentage error for y = %5.4f %%" %(diff_y *100)) +print("percentage error for nx = %5.2f %%" %(diff_nx *100)) +print("percentage error for ny = %5.2f %%" %(diff_ny *100)) +print("nz = %5.2f " %(nz[0])) -assert (diff_x < tolerance) and (diff_y < tolerance) and (np.abs(z[0]) < 1e-8) and (diff_step < 1e-8) and (diff_delta < tolerance_t) , 'Test point_of_contact did not pass' +assert (diff_x < tolerance) and (diff_y < tolerance) and (np.abs(z[0]) < 1e-8) and (diff_step < 1e-8) and (diff_time < tolerance_t) and (diff_nx < tolerance_n) and (diff_ny < tolerance_n) and (np.abs(nz) < 1e-8) , 'Test point_of_contact did not pass' diff --git a/Python/pywarpx/particle_containers.py b/Python/pywarpx/particle_containers.py index ec584523fe4..ee15b89ca4a 100644 --- a/Python/pywarpx/particle_containers.py +++ b/Python/pywarpx/particle_containers.py @@ -774,7 +774,9 @@ def get_particle_boundary_buffer(self, species_name, boundary, comp_name, level) comp_name : str The component of the array data that will be returned. "x", "y", "z", "ux", "uy", "uz", "w" - "stepScraped","deltaTimeScraped", "nx", "ny", "nz" + + "stepScraped","timeScraped", + if boundary='eb': "nx", "ny", "nz" level : int Which AMR level to retrieve scraped particle data from. diff --git a/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_psatd_Vay_deposition_particle_shape_4.json b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_psatd_Vay_deposition_particle_shape_4.json new file mode 100644 index 00000000000..10f16df934f --- /dev/null +++ b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_psatd_Vay_deposition_particle_shape_4.json @@ -0,0 +1,29 @@ +{ + "electrons": { + "particle_momentum_x": 5.696397887862144e-20, + "particle_momentum_y": 0.0, + "particle_momentum_z": 5.696397887862156e-20, + "particle_position_x": 0.6553600000001614, + "particle_position_y": 0.6553600000001614, + "particle_weight": 3200000000000000.5 + }, + "lev=0": { + "Ex": 3616987165668.129, + "Ey": 0.0, + "Ez": 3616987165667.9756, + "divE": 2.269072850514105e+18, + "jx": 1.0121499183289864e+16, + "jy": 0.0, + "jz": 1.0121499183289892e+16, + "part_per_cell": 131072.0, + "rho": 20090797.21028113 + }, + "positrons": { + "particle_momentum_x": 5.696397887862144e-20, + "particle_momentum_y": 0.0, + "particle_momentum_z": 5.696397887862156e-20, + "particle_position_x": 0.6553600000001614, + "particle_position_y": 0.6553600000001614, + "particle_weight": 3200000000000000.5 + } +} diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini index 2760beed53b..13cbd2c22d5 100644 --- a/Regression/WarpX-GPU-tests.ini +++ b/Regression/WarpX-GPU-tests.ini @@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more [AMReX] dir = /home/regtester/git/amrex/ -branch = 68244ec91d118b5d4cc21f93376eaae8b56c51eb +branch = 2230caa24c7d4bd07edb08b54e9368f9c73eae6e [source] dir = /home/regtester/git/WarpX diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index 9fb763b683e..08c3a4bb805 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det [AMReX] dir = /home/regtester/AMReX_RegTesting/amrex/ -branch = 68244ec91d118b5d4cc21f93376eaae8b56c51eb +branch = 2230caa24c7d4bd07edb08b54e9368f9c73eae6e [source] dir = /home/regtester/AMReX_RegTesting/warpx @@ -1434,6 +1434,25 @@ particleTypes = electrons positrons analysisRoutine = Examples/Tests/langmuir/analysis_2d.py analysisOutputImage = langmuir_multi_2d_analysis.png +[Langmuir_multi_2d_psatd_Vay_deposition_particle_shape_4] +buildDir = . +inputFile = Examples/Tests/langmuir/inputs_2d +runtime_params = algo.maxwell_solver=psatd amr.max_grid_size=128 algo.current_deposition=vay diag1.electrons.variables=w ux uy uz diag1.positrons.variables=w ux uy uz diag1.fields_to_plot = Ex Ey Ez jx jy jz part_per_cell rho divE warpx.cfl = 0.7071067811865475 algo.particle_shape=4 +dim = 2 +addToCompileString = USE_PSATD=TRUE +cmakeSetupOpts = -DWarpX_DIMS=2 -DWarpX_PSATD=ON +restartTest = 0 +useMPI = 1 +numprocs = 1 +useOMP = 1 +numthreads = 1 +compileTest = 0 +doVis = 0 +compareParticles = 1 +particleTypes = electrons positrons +analysisRoutine = Examples/Tests/langmuir/analysis_2d.py +analysisOutputImage = langmuir_multi_2d_analysis.png + [Langmuir_multi_2d_psatd_Vay_deposition_nodal] buildDir = . inputFile = Examples/Tests/langmuir/inputs_2d diff --git a/Source/EmbeddedBoundary/DistanceToEB.H b/Source/EmbeddedBoundary/DistanceToEB.H index 8f00984eefa..b46def938db 100644 --- a/Source/EmbeddedBoundary/DistanceToEB.H +++ b/Source/EmbeddedBoundary/DistanceToEB.H @@ -34,56 +34,95 @@ void normalize (amrex::RealVect& a) noexcept a[2] *= inv_norm); } + + +// This function calculates the normal vector using the nodal and cell-centered data. +// i,j,k are the index of the nearest node to the left of the point at which we interpolate. +// W are the interpolation weight for the left and right nodes (for the 0th component and 1st component respectively) +// ic,jc,kc are the index of the nearest cell-center to the left of the point at which we interpolate. AMREX_GPU_HOST_DEVICE AMREX_INLINE amrex::RealVect interp_normal (int i, int j, int k, const amrex::Real W[AMREX_SPACEDIM][2], + int ic, int jc, int kc, const amrex::Real Wc[AMREX_SPACEDIM][2], amrex::Array4 const& phi, amrex::GpuArray const& dxi) noexcept { + #if (defined WARPX_DIM_3D) amrex::RealVect normal{0.0, 0.0, 0.0}; + for (int iic = 0; iic < 2; ++iic) { + for (int kk = 0; kk < 2; ++kk) { + for (int jj=0; jj< 2; ++jj) { + for (int ii = 0; ii < 2; ++ii) { + int icstart = ic + iic; + amrex::Real sign = (ii%2)*2. - 1.; + int wccomp = static_cast(iic%2); + int w1comp = static_cast(jj%2); + int w2comp = static_cast(kk%2); + normal[0] += sign * phi(icstart + ii, j + jj, k + kk) * dxi[0] * Wc[0][wccomp] * W[1][w1comp] * W[2][w2comp]; + } + } + } + } + for (int iic = 0; iic < 2; ++iic) { + for (int kk = 0; kk < 2; ++kk) { + for (int ii=0; ii< 2; ++ii) { + for (int jj = 0; jj < 2; ++jj) { + int jcstart = jc + iic; + amrex::Real sign = (jj%2)*2. - 1.; + int wccomp = static_cast(iic%2); + int w1comp = static_cast(ii%2); + int w2comp = static_cast(kk%2); + normal[1] += sign * phi(i + ii, jcstart + jj, k + kk) * dxi[1] * W[0][w1comp] * Wc[1][wccomp] * W[2][w2comp]; + } + } + } + } + for (int iic = 0; iic < 2; ++iic) { + for (int jj = 0; jj < 2; ++jj) { + for (int ii=0; ii< 2; ++ii) { + for (int kk = 0; kk < 2; ++kk) { + int kcstart = kc + iic; + amrex::Real sign = (kk%2)*2. - 1.; + int wccomp = static_cast(iic%2); + int w1comp = static_cast(ii%2); + int w2comp = static_cast(jj%2); + normal[2] += sign * phi(i + ii, j + jj, kcstart + kk) * dxi[2] * W[0][w1comp] * W[1][w2comp] * Wc[2][wccomp]; + } + } + } + } - normal[0] -= phi(i, j , k ) * dxi[0] * W[1][0] * W[2][0]; - normal[0] += phi(i+1, j , k ) * dxi[0] * W[1][0] * W[2][0]; - normal[0] -= phi(i, j+1, k ) * dxi[0] * W[1][1] * W[2][0]; - normal[0] += phi(i+1, j+1, k ) * dxi[0] * W[1][1] * W[2][0]; - normal[0] -= phi(i, j , k+1) * dxi[0] * W[1][0] * W[2][1]; - normal[0] += phi(i+1, j , k+1) * dxi[0] * W[1][0] * W[2][1]; - normal[0] -= phi(i , j+1, k+1) * dxi[0] * W[1][1] * W[2][1]; - normal[0] += phi(i+1, j+1, k+1) * dxi[0] * W[1][1] * W[2][1]; - - normal[1] -= phi(i, j , k ) * dxi[1] * W[0][0] * W[2][0]; - normal[1] += phi(i , j+1, k ) * dxi[1] * W[0][0] * W[2][0]; - normal[1] -= phi(i+1, j , k ) * dxi[1] * W[0][1] * W[2][0]; - normal[1] += phi(i+1, j+1, k ) * dxi[1] * W[0][1] * W[2][0]; - normal[1] -= phi(i, j , k+1) * dxi[1] * W[0][0] * W[2][1]; - normal[1] += phi(i , j+1, k+1) * dxi[1] * W[0][0] * W[2][1]; - normal[1] -= phi(i+1, j , k+1) * dxi[1] * W[0][1] * W[2][1]; - normal[1] += phi(i+1, j+1, k+1) * dxi[1] * W[0][1] * W[2][1]; - - normal[2] -= phi(i , j , k ) * dxi[2] * W[0][0] * W[1][0]; - normal[2] += phi(i , j , k+1) * dxi[2] * W[0][0] * W[1][0]; - normal[2] -= phi(i+1, j , k ) * dxi[2] * W[0][1] * W[1][0]; - normal[2] += phi(i+1, j , k+1) * dxi[2] * W[0][1] * W[1][0]; - normal[2] -= phi(i, j+1, k ) * dxi[2] * W[0][0] * W[1][1]; - normal[2] += phi(i , j+1, k+1) * dxi[2] * W[0][0] * W[1][1]; - normal[2] -= phi(i+1, j+1, k ) * dxi[2] * W[0][1] * W[1][1]; - normal[2] += phi(i+1, j+1, k+1) * dxi[2] * W[0][1] * W[1][1]; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) amrex::RealVect normal{0.0, 0.0}; + for (int iic = 0; iic < 2; ++iic) { + for (int jj=0; jj< 2; ++jj) { + for (int ii = 0; ii < 2; ++ii) { + int icstart = ic + iic; + amrex::Real sign = (ii%2)*2. - 1.; + int wccomp = static_cast(iic%2); + int w1comp = static_cast(jj%2); + normal[0] += sign * phi(icstart + ii, j + jj, k) * dxi[0] * Wc[0][wccomp] * W[1][w1comp]; + } + } + } + for (int iic = 0; iic < 2; ++iic) { + for (int ii=0; ii< 2; ++ii) { + for (int jj = 0; jj < 2; ++jj) { + int jcstart = jc + iic; + amrex::Real sign = (jj%2)*2. - 1.; + int wccomp = static_cast(iic%2); + int w1comp = static_cast(ii%2); + normal[1] += sign * phi(i + ii, jcstart + jj, k) * dxi[1] * W[0][w1comp] * Wc[1][wccomp]; + } + } + } + amrex::ignore_unused(kc); - normal[0] -= phi(i, j , k) * dxi[0] * W[1][0]; - normal[0] += phi(i+1, j , k) * dxi[0] * W[1][0]; - normal[0] -= phi(i, j+1, k) * dxi[0] * W[1][1]; - normal[0] += phi(i+1, j+1, k) * dxi[0] * W[1][1]; - - normal[1] -= phi(i, j , k) * dxi[1] * W[0][0]; - normal[1] += phi(i , j+1, k) * dxi[1] * W[0][0]; - normal[1] -= phi(i+1, j , k) * dxi[1] * W[0][1]; - normal[1] += phi(i+1, j+1, k) * dxi[1] * W[0][1]; #else + amrex::ignore_unused(i, j, k, ic, jc, kc, W, Wc, phi, dxi); amrex::RealVect normal{0.0, 0.0}; - amrex::ignore_unused(i, j, k, W, phi, dxi); WARPX_ABORT_WITH_MESSAGE("Error: interp_distance not yet implemented in 1D"); + #endif return normal; } diff --git a/Source/EmbeddedBoundary/ParticleScraper.H b/Source/EmbeddedBoundary/ParticleScraper.H index 7531a03487b..7d6934cbb9c 100644 --- a/Source/EmbeddedBoundary/ParticleScraper.H +++ b/Source/EmbeddedBoundary/ParticleScraper.H @@ -10,6 +10,7 @@ #include "EmbeddedBoundary/DistanceToEB.H" #include "Particles/Pusher/GetAndSetPosition.H" + #include #include @@ -182,16 +183,17 @@ scrapeParticles (PC& pc, const amrex::Vector& distance_t int i, j, k; amrex::Real W[AMREX_SPACEDIM][2]; - ablastr::particles::compute_weights_nodal(xp, yp, zp, plo, dxi, i, j, k, W); - + ablastr::particles::compute_weights(xp, yp, zp, plo, dxi, i, j, k, W); amrex::Real phi_value = ablastr::particles::interp_field_nodal(i, j, k, W, phi); if (phi_value < 0.0) { - - amrex::RealVect normal = DistanceToEB::interp_normal(i, j, k, W, phi, dxi); + int ic, jc, kc; // Cell-centered indices + int nodal; + amrex::Real Wc[AMREX_SPACEDIM][2]; // Cell-centered weights + ablastr::particles::compute_weights(xp, yp, zp, plo, dxi, ic, jc, kc, Wc, nodal=0); + amrex::RealVect normal = DistanceToEB::interp_normal(i, j, k, W, ic, jc, kc, Wc, phi, dxi); DistanceToEB::normalize(normal); - amrex::RealVect pos; #if (defined WARPX_DIM_3D) pos[0] = xp; diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H index 670d95014a0..d43c6c0741d 100644 --- a/Source/Particles/Gather/FieldGather.H +++ b/Source/Particles/Gather/FieldGather.H @@ -1689,6 +1689,11 @@ void doGatherShapeN (const amrex::ParticleReal xp, ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, ex_type, ey_type, ez_type, bx_type, by_type, bz_type, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 4) { + doGatherShapeN<4,1>(xp, yp, zp, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); } } else { if (nox == 1) { @@ -1706,6 +1711,11 @@ void doGatherShapeN (const amrex::ParticleReal xp, ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, ex_type, ey_type, ez_type, bx_type, by_type, bz_type, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 4) { + doGatherShapeN<4,0>(xp, yp, zp, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); } } } @@ -1782,6 +1792,12 @@ void doGatherShapeNImplicit ( ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, ex_type, ey_type, ez_type, bx_type, by_type, bz_type, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 4) { + doGatherShapeNEsirkepovStencilImplicit<4>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); } } else if (depos_type==3) { // CurrentDepositionAlgo::Villasenor @@ -1827,6 +1843,11 @@ void doGatherShapeNImplicit ( ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, ex_type, ey_type, ez_type, bx_type, by_type, bz_type, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 4) { + doGatherShapeN<4,0>(xp_nph, yp_nph, zp_nph, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); } } } diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H index 285b0a9777c..9946a680fcd 100644 --- a/Source/Particles/MultiParticleContainer.H +++ b/Source/Particles/MultiParticleContainer.H @@ -95,11 +95,11 @@ public: void InitMultiPhysicsModules (); - /// - /// This evolves all the particles by one PIC time step, including current deposition, the - /// field solve, and pushing the particles, for all the species in the MultiParticleContainer. - /// This is the electromagnetic version. - /// + /** + * \brief This evolves all the particles by one PIC time step, including current deposition, the + * field solve, and pushing the particles, for all the species in the MultiParticleContainer. + * This is the electromagnetic version. + */ void Evolve (int lev, const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez, const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz, @@ -111,19 +111,18 @@ public: amrex::Real t, amrex::Real dt, DtType a_dt_type=DtType::Full, bool skip_deposition=false, PushType push_type=PushType::Explicit); - /// - /// This pushes the particle positions by one half time step for all the species in the - /// MultiParticleContainer. It is used to desynchronize the particles after initialization - /// or when restarting from a checkpoint. - /// + /** + * \brief This pushes the particle positions by one time step for all the species in the + * MultiParticleContainer. + */ void PushX (amrex::Real dt); - /// - /// This pushes the particle momenta by dt for all the species in the - /// MultiParticleContainer. It is used to desynchronize the particles after initialization - /// or when restarting from a checkpoint. It is also used to synchronize particles at the - /// the end of the run. This is the electromagnetic version. - /// + /** + * This pushes the particle momenta by dt for all the species in the + * MultiParticleContainer. It is used to desynchronize the particles after initialization + * or when restarting from a checkpoint. It is also used to synchronize particles at the + * the end of the run. This is the electromagnetic version. + */ void PushP (int lev, amrex::Real dt, const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez, const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz); diff --git a/Source/Particles/ParticleBoundaryBuffer.cpp b/Source/Particles/ParticleBoundaryBuffer.cpp index 4132a1b971e..f991f211d28 100644 --- a/Source/Particles/ParticleBoundaryBuffer.cpp +++ b/Source/Particles/ParticleBoundaryBuffer.cpp @@ -45,9 +45,11 @@ struct IsOutsideDomainBoundary { } }; +#ifdef AMREX_USE_EB struct FindEmbeddedBoundaryIntersection { const int m_step_index; const int m_delta_index; + const int m_normal_index; const int m_step; const amrex::Real m_dt; amrex::Array4 m_phiarr; @@ -95,7 +97,7 @@ struct FindEmbeddedBoundaryIntersection { amrex::Real W[AMREX_SPACEDIM][2]; amrex::ParticleReal x_temp=xp, y_temp=yp, z_temp=zp; UpdatePosition(x_temp, y_temp, z_temp, ux, uy, uz, -dt_frac*dt); - ablastr::particles::compute_weights_nodal(x_temp, y_temp, z_temp, plo, dxi, i, j, k, W); + ablastr::particles::compute_weights(x_temp, y_temp, z_temp, plo, dxi, i, j, k, W); amrex::Real phi_value = ablastr::particles::interp_field_nodal(i, j, k, W, phiarr); return phi_value; } ); @@ -109,26 +111,55 @@ struct FindEmbeddedBoundaryIntersection { amrex::ParticleReal x_temp=xp, y_temp=yp, z_temp=zp; UpdatePosition(x_temp, y_temp, z_temp, ux, uy, uz, -dt_fraction*m_dt); + // record the components of the normal on the destination + int i, j, k; + amrex::Real W[AMREX_SPACEDIM][2]; + ablastr::particles::compute_weights(x_temp, y_temp, z_temp, plo, dxi, i, j, k, W); + int ic, jc, kc; // Cell-centered indices + int nodal; + amrex::Real Wc[AMREX_SPACEDIM][2]; // Cell-centered weight + ablastr::particles::compute_weights(x_temp, y_temp, z_temp, plo, dxi, ic, jc, kc, Wc, nodal=0); // nodal=0 to calculate the weights with respect to the cell-centered nodes + amrex::RealVect normal = DistanceToEB::interp_normal(i, j, k, W, ic, jc, kc, Wc, phiarr, dxi); + DistanceToEB::normalize(normal); + #if (defined WARPX_DIM_3D) dst.m_rdata[PIdx::x][dst_i] = x_temp; dst.m_rdata[PIdx::y][dst_i] = y_temp; dst.m_rdata[PIdx::z][dst_i] = z_temp; + //save normal components + dst.m_runtime_rdata[m_normal_index][dst_i] = normal[0]; + dst.m_runtime_rdata[m_normal_index+1][dst_i] = normal[1]; + dst.m_runtime_rdata[m_normal_index+2][dst_i] = normal[2]; #elif (defined WARPX_DIM_XZ) dst.m_rdata[PIdx::x][dst_i] = x_temp; dst.m_rdata[PIdx::z][dst_i] = z_temp; amrex::ignore_unused(y_temp); + //save normal components + dst.m_runtime_rdata[m_normal_index][dst_i] = normal[0]; + dst.m_runtime_rdata[m_normal_index+1][dst_i] = 0.0; + dst.m_runtime_rdata[m_normal_index+2][dst_i] = normal[1]; #elif (defined WARPX_DIM_RZ) dst.m_rdata[PIdx::x][dst_i] = std::sqrt(x_temp*x_temp + y_temp*y_temp); dst.m_rdata[PIdx::z][dst_i] = z_temp; dst.m_rdata[PIdx::theta][dst_i] = std::atan2(y_temp, x_temp); + //save normal components + amrex::Real theta=std::atan2(y_temp, x_temp); + dst.m_runtime_rdata[m_normal_index][dst_i] = normal[0]*std::cos(theta); + dst.m_runtime_rdata[m_normal_index+1][dst_i] = normal[0]*std::sin(theta); + dst.m_runtime_rdata[m_normal_index+2][dst_i] = normal[1]; #elif (defined WARPX_DIM_1D_Z) dst.m_rdata[PIdx::z][dst_i] = z_temp; amrex::ignore_unused(x_temp, y_temp); + //normal not defined + dst.m_runtime_rdata[m_normal_index][dst_i] = 0.0; + dst.m_runtime_rdata[m_normal_index+1][dst_i] = 0.0; + dst.m_runtime_rdata[m_normal_index+2][dst_i] = 0.0; #else - amrex::ignore_unused(x_temp, y_temp, z_temp); + amrex::ignore_unused(x_temp, y_temp, z_temp,normal); #endif } }; +#endif struct CopyAndTimestamp { int m_step_index; @@ -407,6 +438,10 @@ void ParticleBoundaryBuffer::gatherParticles (MultiParticleContainer& mypc, buffer[i] = pc.make_alike(); buffer[i].AddIntComp("stepScraped", false); buffer[i].AddRealComp("deltaTimeScraped", false); + buffer[i].AddRealComp("nx", false); + buffer[i].AddRealComp("ny", false); + buffer[i].AddRealComp("nz", false); + } auto& species_buffer = buffer[i]; for (int lev = 0; lev < pc.numLevels(); ++lev) @@ -463,12 +498,14 @@ void ParticleBoundaryBuffer::gatherParticles (MultiParticleContainer& mypc, const int step_scraped_index = string_to_index_intcomp.at("stepScraped"); auto string_to_index_realcomp = buffer[i].getParticleRuntimeComps(); const int delta_index = string_to_index_realcomp.at("deltaTimeScraped"); + const int normal_index = string_to_index_realcomp.at("nx"); const int step = warpx_instance.getistep(0); { WARPX_PROFILE("ParticleBoundaryBuffer::gatherParticles::filterTransformEB"); amrex::filterAndTransformParticles(ptile_buffer, ptile, predicate, - FindEmbeddedBoundaryIntersection{step_scraped_index,delta_index, step, dt, phiarr, dxi, plo}, 0, dst_index); + FindEmbeddedBoundaryIntersection{step_scraped_index,delta_index, normal_index, step, dt, phiarr, dxi, plo}, 0, dst_index); + } } } diff --git a/Source/Particles/ParticleCreation/SmartUtils.H b/Source/Particles/ParticleCreation/SmartUtils.H index f84734308fb..dbac563ca28 100644 --- a/Source/Particles/ParticleCreation/SmartUtils.H +++ b/Source/Particles/ParticleCreation/SmartUtils.H @@ -49,7 +49,7 @@ SmartCopyTag getSmartCopyTag (const NameMap& src, const NameMap& dst) noexcept; * \param num_added the number of particles to set the ids for. */ template -void setNewParticleIDs (PTile& ptile, int old_size, int num_added) +void setNewParticleIDs (PTile& ptile, amrex::Long old_size, amrex::Long num_added) { amrex::Long pid; #ifdef AMREX_USE_OMP @@ -64,8 +64,9 @@ void setNewParticleIDs (PTile& ptile, int old_size, int num_added) auto ptd = ptile.getParticleTileData(); amrex::ParallelFor(num_added, [=] AMREX_GPU_DEVICE (int ip) noexcept { - auto const new_id = ip + old_size; - ptd.m_idcpu[new_id] = amrex::SetParticleIDandCPU(pid+ip, cpuid); + auto const lip = static_cast(ip); + auto const new_id = lip + old_size; + ptd.m_idcpu[new_id] = amrex::SetParticleIDandCPU(pid+lip, cpuid); }); } diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 08c784709fa..da1655b9dab 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -1022,8 +1021,8 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int overlap_realbox.lo(2))}; // count the number of particles that each cell in overlap_box could add - Gpu::DeviceVector counts(overlap_box.numPts(), 0); - Gpu::DeviceVector offset(overlap_box.numPts()); + Gpu::DeviceVector counts(overlap_box.numPts(), 0); + Gpu::DeviceVector offset(overlap_box.numPts()); auto *pcounts = counts.data(); const amrex::IntVect lrrfac = rrfac; Box fine_overlap_box; // default Box is NOT ok(). @@ -1042,7 +1041,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int if (inj_pos->overlapsWith(lo, hi)) { auto index = overlap_box.index(iv); - const int r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv))? + const amrex::Long r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv))? (AMREX_D_TERM(lrrfac[0],*lrrfac[1],*lrrfac[2])) : (1); pcounts[index] = num_ppc*r; // update pcount by checking if cell-corners or cell-center @@ -1080,10 +1079,10 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int // Max number of new particles. All of them are created, // and invalid ones are then discarded - const int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); + const amrex::Long max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function - int pid; + amrex::Long pid; #ifdef AMREX_USE_OMP #pragma omp critical (add_plasma_nextid) #endif @@ -1092,7 +1091,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int ParticleType::NextID(pid+max_new_particles); } WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - static_cast(pid) + static_cast(max_new_particles) < LongParticleIds::LastParticleID, + pid + max_new_particles < LongParticleIds::LastParticleID, "ERROR: overflow on particle id numbers"); const int cpuid = ParallelDescriptor::MyProc(); @@ -1103,8 +1102,8 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int DefineAndReturnParticleTile(lev, grid_id, tile_id); } - auto old_size = particle_tile.size(); - auto new_size = old_size + max_new_particles; + auto const old_size = static_cast(particle_tile.size()); + auto const new_size = old_size + max_new_particles; particle_tile.resize(new_size); auto& soa = particle_tile.GetStructOfArrays(); @@ -1639,10 +1638,10 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, // Max number of new particles. All of them are created, // and invalid ones are then discarded - const int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); + const amrex::Long max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function - int pid; + amrex::Long pid; #ifdef AMREX_USE_OMP #pragma omp critical (add_plasma_nextid) #endif @@ -1651,15 +1650,15 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, ParticleType::NextID(pid+max_new_particles); } WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - static_cast(pid) + static_cast(max_new_particles) < LongParticleIds::LastParticleID, + pid + max_new_particles < LongParticleIds::LastParticleID, "overflow on particle id numbers"); const int cpuid = ParallelDescriptor::MyProc(); auto& particle_tile = tmp_pc.DefineAndReturnParticleTile(0, grid_id, tile_id); - auto old_size = particle_tile.size(); - auto new_size = old_size + max_new_particles; + auto const old_size = static_cast(particle_tile.size()); + auto const new_size = old_size + max_new_particles; particle_tile.resize(new_size); auto& soa = particle_tile.GetStructOfArrays(); diff --git a/Source/Particles/ShapeFactors.H b/Source/Particles/ShapeFactors.H index 73e8f7243bb..7c56da457ed 100644 --- a/Source/Particles/ShapeFactors.H +++ b/Source/Particles/ShapeFactors.H @@ -16,7 +16,7 @@ /** * Compute shape factor and return index of leftmost cell where * particle writes. - * Specializations are defined for orders 0 to 3 (using "if constexpr"). + * Specializations are defined for orders 0 to 4 (using "if constexpr"). * Shape factor functors may be evaluated with double arguments * in current deposition to ensure that current deposited by * particles that move only a small distance is still resolved. @@ -67,13 +67,11 @@ struct Compute_shape_factor else if constexpr (depos_order == 4){ const auto j = static_cast(xmid + T(0.5)); const T xint = xmid - T(j); - const T xint_p1 = xint + T(1.0); - const T xint_m1 = xint - T(1.0); - sx[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint); - sx[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); - sx[2] = T(115.0)/T(192.0) + xint*xint*(xint*xint/T(4.0) - T(5.0)/T(8.0)); - sx[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); - sx[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint); + sx[0] = (T(1.0))/(T(24.0))*(T(0.5) - xint)*(T(0.5) - xint)*(T(0.5) - xint)*(T(0.5) - xint); + sx[1] = (T(1.0))/(T(24.0))*(T(4.75) - T(11.0)*xint + T(4.0)*xint*xint*(T(1.5) + xint - xint*xint)); + sx[2] = (T(1.0))/(T(24.0))*(T(14.375) + T(6.0)*xint*xint*(xint*xint - T(2.5))); + sx[3] = (T(1.0))/(T(24.0))*(T(4.75) + T(11.0)*xint + T(4.0)*xint*xint*(T(1.5) - xint - xint*xint)); + sx[4] = (T(1.0))/(T(24.0))*(T(0.5) + xint)*(T(0.5) + xint)*(T(0.5) + xint)*(T(0.5)+xint); // index of the leftmost cell where particle deposits return j-2; } @@ -90,7 +88,7 @@ struct Compute_shape_factor /** * Compute shifted shape factor and return index of leftmost cell where * particle writes, for Esirkepov algorithm. - * Specializations are defined below for orders 1, 2 and 3 (using "if constexpr"). + * Specializations are defined below for orders 1, 2, 3, and 4 (using "if constexpr"). */ template struct Compute_shifted_shape_factor @@ -129,7 +127,7 @@ struct Compute_shifted_shape_factor else if constexpr (depos_order == 3){ const auto i = static_cast(x_old); const int i_shift = i - (i_new + 1); - const T xint = x_old - i; + const T xint = x_old - T(i); sx[1+i_shift] = (T(1.0))/(T(6.0))*(T(1.0) - xint)*(T(1.0) - xint)*(T(1.0) - xint); sx[2+i_shift] = (T(2.0))/(T(3.0)) - xint*xint*(T(1.0) - xint/(T(2.0))); sx[3+i_shift] = (T(2.0))/(T(3.0)) - (T(1.0) - xint)*(T(1.0) - xint)*(T(1.0) - T(0.5)*(T(1.0) - xint)); @@ -137,6 +135,18 @@ struct Compute_shifted_shape_factor // index of the leftmost cell where particle deposits return i - 1; } + else if constexpr (depos_order == 4){ + const auto i = static_cast(x_old + T(0.5)); + const int i_shift = i - (i_new + 2); + const T xint = x_old - T(i); + sx[1+i_shift] = (T(1.0))/(T(24.0))*(T(0.5) - xint)*(T(0.5) - xint)*(T(0.5) - xint)*(T(0.5) - xint); + sx[2+i_shift] = (T(1.0))/(T(24.0))*(T(4.75) - T(11.0)*xint + T(4.0)*xint*xint*(T(1.5) + xint - xint*xint)); + sx[3+i_shift] = (T(1.0))/(T(24.0))*(T(14.375) + T(6.0)*xint*xint*(xint*xint - T(2.5))); + sx[4+i_shift] = (T(1.0))/(T(24.0))*(T(4.75) + T(11.0)*xint + T(4.0)*xint*xint*(T(1.5) - xint - xint*xint)); + sx[5+i_shift] = (T(1.0))/(T(24.0))*(T(0.5) + xint)*(T(0.5) + xint)*(T(0.5) + xint)*(T(0.5)+xint); + // index of the leftmost cell where particle deposits + return i - 2; + } else{ WARPX_ABORT_WITH_MESSAGE("Unknown particle shape selected in Compute_shifted_shape_factor"); amrex::ignore_unused(sx, x_old, i_new); @@ -209,22 +219,18 @@ struct Compute_shape_factor_pair else if constexpr (depos_order == 4){ const auto j = static_cast(xmid + T(0.5)); const T xint_old = xold - T(j); - T xint_p1 = xint_old + T(1.0); - T xint_m1 = xint_old - T(1.0); - sx_old[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old); - sx_old[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); - sx_old[2] = T(115.0)/T(192.0) + xint_old*xint_old*(xint_old*xint_old/T(4.0) - T(5.0)/T(8.0)); - sx_old[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); - sx_old[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old); + sx_old[0] = (T(1.0))/(T(24.0))*(T(0.5) - xint_old)*(T(0.5) - xint_old)*(T(0.5) - xint_old)*(T(0.5) - xint_old); + sx_old[1] = (T(1.0))/(T(24.0))*(T(4.75) - T(11.0)*xint_old + T(4.0)*xint_old*xint_old*(T(1.5) + xint_old - xint_old*xint_old)); + sx_old[2] = (T(1.0))/(T(24.0))*(T(14.375) + T(6.0)*xint_old*xint_old*(xint_old*xint_old - T(2.5))); + sx_old[3] = (T(1.0))/(T(24.0))*(T(4.75) + T(11.0)*xint_old + T(4.0)*xint_old*xint_old*(T(1.5) - xint_old - xint_old*xint_old)); + sx_old[4] = (T(1.0))/(T(24.0))*(T(0.5) + xint_old)*(T(0.5) + xint_old)*(T(0.5) + xint_old)*(T(0.5)+xint_old); // const T xint_new = xnew - T(j); - xint_p1 = xint_new + T(1.0); - xint_m1 = xint_new - T(1.0); - sx_new[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new); - sx_new[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); - sx_new[2] = T(115.0)/T(192.0) + xint_new*xint_new*(xint_new*xint_new/T(4.0) - T(5.0)/T(8.0)); - sx_new[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); - sx_new[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new); + sx_new[0] = (T(1.0))/(T(24.0))*(T(0.5) - xint_new)*(T(0.5) - xint_new)*(T(0.5) - xint_new)*(T(0.5) - xint_new); + sx_new[1] = (T(1.0))/(T(24.0))*(T(4.75) - T(11.0)*xint_new + T(4.0)*xint_new*xint_new*(T(1.5) + xint_new - xint_new*xint_new)); + sx_new[2] = (T(1.0))/(T(24.0))*(T(14.375) + T(6.0)*xint_new*xint_new*(xint_new*xint_new - T(2.5))); + sx_new[3] = (T(1.0))/(T(24.0))*(T(4.75) + T(11.0)*xint_new + T(4.0)*xint_new*xint_new*(T(1.5) - xint_new - xint_new*xint_new)); + sx_new[4] = (T(1.0))/(T(24.0))*(T(0.5) + xint_new)*(T(0.5) + xint_new)*(T(0.5) + xint_new)*(T(0.5)+xint_new); // // index of the leftmost cell where particle deposits return j-2; diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index 58e3450f47d..9f92739a5d5 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -10,7 +10,6 @@ #include "Utils/WarpXProfilerWrapper.H" #include "WarpX.H" -#include #include #include #include diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 0d565c039e6..6e6c0b27c85 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -547,6 +547,13 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size); + } else if (WarpX::nox == 4){ + doDepositionSharedShapeN<4>( + GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, + xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size); } WARPX_PROFILE_VAR_STOP(direct_current_dep_kernel); } @@ -576,6 +583,13 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_arr, jy_arr, jz_arr, np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doEsirkepovDepositionShapeN<4>( + GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); } } else if (push_type == PushType::Implicit) { #if (AMREX_SPACEDIM >= 2) @@ -622,6 +636,15 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doChargeConservingDepositionShapeNImplicit<4>( + xp_n_data, yp_n_data, zp_n_data, + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); } } } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Villasenor) { @@ -709,6 +732,13 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_fab, jy_fab, jz_fab, np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doVayDepositionShapeN<4>( + GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_fab, jy_fab, jz_fab, np_to_deposit, dt, relative_time, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); } } else { // Direct deposition if (push_type == PushType::Explicit) { @@ -733,6 +763,13 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doDepositionShapeN<4>( + GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, + xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); } } else if (push_type == PushType::Implicit) { auto& uxp_n = pti.GetAttribs(particle_comps["ux_n"]); @@ -765,6 +802,15 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, jx_fab, jy_fab, jz_fab, np_to_deposit, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doDepositionShapeNImplicit<4>( + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, + ion_lev, + jx_fab, jy_fab, jz_fab, np_to_deposit, dx, + xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); } } } @@ -1064,6 +1110,12 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size, WarpX::shared_tilesize); + } else if (WarpX::nox == 4){ + doChargeDepositionSharedShapeN<4>(GetPosition, wp.dataPtr()+offset, ion_lev, + rho_fab, ix_type, np_to_deposit, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size, + WarpX::shared_tilesize); } #ifndef AMREX_USE_GPU // CPU, tiling: atomicAdd local_rho into rho diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 5031381561f..b13d7aaf63b 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -440,6 +440,11 @@ WarpX::WarpX () costs_heuristic_cells_wt = 0.250_rt; costs_heuristic_particles_wt = 0.750_rt; break; + case 4: + // this is only a guess + costs_heuristic_cells_wt = 0.200_rt; + costs_heuristic_particles_wt = 0.800_rt; + break; } } else { // FDTD switch (WarpX::nox) @@ -456,6 +461,11 @@ WarpX::WarpX () costs_heuristic_cells_wt = 0.145_rt; costs_heuristic_particles_wt = 0.855_rt; break; + case 4: + // this is only a guess + costs_heuristic_cells_wt = 0.100_rt; + costs_heuristic_particles_wt = 0.900_rt; + break; } } #else // CPU @@ -1327,19 +1337,10 @@ WarpX::ReadParameters () int particle_shape; if (!species_names.empty() || !lasers_names.empty()) { if (utils::parser::queryWithParser(pp_algo, "particle_shape", particle_shape)){ - - if(current_deposition_algo == CurrentDepositionAlgo::Villasenor) { - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - (particle_shape >= 1) && (particle_shape <=4), - "algo.particle_shape can be only 1, 2, 3, or 4 with villasenor deposition" - ); - } - else { - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - (particle_shape >= 1) && (particle_shape <=3), - "algo.particle_shape can be only 1, 2, or 3" - ); - } + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + (particle_shape >= 1) && (particle_shape <=4), + "algo.particle_shape can be only 1, 2, 3, or 4" + ); nox = particle_shape; noy = particle_shape; @@ -1348,8 +1349,7 @@ WarpX::ReadParameters () else{ WARPX_ABORT_WITH_MESSAGE( "algo.particle_shape must be set in the input file:" - " please set algo.particle_shape to 1, 2, or 3." - " if using the villasenor deposition, can use 4 also."); + " please set algo.particle_shape to 1, 2, 3, or 4"); } if ((maxLevel() > 0) && (particle_shape > 1) && (do_pml_j_damping == 1)) diff --git a/Source/ablastr/particles/NodalFieldGather.H b/Source/ablastr/particles/NodalFieldGather.H index b0151627198..5a10c73ae20 100644 --- a/Source/ablastr/particles/NodalFieldGather.H +++ b/Source/ablastr/particles/NodalFieldGather.H @@ -18,30 +18,36 @@ namespace ablastr::particles { /** - * \brief Compute weight of each surrounding node in interpolating a nodal field - * to the given coordinates. + * \brief Compute weight of each surrounding node (or cell-centered nodes) in interpolating a nodal ((or a cell-centered node) field + * to the given coordinates. If nodal=1, then the calculations will be done with respect to the nodes (default). If nodal=0, then the calculations will be done with respect to the cell-centered nodal) * * This currently only does linear order. * * \param xp,yp,zp Particle position coordinates * \param plo Index lower bounds of domain. * \param dxi inverse 3D cell spacing - * \param i,j,k Variables to store indices of position on grid - * \param W 2D array of weights to store each neighbouring node + * \param i,j,k Variables to store indices of position on grid (nodal or cell-centered, depending of the value of `nodal`) + * \param W 2D array of weights to store each neighbouring node (or cell-centered node) + * \param nodal Int that tells if the weights are calculated in respect to the nodes (nodal=1) of the cell-centered nodes (nodal=0) */ AMREX_GPU_HOST_DEVICE AMREX_INLINE -void compute_weights_nodal (const amrex::ParticleReal xp, +void compute_weights (const amrex::ParticleReal xp, const amrex::ParticleReal yp, const amrex::ParticleReal zp, amrex::GpuArray const& plo, amrex::GpuArray const& dxi, - int& i, int& j, int& k, amrex::Real W[AMREX_SPACEDIM][2]) noexcept + int& i, int& j, int& k, amrex::Real W[AMREX_SPACEDIM][2], int nodal=1) noexcept { using namespace amrex::literals; + +#if !((nodal==0)||(nodal==1)) + ABLASTR_ABORT_WITH_MESSAGE("Error: 'nodal' has to be equal to 0 or 1"); +#endif + #if (defined WARPX_DIM_3D) - const amrex::Real x = (xp - plo[0]) * dxi[0]; - const amrex::Real y = (yp - plo[1]) * dxi[1]; - const amrex::Real z = (zp - plo[2]) * dxi[2]; + const amrex::Real x = (xp - plo[0]) * dxi[0] + static_cast(nodal-1)*0.5_rt; + const amrex::Real y = (yp - plo[1]) * dxi[1] + static_cast(nodal-1)*0.5_rt; + const amrex::Real z = (zp - plo[2]) * dxi[2] + static_cast(nodal-1)*0.5_rt; i = static_cast(amrex::Math::floor(x)); j = static_cast(amrex::Math::floor(y)); @@ -58,17 +64,17 @@ void compute_weights_nodal (const amrex::ParticleReal xp, #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) # if (defined WARPX_DIM_XZ) - const amrex::Real x = (xp - plo[0]) * dxi[0]; + const amrex::Real x = (xp - plo[0]) * dxi[0] + static_cast(nodal-1)*0.5_rt; amrex::ignore_unused(yp); i = static_cast(amrex::Math::floor(x)); W[0][1] = x - i; # elif (defined WARPX_DIM_RZ) - const amrex::Real r = (std::sqrt(xp*xp+yp*yp) - plo[0]) * dxi[0]; + const amrex::Real r = (std::sqrt(xp*xp+yp*yp) - plo[0]) * dxi[0] + static_cast(nodal-1)*0.5_rt; i = static_cast(amrex::Math::floor(r)); W[0][1] = r - i; # endif - const amrex::Real z = (zp - plo[1]) * dxi[1]; + const amrex::Real z = (zp - plo[1]) * dxi[1] + static_cast(nodal-1)*0.5_rt; j = static_cast(amrex::Math::floor(z)); W[1][1] = z - j; @@ -77,7 +83,7 @@ void compute_weights_nodal (const amrex::ParticleReal xp, k = 0; #else - amrex::ignore_unused(xp, yp, zp, plo, dxi, i, j, k, W); + amrex::ignore_unused(xp, yp, zp, plo, dxi, i, j, k, W, nodal); ABLASTR_ABORT_WITH_MESSAGE("Error: compute_weights not yet implemented in 1D"); #endif } @@ -138,7 +144,7 @@ amrex::Real doGatherScalarFieldNodal (const amrex::ParticleReal xp, // first find the weight of surrounding nodes to use during interpolation int ii, jj, kk; amrex::Real W[AMREX_SPACEDIM][2]; - compute_weights_nodal(xp, yp, zp, lo, dxi, ii, jj, kk, W); + compute_weights(xp, yp, zp, lo, dxi, ii, jj, kk, W); return interp_field_nodal(ii, jj, kk, W, scalar_field); } @@ -166,7 +172,7 @@ doGatherVectorFieldNodal (const amrex::ParticleReal xp, // first find the weight of surrounding nodes to use during interpolation int ii, jj, kk; amrex::Real W[AMREX_SPACEDIM][2]; - compute_weights_nodal(xp, yp, zp, lo, dxi, ii, jj, kk, W); + compute_weights(xp, yp, zp, lo, dxi, ii, jj, kk, W); amrex::GpuArray const field_interp = { interp_field_nodal(ii, jj, kk, W, vector_field_x), diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index 9db2b1f1aad..6c4570a4078 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -273,7 +273,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "68244ec91d118b5d4cc21f93376eaae8b56c51eb" +set(WarpX_amrex_branch "2230caa24c7d4bd07edb08b54e9368f9c73eae6e" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake index 1079735279a..71831357e66 100644 --- a/cmake/dependencies/pyAMReX.cmake +++ b/cmake/dependencies/pyAMReX.cmake @@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON) set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git" CACHE STRING "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)") -set(WarpX_pyamrex_branch "5aa700de18a61f933cb435adbe2299d74d794d6b" +set(WarpX_pyamrex_branch "0cbf4b08c9045e1845595c836b99f94bb3c1ac9f" CACHE STRING "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)") diff --git a/run_test.sh b/run_test.sh index a0ba9e056a5..2b53571163a 100755 --- a/run_test.sh +++ b/run_test.sh @@ -68,7 +68,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt # Clone AMReX and warpx-data git clone https://github.com/AMReX-Codes/amrex.git -cd amrex && git checkout --detach 68244ec91d118b5d4cc21f93376eaae8b56c51eb && cd - +cd amrex && git checkout --detach 2230caa24c7d4bd07edb08b54e9368f9c73eae6e && cd - # warpx-data contains various required data sets git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git # openPMD-example-datasets contains various required data sets