From 45fd3de2bfcbbc4d325134daa3903b9c875bd150 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Mon, 17 Jun 2024 19:52:01 +0200 Subject: [PATCH 01/13] updates jacobian check (performance tuning) --- src/specfem3D/compute_element.F90 | 7 +- src/specfem3D/prepare_optimized_arrays.F90 | 117 ++++++++++++++++----- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/specfem3D/compute_element.F90 b/src/specfem3D/compute_element.F90 index 8dd647cb1..5c10e61d8 100644 --- a/src/specfem3D/compute_element.F90 +++ b/src/specfem3D/compute_element.F90 @@ -1555,7 +1555,12 @@ subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2 jacobian = (xixl*(etayl*gammazl-etazl*gammayl) & - xiyl*(etaxl*gammazl-etazl*gammaxl) & + xizl*(etaxl*gammayl-etayl*gammaxl)) - if (jacobian <= 0.0_CUSTOM_REAL) stop 'Error invalid jacobian in compute_element_precompute_factors()' + + ! checks Jacobian + ! note: try to avoid this if-statement as it hinders the compiler to tune and vectorize this inner loop. + ! an if-statement would slow this loop down significantly... + ! already checked before in prepare optimized arrays: + ! if (jacobian <= 0.0_CUSTOM_REAL) stop 'Error invalid jacobian in compute_element_precompute_factors()' jacobianl(INDEX_IJK) = 1.0_CUSTOM_REAL / jacobian diff --git a/src/specfem3D/prepare_optimized_arrays.F90 b/src/specfem3D/prepare_optimized_arrays.F90 index f6d8617f4..3d95d9d67 100644 --- a/src/specfem3D/prepare_optimized_arrays.F90 +++ b/src/specfem3D/prepare_optimized_arrays.F90 @@ -450,6 +450,8 @@ subroutine prepare_fused_array() integer :: i,j,k #endif double precision :: sizeval + ! for jacobian test + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl ! fused array only needed for compute forces in crust/mantle (Deville routine) if (USE_DEVILLE_PRODUCTS_VAL) then @@ -479,15 +481,34 @@ subroutine prepare_fused_array() DO_LOOP_IJK - deriv_mapping_crust_mantle(1,INDEX_IJK,ispec) = xix_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(2,INDEX_IJK,ispec) = xiy_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(3,INDEX_IJK,ispec) = xiz_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(4,INDEX_IJK,ispec) = etax_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(5,INDEX_IJK,ispec) = etay_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(6,INDEX_IJK,ispec) = etaz_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(7,INDEX_IJK,ispec) = gammax_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(8,INDEX_IJK,ispec) = gammay_crust_mantle(INDEX_IJK,ispec) - deriv_mapping_crust_mantle(9,INDEX_IJK,ispec) = gammaz_crust_mantle(INDEX_IJK,ispec) + ! get derivatives of ux, uy and uz with respect to x, y and z + xixl = xix_crust_mantle(INDEX_IJK,ispec) + xiyl = xiy_crust_mantle(INDEX_IJK,ispec) + xizl = xiz_crust_mantle(INDEX_IJK,ispec) + etaxl = etax_crust_mantle(INDEX_IJK,ispec) + etayl = etay_crust_mantle(INDEX_IJK,ispec) + etazl = etaz_crust_mantle(INDEX_IJK,ispec) + gammaxl = gammax_crust_mantle(INDEX_IJK,ispec) + gammayl = gammay_crust_mantle(INDEX_IJK,ispec) + gammazl = gammaz_crust_mantle(INDEX_IJK,ispec) + + ! compute the Jacobian + jacobianl = (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + + ! checks Jacobian + if (jacobianl <= 0.0_CUSTOM_REAL) stop 'Error invalid Jacobian in crust/mantle element' + + deriv_mapping_crust_mantle(1,INDEX_IJK,ispec) = xixl + deriv_mapping_crust_mantle(2,INDEX_IJK,ispec) = xiyl + deriv_mapping_crust_mantle(3,INDEX_IJK,ispec) = xizl + deriv_mapping_crust_mantle(4,INDEX_IJK,ispec) = etaxl + deriv_mapping_crust_mantle(5,INDEX_IJK,ispec) = etayl + deriv_mapping_crust_mantle(6,INDEX_IJK,ispec) = etazl + deriv_mapping_crust_mantle(7,INDEX_IJK,ispec) = gammaxl + deriv_mapping_crust_mantle(8,INDEX_IJK,ispec) = gammayl + deriv_mapping_crust_mantle(9,INDEX_IJK,ispec) = gammazl ENDDO_LOOP_IJK @@ -504,15 +525,38 @@ subroutine prepare_fused_array() DO_LOOP_IJK - deriv_mapping_inner_core(1,INDEX_IJK,ispec) = xix_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(2,INDEX_IJK,ispec) = xiy_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(3,INDEX_IJK,ispec) = xiz_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(4,INDEX_IJK,ispec) = etax_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(5,INDEX_IJK,ispec) = etay_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(6,INDEX_IJK,ispec) = etaz_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(7,INDEX_IJK,ispec) = gammax_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(8,INDEX_IJK,ispec) = gammay_inner_core(INDEX_IJK,ispec) - deriv_mapping_inner_core(9,INDEX_IJK,ispec) = gammaz_inner_core(INDEX_IJK,ispec) + ! get derivatives of ux, uy and uz with respect to x, y and z + xixl = xix_inner_core(INDEX_IJK,ispec) + xiyl = xiy_inner_core(INDEX_IJK,ispec) + xizl = xiz_inner_core(INDEX_IJK,ispec) + etaxl = etax_inner_core(INDEX_IJK,ispec) + etayl = etay_inner_core(INDEX_IJK,ispec) + etazl = etaz_inner_core(INDEX_IJK,ispec) + gammaxl = gammax_inner_core(INDEX_IJK,ispec) + gammayl = gammay_inner_core(INDEX_IJK,ispec) + gammazl = gammaz_inner_core(INDEX_IJK,ispec) + + ! checks Jacobian + ! (for fictitious elements, the Jacobian is not valid) + if (idoubling_inner_core(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then + ! compute the Jacobian + jacobianl = (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + + ! checks Jacobian + if (jacobianl <= 0.0_CUSTOM_REAL) stop 'Error invalid Jacobian in inner core element' + endif + + deriv_mapping_inner_core(1,INDEX_IJK,ispec) = xixl + deriv_mapping_inner_core(2,INDEX_IJK,ispec) = xiyl + deriv_mapping_inner_core(3,INDEX_IJK,ispec) = xizl + deriv_mapping_inner_core(4,INDEX_IJK,ispec) = etaxl + deriv_mapping_inner_core(5,INDEX_IJK,ispec) = etayl + deriv_mapping_inner_core(6,INDEX_IJK,ispec) = etazl + deriv_mapping_inner_core(7,INDEX_IJK,ispec) = gammaxl + deriv_mapping_inner_core(8,INDEX_IJK,ispec) = gammayl + deriv_mapping_inner_core(9,INDEX_IJK,ispec) = gammazl ENDDO_LOOP_IJK @@ -529,15 +573,34 @@ subroutine prepare_fused_array() DO_LOOP_IJK - deriv_mapping_outer_core(1,INDEX_IJK,ispec) = xix_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(2,INDEX_IJK,ispec) = xiy_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(3,INDEX_IJK,ispec) = xiz_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(4,INDEX_IJK,ispec) = etax_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(5,INDEX_IJK,ispec) = etay_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(6,INDEX_IJK,ispec) = etaz_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(7,INDEX_IJK,ispec) = gammax_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(8,INDEX_IJK,ispec) = gammay_outer_core(INDEX_IJK,ispec) - deriv_mapping_outer_core(9,INDEX_IJK,ispec) = gammaz_outer_core(INDEX_IJK,ispec) + ! get derivatives of ux, uy and uz with respect to x, y and z + xixl = xix_outer_core(INDEX_IJK,ispec) + xiyl = xiy_outer_core(INDEX_IJK,ispec) + xizl = xiz_outer_core(INDEX_IJK,ispec) + etaxl = etax_outer_core(INDEX_IJK,ispec) + etayl = etay_outer_core(INDEX_IJK,ispec) + etazl = etaz_outer_core(INDEX_IJK,ispec) + gammaxl = gammax_outer_core(INDEX_IJK,ispec) + gammayl = gammay_outer_core(INDEX_IJK,ispec) + gammazl = gammaz_outer_core(INDEX_IJK,ispec) + + ! compute the Jacobian + jacobianl = (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + + ! checks Jacobian + if (jacobianl <= 0.0_CUSTOM_REAL) stop 'Error invalid Jacobian in outer core element' + + deriv_mapping_outer_core(1,INDEX_IJK,ispec) = xixl + deriv_mapping_outer_core(2,INDEX_IJK,ispec) = xiyl + deriv_mapping_outer_core(3,INDEX_IJK,ispec) = xizl + deriv_mapping_outer_core(4,INDEX_IJK,ispec) = etaxl + deriv_mapping_outer_core(5,INDEX_IJK,ispec) = etayl + deriv_mapping_outer_core(6,INDEX_IJK,ispec) = etazl + deriv_mapping_outer_core(7,INDEX_IJK,ispec) = gammaxl + deriv_mapping_outer_core(8,INDEX_IJK,ispec) = gammayl + deriv_mapping_outer_core(9,INDEX_IJK,ispec) = gammazl ENDDO_LOOP_IJK From 15bba52ab8722763d711cb85edd03a190389150e Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 20 Jun 2024 14:22:31 +0200 Subject: [PATCH 02/13] using modules and adding pure statements for compute_element_* routines --- src/specfem3D/SIEM_solve.F90 | 96 ---- src/specfem3D/compute_element.F90 | 437 +++++++++++++++++- src/specfem3D/compute_element_att_memory.F90 | 21 +- src/specfem3D/compute_element_strain.F90 | 93 ++-- .../compute_forces_crust_mantle_Dev.F90 | 16 +- .../compute_forces_crust_mantle_noDev.f90 | 51 +- .../compute_forces_inner_core_Dev.F90 | 38 +- .../compute_forces_inner_core_noDev.f90 | 31 +- .../compute_forces_outer_core_Dev.F90 | 4 +- src/specfem3D/compute_kernels.F90 | 6 + src/specfem3D/compute_seismograms.F90 | 3 + src/specfem3D/compute_strain_att.f90 | 6 + src/specfem3D/rules.mk | 12 + 13 files changed, 603 insertions(+), 211 deletions(-) diff --git a/src/specfem3D/SIEM_solve.F90 b/src/specfem3D/SIEM_solve.F90 index f27684ec2..5fe03f00a 100644 --- a/src/specfem3D/SIEM_solve.F90 +++ b/src/specfem3D/SIEM_solve.F90 @@ -313,99 +313,3 @@ subroutine solve_poisson_equation_backward() end subroutine solve_poisson_equation_backward -! -!------------------------------------------------------------------------------- -! - - subroutine SIEM_solve_element_add_full_gravity(ispec,nspec,nglob,gravity_rho,deriv_loc,ibool,pgrav,rho_s_H) - -! routine for crust/mantle and inner core elements to add full gravity contribution to rho_s_H array - - use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM - - use specfem_par, only: & - hprime_xx,hprime_yy,hprime_zz,wgll_cube - - implicit none - - ! element id - integer,intent(in) :: ispec,nspec,nglob - - real(kind=CUSTOM_REAL),dimension(nglob),intent(in) :: gravity_rho - - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc - integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool - - ! pertubation gravity - real(kind=CUSTOM_REAL), dimension(nglob),intent(in) :: pgrav - - ! gravity contribution - real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ),intent(inout) :: rho_s_H - - ! local parameters - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: pgrav_loc - real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl - real(kind=CUSTOM_REAL) :: tempx1l_phi,tempx2l_phi,tempx3l_phi - real(kind=CUSTOM_REAL) :: gradphi(NDIM) - real(kind=CUSTOM_REAL) :: rhol,factor - integer :: i,j,k,l,iglob - - ! pre-loads potential for this element - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - iglob = ibool(i,j,k,ispec) - pgrav_loc(i,j,k) = pgrav(iglob) - enddo - enddo - enddo - - ! adds full gravity perturbation contribution to rho_s_H array - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - xixl = deriv_loc(1,i,j,k) - xiyl = deriv_loc(2,i,j,k) - xizl = deriv_loc(3,i,j,k) - etaxl = deriv_loc(4,i,j,k) - etayl = deriv_loc(5,i,j,k) - etazl = deriv_loc(6,i,j,k) - gammaxl = deriv_loc(7,i,j,k) - gammayl = deriv_loc(8,i,j,k) - gammazl = deriv_loc(9,i,j,k) - - ! compute the Jacobian - jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) - - ! for \grad\phi - tempx1l_phi = 0._CUSTOM_REAL - tempx2l_phi = 0._CUSTOM_REAL - tempx3l_phi = 0._CUSTOM_REAL - do l = 1,NGLLX - tempx1l_phi = tempx1l_phi + pgrav_loc(l,j,k)*hprime_xx(i,l) - tempx2l_phi = tempx2l_phi + pgrav_loc(i,l,k)*hprime_yy(j,l) - tempx3l_phi = tempx3l_phi + pgrav_loc(i,j,l)*hprime_zz(k,l) - enddo - - ! \grad\phi - gradphi(1) = xixl*tempx1l_phi + etaxl*tempx2l_phi + gammaxl*tempx3l_phi - gradphi(2) = xiyl*tempx1l_phi + etayl*tempx2l_phi + gammayl*tempx3l_phi - gradphi(3) = xizl*tempx1l_phi + etazl*tempx2l_phi + gammazl*tempx3l_phi - - ! get rho - iglob = ibool(i,j,k,ispec) - rhol = gravity_rho(iglob) - - factor = jacobianl * wgll_cube(i,j,k) - - ! adds full gravity contribution - rho_s_H(1,i,j,k) = rho_s_H(1,i,j,k) - factor * rhol * gradphi(1) - rho_s_H(2,i,j,k) = rho_s_H(2,i,j,k) - factor * rhol * gradphi(2) - rho_s_H(3,i,j,k) = rho_s_H(3,i,j,k) - factor * rhol * gradphi(3) - enddo - enddo - enddo - - end subroutine SIEM_solve_element_add_full_gravity diff --git a/src/specfem3D/compute_element.F90 b/src/specfem3D/compute_element.F90 index 5c10e61d8..721db7a2b 100644 --- a/src/specfem3D/compute_element.F90 +++ b/src/specfem3D/compute_element.F90 @@ -29,7 +29,20 @@ ! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh #include "config.fh" +module mod_element + implicit none + + private + + public :: compute_element_iso + public :: compute_element_iso_ic + public :: compute_element_tiso + public :: compute_element_aniso + public :: compute_element_aniso_ic + public :: compute_element_add_full_gravity + +contains !-------------------------------------------------------------------------------------------- ! @@ -37,7 +50,7 @@ ! !-------------------------------------------------------------------------------------------- - subroutine compute_element_iso(ispec, & + pure subroutine compute_element_iso(ispec, & gravity_pre_store,gravity_H, & deriv, & wgll_cube, & @@ -133,7 +146,7 @@ subroutine compute_element_iso(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(:,:,:,:,ispec),jacobianl, & + deriv(1,1,1,1,ispec),jacobianl, & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -193,7 +206,7 @@ subroutine compute_element_iso(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec),jacobianl, & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -201,9 +214,311 @@ subroutine compute_element_iso(ispec, & end subroutine compute_element_iso +! left for reference: original routine... +! +! subroutine compute_element_iso(ispec, & +! gravity_pre_store,gravity_H, & +! deriv, & +! wgll_cube, & +! kappavstore,muvstore, & +! ibool, & +! R_xx,R_yy,R_xy,R_xz,R_yz, & +! epsilon_trace_over_3, & +! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & +! dummyx_loc,dummyy_loc,dummyz_loc, & +! epsilondev_loc,rho_s_H) +! +!! isotropic element in crust/mantle region +! +! use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM,N_SLS,FOUR_THIRDS,ONE_THIRD +! +! use constants_solver, only: & +! NSPEC => NSPEC_CRUST_MANTLE, & +! NGLOB => NGLOB_CRUST_MANTLE, & +! NSPEC_ATTENUATION => NSPEC_CRUST_MANTLE_ATTENUATION, & +! NSPEC_STRAIN_ONLY => NSPEC_CRUST_MANTLE_STRAIN_ONLY, & +! ATTENUATION_VAL, & +! PARTIAL_PHYS_DISPERSION_ONLY_VAL,GRAVITY_VAL +! +! use specfem_par, only: COMPUTE_AND_STORE_STRAIN +! +!#ifdef FORCE_VECTORIZATION +! use constants, only: NGLLCUBE +!#endif +! +! implicit none +! +! ! element id +! integer,intent(in) :: ispec +! +! ! arrays with mesh parameters per slice +! integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool +! +! real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv +! +! ! array with derivatives of Lagrange polynomials and precalculated products +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube +! +! ! store anisotropic properties only where needed to save memory +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: kappavstore,muvstore +! +! ! attenuation +! ! memory variables for attenuation +! ! memory variables R_ij are stored at the local rather than global level +! ! to allow for optimization of cache access by compiler +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATTENUATION),intent(in) :: & +! R_xx,R_yy,R_xy,R_xz,R_yz +! +! real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY),intent(inout) :: epsilon_trace_over_3 +! +! ! gravity +! real(kind=CUSTOM_REAL),dimension(NDIM,NGLOB),intent(in) :: gravity_pre_store +! real(kind=CUSTOM_REAL),dimension(6,NGLOB),intent(in) :: gravity_H +! +! ! element info +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(inout) :: & +! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3 +! +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: dummyx_loc,dummyy_loc,dummyz_loc +! +! real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ),intent(inout) :: rho_s_H +! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(inout) :: epsilondev_loc +! +! ! local parameters +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl +! +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl +! +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sigma_xx,sigma_yy,sigma_zz +! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy +! +! real(kind=CUSTOM_REAL) :: jacobianl +! real(kind=CUSTOM_REAL) :: duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl +! real(kind=CUSTOM_REAL) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl +! real(kind=CUSTOM_REAL) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl +! real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz +! real(kind=CUSTOM_REAL) :: sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy +! +! real(kind=CUSTOM_REAL) :: lambdal,mul,lambdalplus2mul +! real(kind=CUSTOM_REAL) :: kappal +! +! real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl +! real(kind=CUSTOM_REAL) :: templ +! ! attenuation +! real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val +! ! gravity +! real(kind=CUSTOM_REAL) :: factor,sx_l,sy_l,sz_l,gxl,gyl,gzl +! real(kind=CUSTOM_REAL) :: Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl +! integer :: iglob +! +!#ifdef FORCE_VECTORIZATION +!! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress +!! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3 +!! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop +! integer :: ijk +!#else +! integer :: i,j,k +!#endif +!! note: profiling shows that this routine takes about 60% of the total time, another 30% is spend in the tiso routine below.. +! +! ! isotropic element +! DO_LOOP_IJK +! +! ! precomputes factors +! ! get derivatives of ux, uy and uz with respect to x, y and z +! xixl = deriv(1,INDEX_IJK,ispec) +! xiyl = deriv(2,INDEX_IJK,ispec) +! xizl = deriv(3,INDEX_IJK,ispec) +! etaxl = deriv(4,INDEX_IJK,ispec) +! etayl = deriv(5,INDEX_IJK,ispec) +! etazl = deriv(6,INDEX_IJK,ispec) +! gammaxl = deriv(7,INDEX_IJK,ispec) +! gammayl = deriv(8,INDEX_IJK,ispec) +! gammazl = deriv(9,INDEX_IJK,ispec) +! +! ! compute the Jacobian +! jacobianl = 1.0_CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & +! - xiyl*(etaxl*gammazl-etazl*gammaxl) & +! + xizl*(etaxl*gammayl-etayl*gammaxl)) +! +! duxdxl = xixl*tempx1(INDEX_IJK) + etaxl*tempx2(INDEX_IJK) + gammaxl*tempx3(INDEX_IJK) +! duxdyl = xiyl*tempx1(INDEX_IJK) + etayl*tempx2(INDEX_IJK) + gammayl*tempx3(INDEX_IJK) +! duxdzl = xizl*tempx1(INDEX_IJK) + etazl*tempx2(INDEX_IJK) + gammazl*tempx3(INDEX_IJK) +! +! duydxl = xixl*tempy1(INDEX_IJK) + etaxl*tempy2(INDEX_IJK) + gammaxl*tempy3(INDEX_IJK) +! duydyl = xiyl*tempy1(INDEX_IJK) + etayl*tempy2(INDEX_IJK) + gammayl*tempy3(INDEX_IJK) +! duydzl = xizl*tempy1(INDEX_IJK) + etazl*tempy2(INDEX_IJK) + gammazl*tempy3(INDEX_IJK) +! +! duzdxl = xixl*tempz1(INDEX_IJK) + etaxl*tempz2(INDEX_IJK) + gammaxl*tempz3(INDEX_IJK) +! duzdyl = xiyl*tempz1(INDEX_IJK) + etayl*tempz2(INDEX_IJK) + gammayl*tempz3(INDEX_IJK) +! duzdzl = xizl*tempz1(INDEX_IJK) + etazl*tempz2(INDEX_IJK) + gammazl*tempz3(INDEX_IJK) +! +! ! precompute some sums to save CPU time +! duxdxl_plus_duydyl = duxdxl + duydyl +! duxdxl_plus_duzdzl = duxdxl + duzdzl +! duydyl_plus_duzdzl = duydyl + duzdzl +! duxdyl_plus_duydxl = duxdyl + duydxl +! duzdxl_plus_duxdzl = duzdxl + duxdzl +! duzdyl_plus_duydzl = duzdyl + duydzl +! +! ! compute deviatoric strain +! if (COMPUTE_AND_STORE_STRAIN) then +! templ = ONE_THIRD * (duxdxl + duydyl + duzdzl) +! epsilondev_loc(INDEX_IJK,1) = duxdxl - templ +! epsilondev_loc(INDEX_IJK,2) = duydyl - templ +! epsilondev_loc(INDEX_IJK,3) = 0.5_CUSTOM_REAL * duxdyl_plus_duydxl +! epsilondev_loc(INDEX_IJK,4) = 0.5_CUSTOM_REAL * duzdxl_plus_duxdzl +! epsilondev_loc(INDEX_IJK,5) = 0.5_CUSTOM_REAL * duzdyl_plus_duydzl +! if (NSPEC_STRAIN_ONLY > 1) then +! epsilon_trace_over_3(INDEX_IJK,ispec) = templ +! endif +! endif +! +! ! +! ! compute isotropic elements +! ! +! ! layer with no transverse isotropy, use kappav and muv +! kappal = kappavstore(INDEX_IJK,ispec) +! mul = muvstore(INDEX_IJK,ispec) +! +! lambdalplus2mul = kappal + FOUR_THIRDS * mul +! lambdal = lambdalplus2mul - 2.0_CUSTOM_REAL*mul +! +! ! compute stress sigma +! sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl +! sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl +! sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl +! +! sigma_xy = mul*duxdyl_plus_duydxl +! sigma_xz = mul*duzdxl_plus_duxdzl +! sigma_yz = mul*duzdyl_plus_duydzl +! +! ! attenuation contribution to stress +! ! subtract memory variables if attenuation +! if (ATTENUATION_VAL .and. .not. PARTIAL_PHYS_DISPERSION_ONLY_VAL) then +!#ifdef FORCE_VECTORIZATION +! ! here we assume that N_SLS == 3 in order to be able to unroll and suppress the loop +! ! in order to vectorize the outer loop +! R_xx_val = R_xx(INDEX_IJK,1,ispec) +! R_yy_val = R_yy(INDEX_IJK,1,ispec) +! sigma_xx = sigma_xx - R_xx_val +! sigma_yy = sigma_yy - R_yy_val +! sigma_zz = sigma_zz + R_xx_val + R_yy_val +! sigma_xy = sigma_xy - R_xy(INDEX_IJK,1,ispec) +! sigma_xz = sigma_xz - R_xz(INDEX_IJK,1,ispec) +! sigma_yz = sigma_yz - R_yz(INDEX_IJK,1,ispec) +! +! R_xx_val = R_xx(INDEX_IJK,2,ispec) +! R_yy_val = R_yy(INDEX_IJK,2,ispec) +! sigma_xx = sigma_xx - R_xx_val +! sigma_yy = sigma_yy - R_yy_val +! sigma_zz = sigma_zz + R_xx_val + R_yy_val +! sigma_xy = sigma_xy - R_xy(INDEX_IJK,2,ispec) +! sigma_xz = sigma_xz - R_xz(INDEX_IJK,2,ispec) +! sigma_yz = sigma_yz - R_yz(INDEX_IJK,2,ispec) +! +! R_xx_val = R_xx(INDEX_IJK,3,ispec) +! R_yy_val = R_yy(INDEX_IJK,3,ispec) +! sigma_xx = sigma_xx - R_xx_val +! sigma_yy = sigma_yy - R_yy_val +! sigma_zz = sigma_zz + R_xx_val + R_yy_val +! sigma_xy = sigma_xy - R_xy(INDEX_IJK,3,ispec) +! sigma_xz = sigma_xz - R_xz(INDEX_IJK,3,ispec) +! sigma_yz = sigma_yz - R_yz(INDEX_IJK,3,ispec) +!#else +! ! loops over standard linear solids +! do i_SLS = 1,N_SLS +! R_xx_val = R_xx(INDEX_IJK,i_SLS,ispec) +! R_yy_val = R_yy(INDEX_IJK,i_SLS,ispec) +! sigma_xx = sigma_xx - R_xx_val +! sigma_yy = sigma_yy - R_yy_val +! sigma_zz = sigma_zz + R_xx_val + R_yy_val +! sigma_xy = sigma_xy - R_xy(INDEX_IJK,i_SLS,ispec) +! sigma_xz = sigma_xz - R_xz(INDEX_IJK,i_SLS,ispec) +! sigma_yz = sigma_yz - R_yz(INDEX_IJK,i_SLS,ispec) +! enddo +!#endif +! endif +! +! ! define symmetric components of sigma (to be general in case of gravity) +! sigma_yx = sigma_xy +! sigma_zx = sigma_xz +! sigma_zy = sigma_yz +! +! ! compute non-symmetric terms for gravity +! if (GRAVITY_VAL) then +! ! use mesh coordinates to get theta and phi +! ! x y and z contain r theta and phi +! iglob = ibool(INDEX_IJK,ispec) +! +! ! Cartesian components of the gravitational acceleration +! gxl = gravity_pre_store(1,iglob) ! minus_g*sin_theta*cos_phi * rho +! gyl = gravity_pre_store(2,iglob) ! minus_g*sin_theta*sin_phi * rho +! gzl = gravity_pre_store(3,iglob) ! minus_g*cos_theta * rho +! +! ! Cartesian components of gradient of gravitational acceleration +! ! get displacement and multiply by density to compute G tensor +! sx_l = dummyx_loc(INDEX_IJK) +! sy_l = dummyy_loc(INDEX_IJK) +! sz_l = dummyz_loc(INDEX_IJK) +! +! ! compute G tensor from s . g and add to sigma (not symmetric) +! sigma_xx = sigma_xx + sy_l * gyl + sz_l * gzl +! sigma_yy = sigma_yy + sx_l * gxl + sz_l * gzl +! sigma_zz = sigma_zz + sx_l * gxl + sy_l * gyl +! +! sigma_xy = sigma_xy - sx_l * gyl +! sigma_yx = sigma_yx - sy_l * gxl +! +! sigma_xz = sigma_xz - sx_l * gzl +! sigma_zx = sigma_zx - sz_l * gxl +! +! sigma_yz = sigma_yz - sy_l * gzl +! sigma_zy = sigma_zy - sz_l * gyl +! +! Hxxl = gravity_H(1,iglob) +! Hyyl = gravity_H(2,iglob) +! Hzzl = gravity_H(3,iglob) +! Hxyl = gravity_H(4,iglob) +! Hxzl = gravity_H(5,iglob) +! Hyzl = gravity_H(6,iglob) +! +! ! precompute vector +! factor = jacobianl * wgll_cube(INDEX_IJK) +! +! rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl) +! rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl) +! rho_s_H(3,INDEX_IJK) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl) +! endif +! +! ! dot product of stress tensor with test vector, non-symmetric form +! !call compute_element_dot_product_stress(deriv(1,1,1,1,ispec),jacobianl, & +! ! sigma_xx,sigma_yy,sigma_zz, & +! ! sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & +! ! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) +! +! ! form dot product with test vector, non-symmetric form +! tempx1(INDEX_IJK) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x +! tempy1(INDEX_IJK) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y +! tempz1(INDEX_IJK) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z +! +! tempx2(INDEX_IJK) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x +! tempy2(INDEX_IJK) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y +! tempz2(INDEX_IJK) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z +! +! tempx3(INDEX_IJK) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x +! tempy3(INDEX_IJK) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y +! tempz3(INDEX_IJK) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z +! ENDDO_LOOP_IJK +! +! end subroutine compute_element_iso + + !-------------------------------------------------------------------------------------------- - subroutine compute_element_iso_ic(ispec, & + pure subroutine compute_element_iso_ic(ispec, & gravity_pre_store,gravity_H, & deriv, & wgll_cube, & @@ -374,7 +689,7 @@ end subroutine compute_element_iso_ic ! !-------------------------------------------------------------------------------------------- - subroutine compute_element_tiso(ispec, & + pure subroutine compute_element_tiso(ispec, & gravity_pre_store,gravity_H, & deriv, & wgll_cube, & @@ -977,7 +1292,7 @@ end subroutine compute_element_tiso ! !-------------------------------------------------------------------------------------------- - subroutine compute_element_aniso(ispec, & + pure subroutine compute_element_aniso(ispec, & gravity_pre_store,gravity_H, & deriv, & wgll_cube, & @@ -1174,7 +1489,7 @@ end subroutine compute_element_aniso ! !-------------------------------------------------------------------------------------------- - subroutine compute_element_aniso_ic(ispec, & + pure subroutine compute_element_aniso_ic(ispec, & gravity_pre_store,gravity_H, & deriv, & wgll_cube, & @@ -1389,7 +1704,7 @@ end subroutine compute_element_aniso_ic ! please leave this routine in this file, to help compilers inlining this function... ! - subroutine compute_element_stress_attenuation_contrib(R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, & + pure subroutine compute_element_stress_attenuation_contrib(R_xx_loc,R_yy_loc,R_xy_loc,R_xz_loc,R_yz_loc, & sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz) ! we can force inlining (Intel compiler) @@ -1492,7 +1807,7 @@ end subroutine compute_element_stress_attenuation_contrib ! please leave this routine in this file, to help compilers inlining this function... - subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & + pure subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & deriv_loc,jacobianl, & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & @@ -1605,7 +1920,7 @@ end subroutine compute_element_precompute_factors ! please leave this routine in this file, to help compilers inlining this function... - subroutine compute_element_deviatoric_strain(duxdxl,duydyl,duzdzl, & + pure subroutine compute_element_deviatoric_strain(duxdxl,duydyl,duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl, & ispec,NSPEC_STRAIN_ONLY, & epsilon_trace_over_3,epsilondev_loc) @@ -1693,7 +2008,7 @@ end subroutine compute_element_deviatoric_strain ! please leave this routine in this file, to help compilers inlining this function... - subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & + pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -1796,7 +2111,7 @@ end subroutine compute_element_dot_product_stress ! please leave this routine in this file, to help compilers inlining this function... - subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -2059,3 +2374,101 @@ end subroutine compute_element_gravity ! ! end subroutine compute_element_gravity +! +!------------------------------------------------------------------------------- +! + + pure subroutine compute_element_add_full_gravity(ispec,nspec,nglob,gravity_rho,deriv_loc,ibool,pgrav,rho_s_H) + +! routine for crust/mantle and inner core elements to add full gravity contribution to rho_s_H array + + use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM + + use specfem_par, only: & + hprime_xx,hprime_yy,hprime_zz,wgll_cube + + implicit none + + ! element id + integer,intent(in) :: ispec,nspec,nglob + + real(kind=CUSTOM_REAL),dimension(nglob),intent(in) :: gravity_rho + + real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool + + ! pertubation gravity + real(kind=CUSTOM_REAL), dimension(nglob),intent(in) :: pgrav + + ! gravity contribution + real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ),intent(inout) :: rho_s_H + + ! local parameters + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: pgrav_loc + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl + real(kind=CUSTOM_REAL) :: tempx1l_phi,tempx2l_phi,tempx3l_phi + real(kind=CUSTOM_REAL) :: gradphi(NDIM) + real(kind=CUSTOM_REAL) :: rhol,factor + integer :: i,j,k,l,iglob + + ! pre-loads potential for this element + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob = ibool(i,j,k,ispec) + pgrav_loc(i,j,k) = pgrav(iglob) + enddo + enddo + enddo + + ! adds full gravity perturbation contribution to rho_s_H array + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + xixl = deriv_loc(1,i,j,k) + xiyl = deriv_loc(2,i,j,k) + xizl = deriv_loc(3,i,j,k) + etaxl = deriv_loc(4,i,j,k) + etayl = deriv_loc(5,i,j,k) + etazl = deriv_loc(6,i,j,k) + gammaxl = deriv_loc(7,i,j,k) + gammayl = deriv_loc(8,i,j,k) + gammazl = deriv_loc(9,i,j,k) + + ! compute the Jacobian + jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + + ! for \grad\phi + tempx1l_phi = 0._CUSTOM_REAL + tempx2l_phi = 0._CUSTOM_REAL + tempx3l_phi = 0._CUSTOM_REAL + do l = 1,NGLLX + tempx1l_phi = tempx1l_phi + pgrav_loc(l,j,k)*hprime_xx(i,l) + tempx2l_phi = tempx2l_phi + pgrav_loc(i,l,k)*hprime_yy(j,l) + tempx3l_phi = tempx3l_phi + pgrav_loc(i,j,l)*hprime_zz(k,l) + enddo + + ! \grad\phi + gradphi(1) = xixl*tempx1l_phi + etaxl*tempx2l_phi + gammaxl*tempx3l_phi + gradphi(2) = xiyl*tempx1l_phi + etayl*tempx2l_phi + gammayl*tempx3l_phi + gradphi(3) = xizl*tempx1l_phi + etazl*tempx2l_phi + gammazl*tempx3l_phi + + ! get rho + iglob = ibool(i,j,k,ispec) + rhol = gravity_rho(iglob) + + factor = jacobianl * wgll_cube(i,j,k) + + ! adds full gravity contribution + rho_s_H(1,i,j,k) = rho_s_H(1,i,j,k) - factor * rhol * gradphi(1) + rho_s_H(2,i,j,k) = rho_s_H(2,i,j,k) - factor * rhol * gradphi(2) + rho_s_H(3,i,j,k) = rho_s_H(3,i,j,k) - factor * rhol * gradphi(3) + enddo + enddo + enddo + + end subroutine compute_element_add_full_gravity + +end module mod_element diff --git a/src/specfem3D/compute_element_att_memory.F90 b/src/specfem3D/compute_element_att_memory.F90 index bc6e0d690..e54a0a2d0 100644 --- a/src/specfem3D/compute_element_att_memory.F90 +++ b/src/specfem3D/compute_element_att_memory.F90 @@ -30,6 +30,18 @@ #include "config.fh" +module mod_element_att + + implicit none + + private + + public :: compute_element_att_memory_cm + public :: compute_element_att_memory_ic + public :: compute_element_att_memory_cm_lddrk + public :: compute_element_att_memory_ic_lddrk + +contains !-------------------------------------------------------------------------------------------- ! @@ -38,7 +50,7 @@ !-------------------------------------------------------------------------------------------- - subroutine compute_element_att_memory_cm(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & + pure subroutine compute_element_att_memory_cm(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & vx,vy,vz,vnspec,factor_common, & alphaval,betaval,gammaval, & muvstore, & @@ -146,7 +158,7 @@ end subroutine compute_element_att_memory_cm !-------------------------------------------------------------------------------------------- ! - subroutine compute_element_att_memory_cm_lddrk(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & + pure subroutine compute_element_att_memory_cm_lddrk(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, & vx,vy,vz,vnspec,factor_common, & muvstore, & @@ -271,7 +283,7 @@ end subroutine compute_element_att_memory_cm_lddrk !-------------------------------------------------------------------------------------------- - subroutine compute_element_att_memory_ic(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & + pure subroutine compute_element_att_memory_ic(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & vx,vy,vz,vnspec,factor_common, & alphaval,betaval,gammaval, & muvstore, & @@ -380,7 +392,7 @@ end subroutine compute_element_att_memory_ic !-------------------------------------------------------------------------------------------- ! - subroutine compute_element_att_memory_ic_lddrk(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & + pure subroutine compute_element_att_memory_ic_lddrk(ispec,R_xx,R_yy,R_xy,R_xz,R_yz, & R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, & vx,vy,vz,vnspec,factor_common, & muvstore, & @@ -598,3 +610,4 @@ end subroutine compute_element_att_memory_ic_lddrk ! end subroutine compute_element_att_mem_up_cm ! +end module mod_element_att diff --git a/src/specfem3D/compute_element_strain.F90 b/src/specfem3D/compute_element_strain.F90 index 06ccfb9f8..88931d6eb 100644 --- a/src/specfem3D/compute_element_strain.F90 +++ b/src/specfem3D/compute_element_strain.F90 @@ -29,9 +29,21 @@ ! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh #include "config.fh" +module mod_element_strain + implicit none + + private + + public :: compute_element_strain_att_Dev + public :: compute_element_strain_att_noDev + + public :: compute_element_strain_undoatt_Dev + public :: compute_element_strain_undoatt_noDev + +contains - subroutine compute_element_strain_undoatt_Dev(ispec,nglob,nspec, & + pure subroutine compute_element_strain_undoatt_Dev(ispec,nglob,nspec, & displ,ibool, & hprime_xx,hprime_xxT, & deriv, & @@ -153,7 +165,7 @@ subroutine compute_element_strain_undoatt_Dev(ispec,nglob,nspec, & ! ! please leave the routines here to help compilers inlining the code - subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays @@ -162,9 +174,9 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5) :: A - real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3 - real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A + real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B1,B2,B3 + real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j @@ -197,7 +209,7 @@ end subroutine mxm5_3comp_singleA !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays @@ -206,9 +218,9 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3 - real(kind=CUSTOM_REAL),dimension(5,n3) :: B - real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A1,A2,A3 + real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B + real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j @@ -241,7 +253,7 @@ end subroutine mxm5_3comp_singleB !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) + pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays @@ -250,9 +262,9 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n2,n3 - real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3 - real(kind=CUSTOM_REAL),dimension(5,n2) :: B - real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5,n3),intent(in) :: A1,A2,A3 + real(kind=CUSTOM_REAL),dimension(5,n2),intent(in) :: B + real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j,k @@ -293,7 +305,7 @@ end subroutine compute_element_strain_undoatt_Dev !-------------------------------------------------------------------------------------------- ! - subroutine compute_element_strain_undoatt_noDev(ispec,nglob,nspec, & + pure subroutine compute_element_strain_undoatt_noDev(ispec,nglob,nspec, & displ, & hprime_xx,hprime_yy,hprime_zz, & ibool, & @@ -438,7 +450,7 @@ end subroutine compute_element_strain_undoatt_noDev !-------------------------------------------------------------------------------------------- - subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, & + pure subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, & displ,veloc,deltat, & ibool, & hprime_xx,hprime_xxT, & @@ -570,7 +582,7 @@ subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, & ! ! please leave the routines here to help compilers inlining the code - subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays @@ -579,9 +591,9 @@ subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5) :: A - real(kind=CUSTOM_REAL),dimension(5,n3) :: B1,B2,B3 - real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A + real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B1,B2,B3 + real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j @@ -614,7 +626,7 @@ end subroutine mxm5_3comp_singleA !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays @@ -623,9 +635,9 @@ subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5) :: A1,A2,A3 - real(kind=CUSTOM_REAL),dimension(5,n3) :: B - real(kind=CUSTOM_REAL),dimension(n1,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A1,A2,A3 + real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B + real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j @@ -658,7 +670,7 @@ end subroutine mxm5_3comp_singleB !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) + pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) ! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays @@ -667,9 +679,9 @@ subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n2,n3 - real(kind=CUSTOM_REAL),dimension(n1,5,n3) :: A1,A2,A3 - real(kind=CUSTOM_REAL),dimension(5,n2) :: B - real(kind=CUSTOM_REAL),dimension(n1,n2,n3) :: C1,C2,C3 + real(kind=CUSTOM_REAL),dimension(n1,5,n3),intent(in) :: A1,A2,A3 + real(kind=CUSTOM_REAL),dimension(5,n2),intent(in) :: B + real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C1,C2,C3 ! local parameters integer :: i,j,k @@ -709,7 +721,7 @@ end subroutine compute_element_strain_att_Dev !-------------------------------------------------------------------------------------------- ! - subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, & + pure subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, & displ,veloc,deltat, & ibool, & hprime_xx,hprime_yy,hprime_zz, & @@ -725,29 +737,29 @@ subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, & implicit none - integer :: ispec,nglob,nspec - real(kind=CUSTOM_REAL) :: deltat + integer,intent(in) :: ispec,nglob,nspec + real(kind=CUSTOM_REAL),intent(in) :: deltat - real(kind=CUSTOM_REAL), dimension(NDIM,nglob) :: displ,veloc + real(kind=CUSTOM_REAL), dimension(NDIM,nglob),intent(in) :: displ,veloc - integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX),intent(in) :: hprime_xx real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY),intent(in) :: hprime_yy real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ),intent(in) :: hprime_zz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: & xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_xx_loc_nplus1 - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_yy_loc_nplus1 - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_xy_loc_nplus1 - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_xz_loc_nplus1 - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: epsilondev_yz_loc_nplus1 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: epsilondev_xx_loc_nplus1 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: epsilondev_yy_loc_nplus1 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: epsilondev_xy_loc_nplus1 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: epsilondev_xz_loc_nplus1 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: epsilondev_yz_loc_nplus1 - integer :: NSPEC_STRAIN_ONLY - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: eps_trace_over_3_loc_nplus1 + integer,intent(in) :: NSPEC_STRAIN_ONLY + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY),intent(inout) :: eps_trace_over_3_loc_nplus1 ! local variable integer :: i,j,k,l,iglob @@ -858,3 +870,4 @@ subroutine compute_element_strain_att_noDev(ispec,nglob,nspec, & end subroutine compute_element_strain_att_noDev +end module mod_element_strain diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 index 83e04ecba..3da4b615d 100644 --- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 +++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 @@ -94,6 +94,12 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & use specfem_par_full_gravity, only: & gravity_rho => gravity_rho_crust_mantle + ! element compute routines + use mod_element, only: compute_element_iso,compute_element_tiso,compute_element_aniso, & + compute_element_add_full_gravity + + use mod_element_att, only: compute_element_att_memory_cm,compute_element_att_memory_cm_lddrk + implicit none integer,intent(in) :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT @@ -374,8 +380,8 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & if (GRAVITY_VAL) then ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then - call SIEM_solve_element_add_full_gravity(ispec,NSPEC_CRUST_MANTLE,NGLOB,gravity_rho,deriv(:,:,:,:,ispec),ibool, & - pgrav_crust_mantle,rho_s_H) + call compute_element_add_full_gravity(ispec,NSPEC_CRUST_MANTLE,NGLOB,gravity_rho,deriv(1,1,1,1,ispec),ibool, & + pgrav_crust_mantle,rho_s_H) endif #ifdef FORCE_VECTORIZATION @@ -574,7 +580,7 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! ! please leave the routines here to help compilers inlining the code - subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER @@ -653,7 +659,7 @@ end subroutine mxm5_3comp_singleA !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER @@ -732,7 +738,7 @@ end subroutine mxm5_3comp_singleB !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) + pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER diff --git a/src/specfem3D/compute_forces_crust_mantle_noDev.f90 b/src/specfem3D/compute_forces_crust_mantle_noDev.f90 index 60764a8c6..ef9be1c8f 100644 --- a/src/specfem3D/compute_forces_crust_mantle_noDev.f90 +++ b/src/specfem3D/compute_forces_crust_mantle_noDev.f90 @@ -70,6 +70,11 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & use specfem_par_full_gravity, only: & gravity_rho => gravity_rho_crust_mantle + ! element compute routines + use mod_element, only: compute_element_add_full_gravity + + use mod_element_att, only: compute_element_att_memory_cm,compute_element_att_memory_cm_lddrk + implicit none integer,intent(in) :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT @@ -109,42 +114,42 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! local parameters ! for attenuation - real(kind=CUSTOM_REAL) R_xx_val,R_yy_val + real(kind=CUSTOM_REAL) :: R_xx_val,R_yy_val real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3 - integer ispec,iglob - integer i,j,k,l - integer i_SLS + integer :: ispec,iglob + integer :: i,j,k,l + integer :: i_SLS ! the 21 coefficients for an anisotropic medium in reduced notation - real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56 + real(kind=CUSTOM_REAL) :: c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56 - real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl - real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl + real(kind=CUSTOM_REAL) :: duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl - real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl - real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl + real(kind=CUSTOM_REAL) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl + real(kind=CUSTOM_REAL) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl - real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz + real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz - real(kind=CUSTOM_REAL) hp1,hp2,hp3 - real(kind=CUSTOM_REAL) fac1,fac2,fac3 - real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul - real(kind=CUSTOM_REAL) kappal + real(kind=CUSTOM_REAL) :: hp1,hp2,hp3 + real(kind=CUSTOM_REAL) :: fac1,fac2,fac3 + real(kind=CUSTOM_REAL) :: lambdal,mul,lambdalplus2mul + real(kind=CUSTOM_REAL) :: kappal real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms - real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l - real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l - real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l - real(kind=CUSTOM_REAL) templ + real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l + real(kind=CUSTOM_REAL) :: tempy1l,tempy2l,tempy3l + real(kind=CUSTOM_REAL) :: tempz1l,tempz2l,tempz3l + real(kind=CUSTOM_REAL) :: templ ! for gravity - real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy - real(kind=CUSTOM_REAL) factor,sx_l,sy_l,sz_l,gxl,gyl,gzl - real(kind=CUSTOM_REAL) Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl + real(kind=CUSTOM_REAL) :: sigma_yx,sigma_zx,sigma_zy + real(kind=CUSTOM_REAL) :: factor,sx_l,sy_l,sz_l,gxl,gyl,gzl + real(kind=CUSTOM_REAL) :: Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H ! full gravity @@ -508,8 +513,8 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then - call SIEM_solve_element_add_full_gravity(ispec,NSPEC_CRUST_MANTLE,NGLOB,gravity_rho,deriv_loc,ibool, & - pgrav_crust_mantle,rho_s_H) + call compute_element_add_full_gravity(ispec,NSPEC_CRUST_MANTLE,NGLOB,gravity_rho,deriv_loc,ibool, & + pgrav_crust_mantle,rho_s_H) endif do k = 1,NGLLZ diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90 index 926da54a4..ffff0b369 100644 --- a/src/specfem3D/compute_forces_inner_core_Dev.F90 +++ b/src/specfem3D/compute_forces_inner_core_Dev.F90 @@ -85,34 +85,40 @@ subroutine compute_forces_inner_core_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & use specfem_par_full_gravity, only: & gravity_rho => gravity_rho_inner_core + ! element compute routines + use mod_element, only: compute_element_iso_ic,compute_element_aniso_ic, & + compute_element_add_full_gravity + + use mod_element_att, only: compute_element_att_memory_ic,compute_element_att_memory_ic_lddrk + implicit none - integer :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT + integer,intent(in) :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT ! time step - real(kind=CUSTOM_REAL) deltat + real(kind=CUSTOM_REAL),intent(in) :: deltat ! displacement and acceleration - real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core - real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(in) :: displ_inner_core + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(inout) :: accel_inner_core ! for attenuation ! memory variables R_ij are stored at the local rather than global level ! to allow for optimization of cache access by compiler ! variable lengths for factor_common (and one_minus_sum_beta) - integer :: vnspec - real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common - real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval + integer,intent(in) :: vnspec + real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec),intent(in) :: factor_common + real(kind=CUSTOM_REAL), dimension(N_SLS),intent(in) :: alphaval,betaval,gammaval - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT),intent(inout) :: R_xx,R_yy,R_xy,R_xz,R_yz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: & + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT),intent(inout) :: & R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STR_OR_ATT) :: & + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STR_OR_ATT),intent(inout) :: & epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: epsilon_trace_over_3 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY),intent(inout) :: epsilon_trace_over_3 ! work array with contributions real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE),intent(out) :: sum_terms @@ -293,8 +299,8 @@ subroutine compute_forces_inner_core_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & if (GRAVITY_VAL) then ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then - call SIEM_solve_element_add_full_gravity(ispec,NSPEC_INNER_CORE,NGLOB,gravity_rho,deriv(:,:,:,:,ispec),ibool, & - pgrav_inner_core,rho_s_H) + call compute_element_add_full_gravity(ispec,NSPEC_INNER_CORE,NGLOB,gravity_rho,deriv(1,1,1,1,ispec),ibool, & + pgrav_inner_core,rho_s_H) endif #ifdef FORCE_VECTORIZATION @@ -440,7 +446,7 @@ subroutine compute_forces_inner_core_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! ! please leave the routines here to help compilers inlining the code - subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER @@ -519,7 +525,7 @@ end subroutine mxm5_3comp_singleA !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) + pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER @@ -598,7 +604,7 @@ end subroutine mxm5_3comp_singleB !-------------------------------------------------------------------------------------------- - subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) + pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER diff --git a/src/specfem3D/compute_forces_inner_core_noDev.f90 b/src/specfem3D/compute_forces_inner_core_noDev.f90 index 6c85cce82..bb721f81d 100644 --- a/src/specfem3D/compute_forces_inner_core_noDev.f90 +++ b/src/specfem3D/compute_forces_inner_core_noDev.f90 @@ -65,16 +65,21 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & use specfem_par_full_gravity, only: & gravity_rho => gravity_rho_inner_core + ! element compute routines + use mod_element, only: compute_element_add_full_gravity + + use mod_element_att, only: compute_element_att_memory_ic,compute_element_att_memory_ic_lddrk + implicit none - integer :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT + integer,intent(in) :: NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT ! time step - real(kind=CUSTOM_REAL) deltat + real(kind=CUSTOM_REAL),intent(in) :: deltat ! displacement, velocity and acceleration - real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core - real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(in) :: displ_inner_core + real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB),intent(inout) :: accel_inner_core ! for attenuation ! memory variables R_ij are stored at the local rather than global level @@ -82,19 +87,19 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! variable lengths for factor_common and one_minus_sum_beta ! variable sized array variables - integer :: vnspec - real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec) :: factor_common - real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval + integer,intent(in) :: vnspec + real(kind=CUSTOM_REAL), dimension(ATT1_VAL,ATT2_VAL,ATT3_VAL,N_SLS,vnspec),intent(in) :: factor_common + real(kind=CUSTOM_REAL), dimension(N_SLS),intent(in) :: alphaval,betaval,gammaval - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT),intent(inout) :: R_xx,R_yy,R_xy,R_xz,R_yz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT) :: & + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,NSPEC_ATT),intent(inout) :: & R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STR_OR_ATT) :: & + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STR_OR_ATT),intent(inout) :: & epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: epsilon_trace_over_3 + real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY),intent(inout) :: epsilon_trace_over_3 ! inner/outer element run flag integer,intent(in) :: iphase @@ -434,8 +439,8 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then - call SIEM_solve_element_add_full_gravity(ispec,NSPEC_INNER_CORE,NGLOB,gravity_rho,deriv_loc,ibool, & - pgrav_inner_core,rho_s_H) + call compute_element_add_full_gravity(ispec,NSPEC_INNER_CORE,NGLOB,gravity_rho,deriv_loc,ibool, & + pgrav_inner_core,rho_s_H) endif do k = 1,NGLLZ diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90 index 6773a9cca..dfe5f9fab 100644 --- a/src/specfem3D/compute_forces_outer_core_Dev.F90 +++ b/src/specfem3D/compute_forces_outer_core_Dev.F90 @@ -527,7 +527,7 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & ! ! please leave the routines here to help compilers inlining the code - subroutine mxm5_single(A,n1,B,C,n3) + pure subroutine mxm5_single(A,n1,B,C,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER @@ -700,7 +700,7 @@ end subroutine mxm5_single !-------------------------------------------------------------------------------------------- - subroutine mxm5_3dmat_single(A,n1,B,n2,C,n3) + pure subroutine mxm5_3dmat_single(A,n1,B,n2,C,n3) ! we can force inlining (Intel compiler) #if defined __INTEL_COMPILER diff --git a/src/specfem3D/compute_kernels.F90 b/src/specfem3D/compute_kernels.F90 index 2981da390..00828c508 100644 --- a/src/specfem3D/compute_kernels.F90 +++ b/src/specfem3D/compute_kernels.F90 @@ -77,6 +77,9 @@ subroutine compute_kernels_crust_mantle() use specfem_par_crustmantle + ! element compute routines + use mod_element_strain, only: compute_element_strain_undoatt_Dev,compute_element_strain_undoatt_noDev + implicit none ! local parameters @@ -705,6 +708,9 @@ subroutine compute_kernels_inner_core() use specfem_par_innercore + ! element compute routines + use mod_element_strain, only: compute_element_strain_undoatt_Dev,compute_element_strain_undoatt_noDev + implicit none ! local parameters diff --git a/src/specfem3D/compute_seismograms.F90 b/src/specfem3D/compute_seismograms.F90 index 24bf236f8..e04a96041 100644 --- a/src/specfem3D/compute_seismograms.F90 +++ b/src/specfem3D/compute_seismograms.F90 @@ -124,6 +124,9 @@ subroutine compute_seismograms_adjoint(displ_crust_mantle, & etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, & gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle + ! element compute routines + use mod_element_strain, only: compute_element_strain_undoatt_noDev + implicit none real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE),intent(in) :: & diff --git a/src/specfem3D/compute_strain_att.f90 b/src/specfem3D/compute_strain_att.f90 index ca1ce1e62..812c93162 100644 --- a/src/specfem3D/compute_strain_att.f90 +++ b/src/specfem3D/compute_strain_att.f90 @@ -37,6 +37,9 @@ subroutine compute_strain_att() use specfem_par_crustmantle use specfem_par_innercore + ! element compute routines + use mod_element_strain, only: compute_element_strain_att_Dev,compute_element_strain_att_noDev + implicit none ! local parameters integer :: ispec @@ -134,6 +137,9 @@ subroutine compute_strain_att_backward() use specfem_par_crustmantle use specfem_par_innercore + ! element compute routines + use mod_element_strain, only: compute_element_strain_att_Dev,compute_element_strain_att_noDev + implicit none ! local parameters diff --git a/src/specfem3D/rules.mk b/src/specfem3D/rules.mk index c3f9836f2..80789c585 100644 --- a/src/specfem3D/rules.mk +++ b/src/specfem3D/rules.mk @@ -144,6 +144,9 @@ specfem3D_MODULES = \ $(FC_MODDIR)/asdf_data.$(FC_MODEXT) \ $(FC_MODDIR)/constants_solver.$(FC_MODEXT) \ $(FC_MODDIR)/manager_adios.$(FC_MODEXT) \ + $(FC_MODDIR)/mod_element.$(FC_MODEXT) \ + $(FC_MODDIR)/mod_element_att.$(FC_MODEXT) \ + $(FC_MODDIR)/mod_element_strain.$(FC_MODEXT) \ $(FC_MODDIR)/specfem_par.$(FC_MODEXT) \ $(FC_MODDIR)/specfem_par_crustmantle.$(FC_MODEXT) \ $(FC_MODDIR)/specfem_par_innercore.$(FC_MODEXT) \ @@ -401,6 +404,15 @@ $O/locate_point.solverstatic.o: $O/search_kdtree.shared.o $O/make_gravity.solver.o: $O/model_prem.shared.o $O/model_Sohl.shared.o $O/model_vpremoon.shared.o +$O/compute_forces_crust_mantle_Dev.solverstatic.o: $O/compute_element.solverstatic.o $O/compute_element_att_memory.solverstatic.o +$O/compute_forces_crust_mantle_noDev.solverstatic.o: $O/compute_element.solverstatic.o $O/compute_element_att_memory.solverstatic.o +$O/compute_forces_inner_core_Dev.solverstatic.o: $O/compute_element.solverstatic.o $O/compute_element_att_memory.solverstatic.o +$O/compute_forces_inner_core_noDev.solverstatic.o: $O/compute_element.solverstatic.o $O/compute_element_att_memory.solverstatic.o + +$O/compute_kernels.solverstatic.o: $O/compute_element_strain.solverstatic.o +$O/compute_seismograms.solverstatic.o: $O/compute_element_strain.solverstatic.o +$O/compute_strain_att.solverstatic.o: $O/compute_element_strain.solverstatic.o + # Version file $O/initialize_simulation.solverstatic.o: ${SETUP}/version.fh From 005b6ede5530c4ba3e896d7f5ce57cc3ddba480a Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 20 Jun 2024 17:10:05 +0200 Subject: [PATCH 03/13] removes libXSMM test routines --- .../compute_forces_crust_mantle_Dev.F90 | 168 ------------------ 1 file changed, 168 deletions(-) diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 index 3da4b615d..f56a6d684 100644 --- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 +++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 @@ -245,37 +245,12 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! for incompressible fluid flow, Cambridge University Press (2002), ! pages 386 and 389 and Figure 8.3.1 -#ifdef DANIEL_TEST_LOOP - ! loop over single x/y/z-component, to test if cache utilization is better - ! x-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprime_xx,m1,dummyx_loc,tempx1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(dummyx_loc,m1,hprime_xxT,m1,tempx2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(dummyx_loc,m2,hprime_xxT,tempx3,m1) - ! y-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprime_xx,m1,dummyy_loc,tempy1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(dummyy_loc,m1,hprime_xxT,m1,tempy2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(dummyy_loc,m2,hprime_xxT,tempy3,m1) - ! z-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprime_xx,m1,dummyz_loc,tempz1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(dummyz_loc,m1,hprime_xxT,m1,tempz2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(dummyz_loc,m2,hprime_xxT,tempz3,m1) -#else ! computes 1. matrix multiplication for tempx1,.. call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2) ! computes 2. matrix multiplication for tempx2,.. call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX) ! computes 3. matrix multiplication for tempx3,.. call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1) -#endif ! ! compute either isotropic, transverse isotropic or anisotropic elements @@ -334,37 +309,12 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! for incompressible fluid flow, Cambridge University Press (2002), ! pages 386 and 389 and Figure 8.3.1 -#ifdef DANIEL_TEST_LOOP - ! loop over single x/y/z-component, to test if cache utilization is better - ! x-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprimewgll_xxT,m1,tempx1,newtempx1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(tempx2,m1,hprimewgll_xx,m1,newtempx2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(tempx3,m2,hprimewgll_xx,newtempx3,m1) - ! y-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprimewgll_xxT,m1,tempy1,newtempy1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(tempy2,m1,hprimewgll_xx,m1,newtempy2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(tempy3,m2,hprimewgll_xx,newtempy3,m1) - ! z-comp -!DIR$ FORCEINLINE - call mxm5_3comp_singleA_1(hprimewgll_xxT,m1,tempz1,newtempz1,m2) -!DIR$ FORCEINLINE - call mxm5_3comp_3dmat_singleB_1(tempz2,m1,hprimewgll_xx,m1,newtempz2,NGLLX) -!DIR$ FORCEINLINE - call mxm5_3comp_singleB_1(tempz3,m2,hprimewgll_xx,newtempz3,m1) -#else ! computes 1. matrix multiplication for newtempx1,.. call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2) ! computes 2. matrix multiplication for tempx2,.. call mxm5_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX) ! computes 3. matrix multiplication for newtempx3,.. call mxm5_3comp_singleB(tempx3,tempy3,tempz3,m2,hprimewgll_xx,newtempx3,newtempy3,newtempz3,m1) -#endif ! sums contributions DO_LOOP_IJK @@ -840,124 +790,6 @@ pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) end subroutine mxm5_3comp_3dmat_singleB - -!-------------------------------------------------------------------------------------------- - -#ifdef DANIEL_TEST_LOOP - -! loops over single x/y/z-component -! test if cache utilization is better - - subroutine mxm5_3comp_singleA_1(A,n1,B,C,n3) - use constants_solver, only: CUSTOM_REAL -#ifdef USE_XSMM - use my_libxsmm, only: libxsmm_smm_5_25_5 -#endif - implicit none - integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A - real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B - real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C - ! local parameters - integer :: i,j -#ifdef USE_XSMM - ! matrix-matrix multiplication C = alpha A * B + beta C - ! with A(n1,n2) 5x5-matrix, B(n2,n3) 5x25-matrix and C(n1,n3) 5x25-matrix - ! static version using MNK="5 25, 5" ALPHA=1 BETA=0 - call libxsmm_smm_5_25_5(a=A, b=B, c=C) - return -#endif - ! matrix-matrix multiplication - do j = 1,n3 -!dir$ ivdep - do i = 1,n1 - C(i,j) = A(i,1) * B(1,j) & - + A(i,2) * B(2,j) & - + A(i,3) * B(3,j) & - + A(i,4) * B(4,j) & - + A(i,5) * B(5,j) - enddo - enddo - - end subroutine mxm5_3comp_singleA_1 - - - subroutine mxm5_3comp_singleB_1(A,n1,B,C,n3) - use constants_solver, only: CUSTOM_REAL -#ifdef USE_XSMM - use my_libxsmm, only: libxsmm_smm_25_5_5 -#endif - implicit none - integer,intent(in) :: n1,n3 - real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A - real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B - real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C - ! local parameters - integer :: i,j -#ifdef USE_XSMM - ! matrix-matrix multiplication C = alpha A * B + beta C - ! with A(n1,n2) 25x5-matrix, B(n2,n3) 5x5-matrix and C(n1,n3) 25x5-matrix - ! static version - call libxsmm_smm_25_5_5(a=A, b=B, c=C) - return -#endif - ! matrix-matrix multiplication - do j = 1,n3 -!dir$ ivdep - do i = 1,n1 - C(i,j) = A(i,1) * B(1,j) & - + A(i,2) * B(2,j) & - + A(i,3) * B(3,j) & - + A(i,4) * B(4,j) & - + A(i,5) * B(5,j) - enddo - enddo - end subroutine mxm5_3comp_singleB_1 - - - subroutine mxm5_3comp_3dmat_singleB_1(A,n1,B,n2,C,n3) - use constants_solver, only: CUSTOM_REAL -#if defined(XSMM_FORCE_EVEN_IF_SLOWER) || ( defined(XSMM) && defined(__MIC__) ) - use my_libxsmm, only: libxsmm_smm_5_5_5 -#endif - implicit none - integer,intent(in) :: n1,n2,n3 - real(kind=CUSTOM_REAL),dimension(n1,5,n3),intent(in) :: A - real(kind=CUSTOM_REAL),dimension(5,n2),intent(in) :: B - real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C - ! local parameters - integer :: i,j,k -#if defined(XSMM_FORCE_EVEN_IF_SLOWER) || ( defined(XSMM) && defined(__MIC__) ) - ! matrix-matrix multiplication C = alpha A * B + beta C - ! with A(n1,n2,n4) 5x5x5-matrix, B(n2,n3) 5x5-matrix and C(n1,n3,n4) 5x5x5-matrix - call libxsmm_smm_5_5_5(a=A(1,1,1), b=B, c=C(1,1,1)) - call libxsmm_smm_5_5_5(a=A(1,1,2), b=B, c=C(1,1,2)) - call libxsmm_smm_5_5_5(a=A(1,1,3), b=B, c=C(1,1,3)) - call libxsmm_smm_5_5_5(a=A(1,1,4), b=B, c=C(1,1,4)) - call libxsmm_smm_5_5_5(a=A(1,1,5), b=B, c=C(1,1,5)) - return -#endif - ! matrix-matrix multiplication - do k = 1,n3 - do j = 1,n2 -!dir$ ivdep - do i = 1,n1 - C(i,j,k) = A(i,1,k) * B(1,j) & - + A(i,2,k) * B(2,j) & - + A(i,3,k) * B(3,j) & - + A(i,4,k) * B(4,j) & - + A(i,5,k) * B(5,j) - enddo - enddo - enddo - end subroutine mxm5_3comp_3dmat_singleB_1 -#endif - - -!-------------------------------------------------------------------------------------------- - - - end subroutine compute_forces_crust_mantle_Dev From 7aab4ce4be1b43b1a0b9afabc1f2d63a4fd138a3 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Fri, 21 Jun 2024 19:36:53 +0200 Subject: [PATCH 04/13] adds hand-opt routines for testing --- .../compute_forces_crust_mantle_Dev.F90 | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 index f56a6d684..cc30fc871 100644 --- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 +++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90 @@ -151,6 +151,7 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc + ! strain real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5) :: epsilondev_loc real(kind=CUSTOM_REAL) :: fac1,fac2,fac3 @@ -246,7 +247,12 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! pages 386 and 389 and Figure 8.3.1 ! computes 1. matrix multiplication for tempx1,.. +#ifdef USE_HANDOPT call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2) + !call mxm5_3comp_singleA(hprime_xxT,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2) ! A transposed +#else + call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2) +#endif ! computes 2. matrix multiplication for tempx2,.. call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX) ! computes 3. matrix multiplication for tempx3,.. @@ -310,7 +316,12 @@ subroutine compute_forces_crust_mantle_Dev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & ! pages 386 and 389 and Figure 8.3.1 ! computes 1. matrix multiplication for newtempx1,.. +#ifdef USE_HANDOPT call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2) + !call mxm5_3comp_singleA(hprimewgll_xx,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2) ! A transposed +#else + call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2) +#endif ! computes 2. matrix multiplication for tempx2,.. call mxm5_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX) ! computes 3. matrix multiplication for newtempx3,.. @@ -556,7 +567,12 @@ pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) implicit none integer,intent(in) :: n1,n3 +#ifdef USE_HANDOPT + real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A + !real(kind=CUSTOM_REAL),dimension(5,n1),intent(in) :: A ! transposed +#else real(kind=CUSTOM_REAL),dimension(n1,5),intent(in) :: A +#endif real(kind=CUSTOM_REAL),dimension(5,n3),intent(in) :: B1,B2,B3 real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3 @@ -579,6 +595,41 @@ pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) ! matrix-matrix multiplication do j = 1,n3 + +#ifdef USE_HANDOPT + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C1(i,j) = A(i,1) * B1(1,j) + A(i,2) * B1(2,j) + A(i,3) * B1(3,j) + A(i,4) * B1(4,j) + A(i,5) * B1(5,j) + ! transposed + !C1(i,j) = A(1,i) * B1(1,j) + A(2,i) * B1(2,j) + A(3,i) * B1(3,j) + A(4,i) * B1(4,j) + A(5,i) * B1(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C2(i,j) = A(i,1) * B2(1,j) + A(i,2) * B2(2,j) + A(i,3) * B2(3,j) + A(i,4) * B2(4,j) + A(i,5) * B2(5,j) + ! transposed + !C2(i,j) = A(1,i) * B2(1,j) + A(2,i) * B2(2,j) + A(3,i) * B2(3,j) + A(4,i) * B2(4,j) + A(5,i) * B2(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C3(i,j) = A(i,1) * B3(1,j) + A(i,2) * B3(2,j) + A(i,3) * B3(3,j) + A(i,4) * B3(4,j) + A(i,5) * B3(5,j) + ! transposed + !C3(i,j) = A(1,i) * B3(1,j) + A(2,i) * B3(2,j) + A(3,i) * B3(3,j) + A(4,i) * B3(4,j) + A(5,i) * B3(5,j) + enddo + +#else + !DIR$ IVDEP #if defined __INTEL_COMPILER !DIR$ SIMD @@ -602,6 +653,9 @@ pure subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3) + A(i,4) * B3(4,j) & + A(i,5) * B3(5,j) enddo + +#endif + enddo end subroutine mxm5_3comp_singleA @@ -658,6 +712,35 @@ pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) ! matrix-matrix multiplication do j = 1,n3 + +#ifdef USE_HANDOPT + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C1(i,j) = A1(i,1) * B(1,j) + A1(i,2) * B(2,j) + A1(i,3) * B(3,j) + A1(i,4) * B(4,j) + A1(i,5) * B(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C2(i,j) = A2(i,1) * B(1,j) + A2(i,2) * B(2,j) + A2(i,3) * B(3,j) + A2(i,4) * B(4,j) + A2(i,5) * B(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C3(i,j) = A3(i,1) * B(1,j) + A3(i,2) * B(2,j) + A3(i,3) * B(3,j) + A3(i,4) * B(4,j) + A3(i,5) * B(5,j) + enddo + +#else + !DIR$ IVDEP #if defined __INTEL_COMPILER !DIR$ SIMD @@ -681,6 +764,9 @@ pure subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3) + A3(i,4) * B(4,j) & + A3(i,5) * B(5,j) enddo + +#endif + enddo end subroutine mxm5_3comp_singleB @@ -762,6 +848,47 @@ pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) ! matrix-matrix multiplication do k = 1,n3 do j = 1,n2 + +#ifdef USE_HANDOPT + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C1(i,j,k) = A1(i,1,k) * B(1,j) & + + A1(i,2,k) * B(2,j) & + + A1(i,3,k) * B(3,j) & + + A1(i,4,k) * B(4,j) & + + A1(i,5,k) * B(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C2(i,j,k) = A2(i,1,k) * B(1,j) & + + A2(i,2,k) * B(2,j) & + + A2(i,3,k) * B(3,j) & + + A2(i,4,k) * B(4,j) & + + A2(i,5,k) * B(5,j) + enddo + +!DIR$ IVDEP +#if defined __INTEL_COMPILER +!DIR$ SIMD +#endif + do i = 1,n1 + C3(i,j,k) = A3(i,1,k) * B(1,j) & + + A3(i,2,k) * B(2,j) & + + A3(i,3,k) * B(3,j) & + + A3(i,4,k) * B(4,j) & + + A3(i,5,k) * B(5,j) + enddo + +#else + !DIR$ IVDEP #if defined __INTEL_COMPILER !DIR$ SIMD @@ -785,6 +912,9 @@ pure subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3) + A3(i,4,k) * B(4,j) & + A3(i,5,k) * B(5,j) enddo + +#endif + enddo enddo From d0116ba0e57d1bf7da0567e8919efb0e6f4510ff Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Fri, 21 Jun 2024 19:37:47 +0200 Subject: [PATCH 05/13] stores pre-computed jacobian in deriv_mapping_* arrays --- src/specfem3D/compute_element.F90 | 131 ++++++++---------- src/specfem3D/compute_element_strain.F90 | 4 +- .../compute_forces_crust_mantle_noDev.f90 | 13 +- .../compute_forces_inner_core_noDev.f90 | 13 +- .../compute_forces_outer_core_Dev.F90 | 22 ++- src/specfem3D/prepare_optimized_arrays.F90 | 12 +- 6 files changed, 92 insertions(+), 103 deletions(-) diff --git a/src/specfem3D/compute_element.F90 b/src/specfem3D/compute_element.F90 index 721db7a2b..5127fa3cb 100644 --- a/src/specfem3D/compute_element.F90 +++ b/src/specfem3D/compute_element.F90 @@ -88,7 +88,7 @@ pure subroutine compute_element_iso(ispec, & ! arrays with mesh parameters per slice integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube @@ -119,8 +119,6 @@ pure subroutine compute_element_iso(ispec, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(inout) :: epsilondev_loc ! local parameters - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -146,7 +144,7 @@ pure subroutine compute_element_iso(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(1,1,1,1,ispec),jacobianl, & + deriv(1,1,1,1,ispec), & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -197,7 +195,7 @@ pure subroutine compute_element_iso(ispec, & ! compute non-symmetric terms for gravity if (GRAVITY_VAL) then - call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv(1,1,1,1,ispec),wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -206,7 +204,7 @@ pure subroutine compute_element_iso(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(1,1,1,1,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -285,8 +283,6 @@ end subroutine compute_element_iso ! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(inout) :: epsilondev_loc ! ! ! local parameters -! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl -! ! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl ! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl ! !real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -494,7 +490,7 @@ end subroutine compute_element_iso ! endif ! ! ! dot product of stress tensor with test vector, non-symmetric form -! !call compute_element_dot_product_stress(deriv(1,1,1,1,ispec),jacobianl, & +! !call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & ! ! sigma_xx,sigma_yy,sigma_zz, & ! ! sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & ! ! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -556,7 +552,7 @@ pure subroutine compute_element_iso_ic(ispec, & ! arrays with mesh parameters per slice integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube @@ -587,8 +583,6 @@ pure subroutine compute_element_iso_ic(ispec, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,5),intent(inout) :: epsilondev_loc ! local parameters - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -614,7 +608,7 @@ pure subroutine compute_element_iso_ic(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(:,:,:,:,ispec),jacobianl, & + deriv(1,1,1,1,ispec), & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -666,7 +660,7 @@ pure subroutine compute_element_iso_ic(ispec, & ! compute non-symmetric terms for gravity if (GRAVITY_VAL) then - call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv(1,1,1,1,ispec),wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -675,7 +669,7 @@ pure subroutine compute_element_iso_ic(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -730,7 +724,7 @@ pure subroutine compute_element_tiso(ispec, & ! arrays with mesh parameters per slice integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube @@ -768,8 +762,6 @@ pure subroutine compute_element_tiso(ispec, & real(kind=CUSTOM_REAL) :: c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56 ! local element arrays - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -791,7 +783,7 @@ pure subroutine compute_element_tiso(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(:,:,:,:,ispec),jacobianl, & + deriv(1,1,1,1,ispec), & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -870,7 +862,7 @@ pure subroutine compute_element_tiso(ispec, & ! compute non-symmetric terms for gravity if (GRAVITY_VAL) then - call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv(1,1,1,1,ispec),wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -879,7 +871,7 @@ pure subroutine compute_element_tiso(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -988,8 +980,6 @@ end subroutine compute_element_tiso ! real(kind=CUSTOM_REAL) :: two_eta_aniso,four_eta_aniso,six_eta_aniso ! ! ! local element arrays -! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl -! ! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl ! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl ! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -1016,7 +1006,7 @@ end subroutine compute_element_tiso ! ! ! precomputes factors ! call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & -! deriv(:,:,:,:,ispec),jacobianl, & +! deriv(1,1,1,1,ispec), & ! duxdxl,duydyl,duzdzl, & ! duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & ! duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -1268,7 +1258,7 @@ end subroutine compute_element_tiso ! ! ! compute non-symmetric terms for gravity ! if (GRAVITY_VAL) then -! call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,rstore,jacobianl,wgll_cube, & +! call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,rstore,deriv(:,:,:,:,ispec),wgll_cube, & ! minus_gravity_table,minus_deriv_gravity_table,density_table, & ! dummyx_loc,dummyy_loc,dummyz_loc, & ! sigma_xx,sigma_yy,sigma_zz, & @@ -1277,7 +1267,7 @@ end subroutine compute_element_tiso ! endif ! ! ! dot product of stress tensor with test vector, non-symmetric form -! call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & +! call compute_element_dot_product_stress(deriv(:,:,:,:,ispec), & ! sigma_xx,sigma_yy,sigma_zz, & ! sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & ! tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -1333,7 +1323,7 @@ pure subroutine compute_element_aniso(ispec, & ! arrays with mesh parameters per slice integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube @@ -1371,8 +1361,6 @@ pure subroutine compute_element_aniso(ispec, & real(kind=CUSTOM_REAL) :: c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56 ! local element arrays - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -1393,7 +1381,7 @@ pure subroutine compute_element_aniso(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(:,:,:,:,ispec),jacobianl, & + deriv(1,1,1,1,ispec), & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -1468,7 +1456,7 @@ pure subroutine compute_element_aniso(ispec, & ! compute non-symmetric terms for gravity if (GRAVITY_VAL) then - call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv(1,1,1,1,ispec),wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -1477,7 +1465,7 @@ pure subroutine compute_element_aniso(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -1528,7 +1516,7 @@ pure subroutine compute_element_aniso_ic(ispec, & ! arrays with mesh parameters per slice integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: deriv ! array with derivatives of Lagrange polynomials and precalculated products real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube @@ -1565,8 +1553,6 @@ pure subroutine compute_element_aniso_ic(ispec, & real(kind=CUSTOM_REAL) :: c11,c12,c13,c22,c23,c33,c44,c55,c66 ! local element arrays - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl, duydyl, duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl @@ -1587,7 +1573,7 @@ pure subroutine compute_element_aniso_ic(ispec, & ! precomputes factors call compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv(:,:,:,:,ispec),jacobianl, & + deriv(1,1,1,1,ispec), & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -1676,7 +1662,7 @@ pure subroutine compute_element_aniso_ic(ispec, & ! compute non-symmetric terms for gravity if (GRAVITY_VAL) then - call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + call compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv(1,1,1,1,ispec),wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -1685,7 +1671,7 @@ pure subroutine compute_element_aniso_ic(ispec, & endif ! dot product of stress tensor with test vector, non-symmetric form - call compute_element_dot_product_stress(deriv(:,:,:,:,ispec),jacobianl, & + call compute_element_dot_product_stress(deriv(1,1,1,1,ispec), & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -1808,7 +1794,7 @@ end subroutine compute_element_stress_attenuation_contrib ! please leave this routine in this file, to help compilers inlining this function... pure subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, & - deriv_loc,jacobianl, & + deriv_loc, & duxdxl,duydyl,duzdzl, & duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl) @@ -1835,15 +1821,14 @@ pure subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,t real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: tempy1,tempy2,tempy3 real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: tempz1,tempz2,tempz3 - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: jacobianl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: duxdxl,duydyl,duzdzl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl, & duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl ! local parameters - real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobian + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl real(kind=CUSTOM_REAL) :: duxdyl,duxdzl,duydxl,duydzl,duzdxl,duzdyl real(kind=CUSTOM_REAL) :: x1,x2,x3,y1,y2,y3,z1,z2,z3 @@ -1866,18 +1851,19 @@ pure subroutine compute_element_precompute_factors(tempx1,tempx2,tempx3,tempy1,t gammayl = deriv_loc(8,INDEX_IJK) gammazl = deriv_loc(9,INDEX_IJK) + ! Jacobian already stored in deriv_loc(10,INDEX_IJK) ! compute the Jacobian - jacobian = (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) - + !jacobian = (xixl*(etayl*gammazl-etazl*gammayl) & + ! - xiyl*(etaxl*gammazl-etazl*gammaxl) & + ! + xizl*(etaxl*gammayl-etayl*gammaxl)) + ! ! checks Jacobian ! note: try to avoid this if-statement as it hinders the compiler to tune and vectorize this inner loop. ! an if-statement would slow this loop down significantly... ! already checked before in prepare optimized arrays: ! if (jacobian <= 0.0_CUSTOM_REAL) stop 'Error invalid jacobian in compute_element_precompute_factors()' - - jacobianl(INDEX_IJK) = 1.0_CUSTOM_REAL / jacobian + ! + !jacobianl(INDEX_IJK) = 1.0_CUSTOM_REAL / jacobian x1 = tempx1(INDEX_IJK) x2 = tempx2(INDEX_IJK) @@ -2008,7 +1994,7 @@ end subroutine compute_element_deviatoric_strain ! please leave this routine in this file, to help compilers inlining this function... - pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & + pure subroutine compute_element_dot_product_stress(deriv_loc, & sigma_xx,sigma_yy,sigma_zz, & sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy, & tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3) @@ -2031,8 +2017,7 @@ pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & implicit none - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: jacobianl + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: sigma_xx,sigma_yy,sigma_zz real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy @@ -2042,11 +2027,8 @@ pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: tempz1,tempz2,tempz3 ! local parameters - real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl real(kind=CUSTOM_REAL) :: sxx,syy,szz,sxy,sxz,syz,syx,szx,szy - - real(kind=CUSTOM_REAL) :: fac - #ifdef FORCE_VECTORIZATION integer :: ijk #else @@ -2067,7 +2049,7 @@ pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & gammazl = deriv_loc(9,INDEX_IJK) ! common factor - fac = jacobianl(INDEX_IJK) + jacobianl = deriv_loc(10,INDEX_IJK) sxx = sigma_xx(INDEX_IJK) syy = sigma_yy(INDEX_IJK) @@ -2082,25 +2064,25 @@ pure subroutine compute_element_dot_product_stress(deriv_loc,jacobianl, & ! form dot product with test vector, non-symmetric form ! this goes to accel_x - tempx1(INDEX_IJK) = fac * (sxx*xixl + syx*xiyl + szx*xizl) + tempx1(INDEX_IJK) = jacobianl * (sxx*xixl + syx*xiyl + szx*xizl) ! this goes to accel_y - tempy1(INDEX_IJK) = fac * (sxy*xixl + syy*xiyl + szy*xizl) + tempy1(INDEX_IJK) = jacobianl * (sxy*xixl + syy*xiyl + szy*xizl) ! this goes to accel_z - tempz1(INDEX_IJK) = fac * (sxz*xixl + syz*xiyl + szz*xizl) + tempz1(INDEX_IJK) = jacobianl * (sxz*xixl + syz*xiyl + szz*xizl) ! this goes to accel_x - tempx2(INDEX_IJK) = fac * (sxx*etaxl + syx*etayl + szx*etazl) + tempx2(INDEX_IJK) = jacobianl * (sxx*etaxl + syx*etayl + szx*etazl) ! this goes to accel_y - tempy2(INDEX_IJK) = fac * (sxy*etaxl + syy*etayl + szy*etazl) + tempy2(INDEX_IJK) = jacobianl * (sxy*etaxl + syy*etayl + szy*etazl) ! this goes to accel_z - tempz2(INDEX_IJK) = fac * (sxz*etaxl + syz*etayl + szz*etazl) + tempz2(INDEX_IJK) = jacobianl * (sxz*etaxl + syz*etayl + szz*etazl) ! this goes to accel_x - tempx3(INDEX_IJK) = fac * (sxx*gammaxl + syx*gammayl + szx*gammazl) + tempx3(INDEX_IJK) = jacobianl * (sxx*gammaxl + syx*gammayl + szx*gammazl) ! this goes to accel_y - tempy3(INDEX_IJK) = fac * (sxy*gammaxl + syy*gammayl + szy*gammazl) + tempy3(INDEX_IJK) = jacobianl * (sxy*gammaxl + syy*gammayl + szy*gammazl) ! this goes to accel_z - tempz3(INDEX_IJK) = fac * (sxz*gammaxl + syz*gammayl + szz*gammazl) + tempz3(INDEX_IJK) = jacobianl * (sxz*gammaxl + syz*gammayl + szz*gammazl) ENDDO_LOOP_IJK end subroutine compute_element_dot_product_stress @@ -2111,7 +2093,7 @@ end subroutine compute_element_dot_product_stress ! please leave this routine in this file, to help compilers inlining this function... - pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_cube, & + pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,deriv_loc,wgll_cube, & gravity_pre_store,gravity_H, & dummyx_loc,dummyy_loc,dummyz_loc, & sigma_xx,sigma_yy,sigma_zz, & @@ -2142,7 +2124,7 @@ pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_c integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool ! real(kind=CUSTOM_REAL), dimension(3,NGLOB),intent(in) :: rstore - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: jacobianl + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(in) :: wgll_cube ! gravity @@ -2165,7 +2147,7 @@ pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_c ! double precision :: minus_g_over_radius,minus_dg_plus_g_over_radius ! double precision :: cos_theta,sin_theta,cos_phi,sin_phi ! double precision :: cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq - real(kind=CUSTOM_REAL) :: factor,sx_l,sy_l,sz_l,gxl,gyl,gzl + real(kind=CUSTOM_REAL) :: factor,sx_l,sy_l,sz_l,gxl,gyl,gzl,jacobianl real(kind=CUSTOM_REAL) :: Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl ! integer :: int_radius @@ -2219,7 +2201,8 @@ pure subroutine compute_element_gravity(ispec,NSPEC,NGLOB,ibool,jacobianl,wgll_c Hyzl = gravity_H(6,iglob) ! cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta * rho ! precompute vector - factor = jacobianl(INDEX_IJK) * wgll_cube(INDEX_IJK) + jacobianl = deriv_loc(10,INDEX_IJK) + factor = jacobianl * wgll_cube(INDEX_IJK) rho_s_H(1,INDEX_IJK) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl) rho_s_H(2,INDEX_IJK) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl) @@ -2394,7 +2377,7 @@ pure subroutine compute_element_add_full_gravity(ispec,nspec,nglob,gravity_rho,d real(kind=CUSTOM_REAL),dimension(nglob),intent(in) :: gravity_rho - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ),intent(in) :: deriv_loc integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool ! pertubation gravity @@ -2436,9 +2419,11 @@ pure subroutine compute_element_add_full_gravity(ispec,nspec,nglob,gravity_rho,d gammazl = deriv_loc(9,i,j,k) ! compute the Jacobian - jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) + !jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & + ! - xiyl*(etaxl*gammazl-etazl*gammaxl) & + ! + xizl*(etaxl*gammayl-etayl*gammaxl)) + ! from store + jacobianl = deriv_loc(10,i,j,k) ! for \grad\phi tempx1l_phi = 0._CUSTOM_REAL diff --git a/src/specfem3D/compute_element_strain.F90 b/src/specfem3D/compute_element_strain.F90 index 88931d6eb..140ff074f 100644 --- a/src/specfem3D/compute_element_strain.F90 +++ b/src/specfem3D/compute_element_strain.F90 @@ -62,7 +62,7 @@ pure subroutine compute_element_strain_undoatt_Dev(ispec,nglob,nspec, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX),intent(in) :: hprime_xx,hprime_xxT - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: deriv real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ),intent(out) :: epsilondev_loc real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ),intent(out) :: eps_trace_over_3_loc @@ -473,7 +473,7 @@ pure subroutine compute_element_strain_att_Dev(ispec,nglob,nspec, & integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX),intent(in) :: hprime_xx,hprime_xxT - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: deriv + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: deriv real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(out) :: epsilondev_xx_loc_nplus1 real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(out) :: epsilondev_yy_loc_nplus1 diff --git a/src/specfem3D/compute_forces_crust_mantle_noDev.f90 b/src/specfem3D/compute_forces_crust_mantle_noDev.f90 index ef9be1c8f..f436c7f14 100644 --- a/src/specfem3D/compute_forces_crust_mantle_noDev.f90 +++ b/src/specfem3D/compute_forces_crust_mantle_noDev.f90 @@ -153,7 +153,7 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H ! full gravity - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ) :: deriv_loc + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ) :: deriv_loc ! integer :: computed_elements integer :: num_elements,ispec_p @@ -229,6 +229,11 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & gammayl = gammay(i,j,k,ispec) gammazl = gammaz(i,j,k,ispec) +! compute the Jacobian + jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then ! note: deriv_mapping_crust_mantle is only available when chosen to use Deville routines @@ -242,13 +247,9 @@ subroutine compute_forces_crust_mantle_noDev(NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & deriv_loc(7,i,j,k) = gammaxl deriv_loc(8,i,j,k) = gammayl deriv_loc(9,i,j,k) = gammazl + deriv_loc(10,i,j,k) = jacobianl endif -! compute the Jacobian - jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) - duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l diff --git a/src/specfem3D/compute_forces_inner_core_noDev.f90 b/src/specfem3D/compute_forces_inner_core_noDev.f90 index bb721f81d..c31045d3b 100644 --- a/src/specfem3D/compute_forces_inner_core_noDev.f90 +++ b/src/specfem3D/compute_forces_inner_core_noDev.f90 @@ -148,7 +148,7 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy ! full gravity - real(kind=CUSTOM_REAL), dimension(9,NGLLX,NGLLY,NGLLZ) :: deriv_loc + real(kind=CUSTOM_REAL), dimension(10,NGLLX,NGLLY,NGLLZ) :: deriv_loc ! integer :: computed_elements integer :: num_elements,ispec_p @@ -226,6 +226,11 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & gammayl = gammay(i,j,k,ispec) gammazl = gammaz(i,j,k,ispec) + ! compute the Jacobian + jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & + - xiyl*(etaxl*gammazl-etazl*gammaxl) & + + xizl*(etaxl*gammayl-etayl*gammaxl)) + ! full gravity if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then ! note: deriv_mapping_inner_core is only available when chosen to use Deville routines @@ -239,13 +244,9 @@ subroutine compute_forces_inner_core_noDev( NSPEC_STR_OR_ATT,NGLOB,NSPEC_ATT, & deriv_loc(7,i,j,k) = gammaxl deriv_loc(8,i,j,k) = gammayl deriv_loc(9,i,j,k) = gammazl + deriv_loc(10,i,j,k) = jacobianl endif - ! compute the Jacobian - jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) - duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90 index dfe5f9fab..094e2d902 100644 --- a/src/specfem3D/compute_forces_outer_core_Dev.F90 +++ b/src/specfem3D/compute_forces_outer_core_Dev.F90 @@ -114,10 +114,9 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: A_rotation,B_rotation integer :: ispec,iglob - real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl + real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dpotentialdxl,dpotentialdyl,dpotentialdzl - real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: jacobianl ! Deville real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: chi_elem @@ -259,11 +258,6 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & gammayl = deriv(8,INDEX_IJK,ispec) gammazl = deriv(9,INDEX_IJK,ispec) - ! compute the Jacobian - jacobianl(INDEX_IJK) = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) & - - xiyl*(etaxl*gammazl-etazl*gammaxl) & - + xizl*(etaxl*gammayl-etayl*gammaxl)) - dpotentialdxl(INDEX_IJK) = xixl*temp1(INDEX_IJK) + etaxl*temp2(INDEX_IJK) + gammaxl*temp3(INDEX_IJK) dpotentialdyl(INDEX_IJK) = xiyl*temp1(INDEX_IJK) + etayl*temp2(INDEX_IJK) + gammayl*temp3(INDEX_IJK) dpotentialdzl(INDEX_IJK) = xizl*temp1(INDEX_IJK) + etazl*temp2(INDEX_IJK) + gammazl*temp3(INDEX_IJK) @@ -388,12 +382,13 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & gammaxl = deriv(7,INDEX_IJK,ispec) gammayl = deriv(8,INDEX_IJK,ispec) gammazl = deriv(9,INDEX_IJK,ispec) + jacobianl = deriv(10,INDEX_IJK,ispec) - temp1(INDEX_IJK) = jacobianl(INDEX_IJK) * & + temp1(INDEX_IJK) = jacobianl * & (xixl*dpotentialdxl(INDEX_IJK) + xiyl*dpotentialdyl(INDEX_IJK) + xizl*dpotentialdzl(INDEX_IJK)) - temp2(INDEX_IJK) = jacobianl(INDEX_IJK) * & + temp2(INDEX_IJK) = jacobianl * & (etaxl*dpotentialdxl(INDEX_IJK) + etayl*dpotentialdyl(INDEX_IJK) + etazl*dpotentialdzl(INDEX_IJK)) - temp3(INDEX_IJK) = jacobianl(INDEX_IJK) * & + temp3(INDEX_IJK) = jacobianl * & (gammaxl*dpotentialdxl(INDEX_IJK) + gammayl*dpotentialdyl(INDEX_IJK) + gammazl*dpotentialdzl(INDEX_IJK)) ENDDO_LOOP_IJK @@ -453,8 +448,9 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & vec_z = gravity_pre_store(3,iglob) ! compute divergence of displacement - ! distinguish between single and double precision for reals - gravity_term = jacobianl(INDEX_IJK) * wgll_cube(INDEX_IJK) & + jacobianl = deriv(10,INDEX_IJK,ispec) + + gravity_term = jacobianl * wgll_cube(INDEX_IJK) & * (dpotentialdxl(INDEX_IJK) * vec_x & + dpotentialdyl(INDEX_IJK) * vec_y & + dpotentialdzl(INDEX_IJK) * vec_z) @@ -462,7 +458,7 @@ subroutine compute_forces_outer_core_Dev(timeval,deltat,two_omega_earth, & ! full gravity contribution if (FULL_GRAVITY_VAL .and. .not. DISCARD_GCONTRIB) then gravity_term = gravity_term & - - gravity_rho_g_over_kappa(iglob) * jacobianl(INDEX_IJK) * wgll_cube(INDEX_IJK) * pgrav_outer_core(iglob) + - gravity_rho_g_over_kappa(iglob) * jacobianl * wgll_cube(INDEX_IJK) * pgrav_outer_core(iglob) endif ! divergence of displacement field with gravity on diff --git a/src/specfem3D/prepare_optimized_arrays.F90 b/src/specfem3D/prepare_optimized_arrays.F90 index 3d95d9d67..d78b90ac1 100644 --- a/src/specfem3D/prepare_optimized_arrays.F90 +++ b/src/specfem3D/prepare_optimized_arrays.F90 @@ -472,7 +472,8 @@ subroutine prepare_fused_array() ! crust/mantle ! allocates fused array - allocate(deriv_mapping_crust_mantle(9,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE),stat=ier) + ! (using padding w/ size 16) + allocate(deriv_mapping_crust_mantle(10,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE),stat=ier) if (ier /= 0) stop 'Error allocating array deriv_mapping_crust_mantle' deriv_mapping_crust_mantle(:,:,:,:,:) = 0.0_CUSTOM_REAL @@ -509,6 +510,7 @@ subroutine prepare_fused_array() deriv_mapping_crust_mantle(7,INDEX_IJK,ispec) = gammaxl deriv_mapping_crust_mantle(8,INDEX_IJK,ispec) = gammayl deriv_mapping_crust_mantle(9,INDEX_IJK,ispec) = gammazl + deriv_mapping_crust_mantle(10,INDEX_IJK,ispec) = 1.0_CUSTOM_REAL / jacobianl ENDDO_LOOP_IJK @@ -516,7 +518,7 @@ subroutine prepare_fused_array() ! inner core ! allocates fused array - allocate(deriv_mapping_inner_core(9,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE),stat=ier) + allocate(deriv_mapping_inner_core(10,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE),stat=ier) if (ier /= 0) stop 'Error allocating array deriv_mapping_inner_core' deriv_mapping_inner_core(:,:,:,:,:) = 0.0_CUSTOM_REAL @@ -546,6 +548,8 @@ subroutine prepare_fused_array() ! checks Jacobian if (jacobianl <= 0.0_CUSTOM_REAL) stop 'Error invalid Jacobian in inner core element' + else + jacobianl = 1.0_CUSTOM_REAL endif deriv_mapping_inner_core(1,INDEX_IJK,ispec) = xixl @@ -557,6 +561,7 @@ subroutine prepare_fused_array() deriv_mapping_inner_core(7,INDEX_IJK,ispec) = gammaxl deriv_mapping_inner_core(8,INDEX_IJK,ispec) = gammayl deriv_mapping_inner_core(9,INDEX_IJK,ispec) = gammazl + deriv_mapping_inner_core(10,INDEX_IJK,ispec) = 1.0_CUSTOM_REAL / jacobianl ENDDO_LOOP_IJK @@ -564,7 +569,7 @@ subroutine prepare_fused_array() ! outer core ! allocates fused array - allocate(deriv_mapping_outer_core(9,NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE),stat=ier) + allocate(deriv_mapping_outer_core(10,NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE),stat=ier) if (ier /= 0) stop 'Error allocating array deriv_mapping_outer_core' deriv_mapping_outer_core(:,:,:,:,:) = 0.0_CUSTOM_REAL @@ -601,6 +606,7 @@ subroutine prepare_fused_array() deriv_mapping_outer_core(7,INDEX_IJK,ispec) = gammaxl deriv_mapping_outer_core(8,INDEX_IJK,ispec) = gammayl deriv_mapping_outer_core(9,INDEX_IJK,ispec) = gammazl + deriv_mapping_outer_core(10,INDEX_IJK,ispec) = 1.0_CUSTOM_REAL / jacobianl ENDDO_LOOP_IJK From ae8347244ed60492cd86d5ff08c364adfa4c6a2f Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Sat, 22 Jun 2024 13:50:32 +0200 Subject: [PATCH 06/13] updates source/receiver position user output --- src/meshfem3D/setup_model.f90 | 7 +++-- src/specfem3D/initialize_simulation.F90 | 4 ++- src/specfem3D/locate_receivers.f90 | 10 ++++--- src/specfem3D/locate_sources.f90 | 40 ++++++++++--------------- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/meshfem3D/setup_model.f90 b/src/meshfem3D/setup_model.f90 index 9558c7ce6..6bc18d235 100644 --- a/src/meshfem3D/setup_model.f90 +++ b/src/meshfem3D/setup_model.f90 @@ -130,7 +130,7 @@ end subroutine setup_model subroutine sm_output_info() - use constants, only: IMAIN,NGLLX,NGLLY,NGLLZ,NGNOD,NGNOD2D,N_SLS + use constants, only: IMAIN,NGLLX,NGLLY,NGLLZ,NGNOD,NGNOD2D,N_SLS,ASSUME_PERFECT_SPHERE use shared_parameters, only: R_PLANET_KM use meshfem_models_par @@ -248,7 +248,10 @@ subroutine sm_output_info() write(IMAIN,*) ' no general mantle anisotropy' endif write(IMAIN,*) - + if (ASSUME_PERFECT_SPHERE) then + write(IMAIN,*) ' assuming perfect sphere' + endif + write(IMAIN,*) write(IMAIN,*) 'Reference radius of the globe used is ',R_PLANET_KM,' km' write(IMAIN,*) write(IMAIN,*) 'Central cube is at a radius of ',R_CENTRAL_CUBE/1000.d0,' km' diff --git a/src/specfem3D/initialize_simulation.F90 b/src/specfem3D/initialize_simulation.F90 index f2c81d7f2..90cd641e5 100644 --- a/src/specfem3D/initialize_simulation.F90 +++ b/src/specfem3D/initialize_simulation.F90 @@ -247,8 +247,10 @@ subroutine initialize_simulation() else write(IMAIN,*) ' no general mantle anisotropy' endif - write(IMAIN,*) + if (ASSUME_PERFECT_SPHERE) then + write(IMAIN,*) ' assuming perfect sphere' + endif write(IMAIN,*) if (REGIONAL_MESH_CUTOFF) then write(IMAIN,*) 'Regional mesh cutoff:' diff --git a/src/specfem3D/locate_receivers.f90 b/src/specfem3D/locate_receivers.f90 index a107064b9..7b5e720c1 100644 --- a/src/specfem3D/locate_receivers.f90 +++ b/src/specfem3D/locate_receivers.f90 @@ -513,16 +513,18 @@ subroutine locate_receivers(yr,jda,ho,mi,sec) write(IMAIN,*) ' original longitude: ',sngl(stlon(irec)) write(IMAIN,*) ' epicentral distance: ',sngl(epidist(irec)) write(IMAIN,*) ' closest estimate found: ',sngl(final_distance(irec)),' km away' - write(IMAIN,*) ' in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec) - write(IMAIN,*) ' at xi,eta,gamma coordinates = ',xi_receiver(irec),eta_receiver(irec),gamma_receiver(irec) - write(IMAIN,*) ' at (x,y,z) = ',xyz_found(1,irec),xyz_found(2,irec),xyz_found(3,irec) + write(IMAIN,*) ' in slice ',islice_selected_rec(irec),' in element ',ispec_selected_rec(irec) + write(IMAIN,*) ' at xi,eta,gamma coordinates = ', & + sngl(xi_receiver(irec)),sngl(eta_receiver(irec)),sngl(gamma_receiver(irec)) + write(IMAIN,*) ' at (x,y,z) = ', & + sngl(xyz_found(1,irec)),sngl(xyz_found(2,irec)),sngl(xyz_found(3,irec)) ! converts geocentric coordinates x/y/z to geographic radius/latitude/longitude (in degrees) call xyz_2_rlatlon_dble(xyz_found(1,irec),xyz_found(2,irec),xyz_found(3,irec),radius,lat,lon,ELLIPTICITY_VAL) ! output same longitude range ([0,360] by default) as input range from stations file stlon(..) if (stlon(irec) < 0.d0) lon = lon - 360.d0 - write(IMAIN,*) ' at lat/lon = ',sngl(lat),sngl(lon) + write(IMAIN,*) ' at lat/lon = ',sngl(lat),sngl(lon) endif ! add warning if estimate is poor diff --git a/src/specfem3D/locate_sources.f90 b/src/specfem3D/locate_sources.f90 index b4e72542e..60fb9cc3f 100644 --- a/src/specfem3D/locate_sources.f90 +++ b/src/specfem3D/locate_sources.f90 @@ -477,39 +477,29 @@ subroutine locate_sources() write(IMAIN,*) ' source located in slice ',islice_selected_source(isource_in_this_subset) write(IMAIN,*) ' in element ',ispec_selected_source(isource_in_this_subset) write(IMAIN,*) + write(IMAIN,*) ' at xi,eta,gamma coordinates = ', & + sngl(xi_source(isource)),sngl(eta_source(isource)),sngl(gamma_source(isource)) + write(IMAIN,*) ' at (x,y,z) = ', sngl(xyz_found_subset(1,isource_in_this_subset)), & + sngl(xyz_found_subset(2,isource_in_this_subset)),sngl(xyz_found_subset(3,isource_in_this_subset)) + write(IMAIN,*) ! different output for force point sources if (USE_FORCE_POINT_SOURCE) then write(IMAIN,*) ' using force point source:' - write(IMAIN,*) ' xi coordinate of source in that element: ',xi_source(isource) - write(IMAIN,*) ' eta coordinate of source in that element: ',eta_source(isource) - write(IMAIN,*) ' gamma coordinate of source in that element: ',gamma_source(isource) - - write(IMAIN,*) - write(IMAIN,*) ' component of direction vector in East direction: ',comp_dir_vect_source_E(isource) - write(IMAIN,*) ' component of direction vector in North direction: ',comp_dir_vect_source_N(isource) - write(IMAIN,*) ' component of direction vector in Vertical direction: ',comp_dir_vect_source_Z_UP(isource) - + write(IMAIN,*) ' component of direction vector in East direction: ',sngl(comp_dir_vect_source_E(isource)) + write(IMAIN,*) ' component of direction vector in North direction: ',sngl(comp_dir_vect_source_N(isource)) + write(IMAIN,*) ' component of direction vector in Vertical direction: ',sngl(comp_dir_vect_source_Z_UP(isource)) !write(IMAIN,*) ' i index of source in that element: ',nint(xi_source(isource)) !write(IMAIN,*) ' j index of source in that element: ',nint(eta_source(isource)) !write(IMAIN,*) ' k index of source in that element: ',nint(gamma_source(isource)) !write(IMAIN,*) !write(IMAIN,*) ' component direction: ',COMPONENT_FORCE_SOURCE write(IMAIN,*) - write(IMAIN,*) ' nu1 = ',nu_source(1,:,isource),'North' - write(IMAIN,*) ' nu2 = ',nu_source(2,:,isource),'East' - write(IMAIN,*) ' nu3 = ',nu_source(3,:,isource),'Vertical' - write(IMAIN,*) - write(IMAIN,*) ' at (x,y,z) coordinates = ',xyz_found_subset(1,isource_in_this_subset), & - xyz_found_subset(2,isource_in_this_subset),xyz_found_subset(3,isource_in_this_subset) + write(IMAIN,*) ' nu1 = ',sngl(nu_source(1,:,isource)),'North' + write(IMAIN,*) ' nu2 = ',sngl(nu_source(2,:,isource)),'East' + write(IMAIN,*) ' nu3 = ',sngl(nu_source(3,:,isource)),'Vertical' else ! moment tensor - write(IMAIN,*) ' using moment tensor source:' - write(IMAIN,*) ' xi coordinate of source in that element: ',xi_source(isource) - write(IMAIN,*) ' eta coordinate of source in that element: ',eta_source(isource) - write(IMAIN,*) ' gamma coordinate of source in that element: ',gamma_source(isource) - write(IMAIN,*) - write(IMAIN,*) ' at (x,y,z) coordinates = ',xyz_found_subset(1,isource_in_this_subset), & - xyz_found_subset(2,isource_in_this_subset),xyz_found_subset(3,isource_in_this_subset) + write(IMAIN,*) ' using moment tensor source' endif write(IMAIN,*) @@ -575,9 +565,9 @@ subroutine locate_sources() write(IMAIN,*) ' using (quasi) Heaviside source time function' ! add message if source is a Heaviside if (hdur(isource) <= 5.0*DT) then - write(IMAIN,*) - write(IMAIN,*) ' Source time function is a Heaviside, convolve later' - write(IMAIN,*) + write(IMAIN,*) ' ***' + write(IMAIN,*) ' *** Source time function is a Heaviside, convolve later' + write(IMAIN,*) ' ***' endif write(IMAIN,*) write(IMAIN,*) ' half duration: ',hdur(isource),' seconds' From 821290d3d57d08b35b8ea5258747f7a57721beaa Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Mon, 24 Jun 2024 21:13:21 +0200 Subject: [PATCH 07/13] updates iglob variable names --- src/gindex3D/create_gindex.f90 | 210 ++++++++++++++-------------- src/specfem3D/SIEM_index_region.F90 | 108 +++++++------- 2 files changed, 159 insertions(+), 159 deletions(-) diff --git a/src/gindex3D/create_gindex.f90 b/src/gindex3D/create_gindex.f90 index 8ad638a29..6b1031980 100644 --- a/src/gindex3D/create_gindex.f90 +++ b/src/gindex3D/create_gindex.f90 @@ -89,9 +89,9 @@ subroutine create_gindex_for_process(i_proc) integer :: j_proc integer :: i,j,k,i_elmt,i_node,ier integer :: ispec_ic,ispec_oc,ispec_cm,ispec_trinf,ispec_inf - integer :: ibool_ic,ibool_oc,ibool_cm,ibool_trinf,ibool_inf + integer :: iglob_ic,iglob_oc,iglob_cm,iglob_trinf,iglob_inf integer :: k_ic,k_oc,k_cm,k_trinf,k_inf - integer :: ibool,inode,ignode,ispec,nnode_icb,nnode_cmb,nnode_trinfb,nnode_infb + integer :: iglob,inode,ignode,ispec,nnode_icb,nnode_cmb,nnode_trinfb,nnode_infb ! local integer,dimension(:),allocatable :: inode_ic,inode_oc,inode_cm,inode_trinf,inode_inf @@ -435,10 +435,10 @@ subroutine create_gindex_for_process(i_proc) k_ic = NGLLZ ! top face do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - ibool_ic = ibool_inner_core(i,j,k_ic,ispec_ic) - inode_oc(ibool_oc)=inode_ic(ibool_ic) - isnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + iglob_ic = ibool_inner_core(i,j,k_ic,ispec_ic) + inode_oc(iglob_oc)=inode_ic(iglob_ic) + isnode_oc(iglob_oc) = .true. enddo enddo enddo @@ -447,11 +447,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k,i_elmt) - if (.not. isnode_oc(ibool_oc)) then - isnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k,i_elmt) + if (.not. isnode_oc(iglob_oc)) then + isnode_oc(iglob_oc) = .true. inode = inode+1 - inode_oc(ibool_oc)=inode + inode_oc(iglob_oc)=inode endif enddo enddo @@ -468,10 +468,10 @@ subroutine create_gindex_for_process(i_proc) k_cm = 1; k_oc = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - inode_cm(ibool_cm)=inode_oc(ibool_oc) - isnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + inode_cm(iglob_cm)=inode_oc(iglob_oc) + isnode_cm(iglob_cm) = .true. enddo enddo enddo @@ -480,11 +480,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k,i_elmt) - if (.not. isnode_cm(ibool_cm)) then - isnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k,i_elmt) + if (.not. isnode_cm(iglob_cm)) then + isnode_cm(iglob_cm) = .true. inode = inode+1 - inode_cm(ibool_cm)=inode + inode_cm(iglob_cm)=inode endif enddo enddo @@ -502,10 +502,10 @@ subroutine create_gindex_for_process(i_proc) k_trinf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - inode_trinf(ibool_trinf)=inode_cm(ibool_cm) - isnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + inode_trinf(iglob_trinf)=inode_cm(iglob_cm) + isnode_trinf(iglob_trinf) = .true. enddo enddo enddo @@ -514,11 +514,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k,i_elmt) - if (.not. isnode_trinf(ibool_trinf)) then - isnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k,i_elmt) + if (.not. isnode_trinf(iglob_trinf)) then + isnode_trinf(iglob_trinf) = .true. inode = inode+1 - inode_trinf(ibool_trinf)=inode + inode_trinf(iglob_trinf)=inode endif enddo enddo @@ -535,10 +535,10 @@ subroutine create_gindex_for_process(i_proc) k_inf = 1; k_trinf = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - inode_inf(ibool_inf)=inode_trinf(ibool_trinf) - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + inode_inf(iglob_inf)=inode_trinf(iglob_trinf) + isnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -547,11 +547,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isnode_inf(ibool_inf)) then - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isnode_inf(iglob_inf)) then + isnode_inf(iglob_inf) = .true. inode = inode+1 - inode_inf(ibool_inf)=inode + inode_inf(iglob_inf)=inode endif enddo enddo @@ -568,10 +568,10 @@ subroutine create_gindex_for_process(i_proc) k_inf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - inode_inf(ibool_inf)=inode_cm(ibool_cm) - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + inode_inf(iglob_inf)=inode_cm(iglob_cm) + isnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -580,11 +580,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isnode_inf(ibool_inf)) then - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isnode_inf(iglob_inf)) then + isnode_inf(iglob_inf) = .true. inode = inode+1 - inode_inf(ibool_inf)=inode + inode_inf(iglob_inf)=inode endif enddo enddo @@ -746,10 +746,10 @@ subroutine create_gindex_for_process(i_proc) k_ic = NGLLZ ! top face do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - ibool_ic = ibool_inner_core(i,j,k_ic,ispec_ic) - ignode_oc(ibool_oc) = ignode_ic(ibool_ic) - isgnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + iglob_ic = ibool_inner_core(i,j,k_ic,ispec_ic) + ignode_oc(iglob_oc) = ignode_ic(iglob_ic) + isgnode_oc(iglob_oc) = .true. enddo enddo enddo @@ -758,11 +758,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k,i_elmt) - if (.not. isgnode_oc(ibool_oc)) then - isgnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k,i_elmt) + if (.not. isgnode_oc(iglob_oc)) then + isgnode_oc(iglob_oc) = .true. ignode = ignode+1 - ignode_oc(ibool_oc)=ignode + ignode_oc(iglob_oc)=ignode endif enddo enddo @@ -780,10 +780,10 @@ subroutine create_gindex_for_process(i_proc) k_cm = 1; k_oc = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - ignode_cm(ibool_cm) = ignode_oc(ibool_oc) - isgnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + ignode_cm(iglob_cm) = ignode_oc(iglob_oc) + isgnode_cm(iglob_cm) = .true. enddo enddo enddo @@ -792,11 +792,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k,i_elmt) - if (.not. isgnode_cm(ibool_cm)) then - isgnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k,i_elmt) + if (.not. isgnode_cm(iglob_cm)) then + isgnode_cm(iglob_cm) = .true. ignode = ignode+1 - ignode_cm(ibool_cm)=ignode + ignode_cm(iglob_cm)=ignode endif enddo enddo @@ -815,10 +815,10 @@ subroutine create_gindex_for_process(i_proc) k_trinf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - ignode_trinf(ibool_trinf) = ignode_cm(ibool_cm) - isgnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + ignode_trinf(iglob_trinf) = ignode_cm(iglob_cm) + isgnode_trinf(iglob_trinf) = .true. enddo enddo enddo @@ -827,11 +827,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k,i_elmt) - if (.not. isgnode_trinf(ibool_trinf)) then - isgnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k,i_elmt) + if (.not. isgnode_trinf(iglob_trinf)) then + isgnode_trinf(iglob_trinf) = .true. ignode = ignode+1 - ignode_trinf(ibool_trinf) = ignode + ignode_trinf(iglob_trinf) = ignode endif enddo enddo @@ -848,10 +848,10 @@ subroutine create_gindex_for_process(i_proc) k_inf = 1; k_trinf = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - ignode_inf(ibool_inf) = ignode_trinf(ibool_trinf) - isgnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + ignode_inf(iglob_inf) = ignode_trinf(iglob_trinf) + isgnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -860,11 +860,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isgnode_inf(ibool_inf)) then - isgnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isgnode_inf(iglob_inf)) then + isgnode_inf(iglob_inf) = .true. ignode = ignode+1 - ignode_inf(ibool_inf) = ignode + ignode_inf(iglob_inf) = ignode endif enddo enddo @@ -881,10 +881,10 @@ subroutine create_gindex_for_process(i_proc) k_inf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - ignode_inf(ibool_inf) = ignode_cm(ibool_cm) - isgnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + ignode_inf(iglob_inf) = ignode_cm(iglob_cm) + isgnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -893,11 +893,11 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isgnode_inf(ibool_inf)) then - isgnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isgnode_inf(iglob_inf)) then + isgnode_inf(iglob_inf) = .true. ignode = ignode+1 - ignode_inf(ibool_inf) = ignode + ignode_inf(iglob_inf) = ignode endif enddo enddo @@ -1044,8 +1044,8 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool = ibool_inner_core(i,j,k,i_elmt) - gnf(1,inode_ic(ibool)) = 1 + iglob = ibool_inner_core(i,j,k,i_elmt) + gnf(1,inode_ic(iglob)) = 1 enddo enddo enddo @@ -1071,8 +1071,8 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ-1 do j = 1,NGLLY do i = 1,NGLLX - ibool = ibool_infinite(i,j,k,i_elmt) - gnf(1,inode_inf(ibool)) = 1 + iglob = ibool_infinite(i,j,k,i_elmt) + gnf(1,inode_inf(iglob)) = 1 enddo enddo enddo @@ -1392,9 +1392,9 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ,2 do j = 1,NGLLY,2 do i = 1,NGLLX,2 - ibool = ibool_inner_core(i,j,k,i_elmt) - isnode_ic(ibool) = .true. - isnode(inode_ic(ibool)) = .true. + iglob = ibool_inner_core(i,j,k,i_elmt) + isnode_ic(iglob) = .true. + isnode(inode_ic(iglob)) = .true. enddo enddo enddo @@ -1405,9 +1405,9 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ,2 do j = 1,NGLLY,2 do i = 1,NGLLX,2 - ibool = ibool_outer_core(i,j,k,i_elmt) - isnode_oc(ibool) = .true. - isnode(inode_oc(ibool)) = .true. + iglob = ibool_outer_core(i,j,k,i_elmt) + isnode_oc(iglob) = .true. + isnode(inode_oc(iglob)) = .true. enddo enddo enddo @@ -1418,9 +1418,9 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ,2 do j = 1,NGLLY,2 do i = 1,NGLLX,2 - ibool = ibool_crust_mantle(i,j,k,i_elmt) - isnode_cm(ibool) = .true. - isnode(inode_cm(ibool)) = .true. + iglob = ibool_crust_mantle(i,j,k,i_elmt) + isnode_cm(iglob) = .true. + isnode(inode_cm(iglob)) = .true. enddo enddo enddo @@ -1432,9 +1432,9 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ,2 do j = 1,NGLLY,2 do i = 1,NGLLX,2 - ibool = ibool_trinfinite(i,j,k,i_elmt) - isnode_trinf(ibool) = .true. - isnode(inode_trinf(ibool)) = .true. + iglob = ibool_trinfinite(i,j,k,i_elmt) + isnode_trinf(iglob) = .true. + isnode(inode_trinf(iglob)) = .true. enddo enddo enddo @@ -1446,9 +1446,9 @@ subroutine create_gindex_for_process(i_proc) do k = 1,NGLLZ,2 do j = 1,NGLLY,2 do i = 1,NGLLX,2 - ibool = ibool_infinite(i,j,k,i_elmt) - isnode_inf(ibool) = .true. - isnode(inode_inf(ibool)) = .true. + iglob = ibool_infinite(i,j,k,i_elmt) + isnode_inf(iglob) = .true. + isnode(inode_inf(iglob)) = .true. enddo enddo enddo @@ -1821,8 +1821,8 @@ subroutine create_gindex_for_process(i_proc) if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle do i = 1,NGLLCUBE_INF - ibool = inode_elmt_ic1(i,i_elmt) - gnf1(1,inode_ic1(ibool)) = 1 + iglob = inode_elmt_ic1(i,i_elmt) + gnf1(1,inode_ic1(iglob)) = 1 enddo enddo @@ -1847,8 +1847,8 @@ subroutine create_gindex_for_process(i_proc) do j = 1,NGLLY_INF do i = 1,NGLLX_INF igll = NGLLX_INF * NGLLY_INF * (k-1) + NGLLX_INF * (j-1) + i - ibool = inode_elmt_inf1(igll,i_elmt) - gnf1(1,inode_inf1(ibool)) = 1 + iglob = inode_elmt_inf1(igll,i_elmt) + gnf1(1,inode_inf1(iglob)) = 1 enddo enddo enddo diff --git a/src/specfem3D/SIEM_index_region.F90 b/src/specfem3D/SIEM_index_region.F90 index 39b1a72c2..689cb8eb4 100755 --- a/src/specfem3D/SIEM_index_region.F90 +++ b/src/specfem3D/SIEM_index_region.F90 @@ -99,9 +99,9 @@ subroutine SIEM_get_index_region() integer :: i,j,k,ier integer :: i_elmt,i_node integer :: ispec_ic,ispec_oc,ispec_cm,ispec_trinf,ispec_inf - integer :: ibool_ic,ibool_oc,ibool_cm,ibool_trinf,ibool_inf + integer :: iglob_ic,iglob_oc,iglob_cm,iglob_trinf,iglob_inf integer :: k_ic,k_oc,k_cm,k_trinf,k_inf - integer :: ibool,inode,ispec,nnode_icb,nnode_cmb,nnode_trinfb,nnode_infb + integer :: iglob,inode,ispec,nnode_icb,nnode_cmb,nnode_trinfb,nnode_infb integer,dimension(:),allocatable :: inode_ic,inode_oc,inode_cm,inode_trinf,inode_inf integer,dimension(:),allocatable :: inode_ic1,inode_oc1,inode_cm1,inode_trinf1,inode_inf1 @@ -312,10 +312,10 @@ subroutine SIEM_get_index_region() k_ic = NGLLZ ! top face do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - ibool_ic = ibool_inner_core(i,j,k_ic,ispec_ic) - inode_oc(ibool_oc) = inode_ic(ibool_ic) - isnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + iglob_ic = ibool_inner_core(i,j,k_ic,ispec_ic) + inode_oc(iglob_oc) = inode_ic(iglob_ic) + isnode_oc(iglob_oc) = .true. enddo enddo enddo @@ -324,11 +324,11 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_oc = ibool_outer_core(i,j,k,i_elmt) - if (.not. isnode_oc(ibool_oc)) then - isnode_oc(ibool_oc) = .true. + iglob_oc = ibool_outer_core(i,j,k,i_elmt) + if (.not. isnode_oc(iglob_oc)) then + isnode_oc(iglob_oc) = .true. inode = inode+1 - inode_oc(ibool_oc) = inode + inode_oc(iglob_oc) = inode endif enddo enddo @@ -345,10 +345,10 @@ subroutine SIEM_get_index_region() k_cm = 1; k_oc = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - ibool_oc = ibool_outer_core(i,j,k_oc,ispec_oc) - inode_cm(ibool_cm) = inode_oc(ibool_oc) - isnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + iglob_oc = ibool_outer_core(i,j,k_oc,ispec_oc) + inode_cm(iglob_cm) = inode_oc(iglob_oc) + isnode_cm(iglob_cm) = .true. enddo enddo enddo @@ -357,11 +357,11 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_cm = ibool_crust_mantle(i,j,k,i_elmt) - if (.not. isnode_cm(ibool_cm)) then - isnode_cm(ibool_cm) = .true. + iglob_cm = ibool_crust_mantle(i,j,k,i_elmt) + if (.not. isnode_cm(iglob_cm)) then + isnode_cm(iglob_cm) = .true. inode = inode+1 - inode_cm(ibool_cm) = inode + inode_cm(iglob_cm) = inode endif enddo enddo @@ -379,10 +379,10 @@ subroutine SIEM_get_index_region() k_trinf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - inode_trinf(ibool_trinf) = inode_cm(ibool_cm) - isnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + inode_trinf(iglob_trinf) = inode_cm(iglob_cm) + isnode_trinf(iglob_trinf) = .true. enddo enddo enddo @@ -391,11 +391,11 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_trinf = ibool_trinfinite(i,j,k,i_elmt) - if (.not. isnode_trinf(ibool_trinf)) then - isnode_trinf(ibool_trinf) = .true. + iglob_trinf = ibool_trinfinite(i,j,k,i_elmt) + if (.not. isnode_trinf(iglob_trinf)) then + isnode_trinf(iglob_trinf) = .true. inode = inode+1 - inode_trinf(ibool_trinf) = inode + inode_trinf(iglob_trinf) = inode endif enddo enddo @@ -412,10 +412,10 @@ subroutine SIEM_get_index_region() k_inf = 1; k_trinf = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) - inode_inf(ibool_inf) = inode_trinf(ibool_trinf) - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_trinf = ibool_trinfinite(i,j,k_trinf,ispec_trinf) + inode_inf(iglob_inf) = inode_trinf(iglob_trinf) + isnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -424,11 +424,11 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isnode_inf(ibool_inf)) then - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isnode_inf(iglob_inf)) then + isnode_inf(iglob_inf) = .true. inode = inode+1 - inode_inf(ibool_inf) = inode + inode_inf(iglob_inf) = inode endif enddo enddo @@ -445,10 +445,10 @@ subroutine SIEM_get_index_region() k_inf = 1; k_cm = NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k_inf,ispec_inf) - ibool_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) - inode_inf(ibool_inf) = inode_cm(ibool_cm) - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k_inf,ispec_inf) + iglob_cm = ibool_crust_mantle(i,j,k_cm,ispec_cm) + inode_inf(iglob_inf) = inode_cm(iglob_cm) + isnode_inf(iglob_inf) = .true. enddo enddo enddo @@ -457,11 +457,11 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool_inf = ibool_infinite(i,j,k,i_elmt) - if (.not. isnode_inf(ibool_inf)) then - isnode_inf(ibool_inf) = .true. + iglob_inf = ibool_infinite(i,j,k,i_elmt) + if (.not. isnode_inf(iglob_inf)) then + isnode_inf(iglob_inf) = .true. inode = inode+1 - inode_inf(ibool_inf) = inode + inode_inf(iglob_inf) = inode endif enddo enddo @@ -487,8 +487,8 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ do j = 1,NGLLY do i = 1,NGLLX - ibool = ibool_inner_core(i,j,k,i_elmt) - nf(1,inode_ic(ibool)) = 1 + iglob = ibool_inner_core(i,j,k,i_elmt) + nf(1,inode_ic(iglob)) = 1 enddo enddo enddo @@ -514,8 +514,8 @@ subroutine SIEM_get_index_region() do k = 1,NGLLZ-1 do j = 1,NGLLY do i = 1,NGLLX - ibool = ibool_infinite(i,j,k,i_elmt) - nf(1,inode_inf(ibool)) = 1 + iglob = ibool_infinite(i,j,k,i_elmt) + nf(1,inode_inf(iglob)) = 1 enddo enddo enddo @@ -526,8 +526,8 @@ subroutine SIEM_get_index_region() !do i_elmt=1,NSPEC_INFINITE ! do j=1,NGLLY ! do i=1,NGLLX - ! ibool = ibool_infinite(i,j,k,i_elmt) - ! nf(1,inode_inf(ibool)) = 0 + ! iglob = ibool_infinite(i,j,k,i_elmt) + ! nf(1,inode_inf(iglob)) = 0 ! enddo ! enddo !enddo @@ -840,8 +840,8 @@ subroutine SIEM_get_index_region() do i_elmt = 1,NSPEC_INNER_CORE if (idoubling_inner_core(i_elmt) == IFLAG_IN_FICTITIOUS_CUBE) cycle do i = 1,NGLLCUBE_INF - ibool = inode_elmt_ic1(i,i_elmt) - nf1(1,inode_ic1(ibool)) = 1 + iglob = inode_elmt_ic1(i,i_elmt) + nf1(1,inode_ic1(iglob)) = 1 enddo enddo call synchronize_all() @@ -868,12 +868,12 @@ subroutine SIEM_get_index_region() do j = 1,NGLLY_INF do i = 1,NGLLX_INF ! simple sum should also work here - !ibool = NGLLX_INF*NGLLY_INF*(k-1)+NGLLX_INF*(j-1)+i !ibool_infinite(i,j,k,i_elmt) - !nf1(1,inode_inf1(ibool)) = 1 + !iglob = NGLLX_INF*NGLLY_INF*(k-1)+NGLLX_INF*(j-1)+i !ibool_infinite(i,j,k,i_elmt) + !nf1(1,inode_inf1(iglob)) = 1 igll = NGLLX_INF*NGLLY_INF*(k-1) + NGLLX_INF*(j-1) + i - ibool = inode_elmt_inf1(igll,i_elmt) - nf1(1,inode_inf1(ibool)) = 1 + iglob = inode_elmt_inf1(igll,i_elmt) + nf1(1,inode_inf1(iglob)) = 1 enddo enddo enddo From 6b3a64ce32d381570891d64f69888d8448e889e1 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Mon, 24 Jun 2024 21:15:50 +0200 Subject: [PATCH 08/13] updates mesh permutations for coloring (and adds possibility for inner/outer loop permutations) --- src/meshfem3D/get_perm_color.f90 | 337 -------- src/meshfem3D/setup_color_perm.f90 | 1177 +++++++++++++++++++++------- 2 files changed, 914 insertions(+), 600 deletions(-) diff --git a/src/meshfem3D/get_perm_color.f90 b/src/meshfem3D/get_perm_color.f90 index f1a1229a9..f10bcf833 100644 --- a/src/meshfem3D/get_perm_color.f90 +++ b/src/meshfem3D/get_perm_color.f90 @@ -1068,340 +1068,3 @@ subroutine get_final_perm(color,perm,first_elem_number_in_this_color, & end subroutine get_final_perm - -! -!------------------------------------------------------------------------------------------------- -! -! PERMUTATIONS -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_real(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:) = array_to_permute(:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) - enddo - - end subroutine permute_elements_real - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_real1(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - real(kind=CUSTOM_REAL), intent(inout), dimension(1,1,1,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:) = array_to_permute(:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) - enddo - - end subroutine permute_elements_real1 - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_real_sls(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) - enddo - - end subroutine permute_elements_real_sls - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_real_sls1(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - real(kind=CUSTOM_REAL), intent(inout), dimension(1,1,1,N_SLS,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) - enddo - - end subroutine permute_elements_real_sls1 - - - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of integer type - - subroutine permute_elements_integer(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - integer, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:) = array_to_permute(:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) - enddo - - end subroutine permute_elements_integer - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of double precision type - - subroutine permute_elements_dble(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - double precision, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:) = array_to_permute(:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) - enddo - - end subroutine permute_elements_dble - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of double precision type - - subroutine permute_elements_logical1D(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - logical, intent(inout), dimension(nspec) :: array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:) = array_to_permute(:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(new_ispec) = temp_array(old_ispec) - enddo - - end subroutine permute_elements_logical1D - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of integer type - - subroutine permute_elements_integer1D(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - integer, intent(inout), dimension(nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:) = array_to_permute(:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(new_ispec) = temp_array(old_ispec) - enddo - - end subroutine permute_elements_integer1D - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_dble1(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - double precision, intent(inout), dimension(1,1,1,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:) = array_to_permute(:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) - enddo - - end subroutine permute_elements_dble1 - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_dble_sls(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - double precision, intent(inout), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) - enddo - - end subroutine permute_elements_dble_sls - -! -!------------------------------------------------------------------------------------------------- -! - -! implement permutation of elements for arrays of real (CUSTOM_REAL) type - - subroutine permute_elements_dble_sls1(array_to_permute,temp_array,perm,nspec) - - use constants - - implicit none - - integer, intent(in) :: nspec - integer, intent(in), dimension(nspec) :: perm - - double precision, intent(inout), dimension(N_SLS,1,1,1,nspec) :: & - array_to_permute,temp_array - - integer :: old_ispec,new_ispec - - ! copy the original array - temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) - - do old_ispec = 1,nspec - new_ispec = perm(old_ispec) - array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) - enddo - - end subroutine permute_elements_dble_sls1 - - diff --git a/src/meshfem3D/setup_color_perm.f90 b/src/meshfem3D/setup_color_perm.f90 index 5a4d52923..98958afdb 100644 --- a/src/meshfem3D/setup_color_perm.f90 +++ b/src/meshfem3D/setup_color_perm.f90 @@ -53,6 +53,13 @@ subroutine setup_color_perm(iregion_code) integer :: nspec,nglob integer :: idomain + ! for testing effect of element permutations on code performance + ! note: re-ordering might affect the efficiency of cache fetches. + ! with the inner/outer loop re-ordering here, elements get ordered consecutively for each phase. + ! this seems to have only a minimal performance benefit of ~3-4 % on the CPU. + ! permutes inner/outer loop elements + logical, parameter :: USE_MESH_LOOP_PERMUTATION = .false. + ! user output if (myrank == 0) then write(IMAIN,*) ' mesh coloring: ',USE_MESH_COLORING_GPU @@ -93,7 +100,7 @@ subroutine setup_color_perm(iregion_code) if (maxval(perm) /= num_phase_ispec_crust_mantle) & call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_crust_mantle') - ! sorts array according to permutation + ! sorts array according to color permutation call synchronize_all() if (myrank == 0) then write(IMAIN,*) ' mesh permutation:' @@ -114,6 +121,22 @@ subroutine setup_color_perm(iregion_code) if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_crust_mantle array') endif + ! setup inner/outer loop permutation for crust/mantle + if (USE_MESH_LOOP_PERMUTATION) then + ! user output + if (myrank == 0) then + write(IMAIN,*) ' mesh permutation for inner/outer loops: ',USE_MESH_LOOP_PERMUTATION + call flush_IMAIN() + endif + + call setup_loop_permutation(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,ibool, & + iregion_code, & + num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, & + nspec_outer_crust_mantle,nspec_inner_crust_mantle, & + num_interfaces_crust_mantle,max_nibool_interfaces_cm, & + nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle) + endif + case (IREGION_OUTER_CORE) ! outer core ! initializes @@ -377,19 +400,19 @@ subroutine setup_color(nspec,nglob,ibool,perm, & implicit none - integer :: nspec,nglob - integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool + integer, intent(in) :: nspec,nglob + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(in) :: ibool - integer, dimension(nspec) :: perm + integer, dimension(nspec), intent(inout) :: perm ! wrapper array for ispec is in domain: ! idomain: 1 == crust/mantle, 2 == outer core, 3 == inner core - integer :: idomain - logical, dimension(nspec) :: is_on_a_slice_edge - integer :: num_phase_ispec_d - integer, dimension(num_phase_ispec_d,2) :: phase_ispec_inner_d + integer, intent(in) :: idomain + logical, dimension(nspec), intent(in) :: is_on_a_slice_edge + integer, intent(inout) :: num_phase_ispec_d + integer, dimension(num_phase_ispec_d,2), intent(inout) :: phase_ispec_inner_d - logical :: SAVE_MESH_FILES + logical, intent(in) :: SAVE_MESH_FILES ! local parameters ! added for color permutation @@ -747,66 +770,32 @@ subroutine setup_permutation(nspec,nglob,ibool, & use constants use meshfem_models_par, only: & - TRANSVERSE_ISOTROPY,HETEROGEN_3D_MANTLE,ANISOTROPIC_3D_MANTLE, & - ANISOTROPIC_INNER_CORE,ATTENUATION, & - ATTENUATION_3D,ATTENUATION_1D_WITH_3D_STORAGE + ATTENUATION_1D_WITH_3D_STORAGE use meshfem_par, only: & - ABSORBING_CONDITIONS, & - LOCAL_PATH, & - NCHUNKS,NSPEC2D_TOP,NSPEC2D_BOTTOM, & - xstore,ystore,zstore,idoubling,xstore_glob,ystore_glob,zstore_glob - - use regions_mesh_par2, only: & - xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, & - gammaxstore,gammaystore,gammazstore, & - rhostore,kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, & - c11store,c12store,c13store,c14store,c15store,c16store,c22store, & - c23store,c24store,c25store,c26store,c33store,c34store,c35store, & - c36store,c44store,c45store,c46store,c55store,c56store,c66store, & - ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, & - rho_vp,rho_vs, & - nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, & - ispec_is_tiso,tau_e_store,Qmu_store, & - NSPEC2D_MOHO, NSPEC2D_400, NSPEC2D_670, & - ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot, & - ibelm_670_top,ibelm_670_bot - - use MPI_crust_mantle_par, only: NSPEC_CRUST_MANTLE - use MPI_outer_core_par, only: NSPEC_OUTER_CORE - use MPI_inner_core_par, only: NSPEC_INNER_CORE - - use MPI_trinfinite_par, only: NSPEC_TRINFINITE - use MPI_infinite_par, only: NSPEC_INFINITE + LOCAL_PATH,xstore_glob,ystore_glob,zstore_glob implicit none - integer,intent(in) :: nspec,nglob - integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool + integer, intent(in) :: nspec,nglob + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(inout) :: ibool - integer,intent(in) :: idomain + integer, intent(in) :: idomain integer, dimension(nspec),intent(inout) :: perm - integer :: num_colors_outer,num_colors_inner - integer, dimension(num_colors_outer + num_colors_inner) :: num_elem_colors - integer :: num_phase_ispec_d - integer, dimension(num_phase_ispec_d,2) :: phase_ispec_inner_d + integer, intent(in) :: num_colors_outer,num_colors_inner + integer, dimension(num_colors_outer + num_colors_inner),intent(in) :: num_elem_colors + integer, intent(in) :: num_phase_ispec_d + integer, dimension(num_phase_ispec_d,2),intent(inout) :: phase_ispec_inner_d - logical :: SAVE_MESH_FILES + logical, intent(in) :: SAVE_MESH_FILES ! local parameters - ! added for sorting - double precision, dimension(:,:,:,:), allocatable :: temp_array_dble - real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real - real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: temp_array_real_sls - integer, dimension(:,:,:,:), allocatable :: temp_array_int - integer, dimension(:), allocatable :: temp_array_int_1D integer, dimension(:), allocatable :: temp_perm_global - logical, dimension(:), allocatable :: temp_array_logical_1D logical, dimension(:), allocatable :: mask_global integer :: icolor,icounter,ispec,ielem,ier,i - integer :: iface,old_ispec,new_ispec + integer :: new_ispec character(len=MAX_STRING_LEN) :: filename character(len=MAX_STRING_LEN) :: prname @@ -923,252 +912,280 @@ subroutine setup_permutation(nspec,nglob,ibool, & deallocate(temp_perm_global) ! permutes all required mesh arrays according to new ordering + call permute_all_mesh_element_arrays(nspec,idomain,perm,ibool) - ! permutation of ibool - allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec)) - call permute_elements_integer(ibool,temp_array_int,perm,nspec) - deallocate(temp_array_int) + end subroutine setup_permutation - ! element idoubling flags - allocate(temp_array_int_1D(nspec)) - call permute_elements_integer1D(idoubling,temp_array_int_1D,perm,nspec) - deallocate(temp_array_int_1D) +! +!------------------------------------------------------------------------------------------------- +! - ! element domain flags - allocate(temp_array_logical_1D(nspec)) - call permute_elements_logical1D(ispec_is_tiso,temp_array_logical_1D,perm,nspec) - deallocate(temp_array_logical_1D) + subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & + num_phase_ispec_d,phase_ispec_inner_d,nspec_outer_d,nspec_inner_d, & + num_interfaces,max_nibool_interfaces, & + nibool_interfaces,ibool_interfaces) - ! mesh arrays - ! double precision - allocate(temp_array_dble(NGLLX,NGLLY,NGLLZ,nspec)) - call permute_elements_dble(xstore,temp_array_dble,perm,nspec) - call permute_elements_dble(ystore,temp_array_dble,perm,nspec) - call permute_elements_dble(zstore,temp_array_dble,perm,nspec) - deallocate(temp_array_dble) - ! custom precision - allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) - call permute_elements_real(xixstore,temp_array_real,perm,nspec) - call permute_elements_real(xiystore,temp_array_real,perm,nspec) - call permute_elements_real(xizstore,temp_array_real,perm,nspec) - call permute_elements_real(etaxstore,temp_array_real,perm,nspec) - call permute_elements_real(etaystore,temp_array_real,perm,nspec) - call permute_elements_real(etazstore,temp_array_real,perm,nspec) - call permute_elements_real(gammaxstore,temp_array_real,perm,nspec) - call permute_elements_real(gammaystore,temp_array_real,perm,nspec) - call permute_elements_real(gammazstore,temp_array_real,perm,nspec) + ! sorts element arrays according to inner/outer loop order - ! material parameters - call permute_elements_real(rhostore,temp_array_real,perm,nspec) - call permute_elements_real(kappavstore,temp_array_real,perm,nspec) - deallocate(temp_array_real) + use constants, only: NGLLX,NGLLY,NGLLZ,IMAIN,myrank, & + IREGION_CRUST_MANTLE,IREGION_INNER_CORE,IREGION_OUTER_CORE - ! boundary surfaces - ! note: only arrays pointing to ispec will have to be permuted since value of ispec will be different - ! - ! xmin - do iface = 1,nspec2D_xmin - old_ispec = ibelm_xmin(iface) - new_ispec = perm(old_ispec) - ibelm_xmin(iface) = new_ispec - enddo - ! xmax - do iface = 1,nspec2D_xmax - old_ispec = ibelm_xmax(iface) - new_ispec = perm(old_ispec) - ibelm_xmax(iface) = new_ispec - enddo - ! ymin - do iface = 1,nspec2D_ymin - old_ispec = ibelm_ymin(iface) - new_ispec = perm(old_ispec) - ibelm_ymin(iface) = new_ispec - enddo - ! ymax - do iface = 1,nspec2D_ymax - old_ispec = ibelm_ymax(iface) - new_ispec = perm(old_ispec) - ibelm_ymax(iface) = new_ispec - enddo - ! bottom - do iface = 1,NSPEC2D_BOTTOM(idomain) - old_ispec = ibelm_bottom(iface) - new_ispec = perm(old_ispec) - ibelm_bottom(iface) = new_ispec - enddo - ! top - do iface = 1,NSPEC2D_TOP(idomain) - old_ispec = ibelm_top(iface) - new_ispec = perm(old_ispec) - ibelm_top(iface) = new_ispec - enddo + implicit none - ! attenuation arrays - if (ATTENUATION) then - if (ATTENUATION_3D .or. ATTENUATION_1D_WITH_3D_STORAGE) then - allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) - allocate(temp_array_real_sls(NGLLX,NGLLY,NGLLZ,N_SLS,nspec)) - call permute_elements_real(Qmu_store,temp_array_real,perm,nspec) - call permute_elements_real_sls(tau_e_store,temp_array_real_sls,perm,nspec) - deallocate(temp_array_real,temp_array_real_sls) - else - allocate(temp_array_real(1,1,1,nspec)) - allocate(temp_array_real_sls(1,1,1,N_SLS,nspec)) - call permute_elements_real1(Qmu_store,temp_array_real,perm,nspec) - call permute_elements_real_sls1(tau_e_store,temp_array_real_sls,perm,nspec) - deallocate(temp_array_real,temp_array_real_sls) - endif - endif + integer, intent(in) :: nspec,nglob + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(inout) :: ibool - select case (idomain) - case (IREGION_CRUST_MANTLE) - ! checks number of elements - if (nspec /= NSPEC_CRUST_MANTLE ) & - call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_CRUST_MANTLE') + ! wrapper array for ispec is in domain: + ! idomain: 1 == crust/mantle, 2 == outer core, 3 == inner core + integer, intent(in) :: idomain + integer, intent(in) :: num_phase_ispec_d + integer, dimension(num_phase_ispec_d,2), intent(inout) :: phase_ispec_inner_d + integer, intent(in) :: nspec_outer_d,nspec_inner_d - allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + integer,intent(in) :: num_interfaces,max_nibool_interfaces + integer,dimension(num_interfaces),intent(in) :: nibool_interfaces + integer,dimension(max_nibool_interfaces,num_interfaces),intent(inout):: ibool_interfaces - ! note: muvstore needed for attenuation also for anisotropic 3d mantle - call permute_elements_real(muvstore,temp_array_real,perm,nspec) - if (ANISOTROPIC_3D_MANTLE) then - call permute_elements_real(c11store,temp_array_real,perm,nspec) - call permute_elements_real(c11store,temp_array_real,perm,nspec) - call permute_elements_real(c12store,temp_array_real,perm,nspec) - call permute_elements_real(c13store,temp_array_real,perm,nspec) - call permute_elements_real(c14store,temp_array_real,perm,nspec) - call permute_elements_real(c15store,temp_array_real,perm,nspec) - call permute_elements_real(c16store,temp_array_real,perm,nspec) - call permute_elements_real(c22store,temp_array_real,perm,nspec) - call permute_elements_real(c23store,temp_array_real,perm,nspec) - call permute_elements_real(c24store,temp_array_real,perm,nspec) - call permute_elements_real(c25store,temp_array_real,perm,nspec) - call permute_elements_real(c26store,temp_array_real,perm,nspec) - call permute_elements_real(c33store,temp_array_real,perm,nspec) - call permute_elements_real(c34store,temp_array_real,perm,nspec) - call permute_elements_real(c35store,temp_array_real,perm,nspec) - call permute_elements_real(c36store,temp_array_real,perm,nspec) - call permute_elements_real(c44store,temp_array_real,perm,nspec) - call permute_elements_real(c45store,temp_array_real,perm,nspec) - call permute_elements_real(c46store,temp_array_real,perm,nspec) - call permute_elements_real(c55store,temp_array_real,perm,nspec) - call permute_elements_real(c56store,temp_array_real,perm,nspec) - call permute_elements_real(c66store,temp_array_real,perm,nspec) + ! local parameters + integer :: iphase,num_elements,ier,num_reordered + ! ispec re-ordering + integer :: ispec,ispec_loop,ispec_p,old_ispec,new_ispec + integer, dimension(:), allocatable :: perm_ispec_ordered + ! iglob re-ordering + integer :: i,j,k,iglob,iglob_ordered,iglob_new,iglob_old + integer, dimension(:), allocatable :: perm_iglob_ordered + + ! permutes iglob entries + logical, parameter :: USE_GLOBAL_NODE_PERMUTATION = .true. + + ! user output + if (myrank == 0) then + select case (idomain) + case (IREGION_CRUST_MANTLE) + write(IMAIN,*) ' permuting crust/mantle element order' + case (IREGION_INNER_CORE) + write(IMAIN,*) ' permuting inner core element order' + case (IREGION_OUTER_CORE) + write(IMAIN,*) ' permuting outer core element order' + end select + call flush_IMAIN() + endif + + ! re-orders elements in ibool - aims for better cache fetches + allocate(perm_ispec_ordered(nspec),stat=ier) + if (ier /= 0) stop 'Error allocating ispec_ordered array' + perm_ispec_ordered(:) = 0 + + ! loops over outer/inner elements as in compute_forces_crust_mantle_* routine + ispec_loop = 0 + do iphase = 1,2 + if (iphase == 1) then + num_elements = nspec_outer_d else - if (TRANSVERSE_ISOTROPY) then - call permute_elements_real(kappahstore,temp_array_real,perm,nspec) - call permute_elements_real(muhstore,temp_array_real,perm,nspec) - call permute_elements_real(eta_anisostore,temp_array_real,perm,nspec) - endif + num_elements = nspec_inner_d endif - ! just to be nice and align dvpstore to the permuted mesh - if (HETEROGEN_3D_MANTLE) then - call model_heterogen_mantle_permute_dvp(temp_array_real,perm,nspec) - endif + do ispec_p = 1,num_elements + ispec = phase_ispec_inner_d(ispec_p,iphase) - if (ABSORBING_CONDITIONS .and. NCHUNKS /= 6) then - call permute_elements_real(rho_vp,temp_array_real,perm,nspec) - call permute_elements_real(rho_vs,temp_array_real,perm,nspec) - endif + ! checks range of ispec + if (ispec < 1 .or. ispec > nspec) then + print *,'Error: rank ',myrank,' has invalid ispec ',ispec,' - should be between 1 and ',nspec + print *,' domain: ',idomain,' phase: ',iphase,' elements: ',num_elements,' ispec_p: ',ispec_p + stop 'Invalid ispec in phase re-ordering' + endif - deallocate(temp_array_real) + ! adds element in newly ordered list + ispec_loop = ispec_loop + 1 - ! discontinuities boundary surface - if (SAVE_BOUNDARY_MESH) then - ! moho - do iface = 1,nspec2D_MOHO - ! top - old_ispec = ibelm_moho_top(iface) - new_ispec = perm(old_ispec) - ibelm_moho_top(iface) = new_ispec - ! bottom - old_ispec = ibelm_moho_bot(iface) - new_ispec = perm(old_ispec) - ibelm_moho_bot(iface) = new_ispec - enddo - ! 400 - do iface = 1,nspec2D_400 - ! top - old_ispec = ibelm_400_top(iface) - new_ispec = perm(old_ispec) - ibelm_400_top(iface) = new_ispec - ! bottom - old_ispec = ibelm_400_bot(iface) - new_ispec = perm(old_ispec) - ibelm_400_bot(iface) = new_ispec - enddo - ! 670 - do iface = 1,nspec2D_670 - ! top - old_ispec = ibelm_670_top(iface) - new_ispec = perm(old_ispec) - ibelm_670_top(iface) = new_ispec - ! bottom - old_ispec = ibelm_670_bot(iface) - new_ispec = perm(old_ispec) - ibelm_670_bot(iface) = new_ispec - enddo - endif + ! checks range of ispec_loop + if (ispec_loop > nspec) then + print *,'Error: rank ',myrank,' has invalid ispec_loop ',ispec_loop,' - should be <= ',nspec + stop 'Error ispec_loop exceeds element count for region' + endif - case (IREGION_OUTER_CORE) - ! checks number of elements - if (nspec /= NSPEC_OUTER_CORE ) & - call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_OUTER_CORE') + ! stores ordering + perm_ispec_ordered(ispec) = ispec_loop - if (ABSORBING_CONDITIONS .and. NCHUNKS /= 6) then - allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + ! debug + !if (myrank == 0 .and. iphase == 1 .and. ispec_p < 10) print *,'debug: ispec order ',ispec_loop,ispec + enddo + enddo - call permute_elements_real(rho_vp,temp_array_real,perm,nspec) + ! debug + !if (myrank == 0) print *,'debug: entries ispec_ordered: ',perm_ispec_ordered(1:10) - deallocate(temp_array_real) + ! check + if (ispec_loop /= nspec) then + print *,'Error: rank ',myrank,' has invalid number of total looped elements: ',ispec_loop + print *,' should be ',nspec + call exit_MPI(myrank,'Invalid number of looped elements for region') + endif + + ! re-orders phase_ispec array + num_reordered = 0 + ispec_loop = 0 + do iphase = 1,2 + if (iphase == 1) then + num_elements = nspec_outer_d + else + num_elements = nspec_inner_d endif + do ispec_p = 1,num_elements + ispec_loop = ispec_loop + 1 - case (IREGION_INNER_CORE) - ! checks number of elements - if (nspec /= NSPEC_INNER_CORE ) & - call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_INNER_CORE') + old_ispec = phase_ispec_inner_d(ispec_p,iphase) + new_ispec = perm_ispec_ordered(old_ispec) - allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + ! checks ordering + if (new_ispec /= ispec_loop) then + print *,'Error: rank ',myrank,' has invalid ispec ordering: ',new_ispec,' should be ',ispec_loop + call exit_MPI(myrank,'Invalid ispec ordering for phase_ispec_inner_d') + endif - ! note: muvstore needed for attenuation also for anisotropic inner core - call permute_elements_real(muvstore,temp_array_real,perm,nspec) - ! anisotropy in the inner core only - if (ANISOTROPIC_INNER_CORE) then - call permute_elements_real(c11store,temp_array_real,perm,nspec) - call permute_elements_real(c33store,temp_array_real,perm,nspec) - call permute_elements_real(c12store,temp_array_real,perm,nspec) - call permute_elements_real(c13store,temp_array_real,perm,nspec) - call permute_elements_real(c44store,temp_array_real,perm,nspec) + ! sets new element entry + if (old_ispec /= new_ispec) then + num_reordered = num_reordered + 1 + ! sets new ordering + phase_ispec_inner_d(ispec_p,iphase) = new_ispec + endif + enddo + enddo + + ! user output + if (myrank == 0) then + write(IMAIN,*) ' total number of re-ordered entries: ',num_reordered,' out of ',nspec_outer_d + nspec_inner_d + write(IMAIN,*) + call flush_IMAIN() + endif + + ! re-order ibool & all arrays depending on new element order + ! permutes all required mesh arrays according to new ordering + call permute_all_mesh_element_arrays(nspec,idomain,perm_ispec_ordered,ibool) + + ! free temporary array + deallocate(perm_ispec_ordered) + + ! global node re-ordering + if (USE_GLOBAL_NODE_PERMUTATION) then + ! note: re-orders iglob to have an somewhat increasing order according the new ibool array + ! global node indices are mostly retrieved by: + ! + ! do k = 1,NGLLZ + ! do j = 1,NGLLY + ! do i = 1,NGLLX + ! iglob = ibool(i,j,k,ispec) + ! .. + ! + ! we try to have iglob increasing consecutively in this loop order. + ! nevertheless, since global nodes are shared, some of the incrementing is not +1 for neighboring elements. + ! to avoid many jumps, a mesh coloring might help at least for the first color, where the next looped element + ! won't have shared global nodes. + ! there might be more sophisticated ways to re-order iglobs, like using space-filling curves. + ! here, we just try to simply re-order (without colors) - to see if this has any code performance effect. + + ! user output + if (myrank == 0) then + write(IMAIN,*) ' permuting global node entries' + call flush_IMAIN() endif - deallocate(temp_array_real) + allocate(perm_iglob_ordered(nglob),stat=ier) + if (ier /= 0) stop 'Error allocating iglob_ordered array' + perm_iglob_ordered(:) = 0 - case (IREGION_TRINFINITE) - ! checks number of elements - if (nspec /= NSPEC_TRINFINITE ) & - call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_TRINFINITE') + ! orders iglob according to new ibool element ordering + iglob_ordered = 0 + do ispec = 1,nspec + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob = ibool(i,j,k,ispec) + ! adds iglob entry + if (perm_iglob_ordered(iglob) == 0) then + iglob_ordered = iglob_ordered + 1 + perm_iglob_ordered(iglob) = iglob_ordered + endif + enddo + enddo + enddo + enddo - ! TODO: check if/what arrays need to be permuted - stop 'Permutations for transition infinite region not fully implemented yet' + ! checks + if (iglob_ordered /= nglob) then + print *,'Error: rank ',myrank,' has invalid iglob_ordered: ',iglob_ordered,' - should be ',nglob + call exit_MPI(myrank,'Invalid iglob ordering') + endif - case (IREGION_INFINITE) - ! checks number of elements - if (nspec /= NSPEC_INFINITE ) & - call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_INFINITE') + ! re-orders ibool entries + num_reordered = 0 + do ispec = 1,nspec + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob_old = ibool(i,j,k,ispec) + iglob_new = perm_iglob_ordered(iglob_old) + + ! checks range + if (iglob_new < 1 .or. iglob_new > nglob) then + print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob + print *,' ispec: ',ispec,' i/j/k: ',i,j,k,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new + stop 'Invalid iglob in ibool re-ordering' + endif + + ! updates entry + if (iglob_old /= iglob_new) then + num_reordered = num_reordered + 1 + ibool(i,j,k,ispec) = iglob_new + endif + enddo + enddo + enddo + enddo - ! TODO: check if/what arrays need to be permuted - stop 'Permutations for infinite region not fully implemented yet' + ! debug + !if (myrank == 0) print *,'debug: ibool A',ibool(:,:,:,1) + !if (myrank == 0) print *,'debug: ibool B',ibool(:,:,:,2) - case default - stop 'Error idomain in setup_permutation' - end select + ! user output + if (myrank == 0) then + write(IMAIN,*) ' total number of re-ordered entries: ',num_reordered,' out of ',NGLLX*NGLLY*NGLLZ*nspec + write(IMAIN,*) + call flush_IMAIN() + endif - end subroutine setup_permutation + ! re-orders MPI interface ibool array + do i = 1,num_interfaces + ! ibool entries + do j = 1,nibool_interfaces(i) + iglob_old = ibool_interfaces(j,i) + iglob_new = perm_iglob_ordered(iglob_old) + + ! checks range + if (iglob_new < 1 .or. iglob_new > nglob) then + print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob + print *,' interface: ',i,' point: ',j,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new + stop 'Invalid iglob in MPI interface re-ordering' + endif + + ! updates entry + if (iglob_old /= iglob_new) then + ibool_interfaces(j,i) = iglob_new + endif + enddo + enddo + + ! free array + deallocate(perm_iglob_ordered) + endif + + end subroutine setup_loop_permutation ! !------------------------------------------------------------------------------------------------- ! + ! deprecated ... ! ! subroutine setup_color_perm(iregion_code,nspec,nglob, & @@ -1395,3 +1412,637 @@ end subroutine setup_permutation ! ! ! end subroutine setup_color_perm + +!------------------------------------------------------------------------------------------------- +! +! PERMUTATIONS +! +!------------------------------------------------------------------------------------------------- + + subroutine permute_all_mesh_element_arrays(nspec,idomain,perm,ibool) + + use constants + + use meshfem_models_par, only: & + TRANSVERSE_ISOTROPY,HETEROGEN_3D_MANTLE,ANISOTROPIC_3D_MANTLE, & + ANISOTROPIC_INNER_CORE,ATTENUATION, & + ATTENUATION_3D,ATTENUATION_1D_WITH_3D_STORAGE + + use meshfem_par, only: & + ABSORBING_CONDITIONS,NCHUNKS,NSPEC2D_TOP,NSPEC2D_BOTTOM + + use meshfem_par, only: & + xstore,ystore,zstore,idoubling + + use regions_mesh_par2, only: & + xixstore,xiystore,xizstore,etaxstore,etaystore,etazstore, & + gammaxstore,gammaystore,gammazstore, & + rhostore,kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, & + c11store,c12store,c13store,c14store,c15store,c16store,c22store, & + c23store,c24store,c25store,c26store,c33store,c34store,c35store, & + c36store,c44store,c45store,c46store,c55store,c56store,c66store, & + ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, & + rho_vp,rho_vs, & + nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, & + ispec_is_tiso,tau_e_store,Qmu_store, & + NSPEC2D_MOHO, NSPEC2D_400, NSPEC2D_670, & + ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot, & + ibelm_670_top,ibelm_670_bot + + use MPI_crust_mantle_par, only: NSPEC_CRUST_MANTLE + use MPI_outer_core_par, only: NSPEC_OUTER_CORE + use MPI_inner_core_par, only: NSPEC_INNER_CORE + use MPI_trinfinite_par, only: NSPEC_TRINFINITE + use MPI_infinite_par, only: NSPEC_INFINITE + + implicit none + + integer, intent(in) :: nspec + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(inout) :: ibool + + integer, intent(in) :: idomain + integer, dimension(nspec),intent(in) :: perm + + ! local parameters + ! added for sorting + double precision, dimension(:,:,:,:), allocatable :: temp_array_dble + real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: temp_array_real + real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: temp_array_real_sls + integer, dimension(:,:,:,:), allocatable :: temp_array_int + integer, dimension(:), allocatable :: temp_array_int_1D + logical, dimension(:), allocatable :: temp_array_logical_1D + integer :: iface,old_ispec,new_ispec + + ! permutation of ibool + allocate(temp_array_int(NGLLX,NGLLY,NGLLZ,nspec)) + call permute_elements_integer(ibool,temp_array_int,perm,nspec) + deallocate(temp_array_int) + + ! element idoubling flags + allocate(temp_array_int_1D(nspec)) + call permute_elements_integer1D(idoubling,temp_array_int_1D,perm,nspec) + deallocate(temp_array_int_1D) + + ! element domain flags + allocate(temp_array_logical_1D(nspec)) + call permute_elements_logical1D(ispec_is_tiso,temp_array_logical_1D,perm,nspec) + deallocate(temp_array_logical_1D) + + ! mesh arrays + ! double precision + allocate(temp_array_dble(NGLLX,NGLLY,NGLLZ,nspec)) + call permute_elements_dble(xstore,temp_array_dble,perm,nspec) + call permute_elements_dble(ystore,temp_array_dble,perm,nspec) + call permute_elements_dble(zstore,temp_array_dble,perm,nspec) + deallocate(temp_array_dble) + ! custom precision + allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + call permute_elements_real(xixstore,temp_array_real,perm,nspec) + call permute_elements_real(xiystore,temp_array_real,perm,nspec) + call permute_elements_real(xizstore,temp_array_real,perm,nspec) + call permute_elements_real(etaxstore,temp_array_real,perm,nspec) + call permute_elements_real(etaystore,temp_array_real,perm,nspec) + call permute_elements_real(etazstore,temp_array_real,perm,nspec) + call permute_elements_real(gammaxstore,temp_array_real,perm,nspec) + call permute_elements_real(gammaystore,temp_array_real,perm,nspec) + call permute_elements_real(gammazstore,temp_array_real,perm,nspec) + + ! material parameters + call permute_elements_real(rhostore,temp_array_real,perm,nspec) + call permute_elements_real(kappavstore,temp_array_real,perm,nspec) + deallocate(temp_array_real) + + ! boundary surfaces + ! note: only arrays pointing to ispec will have to be permuted since value of ispec will be different + ! + ! xmin + do iface = 1,nspec2D_xmin + old_ispec = ibelm_xmin(iface) + new_ispec = perm(old_ispec) + ibelm_xmin(iface) = new_ispec + enddo + ! xmax + do iface = 1,nspec2D_xmax + old_ispec = ibelm_xmax(iface) + new_ispec = perm(old_ispec) + ibelm_xmax(iface) = new_ispec + enddo + ! ymin + do iface = 1,nspec2D_ymin + old_ispec = ibelm_ymin(iface) + new_ispec = perm(old_ispec) + ibelm_ymin(iface) = new_ispec + enddo + ! ymax + do iface = 1,nspec2D_ymax + old_ispec = ibelm_ymax(iface) + new_ispec = perm(old_ispec) + ibelm_ymax(iface) = new_ispec + enddo + ! bottom + do iface = 1,NSPEC2D_BOTTOM(idomain) + old_ispec = ibelm_bottom(iface) + new_ispec = perm(old_ispec) + ibelm_bottom(iface) = new_ispec + enddo + ! top + do iface = 1,NSPEC2D_TOP(idomain) + old_ispec = ibelm_top(iface) + new_ispec = perm(old_ispec) + ibelm_top(iface) = new_ispec + enddo + + ! attenuation arrays + if (ATTENUATION) then + if (ATTENUATION_3D .or. ATTENUATION_1D_WITH_3D_STORAGE) then + allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + allocate(temp_array_real_sls(NGLLX,NGLLY,NGLLZ,N_SLS,nspec)) + call permute_elements_real(Qmu_store,temp_array_real,perm,nspec) + call permute_elements_real_sls(tau_e_store,temp_array_real_sls,perm,nspec) + deallocate(temp_array_real,temp_array_real_sls) + else + allocate(temp_array_real(1,1,1,nspec)) + allocate(temp_array_real_sls(1,1,1,N_SLS,nspec)) + call permute_elements_real1(Qmu_store,temp_array_real,perm,nspec) + call permute_elements_real_sls1(tau_e_store,temp_array_real_sls,perm,nspec) + deallocate(temp_array_real,temp_array_real_sls) + endif + endif + + select case (idomain) + case (IREGION_CRUST_MANTLE) + ! checks number of elements + if (nspec /= NSPEC_CRUST_MANTLE ) & + call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_CRUST_MANTLE') + + allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + + ! note: muvstore needed for attenuation also for anisotropic 3d mantle + call permute_elements_real(muvstore,temp_array_real,perm,nspec) + if (ANISOTROPIC_3D_MANTLE) then + call permute_elements_real(c11store,temp_array_real,perm,nspec) + call permute_elements_real(c11store,temp_array_real,perm,nspec) + call permute_elements_real(c12store,temp_array_real,perm,nspec) + call permute_elements_real(c13store,temp_array_real,perm,nspec) + call permute_elements_real(c14store,temp_array_real,perm,nspec) + call permute_elements_real(c15store,temp_array_real,perm,nspec) + call permute_elements_real(c16store,temp_array_real,perm,nspec) + call permute_elements_real(c22store,temp_array_real,perm,nspec) + call permute_elements_real(c23store,temp_array_real,perm,nspec) + call permute_elements_real(c24store,temp_array_real,perm,nspec) + call permute_elements_real(c25store,temp_array_real,perm,nspec) + call permute_elements_real(c26store,temp_array_real,perm,nspec) + call permute_elements_real(c33store,temp_array_real,perm,nspec) + call permute_elements_real(c34store,temp_array_real,perm,nspec) + call permute_elements_real(c35store,temp_array_real,perm,nspec) + call permute_elements_real(c36store,temp_array_real,perm,nspec) + call permute_elements_real(c44store,temp_array_real,perm,nspec) + call permute_elements_real(c45store,temp_array_real,perm,nspec) + call permute_elements_real(c46store,temp_array_real,perm,nspec) + call permute_elements_real(c55store,temp_array_real,perm,nspec) + call permute_elements_real(c56store,temp_array_real,perm,nspec) + call permute_elements_real(c66store,temp_array_real,perm,nspec) + else + if (TRANSVERSE_ISOTROPY) then + call permute_elements_real(kappahstore,temp_array_real,perm,nspec) + call permute_elements_real(muhstore,temp_array_real,perm,nspec) + call permute_elements_real(eta_anisostore,temp_array_real,perm,nspec) + endif + endif + + ! just to be nice and align dvpstore to the permuted mesh + if (HETEROGEN_3D_MANTLE) then + call model_heterogen_mantle_permute_dvp(temp_array_real,perm,nspec) + endif + + if (ABSORBING_CONDITIONS .and. NCHUNKS /= 6) then + call permute_elements_real(rho_vp,temp_array_real,perm,nspec) + call permute_elements_real(rho_vs,temp_array_real,perm,nspec) + endif + + deallocate(temp_array_real) + + ! discontinuities boundary surface + if (SAVE_BOUNDARY_MESH) then + ! moho + do iface = 1,nspec2D_MOHO + ! top + old_ispec = ibelm_moho_top(iface) + new_ispec = perm(old_ispec) + ibelm_moho_top(iface) = new_ispec + ! bottom + old_ispec = ibelm_moho_bot(iface) + new_ispec = perm(old_ispec) + ibelm_moho_bot(iface) = new_ispec + enddo + ! 400 + do iface = 1,nspec2D_400 + ! top + old_ispec = ibelm_400_top(iface) + new_ispec = perm(old_ispec) + ibelm_400_top(iface) = new_ispec + ! bottom + old_ispec = ibelm_400_bot(iface) + new_ispec = perm(old_ispec) + ibelm_400_bot(iface) = new_ispec + enddo + ! 670 + do iface = 1,nspec2D_670 + ! top + old_ispec = ibelm_670_top(iface) + new_ispec = perm(old_ispec) + ibelm_670_top(iface) = new_ispec + ! bottom + old_ispec = ibelm_670_bot(iface) + new_ispec = perm(old_ispec) + ibelm_670_bot(iface) = new_ispec + enddo + endif + + case (IREGION_OUTER_CORE) + ! checks number of elements + if (nspec /= NSPEC_OUTER_CORE ) & + call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_OUTER_CORE') + + if (ABSORBING_CONDITIONS .and. NCHUNKS /= 6) then + allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + + call permute_elements_real(rho_vp,temp_array_real,perm,nspec) + + deallocate(temp_array_real) + endif + + case (IREGION_INNER_CORE) + ! checks number of elements + if (nspec /= NSPEC_INNER_CORE ) & + call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_INNER_CORE') + + allocate(temp_array_real(NGLLX,NGLLY,NGLLZ,nspec)) + + ! note: muvstore needed for attenuation also for anisotropic inner core + call permute_elements_real(muvstore,temp_array_real,perm,nspec) + ! anisotropy in the inner core only + if (ANISOTROPIC_INNER_CORE) then + call permute_elements_real(c11store,temp_array_real,perm,nspec) + call permute_elements_real(c33store,temp_array_real,perm,nspec) + call permute_elements_real(c12store,temp_array_real,perm,nspec) + call permute_elements_real(c13store,temp_array_real,perm,nspec) + call permute_elements_real(c44store,temp_array_real,perm,nspec) + endif + + deallocate(temp_array_real) + + case (IREGION_TRINFINITE) + ! checks number of elements + if (nspec /= NSPEC_TRINFINITE ) & + call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_TRINFINITE') + + ! TODO: check if/what arrays need to be permuted + stop 'Permutations for transition infinite region not fully implemented yet' + + case (IREGION_INFINITE) + ! checks number of elements + if (nspec /= NSPEC_INFINITE ) & + call exit_MPI(myrank,'Error in permutation nspec should be NSPEC_INFINITE') + + ! TODO: check if/what arrays need to be permuted + stop 'Permutations for infinite region not fully implemented yet' + + case default + stop 'Error idomain in setup_permutation' + end select + + end subroutine permute_all_mesh_element_arrays + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_real(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:) = array_to_permute(:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) + enddo + + end subroutine permute_elements_real + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_real1(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + real(kind=CUSTOM_REAL), intent(inout), dimension(1,1,1,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:) = array_to_permute(:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) + enddo + + end subroutine permute_elements_real1 + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_real_sls(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + real(kind=CUSTOM_REAL), intent(inout), dimension(NGLLX,NGLLY,NGLLZ,N_SLS,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) + enddo + + end subroutine permute_elements_real_sls + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_real_sls1(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + real(kind=CUSTOM_REAL), intent(inout), dimension(1,1,1,N_SLS,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) + enddo + + end subroutine permute_elements_real_sls1 + + + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of integer type + + subroutine permute_elements_integer(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + integer, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:) = array_to_permute(:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) + enddo + + end subroutine permute_elements_integer + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of double precision type + + subroutine permute_elements_dble(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + double precision, intent(inout), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:) = array_to_permute(:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) + enddo + + end subroutine permute_elements_dble + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of double precision type + + subroutine permute_elements_logical1D(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + logical, intent(inout), dimension(nspec) :: array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:) = array_to_permute(:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(new_ispec) = temp_array(old_ispec) + enddo + + end subroutine permute_elements_logical1D + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of integer type + + subroutine permute_elements_integer1D(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + integer, intent(inout), dimension(nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:) = array_to_permute(:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(new_ispec) = temp_array(old_ispec) + enddo + + end subroutine permute_elements_integer1D + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_dble1(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + double precision, intent(inout), dimension(1,1,1,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:) = array_to_permute(:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,new_ispec) = temp_array(:,:,:,old_ispec) + enddo + + end subroutine permute_elements_dble1 + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_dble_sls(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + double precision, intent(inout), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) + enddo + + end subroutine permute_elements_dble_sls + +! +!------------------------------------------------------------------------------------------------- +! + +! implement permutation of elements for arrays of real (CUSTOM_REAL) type + + subroutine permute_elements_dble_sls1(array_to_permute,temp_array,perm,nspec) + + use constants + + implicit none + + integer, intent(in) :: nspec + integer, intent(in), dimension(nspec) :: perm + + double precision, intent(inout), dimension(N_SLS,1,1,1,nspec) :: & + array_to_permute,temp_array + + integer :: old_ispec,new_ispec + + ! copy the original array + temp_array(:,:,:,:,:) = array_to_permute(:,:,:,:,:) + + do old_ispec = 1,nspec + new_ispec = perm(old_ispec) + array_to_permute(:,:,:,:,new_ispec) = temp_array(:,:,:,:,old_ispec) + enddo + + end subroutine permute_elements_dble_sls1 + + From 651dee8906ab83d80f5bd6e0a2ecc358a1d832bf Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Tue, 25 Jun 2024 14:34:18 +0200 Subject: [PATCH 09/13] adds parameter to remove ficitious elements from inner/outer loop --- src/meshfem3D/setup_inner_outer.f90 | 113 +++++++++++++++++++++------- 1 file changed, 87 insertions(+), 26 deletions(-) diff --git a/src/meshfem3D/setup_inner_outer.f90 b/src/meshfem3D/setup_inner_outer.f90 index 736f5dc36..7639cd6ae 100644 --- a/src/meshfem3D/setup_inner_outer.f90 +++ b/src/meshfem3D/setup_inner_outer.f90 @@ -32,9 +32,11 @@ subroutine setup_inner_outer(iregion_code) myrank,OUTPUT_FILES,IMAIN,MAX_STRING_LEN, & IREGION_CRUST_MANTLE,IREGION_OUTER_CORE,IREGION_INNER_CORE, & IREGION_TRINFINITE,IREGION_INFINITE, & - NPROCTOT + NPROCTOT, & + IFLAG_IN_FICTITIOUS_CUBE - use meshfem_par, only: ibool,is_on_a_slice_edge,xstore_glob,ystore_glob,zstore_glob + use meshfem_par, only: ibool,is_on_a_slice_edge,xstore_glob,ystore_glob,zstore_glob, & + idoubling use MPI_crust_mantle_par use MPI_outer_core_par @@ -54,10 +56,21 @@ subroutine setup_inner_outer(iregion_code) character(len=MAX_STRING_LEN) :: filename logical,parameter :: DEBUG = .false. + ! explicitly exclude ficitious inner core elements from phase_ispec_* array + logical,parameter :: DO_EXCLUDE_FICTITIOUS_ELEMENTS = .false. + ! stores inner / outer elements ! ! note: arrays is_on_a_slice_edge_.. have flags set for elements which need to ! communicate with other MPI processes + + ! user output + if (myrank == 0) then + write(IMAIN,*) + write(IMAIN,*) 'for overlapping of communications with calculations:' + call flush_IMAIN() + endif + select case (iregion_code) case (IREGION_CRUST_MANTLE) ! crust_mantle @@ -90,12 +103,9 @@ subroutine setup_inner_outer(iregion_code) ! user output if (myrank == 0) then - write(IMAIN,*) - write(IMAIN,*) 'for overlapping of communications with calculations:' - write(IMAIN,*) percentage_edge = 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE) - write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%' - write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%' + write(IMAIN,*) ' percentage of edge elements in crust/mantle ',percentage_edge,'%' + write(IMAIN,*) ' percentage of volume elements in crust/mantle ',100. - percentage_edge,'%' write(IMAIN,*) call flush_IMAIN() endif @@ -149,8 +159,8 @@ subroutine setup_inner_outer(iregion_code) ! user output if (myrank == 0) then percentage_edge = 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE) - write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%' - write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%' + write(IMAIN,*) ' percentage of edge elements in outer core ',percentage_edge,'%' + write(IMAIN,*) ' percentage of volume elements in outer core ',100. - percentage_edge,'%' write(IMAIN,*) endif @@ -171,6 +181,36 @@ subroutine setup_inner_outer(iregion_code) endif nspec_inner_inner_core = NSPEC_INNER_CORE - nspec_outer_inner_core + ! note: for fictitious elements in the inner core, is_on_a_slice_edge(ispec) is set to .false. + ! in routine create_regions_elements(). + ! thus, counting the number of elements on a slice edge will only count "active" inner core elements. + ! and there is no need to explicitly exclude fictitious elements from this nspec_outer_inner_core count again. + ! + ! however, at the moment fictitious elements are still included in the nspec_inner_inner_core count. + ! we could further exclude those such that when we loop over phase_ispec_inner_inner_core(*,*) elements + ! only "active" elements get considered. + ! this would lead to a total count (nspec_inner_inner_core + nspec_outer_inner_core) < NSPEC_INNER_CORE + ! + ! excludes fictitious elements from count + if (DO_EXCLUDE_FICTITIOUS_ELEMENTS) then + ! user output + if (myrank == 0) then + write(IMAIN,*) ' excluding fictitious elements from inner/outer elements' + write(IMAIN,*) + call flush_IMAIN() + endif + + do ispec = 1,NSPEC_INNER_CORE + if (is_on_a_slice_edge(ispec) .and. NPROCTOT > 1) then + ! subtract fictitious element + if (idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE) nspec_outer_inner_core = nspec_outer_inner_core - 1 + else + ! subtract fictitious element + if (idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE) nspec_inner_inner_core = nspec_inner_inner_core - 1 + endif + enddo + endif + num_phase_ispec_inner_core = max(nspec_inner_inner_core,nspec_outer_inner_core) allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),stat=ier) @@ -179,23 +219,44 @@ subroutine setup_inner_outer(iregion_code) phase_ispec_inner_inner_core(:,:) = 0 iinner = 0 iouter = 0 - do ispec = 1,NSPEC_INNER_CORE - if (is_on_a_slice_edge(ispec) .and. NPROCTOT > 1) then - ! outer element - iouter = iouter + 1 - phase_ispec_inner_inner_core(iouter,1) = ispec - else - ! inner element - iinner = iinner + 1 - phase_ispec_inner_inner_core(iinner,2) = ispec - endif - enddo + + if (DO_EXCLUDE_FICTITIOUS_ELEMENTS) then + ! counts only "active" inner core elements and excludes ficititous elements + do ispec = 1,NSPEC_INNER_CORE + if (is_on_a_slice_edge(ispec) .and. NPROCTOT > 1) then + ! outer element + if (idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then + iouter = iouter + 1 + phase_ispec_inner_inner_core(iouter,1) = ispec + endif + else + ! inner element + if (idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then + iinner = iinner + 1 + phase_ispec_inner_inner_core(iinner,2) = ispec + endif + endif + enddo + else + ! default + do ispec = 1,NSPEC_INNER_CORE + if (is_on_a_slice_edge(ispec) .and. NPROCTOT > 1) then + ! outer element + iouter = iouter + 1 + phase_ispec_inner_inner_core(iouter,1) = ispec + else + ! inner element + iinner = iinner + 1 + phase_ispec_inner_inner_core(iinner,2) = ispec + endif + enddo + endif ! user output if (myrank == 0) then percentage_edge = 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE) - write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%' - write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%' + write(IMAIN,*) ' percentage of edge elements in inner core ',percentage_edge,'%' + write(IMAIN,*) ' percentage of volume elements in inner core ',100. - percentage_edge,'%' write(IMAIN,*) call flush_IMAIN() endif @@ -240,8 +301,8 @@ subroutine setup_inner_outer(iregion_code) ! user output if (myrank == 0) then percentage_edge = 100. * nspec_outer_trinfinite / real(NSPEC_TRINFINITE) - write(IMAIN,*) 'percentage of edge elements in transition infinite region ',percentage_edge,'%' - write(IMAIN,*) 'percentage of volume elements in transition infinite region ',100. - percentage_edge,'%' + write(IMAIN,*) ' percentage of edge elements in transition infinite region ',percentage_edge,'%' + write(IMAIN,*) ' percentage of volume elements in transition infinite region ',100. - percentage_edge,'%' write(IMAIN,*) call flush_IMAIN() endif @@ -286,8 +347,8 @@ subroutine setup_inner_outer(iregion_code) ! user output if (myrank == 0) then percentage_edge = 100. * nspec_outer_infinite / real(NSPEC_INFINITE) - write(IMAIN,*) 'percentage of edge elements in infinite region ',percentage_edge,'%' - write(IMAIN,*) 'percentage of volume elements in infinite region ',100. - percentage_edge,'%' + write(IMAIN,*) ' percentage of edge elements in infinite region ',percentage_edge,'%' + write(IMAIN,*) ' percentage of volume elements in infinite region ',100. - percentage_edge,'%' write(IMAIN,*) call flush_IMAIN() endif From 601967539e16be16511ac7b1935eee603b7e28fc Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Tue, 25 Jun 2024 14:35:01 +0200 Subject: [PATCH 10/13] updates mesh permutation routines --- src/meshfem3D/create_regions_mesh.F90 | 14 +- src/meshfem3D/save_arrays_solver.f90 | 3 +- src/meshfem3D/setup_color_perm.f90 | 766 +++++++++++++------------- 3 files changed, 405 insertions(+), 378 deletions(-) diff --git a/src/meshfem3D/create_regions_mesh.F90 b/src/meshfem3D/create_regions_mesh.F90 index 8c1bad88a..39388220e 100644 --- a/src/meshfem3D/create_regions_mesh.F90 +++ b/src/meshfem3D/create_regions_mesh.F90 @@ -152,7 +152,7 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...allocating arrays ' + write(IMAIN,*) ' ...allocating arrays' call flush_IMAIN() endif call crm_allocate_arrays(ipass, & @@ -164,7 +164,7 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...setting up layers ' + write(IMAIN,*) ' ...setting up layers' call flush_IMAIN() endif call crm_setup_layers(ipass,NEX_PER_PROC_ETA) @@ -173,7 +173,7 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...creating mesh elements ' + write(IMAIN,*) ' ...creating mesh elements' call flush_IMAIN() endif call create_regions_elements(ipass, & @@ -248,7 +248,7 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...fills global mesh points ' + write(IMAIN,*) ' ...fills global mesh points' call flush_IMAIN() endif call crm_fill_global_meshes() @@ -329,7 +329,7 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...element inner/outer separation ' + write(IMAIN,*) ' ...element inner/outer separation' call flush_IMAIN() endif call setup_inner_outer(iregion_code) @@ -338,10 +338,10 @@ subroutine create_regions_mesh(npointot, & call synchronize_all() if (myrank == 0) then write(IMAIN,*) - write(IMAIN,*) ' ...element mesh coloring ' + write(IMAIN,*) ' ...element mesh permutation' call flush_IMAIN() endif - call setup_color_perm(iregion_code) + call setup_mesh_permutation(iregion_code) !uncomment: adds model smoothing for point profile models ! if (THREE_D_MODEL == THREE_D_MODEL_PPM) then diff --git a/src/meshfem3D/save_arrays_solver.f90 b/src/meshfem3D/save_arrays_solver.f90 index c91368e26..6298d88d7 100644 --- a/src/meshfem3D/save_arrays_solver.f90 +++ b/src/meshfem3D/save_arrays_solver.f90 @@ -493,8 +493,7 @@ subroutine save_MPI_arrays(iregion_code,LOCAL_PATH, & ! mesh coloring integer,intent(in) :: num_colors_outer,num_colors_inner - integer, dimension(num_colors_outer + num_colors_inner),intent(in) :: & - num_elem_colors + integer, dimension(num_colors_outer + num_colors_inner),intent(in) :: num_elem_colors ! local parameters character(len=MAX_STRING_LEN) :: prname diff --git a/src/meshfem3D/setup_color_perm.f90 b/src/meshfem3D/setup_color_perm.f90 index 98958afdb..56615e110 100644 --- a/src/meshfem3D/setup_color_perm.f90 +++ b/src/meshfem3D/setup_color_perm.f90 @@ -25,21 +25,18 @@ ! !===================================================================== - subroutine setup_color_perm(iregion_code) + subroutine setup_mesh_permutation(iregion_code) - use constants, only: myrank - - use meshfem_par, only: & - IMAIN,USE_MESH_COLORING_GPU,SAVE_MESH_FILES, & + use constants, only: myrank, & + IMAIN,USE_MESH_COLORING_GPU, & IREGION_CRUST_MANTLE,IREGION_OUTER_CORE,IREGION_INNER_CORE, & IREGION_TRINFINITE,IREGION_INFINITE - use meshfem_par, only: ibool,is_on_a_slice_edge + use meshfem_par, only: ibool use MPI_crust_mantle_par use MPI_outer_core_par use MPI_inner_core_par - use MPI_trinfinite_par use MPI_infinite_par @@ -48,11 +45,11 @@ subroutine setup_color_perm(iregion_code) integer,intent(in) :: iregion_code ! local parameters - integer, dimension(:), allocatable :: perm integer :: ier - integer :: nspec,nglob - integer :: idomain + ! additional mesh permutations + ! (can be used for testing mesh permutations also for CPU-only simulations without the GPU coloring) + ! ! for testing effect of element permutations on code performance ! note: re-ordering might affect the efficiency of cache fetches. ! with the inner/outer loop re-ordering here, elements get ordered consecutively for each phase. @@ -60,9 +57,22 @@ subroutine setup_color_perm(iregion_code) ! permutes inner/outer loop elements logical, parameter :: USE_MESH_LOOP_PERMUTATION = .false. + ! for testing effect of global node permutations on code performance + ! note: re-ordering the iglob entries in ibool together with the above re-ordering of the elements + ! can affect the cache efficiency. + ! however, together with the above element re-ordering, this iglob re-ordering seems to have + ! only a minor impact of an additional ~1-1.5 % on the CPU. + ! permutes iglob entries + logical, parameter :: USE_MESH_GLOBAL_NODE_PERMUTATION = .false. + ! user output if (myrank == 0) then - write(IMAIN,*) ' mesh coloring: ',USE_MESH_COLORING_GPU + if (USE_MESH_COLORING_GPU) & + write(IMAIN,*) ' mesh permutation for GPU coloring : ',USE_MESH_COLORING_GPU + if (USE_MESH_LOOP_PERMUTATION) & + write(IMAIN,*) ' mesh permutation for inner/outer loops: ',USE_MESH_LOOP_PERMUTATION + if (USE_MESH_GLOBAL_NODE_PERMUTATION) & + write(IMAIN,*) ' mesh permutation for global nodes : ',USE_MESH_GLOBAL_NODE_PERMUTATION call flush_IMAIN() endif @@ -73,48 +83,12 @@ subroutine setup_color_perm(iregion_code) num_colors_outer_crust_mantle = 0 num_colors_inner_crust_mantle = 0 - ! mesh coloring + ! mesh coloring and permutation if (USE_MESH_COLORING_GPU) then - - ! user output - if (myrank == 0) write(IMAIN,*) ' coloring crust mantle... ' - - ! crust/mantle region - nspec = NSPEC_CRUST_MANTLE - nglob = NGLOB_CRUST_MANTLE - idomain = IREGION_CRUST_MANTLE - - ! creates coloring of elements - allocate(perm(nspec),stat=ier) - if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm crust mantle array') - perm(:) = 0 - - call setup_color(nspec,nglob,ibool,perm, & - idomain,is_on_a_slice_edge, & - num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, & - SAVE_MESH_FILES) - - ! checks - if (minval(perm) /= 1) & - call exit_MPI(myrank, 'minval(perm) should be 1') - if (maxval(perm) /= num_phase_ispec_crust_mantle) & - call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_crust_mantle') - - ! sorts array according to color permutation - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation:' - call flush_IMAIN() - endif - - call setup_permutation(nspec,nglob,ibool, & - idomain,perm, & - num_colors_outer_crust_mantle,num_colors_inner_crust_mantle, & - num_elem_colors_crust_mantle, & - num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, & - SAVE_MESH_FILES) - - deallocate(perm) + call setup_color_permutation(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,iregion_code, & + num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, & + num_colors_outer_crust_mantle,num_colors_inner_crust_mantle, & + num_elem_colors_crust_mantle) else ! dummy array allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle+num_colors_inner_crust_mantle),stat=ier) @@ -123,18 +97,18 @@ subroutine setup_color_perm(iregion_code) ! setup inner/outer loop permutation for crust/mantle if (USE_MESH_LOOP_PERMUTATION) then - ! user output - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation for inner/outer loops: ',USE_MESH_LOOP_PERMUTATION - call flush_IMAIN() - endif - - call setup_loop_permutation(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,ibool, & - iregion_code, & + ! permutes element + call setup_loop_permutation(NSPEC_CRUST_MANTLE,ibool,iregion_code, & num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, & - nspec_outer_crust_mantle,nspec_inner_crust_mantle, & - num_interfaces_crust_mantle,max_nibool_interfaces_cm, & - nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle) + nspec_outer_crust_mantle,nspec_inner_crust_mantle) + endif + + ! setup global nodes (iglob) permutation for crust/mantle + if (USE_MESH_GLOBAL_NODE_PERMUTATION) then + ! permutes global nodes + call setup_global_node_permutation(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,ibool,iregion_code, & + num_interfaces_crust_mantle,max_nibool_interfaces_cm, & + nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle) endif case (IREGION_OUTER_CORE) @@ -143,159 +117,80 @@ subroutine setup_color_perm(iregion_code) num_colors_outer_outer_core = 0 num_colors_inner_outer_core = 0 - ! mesh coloring + ! mesh coloring and permutation if (USE_MESH_COLORING_GPU) then - - ! user output - if (myrank == 0) write(IMAIN,*) ' coloring outer core... ' - - ! outer core region - nspec = NSPEC_OUTER_CORE - nglob = NGLOB_OUTER_CORE - idomain = IREGION_OUTER_CORE - - ! creates coloring of elements - allocate(perm(nspec),stat=ier) - if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm outer_core array') - perm(:) = 0 - - call setup_color(nspec,nglob,ibool,perm, & - idomain,is_on_a_slice_edge, & - num_phase_ispec_outer_core,phase_ispec_inner_outer_core, & - SAVE_MESH_FILES) - - ! checks - if (minval(perm) /= 1) & - call exit_MPI(myrank, 'minval(perm) should be 1') - if (maxval(perm) /= num_phase_ispec_outer_core) & - call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_outer_core') - - ! sorts array according to permutation - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation:' - call flush_IMAIN() - endif - - call setup_permutation(nspec,nglob,ibool, & - idomain,perm, & - num_colors_outer_outer_core,num_colors_inner_outer_core, & - num_elem_colors_outer_core, & - num_phase_ispec_outer_core,phase_ispec_inner_outer_core, & - SAVE_MESH_FILES) - - deallocate(perm) + call setup_color_permutation(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,iregion_code, & + num_phase_ispec_outer_core,phase_ispec_inner_outer_core, & + num_colors_outer_outer_core,num_colors_inner_outer_core, & + num_elem_colors_outer_core) else ! dummy array allocate(num_elem_colors_outer_core(num_colors_outer_outer_core+num_colors_inner_outer_core),stat=ier) if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_outer_core array') endif + ! setup inner/outer loop permutation for outer core + if (USE_MESH_LOOP_PERMUTATION) then + ! permutes element + call setup_loop_permutation(NSPEC_OUTER_CORE,ibool,iregion_code, & + num_phase_ispec_outer_core,phase_ispec_inner_outer_core, & + nspec_outer_outer_core,nspec_inner_outer_core) + endif + + ! setup global nodes (iglob) permutation for outer core + if (USE_MESH_GLOBAL_NODE_PERMUTATION) then + ! permutes global nodes + call setup_global_node_permutation(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,ibool,iregion_code, & + num_interfaces_outer_core,max_nibool_interfaces_oc, & + nibool_interfaces_outer_core,ibool_interfaces_outer_core) + endif + case (IREGION_INNER_CORE) ! inner core ! initializes num_colors_outer_inner_core = 0 num_colors_inner_inner_core = 0 - ! mesh coloring + ! mesh coloring and permutation if (USE_MESH_COLORING_GPU) then - - ! user output - if (myrank == 0) write(IMAIN,*) ' coloring inner core... ' - - ! inner core region - nspec = NSPEC_INNER_CORE - nglob = NGLOB_INNER_CORE - idomain = IREGION_INNER_CORE - - ! creates coloring of elements - allocate(perm(nspec),stat=ier) - if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm inner_core array') - perm(:) = 0 - - call setup_color(nspec,nglob,ibool,perm, & - idomain,is_on_a_slice_edge, & - num_phase_ispec_inner_core,phase_ispec_inner_inner_core, & - SAVE_MESH_FILES) - - ! checks - ! inner core contains fictitious elements not counted for - if (minval(perm) < 0) & - call exit_MPI(myrank, 'minval(perm) should be at least 0') - if (maxval(perm) > num_phase_ispec_inner_core) then - print *,'Error perm inner core:',minval(perm),maxval(perm),num_phase_ispec_inner_core - call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_inner_core') - endif - - ! sorts array according to permutation - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation:' - call flush_IMAIN() - endif - - call setup_permutation(nspec,nglob,ibool, & - idomain,perm, & - num_colors_outer_inner_core,num_colors_inner_inner_core, & - num_elem_colors_inner_core, & - num_phase_ispec_inner_core,phase_ispec_inner_inner_core, & - SAVE_MESH_FILES) - - deallocate(perm) + call setup_color_permutation(NSPEC_INNER_CORE,NGLOB_INNER_CORE,iregion_code, & + num_phase_ispec_inner_core,phase_ispec_inner_inner_core, & + num_colors_outer_inner_core,num_colors_inner_inner_core, & + num_elem_colors_inner_core) else ! dummy array allocate(num_elem_colors_inner_core(num_colors_outer_inner_core+num_colors_inner_inner_core),stat=ier) if (ier /= 0 ) call exit_mpi(myrank,'Error allocating num_elem_colors_inner_core array') endif + ! setup inner/outer loop permutation for inner core + if (USE_MESH_LOOP_PERMUTATION) then + ! permutes element + call setup_loop_permutation(NSPEC_INNER_CORE,ibool,iregion_code, & + num_phase_ispec_inner_core,phase_ispec_inner_inner_core, & + nspec_outer_inner_core,nspec_inner_inner_core) + endif + + ! setup global nodes (iglob) permutation for inner core + if (USE_MESH_GLOBAL_NODE_PERMUTATION) then + ! permutes global nodes + call setup_global_node_permutation(NSPEC_INNER_CORE,NGLOB_INNER_CORE,ibool,iregion_code, & + num_interfaces_inner_core,max_nibool_interfaces_ic, & + nibool_interfaces_inner_core,ibool_interfaces_inner_core) + endif + case (IREGION_TRINFINITE) ! transition infinite ! initializes num_colors_outer_trinfinite = 0 num_colors_inner_trinfinite = 0 - ! mesh coloring + ! mesh coloring and permutation if (USE_MESH_COLORING_GPU) then - - ! user output - if (myrank == 0) write(IMAIN,*) ' coloring transition infinite region... ' - - ! crust/mantle region - nspec = NSPEC_TRINFINITE - nglob = NGLOB_TRINFINITE - idomain = IREGION_TRINFINITE - - ! creates coloring of elements - allocate(perm(nspec),stat=ier) - if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm transition infinite array') - perm(:) = 0 - - call setup_color(nspec,nglob,ibool,perm, & - idomain,is_on_a_slice_edge, & - num_phase_ispec_trinfinite,phase_ispec_inner_trinfinite, & - SAVE_MESH_FILES) - - ! checks - if (minval(perm) /= 1) & - call exit_MPI(myrank, 'minval(perm) should be 1') - if (maxval(perm) /= num_phase_ispec_trinfinite) & - call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_trinfinite') - - ! sorts array according to permutation - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation:' - call flush_IMAIN() - endif - - call setup_permutation(nspec,nglob,ibool, & - idomain,perm, & - num_colors_outer_trinfinite,num_colors_inner_trinfinite, & - num_elem_colors_trinfinite, & - num_phase_ispec_trinfinite,phase_ispec_inner_trinfinite, & - SAVE_MESH_FILES) - - deallocate(perm) + call setup_color_permutation(NSPEC_TRINFINITE,NGLOB_TRINFINITE,iregion_code, & + num_phase_ispec_trinfinite,phase_ispec_inner_trinfinite, & + num_colors_outer_trinfinite,num_colors_inner_trinfinite, & + num_elem_colors_trinfinite) else ! dummy array allocate(num_elem_colors_trinfinite(num_colors_outer_trinfinite+num_colors_inner_trinfinite),stat=ier) @@ -308,48 +203,12 @@ subroutine setup_color_perm(iregion_code) num_colors_outer_infinite = 0 num_colors_inner_infinite = 0 - ! mesh coloring + ! mesh coloring and permutation if (USE_MESH_COLORING_GPU) then - - ! user output - if (myrank == 0) write(IMAIN,*) ' coloring infinite region... ' - - ! crust/mantle region - nspec = NSPEC_INFINITE - nglob = NGLOB_INFINITE - idomain = IREGION_INFINITE - - ! creates coloring of elements - allocate(perm(nspec),stat=ier) - if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm infinite array') - perm(:) = 0 - - call setup_color(nspec,nglob,ibool,perm, & - idomain,is_on_a_slice_edge, & - num_phase_ispec_infinite,phase_ispec_inner_infinite, & - SAVE_MESH_FILES) - - ! checks - if (minval(perm) /= 1) & - call exit_MPI(myrank, 'minval(perm) should be 1') - if (maxval(perm) /= num_phase_ispec_infinite) & - call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_infinite') - - ! sorts array according to permutation - call synchronize_all() - if (myrank == 0) then - write(IMAIN,*) ' mesh permutation:' - call flush_IMAIN() - endif - - call setup_permutation(nspec,nglob,ibool, & - idomain,perm, & - num_colors_outer_infinite,num_colors_inner_infinite, & - num_elem_colors_infinite, & - num_phase_ispec_infinite,phase_ispec_inner_infinite, & - SAVE_MESH_FILES) - - deallocate(perm) + call setup_color_permutation(NSPEC_INFINITE,NGLOB_INFINITE,iregion_code, & + num_phase_ispec_infinite,phase_ispec_inner_infinite, & + num_colors_outer_infinite,num_colors_inner_infinite, & + num_elem_colors_infinite) else ! dummy array allocate(num_elem_colors_infinite(num_colors_outer_infinite+num_colors_inner_infinite),stat=ier) @@ -358,7 +217,96 @@ subroutine setup_color_perm(iregion_code) end select - end subroutine setup_color_perm + ! synchronizes processes + call synchronize_all() + + end subroutine setup_mesh_permutation + +! +!------------------------------------------------------------------------------------------------- +! + + subroutine setup_color_permutation(nspec,nglob,idomain, & + num_phase_ispec_d,phase_ispec_inner_d, & + num_colors_outer,num_colors_inner, & + num_elem_colors) + + use constants, only: myrank,IMAIN,USE_MESH_COLORING_GPU, & + IREGION_CRUST_MANTLE,IREGION_OUTER_CORE,IREGION_INNER_CORE, & + IREGION_TRINFINITE,IREGION_INFINITE + + + use meshfem_par, only: ibool,is_on_a_slice_edge + + implicit none + + integer, intent(in) :: nspec,nglob + + ! wrapper array for ispec is in domain: + ! idomain: 1 == crust/mantle, 2 == outer core, 3 == inner core + integer, intent(in) :: idomain + integer, intent(in) :: num_phase_ispec_d + integer, dimension(num_phase_ispec_d,2), intent(inout) :: phase_ispec_inner_d + + integer, intent(in) :: num_colors_outer,num_colors_inner + integer, dimension(num_colors_outer + num_colors_inner),intent(in) :: num_elem_colors + + ! local parameters + integer :: ier + integer, dimension(:), allocatable :: perm + + ! user output + if (myrank == 0) then + write(IMAIN,*) + select case (idomain) + case (IREGION_CRUST_MANTLE) + write(IMAIN,*) ' coloring crust mantle... ' + case (IREGION_OUTER_CORE) + write(IMAIN,*) ' coloring outer core... ' + case (IREGION_INNER_CORE) + write(IMAIN,*) ' coloring inner core... ' + case (IREGION_TRINFINITE) + write(IMAIN,*) ' coloring transition infinite region... ' + case (IREGION_INFINITE) + write(IMAIN,*) ' coloring infinite region... ' + case default + call exit_mpi(myrank,'Invalid region for mesh coloring') + end select + call flush_IMAIN() + endif + + ! creates coloring of elements + allocate(perm(nspec),stat=ier) + if (ier /= 0 ) call exit_mpi(myrank,'Error allocating temporary perm array') + perm(:) = 0 + + call setup_color(nspec,nglob,ibool,perm, & + idomain,is_on_a_slice_edge, & + num_phase_ispec_d,phase_ispec_inner_d) + + ! checks + if (minval(perm) /= 1) & + call exit_MPI(myrank, 'minval(perm) should be 1') + if (maxval(perm) /= num_phase_ispec_d) & + call exit_MPI(myrank, 'maxval(perm) should be num_phase_ispec_d') + + ! sorts array according to color permutation + call synchronize_all() + if (myrank == 0) then + write(IMAIN,*) ' mesh permutation:' + call flush_IMAIN() + endif + + call setup_permutation(nspec,nglob,ibool, & + idomain,perm, & + num_colors_outer,num_colors_inner, & + num_elem_colors, & + num_phase_ispec_d,phase_ispec_inner_d) + + ! free temporary array + deallocate(perm) + + end subroutine setup_color_permutation ! !------------------------------------------------------------------------------------------------- @@ -366,20 +314,20 @@ end subroutine setup_color_perm subroutine setup_color(nspec,nglob,ibool,perm, & idomain,is_on_a_slice_edge, & - num_phase_ispec_d,phase_ispec_inner_d, & - SAVE_MESH_FILES) + num_phase_ispec_d,phase_ispec_inner_d) ! sets up mesh coloring - use constants, only: myrank - - use meshfem_par, only: & - LOCAL_PATH,MAX_NUMBER_OF_COLORS,IMAIN,NGLLX,NGLLY,NGLLZ, & + use constants, only: myrank, & + MAX_NUMBER_OF_COLORS,IMAIN,NGLLX,NGLLY,NGLLZ, & MAX_STRING_LEN,IOUT, & IFLAG_IN_FICTITIOUS_CUBE, & IREGION_CRUST_MANTLE,IREGION_OUTER_CORE,IREGION_INNER_CORE, & IREGION_TRINFINITE,IREGION_INFINITE + use meshfem_par, only: & + LOCAL_PATH,SAVE_MESH_FILES + use meshfem_par, only: & idoubling,xstore_glob,ystore_glob,zstore_glob @@ -409,11 +357,9 @@ subroutine setup_color(nspec,nglob,ibool,perm, & ! idomain: 1 == crust/mantle, 2 == outer core, 3 == inner core integer, intent(in) :: idomain logical, dimension(nspec), intent(in) :: is_on_a_slice_edge - integer, intent(inout) :: num_phase_ispec_d + integer, intent(in) :: num_phase_ispec_d integer, dimension(num_phase_ispec_d,2), intent(inout) :: phase_ispec_inner_d - logical, intent(in) :: SAVE_MESH_FILES - ! local parameters ! added for color permutation integer :: nb_colors_outer_elements,nb_colors_inner_elements @@ -764,8 +710,7 @@ subroutine setup_permutation(nspec,nglob,ibool, & idomain,perm, & num_colors_outer,num_colors_inner, & num_elem_colors, & - num_phase_ispec_d,phase_ispec_inner_d, & - SAVE_MESH_FILES) + num_phase_ispec_d,phase_ispec_inner_d) use constants @@ -773,7 +718,8 @@ subroutine setup_permutation(nspec,nglob,ibool, & ATTENUATION_1D_WITH_3D_STORAGE use meshfem_par, only: & - LOCAL_PATH,xstore_glob,ystore_glob,zstore_glob + LOCAL_PATH,SAVE_MESH_FILES, & + xstore_glob,ystore_glob,zstore_glob implicit none @@ -788,8 +734,6 @@ subroutine setup_permutation(nspec,nglob,ibool, & integer, intent(in) :: num_phase_ispec_d integer, dimension(num_phase_ispec_d,2),intent(inout) :: phase_ispec_inner_d - logical, intent(in) :: SAVE_MESH_FILES - ! local parameters integer, dimension(:), allocatable :: temp_perm_global logical, dimension(:), allocatable :: mask_global @@ -920,19 +864,21 @@ end subroutine setup_permutation !------------------------------------------------------------------------------------------------- ! - subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & - num_phase_ispec_d,phase_ispec_inner_d,nspec_outer_d,nspec_inner_d, & - num_interfaces,max_nibool_interfaces, & - nibool_interfaces,ibool_interfaces) + subroutine setup_loop_permutation(nspec,ibool,idomain, & + num_phase_ispec_d,phase_ispec_inner_d, & + nspec_outer_d,nspec_inner_d) ! sorts element arrays according to inner/outer loop order use constants, only: NGLLX,NGLLY,NGLLZ,IMAIN,myrank, & - IREGION_CRUST_MANTLE,IREGION_INNER_CORE,IREGION_OUTER_CORE + IREGION_CRUST_MANTLE,IREGION_INNER_CORE,IREGION_OUTER_CORE,IREGION_TRINFINITE,IREGION_INFINITE, & + IFLAG_IN_FICTITIOUS_CUBE + + use meshfem_par, only: idoubling implicit none - integer, intent(in) :: nspec,nglob + integer, intent(in) :: nspec integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(inout) :: ibool ! wrapper array for ispec is in domain: @@ -942,31 +888,26 @@ subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & integer, dimension(num_phase_ispec_d,2), intent(inout) :: phase_ispec_inner_d integer, intent(in) :: nspec_outer_d,nspec_inner_d - integer,intent(in) :: num_interfaces,max_nibool_interfaces - integer,dimension(num_interfaces),intent(in) :: nibool_interfaces - integer,dimension(max_nibool_interfaces,num_interfaces),intent(inout):: ibool_interfaces - ! local parameters integer :: iphase,num_elements,ier,num_reordered ! ispec re-ordering integer :: ispec,ispec_loop,ispec_p,old_ispec,new_ispec integer, dimension(:), allocatable :: perm_ispec_ordered - ! iglob re-ordering - integer :: i,j,k,iglob,iglob_ordered,iglob_new,iglob_old - integer, dimension(:), allocatable :: perm_iglob_ordered - - ! permutes iglob entries - logical, parameter :: USE_GLOBAL_NODE_PERMUTATION = .true. ! user output if (myrank == 0) then + write(IMAIN,*) select case (idomain) case (IREGION_CRUST_MANTLE) - write(IMAIN,*) ' permuting crust/mantle element order' - case (IREGION_INNER_CORE) - write(IMAIN,*) ' permuting inner core element order' + write(IMAIN,*) ' permuting element order for crust/mantle' case (IREGION_OUTER_CORE) - write(IMAIN,*) ' permuting outer core element order' + write(IMAIN,*) ' permuting element order for outer core' + case (IREGION_INNER_CORE) + write(IMAIN,*) ' permuting element order for inner core' + case (IREGION_TRINFINITE) + write(IMAIN,*) ' permuting element order for transition infinite region' + case (IREGION_INFINITE) + write(IMAIN,*) ' permuting element order for infinite region' end select call flush_IMAIN() endif @@ -995,6 +936,12 @@ subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & stop 'Invalid ispec in phase re-ordering' endif + ! inner core excludes fictitious elements + if (idomain == IREGION_INNER_CORE) then + ! exclude fictitious elements in central cube + if (idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle + endif + ! adds element in newly ordered list ispec_loop = ispec_loop + 1 @@ -1012,6 +959,36 @@ subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & enddo enddo + ! inner core fictitious elements + if (idomain == IREGION_INNER_CORE) then + ! moves fictitious elements to end of array + do iphase = 1,2 + if (iphase == 1) then + num_elements = nspec_outer_d + else + num_elements = nspec_inner_d + endif + do ispec_p = 1,num_elements + ispec = phase_ispec_inner_d(ispec_p,iphase) + ! move fictitious element in central cube to end of list + if (idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE) then + ispec_loop = ispec_loop + 1 + perm_ispec_ordered(ispec) = ispec_loop + endif + enddo + enddo + + ! in case phase_ispec_inner_inner_core only contains non-fictitious elements, + ! we still missed some elements. let's move those missed (fictitious) elements to end of array. + do ispec = 1,nspec + if (perm_ispec_ordered(ispec) == 0) then + ! move fictitious element in central cube to end of list + ispec_loop = ispec_loop + 1 + perm_ispec_ordered(ispec) = ispec_loop + endif + enddo + endif + ! debug !if (myrank == 0) print *,'debug: entries ispec_ordered: ',perm_ispec_ordered(1:10) @@ -1032,23 +1009,22 @@ subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & num_elements = nspec_inner_d endif do ispec_p = 1,num_elements - ispec_loop = ispec_loop + 1 - old_ispec = phase_ispec_inner_d(ispec_p,iphase) new_ispec = perm_ispec_ordered(old_ispec) - ! checks ordering - if (new_ispec /= ispec_loop) then - print *,'Error: rank ',myrank,' has invalid ispec ordering: ',new_ispec,' should be ',ispec_loop - call exit_MPI(myrank,'Invalid ispec ordering for phase_ispec_inner_d') - endif - ! sets new element entry if (old_ispec /= new_ispec) then num_reordered = num_reordered + 1 ! sets new ordering phase_ispec_inner_d(ispec_p,iphase) = new_ispec endif + + ! checks ordering + ispec_loop = ispec_loop + 1 + if (new_ispec /= ispec_loop .and. idomain /= IREGION_INNER_CORE) then + print *,'Error: rank ',myrank,' has invalid ispec ordering: ',new_ispec,' should be ',ispec_loop + call exit_MPI(myrank,'Invalid ispec ordering for phase_ispec_inner_d') + endif enddo enddo @@ -1066,120 +1042,172 @@ subroutine setup_loop_permutation(nspec,nglob,ibool,idomain, & ! free temporary array deallocate(perm_ispec_ordered) - ! global node re-ordering - if (USE_GLOBAL_NODE_PERMUTATION) then - ! note: re-orders iglob to have an somewhat increasing order according the new ibool array - ! global node indices are mostly retrieved by: - ! - ! do k = 1,NGLLZ - ! do j = 1,NGLLY - ! do i = 1,NGLLX - ! iglob = ibool(i,j,k,ispec) - ! .. - ! - ! we try to have iglob increasing consecutively in this loop order. - ! nevertheless, since global nodes are shared, some of the incrementing is not +1 for neighboring elements. - ! to avoid many jumps, a mesh coloring might help at least for the first color, where the next looped element - ! won't have shared global nodes. - ! there might be more sophisticated ways to re-order iglobs, like using space-filling curves. - ! here, we just try to simply re-order (without colors) - to see if this has any code performance effect. + end subroutine setup_loop_permutation - ! user output - if (myrank == 0) then - write(IMAIN,*) ' permuting global node entries' - call flush_IMAIN() - endif +! +!------------------------------------------------------------------------------------------------- +! - allocate(perm_iglob_ordered(nglob),stat=ier) - if (ier /= 0) stop 'Error allocating iglob_ordered array' - perm_iglob_ordered(:) = 0 + subroutine setup_global_node_permutation(nspec,nglob,ibool,idomain, & + num_interfaces,max_nibool_interfaces, & + nibool_interfaces,ibool_interfaces) - ! orders iglob according to new ibool element ordering - iglob_ordered = 0 - do ispec = 1,nspec - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - iglob = ibool(i,j,k,ispec) - ! adds iglob entry - if (perm_iglob_ordered(iglob) == 0) then - iglob_ordered = iglob_ordered + 1 - perm_iglob_ordered(iglob) = iglob_ordered - endif - enddo + ! sorts ibool & ibool_interfaces arrays according to new iglob order + + use constants, only: NGLLX,NGLLY,NGLLZ,IMAIN,myrank, & + IREGION_CRUST_MANTLE,IREGION_INNER_CORE,IREGION_OUTER_CORE,IREGION_TRINFINITE,IREGION_INFINITE + + implicit none + + integer, intent(in) :: nspec,nglob + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec), intent(inout) :: ibool + + ! wrapper array for ispec is in domain: + ! idomain: 1 == crust/mantle, 2 == outer core, 3 == inner core + integer, intent(in) :: idomain + + integer,intent(in) :: num_interfaces,max_nibool_interfaces + integer,dimension(num_interfaces),intent(in) :: nibool_interfaces + integer,dimension(max_nibool_interfaces,num_interfaces),intent(inout):: ibool_interfaces + + ! local parameters + integer :: num_reordered,ier + ! iglob re-ordering + integer :: ispec,i,j,k,iglob,iglob_ordered,iglob_new,iglob_old + integer, dimension(:), allocatable :: perm_iglob_ordered + + ! global node re-ordering + ! note: re-orders iglob to have an somewhat increasing order according the new ibool array + ! global node indices are mostly retrieved by: + ! + ! do k = 1,NGLLZ + ! do j = 1,NGLLY + ! do i = 1,NGLLX + ! iglob = ibool(i,j,k,ispec) + ! .. + ! + ! we try to have iglob increasing consecutively in this loop order. + ! nevertheless, since global nodes are shared, some of the incrementing is not +1 for neighboring elements. + ! to avoid many jumps, a mesh coloring might help at least for the first color, where the next looped element + ! won't have shared global nodes. + ! + ! however, using mesh coloring and this iglob re-ordering to avoid too many jumps on shared nodes leads + ! to a slower performance. it seems that the cache memory is better utilized if neighboring elements + ! have as many shared nodes as possible... + ! + ! there might be other ideas and more sophisticated ways to re-order iglobs, like using space-filling curves. + ! here, we just try to simply re-order (without colors) - to see if this has any code performance effect. + + ! user output + if (myrank == 0) then + write(IMAIN,*) + select case (idomain) + case (IREGION_CRUST_MANTLE) + write(IMAIN,*) ' permuting global node entries for crust/mantle' + case (IREGION_INNER_CORE) + write(IMAIN,*) ' permuting global node entries for inner core' + case (IREGION_OUTER_CORE) + write(IMAIN,*) ' permuting global node entries for outer core' + case (IREGION_TRINFINITE) + write(IMAIN,*) ' permuting global node entries for transition infinite region' + case (IREGION_INFINITE) + write(IMAIN,*) ' permuting global node entries for infinite region' + end select + call flush_IMAIN() + endif + + allocate(perm_iglob_ordered(nglob),stat=ier) + if (ier /= 0) stop 'Error allocating iglob_ordered array' + perm_iglob_ordered(:) = 0 + + ! orders iglob according to new ibool element ordering + iglob_ordered = 0 + do ispec = 1,nspec + ! element indexing + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob = ibool(i,j,k,ispec) + ! adds iglob entry + if (perm_iglob_ordered(iglob) == 0) then + iglob_ordered = iglob_ordered + 1 + perm_iglob_ordered(iglob) = iglob_ordered + endif enddo enddo enddo + enddo - ! checks - if (iglob_ordered /= nglob) then - print *,'Error: rank ',myrank,' has invalid iglob_ordered: ',iglob_ordered,' - should be ',nglob - call exit_MPI(myrank,'Invalid iglob ordering') - endif + ! checks + if (iglob_ordered /= nglob) then + print *,'Error: rank ',myrank,' has invalid iglob_ordered: ',iglob_ordered,' - should be ',nglob + call exit_MPI(myrank,'Invalid iglob ordering') + endif - ! re-orders ibool entries - num_reordered = 0 - do ispec = 1,nspec - do k = 1,NGLLZ - do j = 1,NGLLY - do i = 1,NGLLX - iglob_old = ibool(i,j,k,ispec) - iglob_new = perm_iglob_ordered(iglob_old) - - ! checks range - if (iglob_new < 1 .or. iglob_new > nglob) then - print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob - print *,' ispec: ',ispec,' i/j/k: ',i,j,k,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new - stop 'Invalid iglob in ibool re-ordering' - endif - - ! updates entry - if (iglob_old /= iglob_new) then - num_reordered = num_reordered + 1 - ibool(i,j,k,ispec) = iglob_new - endif - enddo + ! re-orders ibool entries + num_reordered = 0 + do ispec = 1,nspec + ! permutes element indexing + do k = 1,NGLLZ + do j = 1,NGLLY + do i = 1,NGLLX + iglob_old = ibool(i,j,k,ispec) + iglob_new = perm_iglob_ordered(iglob_old) + + ! checks range + if (iglob_new < 1 .or. iglob_new > nglob) then + print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob + print *,' ispec: ',ispec,' i/j/k: ',i,j,k,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new + stop 'Invalid iglob in ibool re-ordering' + endif + + ! updates entry + if (iglob_old /= iglob_new) then + num_reordered = num_reordered + 1 + ibool(i,j,k,ispec) = iglob_new + endif enddo enddo enddo + enddo - ! debug - !if (myrank == 0) print *,'debug: ibool A',ibool(:,:,:,1) - !if (myrank == 0) print *,'debug: ibool B',ibool(:,:,:,2) + ! debug + !if (myrank == 0) print *,'debug: ibool A',ibool(:,:,:,1) + !if (myrank == 0) print *,'debug: ibool B',ibool(:,:,:,2) - ! user output - if (myrank == 0) then - write(IMAIN,*) ' total number of re-ordered entries: ',num_reordered,' out of ',NGLLX*NGLLY*NGLLZ*nspec - write(IMAIN,*) - call flush_IMAIN() - endif + ! user output + if (myrank == 0) then + write(IMAIN,*) ' total number of re-ordered entries: ',num_reordered,' out of ',NGLLX*NGLLY*NGLLZ*nspec + write(IMAIN,*) + call flush_IMAIN() + endif - ! re-orders MPI interface ibool array - do i = 1,num_interfaces - ! ibool entries - do j = 1,nibool_interfaces(i) - iglob_old = ibool_interfaces(j,i) - iglob_new = perm_iglob_ordered(iglob_old) - - ! checks range - if (iglob_new < 1 .or. iglob_new > nglob) then - print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob - print *,' interface: ',i,' point: ',j,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new - stop 'Invalid iglob in MPI interface re-ordering' - endif + ! re-orders MPI interface ibool array + do i = 1,num_interfaces + ! ibool entries + do j = 1,nibool_interfaces(i) + iglob_old = ibool_interfaces(j,i) + iglob_new = perm_iglob_ordered(iglob_old) + + ! checks range + if (iglob_new < 1 .or. iglob_new > nglob) then + print *,'Error: rank ',myrank,' has invalid iglob ',iglob_new,' - should be between 1 and ',nglob + print *,' interface: ',i,' point: ',j,' iglob_old: ',iglob_old,' iglob_new: ',iglob_new + stop 'Invalid iglob in MPI interface re-ordering' + endif - ! updates entry - if (iglob_old /= iglob_new) then - ibool_interfaces(j,i) = iglob_new - endif - enddo + ! updates entry + if (iglob_old /= iglob_new) then + ibool_interfaces(j,i) = iglob_new + endif enddo + enddo - ! free array - deallocate(perm_iglob_ordered) - endif + ! free array + deallocate(perm_iglob_ordered) + + end subroutine setup_global_node_permutation - end subroutine setup_loop_permutation ! !------------------------------------------------------------------------------------------------- From 50fe3a21d8042b9305a6a2182e574eef92ed50ba Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 19 Sep 2024 12:41:25 +0200 Subject: [PATCH 11/13] updates github actions --- .github/workflows/CI.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3fac196da..b73443e9f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -147,7 +147,7 @@ jobs: strategy: matrix: - os: [ubuntu-latest,ubuntu-20.04] + os: [ubuntu-latest,ubuntu-22.04] steps: - uses: actions/checkout@v4 @@ -164,8 +164,8 @@ jobs: linuxCheck-Intel: - name: Test Intel on ubuntu-20.04 - runs-on: ubuntu-20.04 + name: Test Intel on ubuntu-22.04 + runs-on: ubuntu-22.04 needs: changesCheck steps: @@ -173,7 +173,7 @@ jobs: - name: Cache Intel oneapi packages id: cache-intel-oneapi - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /opt/intel/oneapi key: install-${{ runner.os }}-all From 04456d64207b6dbe4a2a5983a29614746be4d614 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 19 Sep 2024 12:44:12 +0200 Subject: [PATCH 12/13] updates parameter calculations --- src/meshfem3D/calc_jacobian.f90 | 4 +- src/meshfem3D/create_regular_elements.f90 | 2 +- src/specfem3D/check_stability.f90 | 67 ++++++++++++++--------- src/specfem3D/compute_add_sources.f90 | 2 +- 4 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/meshfem3D/calc_jacobian.f90 b/src/meshfem3D/calc_jacobian.f90 index 51f21fa5d..d27a2e18e 100644 --- a/src/meshfem3D/calc_jacobian.f90 +++ b/src/meshfem3D/calc_jacobian.f90 @@ -131,7 +131,6 @@ subroutine recalc_jacobian_gll3D(xstore,ystore,zstore,xigll,yigll,zigll, & do j1 = 1,NGLLY do i1 = 1,NGLLX - hlagrange = hxir(i1) * hetar(j1) * hgammar(k1) hlagrange_xi = hpxir(i1) * hetar(j1) * hgammar(k1) hlagrange_eta = hxir(i1) * hpetar(j1) * hgammar(k1) hlagrange_gamma = hxir(i1) * hetar(j1) * hpgammar(k1) @@ -154,6 +153,8 @@ subroutine recalc_jacobian_gll3D(xstore,ystore,zstore,xigll,yigll,zigll, & ! DEBUG ! test the Lagrange polynomial and its derivative + hlagrange = hxir(i1) * hetar(j1) * hgammar(k1) + xmesh = xmesh + x * hlagrange ymesh = ymesh + y * hlagrange zmesh = zmesh + z * hlagrange @@ -248,7 +249,6 @@ subroutine recalc_jacobian_gll3D(xstore,ystore,zstore,xigll,yigll,zigll, & do j1 = 1,NGLLY do i1 = 1,NGLLX - hlagrange = hxir(i1) * hetar(j1) * hgammar(k1) hlagrange_xi = hpxir(i1) * hetar(j1) * hgammar(k1) hlagrange_eta = hxir(i1) * hpetar(j1) * hgammar(k1) hlagrange_gamma = hxir(i1) * hetar(j1) * hpgammar(k1) diff --git a/src/meshfem3D/create_regular_elements.f90 b/src/meshfem3D/create_regular_elements.f90 index d84918ec5..8a563fb53 100644 --- a/src/meshfem3D/create_regular_elements.f90 +++ b/src/meshfem3D/create_regular_elements.f90 @@ -215,7 +215,7 @@ subroutine create_regular_elements(ilayer,ichunk,ispec_count,ipass, & ! crustal elements are stretched to be thinner in the upper crust than in lower crust in the 3D case ! max ratio between size of upper crust elements and ! lower crust elements is given by the param MAX_RATIO_STRETCHING - ! to avoid stretching, set MAX_RATIO_STRETCHING = 1.0d in constants.h + ! to avoid stretching, set EARTH_MAX_RATIO_CRUST_STRETCHING = 1.0d in constants.h call compute_coord_main_mesh(offset_x,offset_y,offset_z,xelm,yelm,zelm, & ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD,iproc_xi,iproc_eta, & NPROC_XI,NPROC_ETA,NEX_PER_PROC_XI,NEX_PER_PROC_ETA, & diff --git a/src/specfem3D/check_stability.f90 b/src/specfem3D/check_stability.f90 index 1c8ef1eda..1b9d0f9e8 100644 --- a/src/specfem3D/check_stability.f90 +++ b/src/specfem3D/check_stability.f90 @@ -96,12 +96,15 @@ subroutine check_stability() ! compute maximum of norm of displacement in each slice if (.not. GPU_MODE) then ! on CPU - norm_cm = maxval(sqrt(displ_crust_mantle(1,:)**2 + displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2)) + norm_cm = maxval( (displ_crust_mantle(1,:)**2 + displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2) ) + norm_cm = sqrt(norm_cm) if (NSPEC_INNER_CORE > 0) then - norm_ic = maxval(sqrt(displ_inner_core(1,:)**2 + displ_inner_core(2,:)**2 + displ_inner_core(3,:)**2)) + norm_ic = maxval( (displ_inner_core(1,:)**2 + displ_inner_core(2,:)**2 + displ_inner_core(3,:)**2) ) + norm_ic = sqrt(norm_ic) else norm_ic = 0._CUSTOM_REAL endif + Usolidnorm = max(norm_cm,norm_ic) if (NSPEC_OUTER_CORE > 0) then @@ -150,7 +153,7 @@ subroutine check_stability() ! negative values can occur with some compilers when the unstable value is greater ! than the greatest possible floating-point number of the machine ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (Usolidnorm > STABILITY_THRESHOLD .or. Usolidnorm < 0 .or. Usolidnorm /= Usolidnorm) then + if (Usolidnorm > STABILITY_THRESHOLD .or. Usolidnorm < 0._CUSTOM_REAL .or. Usolidnorm /= Usolidnorm) then print *,'Error: simulation became unstable in solid, process',myrank if (GPU_MODE) then print *,' norm solid = ',Usolidnorm @@ -160,7 +163,7 @@ subroutine check_stability() print *,'Please check time step setting in get_timestep_and_layers.f90, exiting...' call exit_MPI(myrank,'forward simulation became unstable in solid and blew up') endif - if (Ufluidnorm > STABILITY_THRESHOLD .or. Ufluidnorm < 0 .or. Ufluidnorm /= Ufluidnorm) then + if (Ufluidnorm > STABILITY_THRESHOLD .or. Ufluidnorm < 0._CUSTOM_REAL .or. Ufluidnorm /= Ufluidnorm) then print *,'Error: simulation became unstable in fluid, process',myrank print *,' norm fluid = ',Ufluidnorm print *,'Please check time step setting in get_timestep_and_layers.f90, exiting...' @@ -174,9 +177,11 @@ subroutine check_stability() if (SIMULATION_TYPE == 3) then if (.not. GPU_MODE) then ! on CPU - norm_cm = maxval(sqrt(b_displ_crust_mantle(1,:)**2 + b_displ_crust_mantle(2,:)**2 + b_displ_crust_mantle(3,:)**2)) + norm_cm = maxval( (b_displ_crust_mantle(1,:)**2 + b_displ_crust_mantle(2,:)**2 + b_displ_crust_mantle(3,:)**2) ) + norm_cm = sqrt(norm_cm) if (NSPEC_INNER_CORE > 0) then - norm_ic = maxval(sqrt(b_displ_inner_core(1,:)**2 + b_displ_inner_core(2,:)**2 + b_displ_inner_core(3,:)**2)) + norm_ic = maxval( (b_displ_inner_core(1,:)**2 + b_displ_inner_core(2,:)**2 + b_displ_inner_core(3,:)**2) ) + norm_ic = sqrt(norm_ic) else norm_ic = 0._CUSTOM_REAL endif @@ -193,9 +198,9 @@ subroutine check_stability() endif ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0 .or. b_Usolidnorm /= b_Usolidnorm) & + if (b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0._CUSTOM_REAL .or. b_Usolidnorm /= b_Usolidnorm) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the solid') - if (b_Ufluidnorm > STABILITY_THRESHOLD .or. b_Ufluidnorm < 0 .or. b_Ufluidnorm /= b_Ufluidnorm) & + if (b_Ufluidnorm > STABILITY_THRESHOLD .or. b_Ufluidnorm < 0._CUSTOM_REAL .or. b_Ufluidnorm /= b_Ufluidnorm) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the fluid') ! compute the maximum of the maxima for all the slices using an MPI reduction @@ -286,9 +291,9 @@ subroutine check_stability() write(IMAIN,*) 'Max of strain, epsilondev_crust_mantle =',Strain2_norm_all endif - write(IMAIN,*) 'Elapsed time in seconds = ',tCPU + write(IMAIN,*) 'Elapsed time in seconds = ',sngl(tCPU) write(IMAIN,"(' Elapsed time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds - write(IMAIN,*) 'Mean elapsed time per time step in seconds = ',tCPU/dble(it) + write(IMAIN,*) 'Mean elapsed time per time step in seconds = ',sngl(tCPU/dble(it)) ! do not check before MIN_TIME_STEPS_FOR_SLOW_NODES time steps ! because the time step estimate (which is an average) may then be unreliable @@ -308,11 +313,11 @@ subroutine check_stability() write(IMAIN,*) 'Time steps remaining = ',NSTEP - it endif - write(IMAIN,*) 'Estimated remaining time in seconds = ',t_remain + write(IMAIN,*) 'Estimated remaining time in seconds = ',sngl(t_remain) write(IMAIN,"(' Estimated remaining time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") & ihours_remain,iminutes_remain,iseconds_remain - write(IMAIN,*) 'Estimated total run time in seconds = ',t_total + write(IMAIN,*) 'Estimated total run time in seconds = ',sngl(t_total) write(IMAIN,"(' Estimated total run time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") & ihours_total,iminutes_total,iseconds_total write(IMAIN,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that' @@ -408,7 +413,8 @@ subroutine check_stability() ! negative values can occur with some compilers when the unstable value is greater ! than the greatest possible floating-point number of the machine ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (Usolidnorm_all > STABILITY_THRESHOLD .or. Usolidnorm_all < 0 .or. Usolidnorm_all /= Usolidnorm_all) then + if (Usolidnorm_all > STABILITY_THRESHOLD .or. Usolidnorm_all < 0._CUSTOM_REAL & + .or. Usolidnorm_all /= Usolidnorm_all) then print *,'Error: simulation became unstable' if (GPU_MODE) then print *,' all processes total norm solid = ',Usolidnorm_all @@ -419,7 +425,8 @@ subroutine check_stability() print *,'Please check time step setting in get_timestep_and_layers.f90, exiting...' call exit_MPI(myrank,'forward simulation became unstable and blew up in the solid') endif - if (Ufluidnorm_all > STABILITY_THRESHOLD .or. Ufluidnorm_all < 0 .or. Ufluidnorm_all /= Ufluidnorm_all) then + if (Ufluidnorm_all > STABILITY_THRESHOLD .or. Ufluidnorm_all < 0._CUSTOM_REAL & + .or. Ufluidnorm_all /= Ufluidnorm_all) then print *,'Error: simulation became unstable' print *,' all processes total norm fluid = ',Ufluidnorm_all print *,'Please check time step setting in get_timestep_and_layers.f90, exiting...' @@ -428,9 +435,11 @@ subroutine check_stability() if (SIMULATION_TYPE == 3 .and. .not. UNDO_ATTENUATION) then ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (b_Usolidnorm_all > STABILITY_THRESHOLD .or. b_Usolidnorm_all < 0 .or. b_Usolidnorm_all /= b_Usolidnorm_all) & + if (b_Usolidnorm_all > STABILITY_THRESHOLD .or. b_Usolidnorm_all < 0._CUSTOM_REAL & + .or. b_Usolidnorm_all /= b_Usolidnorm_all) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the solid') - if (b_Ufluidnorm_all > STABILITY_THRESHOLD .or. b_Ufluidnorm_all < 0 .or. b_Ufluidnorm_all /= b_Ufluidnorm_all) & + if (b_Ufluidnorm_all > STABILITY_THRESHOLD .or. b_Ufluidnorm_all < 0._CUSTOM_REAL & + .or. b_Ufluidnorm_all /= b_Ufluidnorm_all) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the fluid') endif @@ -481,9 +490,11 @@ subroutine check_stability_backward() ! compute maximum of norm of displacement in each slice if (.not. GPU_MODE) then ! on CPU - norm_cm = maxval(sqrt(b_displ_crust_mantle(1,:)**2 + b_displ_crust_mantle(2,:)**2 + b_displ_crust_mantle(3,:)**2)) + norm_cm = maxval( (b_displ_crust_mantle(1,:)**2 + b_displ_crust_mantle(2,:)**2 + b_displ_crust_mantle(3,:)**2) ) + norm_cm = sqrt(norm_cm) if (NSPEC_INNER_CORE > 0) then - norm_ic = maxval(sqrt(b_displ_inner_core(1,:)**2 + b_displ_inner_core(2,:)**2 + b_displ_inner_core(3,:)**2)) + norm_ic = maxval( (b_displ_inner_core(1,:)**2 + b_displ_inner_core(2,:)**2 + b_displ_inner_core(3,:)**2) ) + norm_ic = sqrt(norm_ic) else norm_ic = 0._CUSTOM_REAL endif @@ -499,9 +510,9 @@ subroutine check_stability_backward() endif ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0 .or. b_Usolidnorm /= b_Usolidnorm) & + if (b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0._CUSTOM_REAL .or. b_Usolidnorm /= b_Usolidnorm) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the solid') - if (b_Ufluidnorm > STABILITY_THRESHOLD .or. b_Ufluidnorm < 0 .or. b_Ufluidnorm /= b_Ufluidnorm) & + if (b_Ufluidnorm > STABILITY_THRESHOLD .or. b_Ufluidnorm < 0._CUSTOM_REAL .or. b_Ufluidnorm /= b_Ufluidnorm) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the fluid') ! compute the maximum of the maxima for all the slices using an MPI reduction @@ -559,9 +570,11 @@ subroutine check_stability_backward() ! negative values can occur with some compilers when the unstable value is greater ! than the greatest possible floating-point number of the machine ! this trick checks for NaN (Not a Number), which is not even equal to itself - if (b_Usolidnorm_all > STABILITY_THRESHOLD .or. b_Usolidnorm_all < 0 .or. b_Usolidnorm_all /= b_Usolidnorm_all) & + if (b_Usolidnorm_all > STABILITY_THRESHOLD .or. b_Usolidnorm_all < 0._CUSTOM_REAL & + .or. b_Usolidnorm_all /= b_Usolidnorm_all) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the solid') - if (b_Ufluidnorm_all > STABILITY_THRESHOLD .or. b_Ufluidnorm_all < 0 .or. b_Ufluidnorm_all /= b_Ufluidnorm_all) & + if (b_Ufluidnorm_all > STABILITY_THRESHOLD .or. b_Ufluidnorm_all < 0._CUSTOM_REAL & + .or. b_Ufluidnorm_all /= b_Ufluidnorm_all) & call exit_MPI(myrank,'backward simulation became unstable and blew up in the fluid') endif @@ -652,9 +665,9 @@ subroutine write_timestamp_file(Usolidnorm_all,Ufluidnorm_all,b_Usolidnorm_all,b write(IOUT,*) endif - write(IOUT,*) 'Elapsed time in seconds = ',tCPU + write(IOUT,*) 'Elapsed time in seconds = ',sngl(tCPU) write(IOUT,"(' Elapsed time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds - write(IOUT,*) 'Mean elapsed time per time step in seconds = ',tCPU/dble(it) + write(IOUT,*) 'Mean elapsed time per time step in seconds = ',sngl(tCPU/dble(it)) write(IOUT,*) if (NUMBER_OF_RUNS > 1 .and. NUMBER_OF_THIS_RUN < NUMBER_OF_RUNS) then @@ -668,12 +681,12 @@ subroutine write_timestamp_file(Usolidnorm_all,Ufluidnorm_all,b_Usolidnorm_all,b write(IOUT,*) 'Time steps remaining = ',NSTEP - it endif - write(IOUT,*) 'Estimated remaining time in seconds = ',t_remain + write(IOUT,*) 'Estimated remaining time in seconds = ',sngl(t_remain) write(IOUT,"(' Estimated remaining time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") & ihours_remain,iminutes_remain,iseconds_remain write(IOUT,*) - write(IOUT,*) 'Estimated total run time in seconds = ',t_total + write(IOUT,*) 'Estimated total run time in seconds = ',sngl(t_total) write(IOUT,"(' Estimated total run time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") & ihours_total,iminutes_total,iseconds_total write(IOUT,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that' @@ -761,7 +774,7 @@ subroutine print_elapsed_time() iminutes = (int_tCPU - 3600*ihours) / 60 iseconds = int_tCPU - 3600*ihours - 60*iminutes write(IMAIN,*) 'Time-Loop Complete. Timing info:' - write(IMAIN,*) 'Total elapsed time in seconds = ',tCPU + write(IMAIN,*) 'Total elapsed time in seconds = ',sngl(tCPU) write(IMAIN,"(' Total elapsed time in hh:mm:ss = ',i6,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds write(IMAIN,*) call flush_IMAIN() diff --git a/src/specfem3D/compute_add_sources.f90 b/src/specfem3D/compute_add_sources.f90 index c71349ebf..9096e3838 100644 --- a/src/specfem3D/compute_add_sources.f90 +++ b/src/specfem3D/compute_add_sources.f90 @@ -537,7 +537,7 @@ double precision function get_stf_viscoelastic(time_source_dble,isource,it_index ! moment-tensor ! Heaviside source time function if (USE_MONOCHROMATIC_CMT_SOURCE) then - f0 = 1.d0 / hdur(isource) ! using half duration as a FREQUENCY just to avoid changing CMTSOLUTION file format + f0 = 1.d0 / hdur(isource) ! using half duration as a PERIOD just to avoid changing CMTSOLUTION file format stf = comp_source_time_function_mono(time_source_dble,f0) else stf = comp_source_time_function(time_source_dble,hdur_Gaussian(isource),it_index) From 72430ecc3d360c3ae8991d22cfbc47b5e4159ddd Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Thu, 19 Sep 2024 12:45:52 +0200 Subject: [PATCH 13/13] adds vtk write routine --- src/shared/write_VTK_file.f90 | 98 +++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/shared/write_VTK_file.f90 b/src/shared/write_VTK_file.f90 index 83fa4651c..498dd95f0 100644 --- a/src/shared/write_VTK_file.f90 +++ b/src/shared/write_VTK_file.f90 @@ -1474,10 +1474,108 @@ end subroutine write_VTK_movie_data_elemental ! close(IOUT_VTK) ! ! end subroutine write_VTK_movie_data_binary + ! +!------------------------------------------------------------------------------------------------- ! + + subroutine write_VTK_wavefield(nspec,nglob,xstore,ystore,zstore,ibool, & + field,prname_file) + + use constants, only: CUSTOM_REAL,MAX_STRING_LEN,IOUT_VTK,NDIM,NGLLX,NGLLY,NGLLZ + + implicit none + + integer,intent(in) :: nspec,nglob + + ! global coordinates + integer, dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: ibool + real(kind=CUSTOM_REAL), dimension(nglob),intent(in) :: xstore,ystore,zstore + + ! GLL data values array + real(kind=CUSTOM_REAL), dimension(NDIM,nglob),intent(in) :: field + + ! file name + character(len=MAX_STRING_LEN),intent(in) :: prname_file + + ! local parameters + integer :: ispec,i,ier + + open(IOUT_VTK,file=trim(prname_file)//'.vtk',status='unknown',iostat=ier) + if (ier /= 0 ) then + print *, 'Error opening VTK output file: ',trim(prname_file) + stop 'Error opening VTK output file' + endif + write(IOUT_VTK,'(a)') '# vtk DataFile Version 3.1' + write(IOUT_VTK,'(a)') 'material model VTK file' + write(IOUT_VTK,'(a)') 'ASCII' + write(IOUT_VTK,'(a)') 'DATASET UNSTRUCTURED_GRID' + write(IOUT_VTK, '(a,i12,a)') 'POINTS ', nglob, ' float' + do i = 1,nglob + write(IOUT_VTK,'(3e18.6)') real(xstore(i),kind=4),real(ystore(i),kind=4),real(zstore(i),kind=4) + enddo + write(IOUT_VTK,*) "" + + ! note: indices for vtk start at 0 + write(IOUT_VTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9 + do ispec = 1,nspec + write(IOUT_VTK,'(9i12)') 8, & + ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1, & + ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1, & + ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1, & + ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1 + enddo + write(IOUT_VTK,*) "" + + ! type: hexahedrons + write(IOUT_VTK,'(a,i12)') "CELL_TYPES ",nspec + write(IOUT_VTK,'(6i12)') (12,ispec=1,nspec) + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a,i12)') "POINT_DATA ",nglob + + ! single wavefield components + if (.true.) then + write(IOUT_VTK,'(a)') "SCALARS field_x float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) real(field(1,i),kind=4) + enddo + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a)') "SCALARS field_y float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) real(field(2,i),kind=4) + enddo + write(IOUT_VTK,*) "" + + write(IOUT_VTK,'(a)') "SCALARS field_z float" + write(IOUT_VTK,'(a)') "LOOKUP_TABLE default" + do i = 1,nglob + write(IOUT_VTK,*) real(field(3,i),kind=4) + enddo + write(IOUT_VTK,*) "" + endif + + ! vector wavefield + if (.true.) then + write(IOUT_VTK,'(a)') "VECTORS field_vector float" + do i = 1,nglob + write(IOUT_VTK,*) real(field(1,i),kind=4),real(field(2,i),kind=4),real(field(3,i),kind=4) + enddo + write(IOUT_VTK,*) "" + endif + + close(IOUT_VTK) + + end subroutine write_VTK_wavefield + !------------------------------------------------------------------------------------------------- ! +! VTU binary formats +! +!------------------------------------------------------------------------------------------------- subroutine write_VTU_movie_data(ne,np,total_dat_xyz,total_dat_con,total_dat,mesh_file,var_name)