-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
seabed stress - remove if statements #673
Changes from 1 commit
5c036cc
4ab3476
0122dd3
34b074c
dbbad0d
22893ca
c833f90
7376807
e312c5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,7 +98,7 @@ subroutine evp (dt) | |
use ice_dyn_evp_1d, only: ice_dyn_evp_1d_copyin, ice_dyn_evp_1d_kernel, & | ||
ice_dyn_evp_1d_copyout | ||
use ice_dyn_shared, only: evp_algorithm, stack_velocity_field, unstack_velocity_field | ||
|
||
use ice_dyn_shared, only: deformations | ||
real (kind=dbl_kind), intent(in) :: & | ||
dt ! time step | ||
|
||
|
@@ -393,81 +393,147 @@ subroutine evp (dt) | |
divu,rdg_conv,rdg_shear,shear,taubx,tauby ) | ||
|
||
else ! evp_algorithm == standard_2d (Standard CICE) | ||
if (ndte > 1) then | ||
do ksub = 1,ndte-1 ! subcycling | ||
|
||
do ksub = 1,ndte ! subcycling | ||
|
||
!----------------------------------------------------------------- | ||
! stress tensor equation, total surface stress | ||
!----------------------------------------------------------------- | ||
!----------------------------------------------------------------- | ||
! stress tensor equation, total surface stress | ||
!----------------------------------------------------------------- | ||
|
||
!$TCXOMP PARALLEL DO PRIVATE(iblk,strtmp) | ||
do iblk = 1, nblocks | ||
!$TCXOMP PARALLEL DO PRIVATE(iblk,strtmp) | ||
do iblk = 1, nblocks | ||
|
||
! if (trim(yield_curve) == 'ellipse') then | ||
call stress (nx_block, ny_block, & | ||
ksub, icellt(iblk), & | ||
icellt(iblk), & | ||
indxti (:,iblk), indxtj (:,iblk), & | ||
uvel (:,:,iblk), vvel (:,:,iblk), & | ||
dxt (:,:,iblk), dyt (:,:,iblk), & | ||
dxhy (:,:,iblk), dyhx (:,:,iblk), & | ||
cxp (:,:,iblk), cyp (:,:,iblk), & | ||
cxm (:,:,iblk), cym (:,:,iblk), & | ||
tarear (:,:,iblk), tinyarea (:,:,iblk), & | ||
tinyarea (:,:,iblk), & | ||
strength (:,:,iblk), & | ||
stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & | ||
stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & | ||
stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & | ||
stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & | ||
stress12_1(:,:,iblk), stress12_2(:,:,iblk), & | ||
stress12_3(:,:,iblk), stress12_4(:,:,iblk), & | ||
shear (:,:,iblk), divu (:,:,iblk), & | ||
rdg_conv (:,:,iblk), rdg_shear (:,:,iblk), & | ||
strtmp (:,:,:) ) | ||
|
||
!----------------------------------------------------------------- | ||
! momentum equation | ||
!----------------------------------------------------------------- | ||
|
||
call stepu (nx_block, ny_block, & | ||
icellu (iblk), Cdn_ocn (:,:,iblk), & | ||
indxui (:,iblk), indxuj (:,iblk), & | ||
aiu (:,:,iblk), strtmp (:,:,:), & | ||
uocn (:,:,iblk), vocn (:,:,iblk), & | ||
waterx (:,:,iblk), watery (:,:,iblk), & | ||
forcex (:,:,iblk), forcey (:,:,iblk), & | ||
umassdti (:,:,iblk), fm (:,:,iblk), & | ||
uarear (:,:,iblk), & | ||
strintx (:,:,iblk), strinty (:,:,iblk), & | ||
taubx (:,:,iblk), tauby (:,:,iblk), & | ||
uvel_init(:,:,iblk), vvel_init(:,:,iblk),& | ||
uvel (:,:,iblk), vvel (:,:,iblk), & | ||
Tbu (:,:,iblk)) | ||
|
||
enddo | ||
!$TCXOMP END PARALLEL DO | ||
|
||
call stack_velocity_field(uvel, vvel, fld2) | ||
call ice_timer_start(timer_bound) | ||
if (maskhalo_dyn) then | ||
call ice_HaloUpdate (fld2, halo_info_mask, & | ||
field_loc_NEcorner, field_type_vector) | ||
else | ||
call ice_HaloUpdate (fld2, halo_info, & | ||
field_loc_NEcorner, field_type_vector) | ||
endif | ||
call ice_timer_stop(timer_bound) | ||
call unstack_velocity_field(fld2, uvel, vvel) | ||
|
||
enddo ! subcycling | ||
endif ! only subcycle when ndte larger than 1 | ||
!$TCXOMP PARALLEL DO PRIVATE(iblk,strtmp) | ||
do iblk = 1, nblocks | ||
|
||
! if (trim(yield_curve) == 'ellipse') then | ||
call stress (nx_block, ny_block, & | ||
icellt(iblk), & | ||
indxti (:,iblk), indxtj (:,iblk), & | ||
uvel (:,:,iblk), vvel (:,:,iblk), & | ||
dxt (:,:,iblk), dyt (:,:,iblk), & | ||
dxhy (:,:,iblk), dyhx (:,:,iblk), & | ||
cxp (:,:,iblk), cyp (:,:,iblk), & | ||
cxm (:,:,iblk), cym (:,:,iblk), & | ||
tinyarea (:,:,iblk), & | ||
strength (:,:,iblk), & | ||
stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & | ||
stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & | ||
stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & | ||
stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & | ||
stress12_1(:,:,iblk), stress12_2(:,:,iblk), & | ||
stress12_3(:,:,iblk), stress12_4(:,:,iblk), & | ||
strtmp (:,:,:) ) | ||
! endif ! yield_curve | ||
!----------------------------------------------------------------- | ||
! on last subcycle, save quantities for mechanical redistribution | ||
!----------------------------------------------------------------- | ||
call deformations (nx_block , ny_block , & | ||
icellt(iblk) , & | ||
indxti(:,iblk) , indxtj(:,iblk) , & | ||
uvel(:,:,iblk) , vvel(:,:,iblk) , & | ||
dxt(:,:,iblk) , dyt(:,:,iblk) , & | ||
cxp(:,:,iblk) , cyp(:,:,iblk) , & | ||
cxm(:,:,iblk) , cym(:,:,iblk) , & | ||
tarear(:,:,iblk) , & | ||
shear(:,:,iblk) , divu(:,:,iblk) , & | ||
rdg_conv(:,:,iblk) , rdg_shear(:,:,iblk) ) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's way cleaner to compute the deformations here, good idea. However, can we keep the |
||
|
||
!----------------------------------------------------------------- | ||
! momentum equation | ||
!----------------------------------------------------------------- | ||
|
||
call stepu (nx_block, ny_block, & | ||
icellu (iblk), Cdn_ocn (:,:,iblk), & | ||
indxui (:,iblk), indxuj (:,iblk), & | ||
ksub, & | ||
aiu (:,:,iblk), strtmp (:,:,:), & | ||
uocn (:,:,iblk), vocn (:,:,iblk), & | ||
waterx (:,:,iblk), watery (:,:,iblk), & | ||
forcex (:,:,iblk), forcey (:,:,iblk), & | ||
umassdti (:,:,iblk), fm (:,:,iblk), & | ||
uarear (:,:,iblk), & | ||
strintx (:,:,iblk), strinty (:,:,iblk), & | ||
taubx (:,:,iblk), tauby (:,:,iblk), & | ||
uvel_init(:,:,iblk), vvel_init(:,:,iblk),& | ||
uvel (:,:,iblk), vvel (:,:,iblk), & | ||
Tbu (:,:,iblk)) | ||
call stepu (nx_block, ny_block, & | ||
icellu (iblk), Cdn_ocn (:,:,iblk), & | ||
indxui (:,iblk), indxuj (:,iblk), & | ||
aiu (:,:,iblk), strtmp (:,:,:), & | ||
uocn (:,:,iblk), vocn (:,:,iblk), & | ||
waterx (:,:,iblk), watery (:,:,iblk), & | ||
forcex (:,:,iblk), forcey (:,:,iblk), & | ||
umassdti (:,:,iblk), fm (:,:,iblk), & | ||
uarear (:,:,iblk), & | ||
strintx (:,:,iblk), strinty (:,:,iblk), & | ||
taubx (:,:,iblk), tauby (:,:,iblk), & | ||
uvel_init(:,:,iblk), vvel_init(:,:,iblk),& | ||
uvel (:,:,iblk), vvel (:,:,iblk), & | ||
Tbu (:,:,iblk)) | ||
|
||
enddo | ||
!$TCXOMP END PARALLEL DO | ||
|
||
call stack_velocity_field(uvel, vvel, fld2) | ||
call ice_timer_start(timer_bound) | ||
if (maskhalo_dyn) then | ||
call ice_HaloUpdate (fld2, halo_info_mask, & | ||
field_loc_NEcorner, field_type_vector) | ||
else | ||
call ice_HaloUpdate (fld2, halo_info, & | ||
field_loc_NEcorner, field_type_vector) | ||
endif | ||
call ice_timer_stop(timer_bound) | ||
call unstack_velocity_field(fld2, uvel, vvel) | ||
|
||
enddo | ||
!$TCXOMP END PARALLEL DO | ||
|
||
call stack_velocity_field(uvel, vvel, fld2) | ||
call ice_timer_start(timer_bound) | ||
if (maskhalo_dyn) then | ||
call ice_HaloUpdate (fld2, halo_info_mask, & | ||
field_loc_NEcorner, field_type_vector) | ||
else | ||
call ice_HaloUpdate (fld2, halo_info, & | ||
field_loc_NEcorner, field_type_vector) | ||
endif | ||
call ice_timer_stop(timer_bound) | ||
call unstack_velocity_field(fld2, uvel, vvel) | ||
|
||
enddo ! subcycling | ||
endif ! evp_algorithm | ||
|
||
call ice_timer_stop(timer_evp_2d) | ||
|
||
deallocate(fld2) | ||
if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask) | ||
|
||
! Force symmetry across the tripole seam | ||
if (trim(grid_type) == 'tripole') then | ||
if (maskhalo_dyn) then | ||
|
@@ -574,30 +640,27 @@ end subroutine evp | |
! author: Elizabeth C. Hunke, LANL | ||
|
||
subroutine stress (nx_block, ny_block, & | ||
ksub, icellt, & | ||
icellt, & | ||
indxti, indxtj, & | ||
uvel, vvel, & | ||
dxt, dyt, & | ||
dxhy, dyhx, & | ||
cxp, cyp, & | ||
cxm, cym, & | ||
tarear, tinyarea, & | ||
tinyarea, & | ||
strength, & | ||
stressp_1, stressp_2, & | ||
stressp_3, stressp_4, & | ||
stressm_1, stressm_2, & | ||
stressm_3, stressm_4, & | ||
stress12_1, stress12_2, & | ||
stress12_3, stress12_4, & | ||
shear, divu, & | ||
rdg_conv, rdg_shear, & | ||
str ) | ||
|
||
use ice_dyn_shared, only: strain_rates, deformations, viscous_coeffs_and_rep_pressure | ||
|
||
integer (kind=int_kind), intent(in) :: & | ||
nx_block, ny_block, & ! block dimensions | ||
ksub , & ! subcycling step | ||
icellt ! no. of cells where icetmask = 1 | ||
|
||
integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: & | ||
|
@@ -616,20 +679,13 @@ subroutine stress (nx_block, ny_block, & | |
cxp , & ! 1.5*HTN - 0.5*HTS | ||
cym , & ! 0.5*HTE - 1.5*HTW | ||
cxm , & ! 0.5*HTN - 1.5*HTS | ||
tarear , & ! 1/tarea | ||
tinyarea ! puny*tarea | ||
|
||
real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: & | ||
stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 | ||
stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22 | ||
stress12_1,stress12_2,stress12_3,stress12_4 ! sigma12 | ||
|
||
real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: & | ||
shear , & ! strain rate II component (1/s) | ||
divu , & ! strain rate I component, velocity divergence (1/s) | ||
rdg_conv , & ! convergence term for ridging (1/s) | ||
rdg_shear ! shear term for ridging (1/s) | ||
|
||
real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: & | ||
str ! stress combinations | ||
|
||
|
@@ -657,16 +713,15 @@ subroutine stress (nx_block, ny_block, & | |
str12ew, str12we, str12ns, str12sn , & | ||
strp_tmp, strm_tmp, tmp | ||
|
||
logical :: capping ! of the viscous coef | ||
real(kind=dbl_kind),parameter :: capping = c1 ! of the viscous coef | ||
|
||
character(len=*), parameter :: subname = '(stress)' | ||
|
||
!----------------------------------------------------------------- | ||
! Initialize | ||
!----------------------------------------------------------------- | ||
|
||
str(:,:,:) = c0 | ||
capping = .true. ! could be later included in ice_in | ||
|
||
do ij = 1, icellt | ||
i = indxti(ij) | ||
|
@@ -872,23 +927,6 @@ subroutine stress (nx_block, ny_block, & | |
|
||
enddo ! ij | ||
|
||
!----------------------------------------------------------------- | ||
! on last subcycle, save quantities for mechanical redistribution | ||
!----------------------------------------------------------------- | ||
if (ksub == ndte) then | ||
call deformations (nx_block , ny_block , & | ||
icellt , & | ||
indxti , indxtj , & | ||
uvel , vvel , & | ||
dxt , dyt , & | ||
cxp , cyp , & | ||
cxm , cym , & | ||
tarear , & | ||
shear , divu , & | ||
rdg_conv , rdg_shear ) | ||
|
||
endif | ||
|
||
end subroutine stress | ||
|
||
!======================================================================= | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -627,7 +627,6 @@ end subroutine dyn_prep2 | |
subroutine stepu (nx_block, ny_block, & | ||
icellu, Cw, & | ||
indxui, indxuj, & | ||
ksub, & | ||
aiu, str, & | ||
uocn, vocn, & | ||
waterx, watery, & | ||
|
@@ -642,8 +641,7 @@ subroutine stepu (nx_block, ny_block, & | |
|
||
integer (kind=int_kind), intent(in) :: & | ||
nx_block, ny_block, & ! block dimensions | ||
icellu, & ! total count when iceumask is true | ||
ksub ! subcycling iteration | ||
icellu ! total count when iceumask is true | ||
|
||
integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: & | ||
indxui , & ! compressed index in i-direction | ||
|
@@ -1388,7 +1386,7 @@ subroutine viscous_coeffs_and_rep_pressure (strength, tinyarea, & | |
real (kind=dbl_kind), intent(in):: & | ||
Deltane, Deltanw, Deltasw, Deltase ! Delta at each corner | ||
|
||
logical , intent(in):: capping | ||
real(kind=dbl_kind) , intent(in):: capping | ||
|
||
real (kind=dbl_kind), intent(out):: & | ||
zetax2ne, zetax2nw, zetax2sw, zetax2se, & ! zetax2 at each corner | ||
|
@@ -1401,42 +1399,33 @@ subroutine viscous_coeffs_and_rep_pressure (strength, tinyarea, & | |
|
||
! NOTE: for comp. efficiency 2 x zeta and 2 x eta are used in the code | ||
|
||
! if (trim(yield_curve) == 'ellipse') then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer we keep this commented There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reinserted it |
||
|
||
if (capping) then | ||
tmpcalcne = strength/max(Deltane,tinyarea) | ||
tmpcalcnw = strength/max(Deltanw,tinyarea) | ||
tmpcalcsw = strength/max(Deltasw,tinyarea) | ||
tmpcalcse = strength/max(Deltase,tinyarea) | ||
else | ||
tmpcalcne = strength/(Deltane + tinyarea) | ||
tmpcalcnw = strength/(Deltanw + tinyarea) | ||
tmpcalcsw = strength/(Deltasw + tinyarea) | ||
tmpcalcse = strength/(Deltase + tinyarea) | ||
endif | ||
|
||
zetax2ne = (c1+Ktens)*tmpcalcne ! northeast | ||
rep_prsne = (c1-Ktens)*tmpcalcne*Deltane | ||
etax2ne = epp2i*zetax2ne | ||
tmpcalcne = capping *(strength/max(Deltane, tinyarea))+ & | ||
(c1-capping)* strength/ (Deltane+ tinyarea) | ||
tmpcalcnw = capping *(strength/max(Deltanw, tinyarea))+ & | ||
(c1-capping)* strength/ (Deltanw+ tinyarea) | ||
tmpcalcsw = capping *(strength/max(Deltasw, tinyarea))+ & | ||
(c1-capping)* strength/ (Deltasw+ tinyarea) | ||
tmpcalcse = capping *(strength/max(Deltase, tinyarea))+ & | ||
(c1-capping)* strength/ (Deltase+ tinyarea) | ||
|
||
zetax2ne = (c1+Ktens)*tmpcalcne ! northeast | ||
rep_prsne = (c1-Ktens)*tmpcalcne*Deltane | ||
etax2ne = epp2i*zetax2ne | ||
|
||
zetax2nw = (c1+Ktens)*tmpcalcnw ! northwest | ||
rep_prsnw = (c1-Ktens)*tmpcalcnw*Deltanw | ||
etax2nw = epp2i*zetax2nw | ||
zetax2nw = (c1+Ktens)*tmpcalcnw ! northwest | ||
rep_prsnw = (c1-Ktens)*tmpcalcnw*Deltanw | ||
etax2nw = epp2i*zetax2nw | ||
|
||
zetax2sw = (c1+Ktens)*tmpcalcsw ! southwest | ||
rep_prssw = (c1-Ktens)*tmpcalcsw*Deltasw | ||
etax2sw = epp2i*zetax2sw | ||
zetax2sw = (c1+Ktens)*tmpcalcsw ! southwest | ||
rep_prssw = (c1-Ktens)*tmpcalcsw*Deltasw | ||
etax2sw = epp2i*zetax2sw | ||
|
||
zetax2se = (c1+Ktens)*tmpcalcse ! southeast | ||
rep_prsse = (c1-Ktens)*tmpcalcse*Deltase | ||
etax2se = epp2i*zetax2se | ||
|
||
! else | ||
|
||
! endif | ||
zetax2se = (c1+Ktens)*tmpcalcse ! southeast | ||
rep_prsse = (c1-Ktens)*tmpcalcse*Deltase | ||
etax2se = epp2i*zetax2se | ||
|
||
end subroutine viscous_coeffs_and_rep_pressure | ||
|
||
!======================================================================= | ||
|
||
! Load velocity components into array for boundary updates | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why this change is necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If ndte=1 then some compilers will create warnings and maybe even stop due to a do loop that runs from 1 to 0. The same should be included in the 1d .Now it aborts if ndte < 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can't be true that the code will stop? Compiler warnings maybe, but setting loop values so a loop won't be executed is done all the time and should be allowed in general. I'm not opposed to the if check, it's not going to affect performance, but I think it's cleaner not being there. Where is the check for ndte<1. It's not in this part of the code, so the if check is needed even less.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems a strong motivator is to get rid of if checks in loops. I assume this is for performance. My question continues to be whether we have measured a performance benefit with these changes. If not, then I prefer that we not replicate the calls for the last subcycle step in order to add the deformations call outside an if statement. My suspicion is that the if statement in the subcycling loop is not going to affect performance. Getting rid of if statements inside an ij loop, ok maybe, but not at this high a level in the computations. In my opinion, it's easier to maintain if the last iteration is not separated/duplicated. Maybe there are other reasons for separating that last subcycle. But if not, then I still prefer we not do it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree in principle with getting rid of if checks in loops. They can make a big difference in performance, but it depends on the loop size and things like how long the pipeline is for alternative commands, which probably varies a good bit across hardware/compiler configurations. Maybe it's gotten better with newer generations. The other considerations here are not having duplicative code (which I also agree with) and making close-but-not-quite-duplicated code for various dynamics options as similar as possible. Since the dynamics is typically the most expensive part of the sea ice simulation, and centers seem to be moving toward higher numbers of subcycles for better convergence, then optimizing these loops seems wise. It's hard to judge, though, if the existing timing tests are inconclusive. Question: in the original code, the deformations routine is called at the end, rather than during the loop. Is the argument for moving it up into the loop (or interrupting the loop) that the computed deformations need to be consistent with the velocities just prior to the last iteration? Or is it not better that the deformations be computed from the final velocities? (Maybe I'm not understanding something here?)