Skip to content

Commit

Permalink
Merge pull request #823 from danielpeter/devel
Browse files Browse the repository at this point in the history
updates seismogram storage and source time function usage for GPU kernels
  • Loading branch information
danielpeter authored Jun 15, 2023
2 parents 525daa9 + cb30771 commit a35d4ce
Show file tree
Hide file tree
Showing 59 changed files with 1,878 additions and 1,159 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ after_success:
- |
if [ "$TESTCOV" == "1" ]; then
gcov --version
echo `pwd`
echo
ls -al obj/
fi
Expand Down
5 changes: 3 additions & 2 deletions .travis/run_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ echo "export OMPI_MCA_btl_vader_single_copy_mechanism=none" >> $HOME/.tmprc
echo "export OMPI_MCA_btl=^openib" >> $HOME/.tmprc

echo ""
echo "exports:"
export
# commented this out due to travis reporting security issues in these outputs
#echo "exports:"
#export
echo ""
8 changes: 5 additions & 3 deletions .travis/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ esac

# info
#echo $TRAVIS_BUILD_DIR
echo $WORKDIR
# commented this out due to travis reporting security issues in these outputs
#echo $WORKDIR

echo `date`
echo
echo "**********************************************************"
Expand Down Expand Up @@ -137,7 +139,7 @@ else

# simulation done
echo
echo "simulation done: `pwd`"
echo "simulation done: $dir"
echo `date`
echo

Expand All @@ -152,7 +154,7 @@ if [[ $? -ne 0 ]]; then exit 1; fi

# simulation done
echo
echo "test done: `pwd`"
echo "test done: $dir"
echo `date`
echo

Expand Down
10 changes: 5 additions & 5 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,11 @@ GENCODE_AMD_MI250 = --amdgpu-target=gfx90a
@COND_HIP_TRUE@@COND_HIP_PLATFORM_NVIDIA_TRUE@HIP_CFLAG_ENDING = -x cu # CUDA compilation or src/gpu/*.c files

# specific targets
@COND_HIP_TRUE@@COND_HIP_MI8_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI8) # --with-hip=MI8 ..
@COND_HIP_TRUE@@COND_HIP_MI25_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI25) # --with-hip=MI25 ..
@COND_HIP_TRUE@@COND_HIP_MI50_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI50) # --with-hip=MI50 ..
@COND_HIP_TRUE@@COND_HIP_MI100_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI100) # --with-hip=MI100 ..
@COND_HIP_TRUE@@COND_HIP_MI250_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI250) # --with-hip=MI250 ..
@COND_HIP_TRUE@@COND_HIP_MI8_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI8) $(FC_DEFINE)GPU_DEVICE_MI8 # --with-hip=MI8 ..
@COND_HIP_TRUE@@COND_HIP_MI25_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI25) $(FC_DEFINE)GPU_DEVICE_MI25 # --with-hip=MI25 ..
@COND_HIP_TRUE@@COND_HIP_MI50_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI50) $(FC_DEFINE)GPU_DEVICE_MI50 # --with-hip=MI50 ..
@COND_HIP_TRUE@@COND_HIP_MI100_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI100) $(FC_DEFINE)GPU_DEVICE_MI100 # --with-hip=MI100 ..
@COND_HIP_TRUE@@COND_HIP_MI250_TRUE@GENCODE_HIP = $(GENCODE_AMD_MI250) $(FC_DEFINE)GPU_DEVICE_MI250 # --with-hip=MI250 ..

@COND_HIP_TRUE@@COND_HIP_CUDA5_TRUE@GENCODE_HIP = $(GENCODE_35) # --with-hip=cuda5 ..
@COND_HIP_TRUE@@COND_HIP_CUDA6_TRUE@GENCODE_HIP = $(GENCODE_37) # --with-hip=cuda6 ..
Expand Down
96 changes: 79 additions & 17 deletions src/gpu/boast/compute_add_sources_kernel.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@ def BOAST::compute_add_sources_kernel( ref = true, n_dim = 3, n_gllx = 5 )
push_env( :array_start => 0 )
kernel = CKernel::new
function_name = "compute_add_sources_kernel"
accel = Real("accel", :dir => :inout,:dim => [ Dim() ] )
ibool = Int("ibool", :dir => :in, :dim => [ Dim() ] )
sourcearrays = Real("sourcearrays", :dir => :in, :dim => [ Dim() ] )
stf_pre_compute = Real("stf_pre_compute", :size => 8, :dir => :in, :dim => [ Dim() ] )
myrank = Int("myrank", :dir => :in)
islice_selected_source = Int("islice_selected_source", :dir => :in, :dim => [ Dim() ] )
ispec_selected_source = Int("ispec_selected_source", :dir => :in, :dim => [ Dim() ] )
nsources = Int("NSOURCES", :dir => :in)
accel = Real("accel", :dir => :inout,:dim => [ Dim() ] )
ibool = Int("ibool", :dir => :in, :dim => [ Dim() ] )
sourcearrays_local = Real("sourcearrays_local", :dir => :in, :dim => [ Dim() ] )
stf_local = Real("stf_local", :dir => :in, :dim => [ Dim() ] )
ispec_selected_source_local = Int("ispec_selected_source_local", :dir => :in, :dim => [ Dim() ] )
nsources_local = Int("nsources_local", :dir => :in)
nstep = Int("NSTEP", :dir => :in)
it = Int("it", :dir => :in)
istage = Int("istage", :dir => :in)

ndim = Int("NDIM", :const => n_dim)
ngllx = Int("NGLLX", :const => n_gllx)
p = Procedure(function_name, [accel,ibool,sourcearrays,stf_pre_compute,myrank,islice_selected_source,ispec_selected_source,nsources])

p = Procedure(function_name, [accel,ibool,sourcearrays_local,stf_local,ispec_selected_source_local,nsources_local,nstep,it,istage])
if (get_lang == CUDA and ref) then
get_output.print File::read("references/#{function_name}.cu")
elsif(get_lang == CUDA or get_lang == CL or get_lang == HIP) then
Expand All @@ -39,14 +41,12 @@ def BOAST::compute_add_sources_kernel( ref = true, n_dim = 3, n_gllx = 5 )
print k === get_local_id(2)
print isource === get_group_id(0) + get_num_groups(0)*get_group_id(1)

print If(isource < nsources) {
print If(myrank == islice_selected_source[isource]) {
print ispec === ispec_selected_source[isource] - 1
print stf === stf_pre_compute[isource]
print iglob === ibool[INDEX4(ngllx,ngllx,ngllx,i,j,k,ispec)] - 1
(0..2).each { |indx|
print atomicAdd(accel+iglob*3+indx, sourcearrays[INDEX5(ndim,ngllx,ngllx,ngllx,indx,i,j,k,isource)]*stf)
}
print If(isource < nsources_local) {
print ispec === ispec_selected_source_local[isource] - 1
print stf === stf_local[INDEX3(nsources_local,nstep,isource,it,istage)]
print iglob === ibool[INDEX4(ngllx,ngllx,ngllx,i,j,k,ispec)] - 1
(0..2).each { |indx|
print atomicAdd(accel+iglob*3+indx, sourcearrays_local[INDEX5(ndim,ngllx,ngllx,ngllx,indx,i,j,k,isource)]*stf)
}
}
close p
Expand All @@ -57,4 +57,66 @@ def BOAST::compute_add_sources_kernel( ref = true, n_dim = 3, n_gllx = 5 )
kernel.procedure = p
return kernel
end

# obsolete kernel, left here for reference...
#
# def BOAST::compute_add_sources_kernel( ref = true, n_dim = 3, n_gllx = 5 )
# push_env( :array_start => 0 )
# kernel = CKernel::new
# function_name = "compute_add_sources_kernel"
# accel = Real("accel", :dir => :inout,:dim => [ Dim() ] )
# ibool = Int("ibool", :dir => :in, :dim => [ Dim() ] )
# sourcearrays = Real("sourcearrays", :dir => :in, :dim => [ Dim() ] )
# stf_pre_compute = Real("stf_pre_compute", :size => 8, :dir => :in, :dim => [ Dim() ] )
# myrank = Int("myrank", :dir => :in)
# islice_selected_source = Int("islice_selected_source", :dir => :in, :dim => [ Dim() ] )
# ispec_selected_source = Int("ispec_selected_source", :dir => :in, :dim => [ Dim() ] )
# nsources = Int("NSOURCES", :dir => :in)
#
# ndim = Int("NDIM", :const => n_dim)
# ngllx = Int("NGLLX", :const => n_gllx)
# p = Procedure(function_name, [accel,ibool,sourcearrays,stf_pre_compute,myrank,islice_selected_source,ispec_selected_source,nsources])
# if (get_lang == CUDA and ref) then
# get_output.print File::read("references/#{function_name}.cu")
# elsif(get_lang == CUDA or get_lang == CL or get_lang == HIP) then
# make_specfem3d_header( :ndim => n_dim, :ngllx => n_gllx, :double => true )
# open p
# ispec = Int( "ispec")
# iglob = Int( "iglob")
# stf = Real("stf")
# isource = Int( "isource")
# i = Int( "i")
# j = Int( "j")
# k = Int( "k")
# decl ispec
# decl iglob
# decl stf
# decl isource
# decl i
# decl j
# decl k
# print i === get_local_id(0)
# print j === get_local_id(1)
# print k === get_local_id(2)
# print isource === get_group_id(0) + get_num_groups(0)*get_group_id(1)
#
# print If(isource < nsources) {
# print If(myrank == islice_selected_source[isource]) {
# print ispec === ispec_selected_source[isource] - 1
# print stf === stf_pre_compute[isource]
# print iglob === ibool[INDEX4(ngllx,ngllx,ngllx,i,j,k,ispec)] - 1
# (0..2).each { |indx|
# print atomicAdd(accel+iglob*3+indx, sourcearrays[INDEX5(ndim,ngllx,ngllx,ngllx,indx,i,j,k,isource)]*stf)
# }
# }
# }
# close p
# else
# raise "Unsupported language!"
# end
# pop_env(:array_start)
# kernel.procedure = p
# return kernel
# end

end
17 changes: 12 additions & 5 deletions src/gpu/boast/compute_seismograms_kernel.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module BOAST
def BOAST::compute_seismograms_kernel(n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_gll3_padded = 128)
def BOAST::compute_seismograms_kernel(ref = false, n_dim = 3, n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_gll3_padded = 128)
push_env( :array_start => 0 )
kernel = CKernel::new
v = []
Expand All @@ -15,15 +15,17 @@ def BOAST::compute_seismograms_kernel(n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_g
v.push ispec_selected_rec = Int("ispec_selected_rec", :dir => :in, :dim => [ Dim() ])
v.push number_receiver_global = Int("number_receiver_global", :dir => :in, :dim => [ Dim() ])
v.push scale_displ = Real("scale_displ", :dir => :in)
v.push seismo_current = Int("seismo_current", :dir => :in)

ndim = Int("NDIM", :const => n_dim)
ngllx = Int("NGLLX", :const => n_gllx)
ngll2 = Int("NGLL2", :const => n_gll2)
ngll3 = Int("NGLL3", :const => n_gll3)
ngll3_padded = Int("NGLL3_PADDED", :const => n_gll3_padded)

p = Procedure(function_name, v )
if(get_lang == CL or get_lang == CUDA or get_lang == HIP) then
make_specfem3d_header( :ngllx => n_gllx, :ngll2 => n_gll2, :ngll3 => n_gll3, :ngll3_padded => n_gll3_padded )
make_specfem3d_header( :ndim => n_dim, :ngllx => n_gllx, :ngll2 => n_gll2, :ngll3 => n_gll3, :ngll3_padded => n_gll3_padded )
open p
ispec = Int("ispec")
iglob = Int("iglob")
Expand All @@ -36,6 +38,7 @@ def BOAST::compute_seismograms_kernel(n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_g
k = Int("k")
l = Int("l")
s = Int("s")
idx = Int("idx")
decl ispec
decl iglob
decl irec_local
Expand All @@ -47,6 +50,7 @@ def BOAST::compute_seismograms_kernel(n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_g
decl k
decl l
decl s
decl idx

decl sh_dxd = Real("sh_dxd", :local => true, :dim => [Dim(ngll3_padded)] )
decl sh_dyd = Real("sh_dyd", :local => true, :dim => [Dim(ngll3_padded)] )
Expand Down Expand Up @@ -96,18 +100,21 @@ def BOAST::compute_seismograms_kernel(n_gllx = 5, n_gll2 = 25, n_gll3 = 125, n_g
}
comment()

print idx === INDEX3(ndim,nrec_local,0,irec_local,seismo_current)
comment()

print If (tx == 0) {
print seismograms[irec_local*3 + 0] === scale_displ * (
print seismograms[idx + 0] === scale_displ * (
nu[(irec_local*3)*3 + 0]*sh_dxd[0] + nu[(irec_local*3 + 1)*3 + 0]*sh_dyd[0] + nu[(irec_local*3 + 2)*3 + 0]*sh_dzd[0]
)
}
print If (tx == 1) {
print seismograms[irec_local*3 + 1] === scale_displ * (
print seismograms[idx + 1] === scale_displ * (
nu[(irec_local*3)*3 + 1]*sh_dxd[0] + nu[(irec_local*3 + 1)*3 + 1]*sh_dyd[0] + nu[(irec_local*3 + 2)*3 + 1]*sh_dzd[0]
)
}
print If (tx == 2) {
print seismograms[irec_local*3 + 2] === scale_displ * (
print seismograms[idx + 2] === scale_displ * (
nu[(irec_local*3)*3 + 2]*sh_dxd[0] + nu[(irec_local*3 + 1)*3 + 2]*sh_dyd[0] + nu[(irec_local*3 + 2)*3 + 2]*sh_dzd[0]
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/boast/inner_core_impl_kernel_forward.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@ def BOAST::impl_kernel(type, forward, ref = true, elem_per_thread = 1, mesh_colo
comment()

# compilation pragma info
if (type == :crust_mantle and get_lang == CUDA and forward) then
if (type == :crust_mantle and (get_lang == CUDA or get_lang == HIP) and forward) then
get_output.puts "#ifdef #{manually_unrolled_loops}"
get_output.puts "#pragma message (\"\\n\\nCompiling with: #{manually_unrolled_loops} enabled\\n\")"
get_output.puts "#endif // #{manually_unrolled_loops}"
Expand Down
Loading

0 comments on commit a35d4ce

Please sign in to comment.