Skip to content

Commit

Permalink
Merge branch 'worleyph/machines/Titan_pgi_updates' into master (PR #1767
Browse files Browse the repository at this point in the history
)

Early on, it was determined that the PGI compiler required that
the CPU target be specified to be istanbul (instead of the actual
processor interlagos) in order for ACME (or CESM) to be
reproducible with respect to changes in process or thread counts.
Here this global specification of istanbul as the CPU target is
removed and only applied to the files that require it. Based
on experimentation, only CAM and MPASLI required
the modified CPU target, and we eventually identified a small
number of files within CAM for which this is required. We have not
yet examined these files to determine why these files require it.
We also have not yet looked at individual MPASLI files, and
instead continue to apply the istanbul cpu target for builds
of the entire GLC component.

Other changes include removing the dependence on the version of
pgi/17.5.0 installed in Dave Norton's directories, moving instead
to the now official OLCF installation. Also fixed a typo in the
module switch command for the PGI version used with PGIACC, and
changed the 'pin' flag to 'pinned' for PGIACC, as 'pin' is no
longer legal.

[Non-BFB] (on Titan when using PGI compiler)

Fixes #1620
Fixes #1610

(Note #1610 might need to be reopened, but this nominally addresses the primary issues.)
  • Loading branch information
minxu74 authored Oct 27, 2017
2 parents 1178adc + 1681986 commit dab8731
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 24 deletions.
30 changes: 24 additions & 6 deletions config/acme/machines/Depends.titan.pgi
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
# lowered optimization to speed-up compilation time
dyn_comp.o: dyn_comp.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS_NOOPT) $(FREEFLAGS) $<

#uwshcu.o: uwshcu.F90
# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS_NOOPT) $(FREEFLAGS) $<
# following need -target-cpu=istanbul for reproducibility
interpolate_data.o: interpolate_data.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

#mo_drydep.o: mo_drydep.F90
# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS_NOOPT) $(FREEFLAGS) $<
tracer_data.o: tracer_data.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

aero_model.o: aero_model.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

modal_aero_deposition.o: modal_aero_deposition.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

dust_sediment_mod.o: dust_sediment_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

mo_gas_phase_chemdr.o: mo_gas_phase_chemdr.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

modal_aer_opt.o: modal_aer_opt.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

zm_conv.o: zm_conv.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -target-cpu=istanbul $<

#micro_mg1_5.o: micro_mg1_5.F90
# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS_NOOPT) $(FREEFLAGS) $<
28 changes: 14 additions & 14 deletions config/acme/machines/Depends.titan.pgiacc
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,46 @@ microp_aero.o: microp_aero.F90


bndry_mod.o: bndry_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

derivative_mod.o: derivative_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

edge_mod.o: edge_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

element_mod.o: element_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

openacc_utils_mod.o: openacc_utils_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

prim_advance_mod.o: prim_advance_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

prim_advection_mod.o: prim_advection_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

prim_si_mod.o: prim_si_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

solver_init_mod.o: solver_init_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

vertremap_mod.o: vertremap_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

viscosity_mod.o: viscosity_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

prim_driver_mod.o: prim_driver_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

physics_mod.o: physics_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<

physconst.o: physconst.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pin,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) -DUSE_OPENACC=1 -acc -ta=tesla,pinned,cc35,cuda7.5,ptxinfo -Minfo=accel $(FREEFLAGS) $<


#uwshcu.o: uwshcu.F90
Expand Down
1 change: 1 addition & 0 deletions config/acme/machines/config_compilers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ for mct, etc.
<compiler COMPILER="pgi" MACH="titan">
<ADD_CFLAGS DEBUG="FALSE"> -O2 </ADD_CFLAGS>
<ADD_FFLAGS DEBUG="FALSE"> -O2 </ADD_FFLAGS>
<ADD_FFLAGS MODEL="glc"> -target-cpu=istanbul </ADD_FFLAGS>
<NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
<PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
<CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
Expand Down
4 changes: 0 additions & 4 deletions config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2222,10 +2222,6 @@
<env name="CRAY_CPU_TARGET">istanbul</env>
<env name="CRAY_CUDA_MPS">1</env>
</environment_variables>
<environment_variables compiler="pgi">
<!-- NOTE(wjs, 2015-03-12) The following line is needed for bit-for-bit reproducibility -->
<env name="CRAY_CPU_TARGET">istanbul</env>
</environment_variables>
<environment_variables compiler="intel">
<env name="CRAYPE_LINK_TYPE">dynamic</env>
</environment_variables>
Expand Down

0 comments on commit dab8731

Please sign in to comment.