Skip to content

Commit

Permalink
Merge ndk/machinefiles/depends-intel-fpmodelfast into master (PR #1761)
Browse files Browse the repository at this point in the history
For Intel builds, there are 7 HOMME fortran files we build with higher optimization flags.
Add -fp-model fast to those 7 builds as this avoids the slow pow function on KNL, and may
generally improve peformance for those files.
Note that the base flags for intel add -fp-model source and I have verified that the compiler will favor "fast" when compiled with BOTH -fp-model source and -fp-model fast.
Passes acme_developer
Also added a set of depends for Intel v 18.

Fixes #1698

[non-BFB]   roundoff level changes
  • Loading branch information
mt5555 committed Oct 22, 2017
2 parents a9771a5 + b3d0c93 commit dad2d1b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
2 changes: 1 addition & 1 deletion config/acme/machines/Depends.intel
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ kissvec.o

ifeq ($(DEBUG),FALSE)
$(PERFOBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $<
$(REDUCED_OPT_OBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $<
$(REDUCED_PRECISION_OBJS): %.o: %.F90
Expand Down
36 changes: 36 additions & 0 deletions config/acme/machines/Depends.intel18
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#
PERFOBJS=\
prim_advection_mod_base.o \
vertremap_mod_base.o \
edge_mod_base.o \
derivative_mod_base.o \
bndry_mod_base.o \
prim_advance_mod.o \
uwshcu.o

# shr_wv_sat_mod does not need to have better than ~0.1% precision, and benefits
# enormously from a lower precision in the vector functions.
REDUCED_PRECISION_OBJS=\
shr_wv_sat_mod.o

SHR_RANDNUM_FORT_OBJS=\
kissvec_mod.o \
mersennetwister_mod.o \
dSFMT_interface.o \
shr_RandNum_mod.o

SHR_RANDNUM_C_OBJS=\
dSFMT.o \
dSFMT_utils.o \
kissvec.o

ifeq ($(DEBUG),FALSE)
$(PERFOBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div $<
$(REDUCED_PRECISION_OBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $<
$(SHR_RANDNUM_FORT_OBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -fp-model fast -no-prec-div -no-prec-sqrt -qoverride-limits $<
$(SHR_RANDNUM_C_OBJS): %.o: %.c
$(CC) -c $(INCLDIR) $(INCS) $(CFLAGS) -O3 -fp-model fast $<
endif

0 comments on commit dad2d1b

Please sign in to comment.