Adds stand alone test_MOM_EOS and time_MOM_EOS (#516)

- Added simple single-thread program to invoke EOS_unit_tests.F90 - Added not-as-simple program to time the cost of calculate_density() and calculate_spec_vol() for both scalar and array APIs - Placed in new directory config_src/drivers/timing_tests/ - Renamed MOM_unit_test_driver.F90 to test_MOM_file_parser.F90 - Updated .testing/Makefile - Added list of programs in config_src/drivers/unit_tests - These are added to BUILDS if DO_UNIT_TESTS is not blank. (DO_UNIT_TESTS was an existing macro but it might be uneeded) - These programs are compiled with code coverage - Added list of programs in config_src/drivers/timing_tests - These programs are compiled with optimization and no coverage - Fixed rule for building UNIT_EXECS (which did not re-build properly because the central Makefile was trying to model the dependencies even though those dependencies are in the build/unit/Makefile.dep) - Added convenient targets build.unit, run.unit, build.timing, run.timing - Timing tests currently time a loop over 1000 calls (so that the resolution of the CPU timer is not too large) and 400 samples to collect statistics on timings. On gaea c5 this takes about 10 seconds. - The results are written to stdout in json. - Added placeholder build and run of timing_tests to GH workflow. - Enabled for [push,pull_request] - We probably will not be able to use timings from GH but I still want to exercise the code we know the timing programs aren't broken by a commit. - Also added driver for string_functions_unit_tests
NOAA-GFDL · Nov 10, 2023 · b15a9d4 · b15a9d4
1 parent 0f2a69d
commit b15a9d4
Show file tree

Hide file tree

Showing 13 changed files with 564 additions and 38 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -19,12 +19,18 @@ jobs:
 
     - uses: ./.github/actions/testing-setup
 
-    - name: Compile unit testing
-      run: make -j build/unit/MOM_unit_tests
+    - name: Compile file parser unit tests
+      run: make -j build/unit/test_MOM_file_parser
 
-    - name: Run unit tests
+    - name: Run file parser unit tests
       run: make run.cov.unit
 
+    - name: Compile unit testing
+      run: make -j build.unit
+
+    - name: Run (single processor) unit tests
+      run: make run.unit
+
     - name: Report unit test coverage to CI (PR)
       if: github.event_name == 'pull_request'
       run: make report.cov.unit REQUIRE_COVERAGE_UPLOAD=true

diff --git a/.github/workflows/perfmon.yml b/.github/workflows/perfmon.yml
@@ -1,6 +1,6 @@
 name: Performance Monitor
 
-on: [pull_request]
+on: [push, pull_request]
 
 jobs:
   build-test-perfmon:
@@ -20,19 +20,34 @@ jobs:
     - uses: ./.github/actions/testing-setup
 
     - name: Compile optimized models
+      if: ${{ github.event_name == 'pull_request' }}
       run: >-
         make -j build.prof
         MOM_TARGET_SLUG=$GITHUB_REPOSITORY
         MOM_TARGET_LOCAL_BRANCH=$GITHUB_BASE_REF
         DO_REGRESSION_TESTS=true
 
     - name: Generate profile data
+      if: ${{ github.event_name == 'pull_request' }}
       run: >-
         pip install f90nml &&
         make profile
         DO_REGRESSION_TESTS=true
 
     - name: Generate perf data
+      if: ${{ github.event_name == 'pull_request' }}
       run: |
         sudo sysctl -w kernel.perf_event_paranoid=2
         make perf DO_REGRESSION_TESTS=true
+
+    - name: Compile timing tests
+      run: |
+        make -j build.timing
+
+    - name: Run timing tests
+      run: |
+        make -j run.timing
+
+    - name: Display timing results
+      run: |
+        make -j show.timing
diff --git a/.testing/Makefile b/.testing/Makefile
@@ -116,6 +116,9 @@ DO_PROFILE ?=
 # Enable code coverage runs
 DO_COVERAGE ?=
 
+# Enable code coverage runs
+DO_UNIT_TESTS ?=
+
 # Report failure if coverage report is not uploaded
 REQUIRE_COVERAGE_UPLOAD ?=
 
@@ -151,10 +154,16 @@ ifeq ($(DO_PROFILE), true)
   BUILDS += opt/MOM6 opt_target/MOM6
 endif
 
-# Unit testing
-UNIT_EXECS ?= MOM_unit_tests
+# Coverage
 ifeq ($(DO_COVERAGE), true)
-  BUILDS += cov/MOM6 $(foreach e, $(UNIT_EXECS), unit/$(e))
+  BUILDS += cov/MOM6
+endif
+
+# Unit testing (or coverage)
+UNIT_EXECS ?= $(basename $(notdir $(wildcard ../config_src/drivers/unit_tests/*.F90) ) )
+TIMING_EXECS ?= $(basename $(notdir $(wildcard ../config_src/drivers/timing_tests/*.F90) ) )
+ifneq (X$(DO_COVERAGE)$(DO_UNIT_TESTS)X, XX)
+  BUILDS += $(foreach e, $(UNIT_EXECS), unit/$(e))
 endif
 
 ifeq ($(DO_PROFILE), false)
@@ -258,13 +267,15 @@ build/coupled/Makefile: MOM_ENV += $(SYMMETRIC_FCFLAGS) $(MOM_LDFLAGS)
 build/nuopc/Makefile: MOM_ENV += $(SYMMETRIC_FCFLAGS) $(MOM_LDFLAGS)
 build/cov/Makefile: MOM_ENV += $(COV_FCFLAGS) $(COV_LDFLAGS)
 build/unit/Makefile: MOM_ENV += $(COV_FCFLAGS) $(COV_LDFLAGS)
+build/timing/Makefile: MOM_ENV += $(OPT_FCFLAGS) $(MOM_LDFLAGS)
 
 # Configure script flags
 MOM_ACFLAGS := --with-framework=$(FRAMEWORK)
 build/openmp/Makefile: MOM_ACFLAGS += --enable-openmp
 build/coupled/Makefile: MOM_ACFLAGS += --with-driver=FMS_cap
 build/nuopc/Makefile: MOM_ACFLAGS += --with-driver=nuopc_cap
 build/unit/Makefile: MOM_ACFLAGS += --with-driver=unit_tests
+build/timing/Makefile: MOM_ACFLAGS += --with-driver=timing_tests
 
 # Fetch regression target source code
 build/target/Makefile: | $(TARGET_CODEBASE)
@@ -276,10 +287,15 @@ build/target_codebase/configure: $(TARGET_SOURCE)
 
 
 # Build executables
-$(foreach e,$(UNIT_EXECS),build/unit/$(e)): build/unit/Makefile $(MOM_SOURCE)
-	cd $(@D) && $(TIME) $(MAKE) -j
-build/%/MOM6: build/%/Makefile $(MOM_SOURCE)
-	cd $(@D) && $(TIME) $(MAKE) -j
+build/unit/test_%: build/unit/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+build/unit/Makefile: $(foreach e,$(UNIT_EXECS),../config_src/drivers/unit_tests/$(e).F90)
+build/timing/time_%: build/timing/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+build/timing/Makefile: $(foreach e,$(TIMING_EXECS),../config_src/drivers/timing_tests/$(e).F90)
+build/%/MOM6: build/%/Makefile FORCE
+	cd $(@D) && $(TIME) $(MAKE) $(@F) -j
+FORCE: ;
 
 
 # Use autoconf to construct the Makefile for each target
@@ -655,28 +671,47 @@ test.summary:
 .PHONY: run.cov.unit
 run.cov.unit: build/unit/MOM_file_parser_tests.F90.gcov
 
-$(WORKSPACE)/work/unit/std.out: build/unit/MOM_unit_tests
+.PHONY: build.unit
+build.unit: $(foreach f, $(UNIT_EXECS), build/unit/$(f))
+.PHONY: run.unit
+run.unit: $(foreach f, $(UNIT_EXECS), work/unit/$(f).out)
+.PHONY: build.timing
+build.timing: $(foreach f, $(TIMING_EXECS), build/timing/$(f))
+.PHONY: run.timing
+run.timing: $(foreach f, $(TIMING_EXECS), work/timing/$(f).out)
+.PHONY: show.timing
+show.timing: $(foreach f, $(TIMING_EXECS), work/timing/$(f).show)
+$(WORKSPACE)/work/timing/%.show:
+	./tools/disp_timing.py $(@:.show=.out)
+
+# General rule to run a unit test executable
+# Pattern is to run build/unit/executable and direct output to executable.out
+$(WORKSPACE)/work/unit/%.out: build/unit/%
+	@mkdir -p $(@D)
+	cd $(@D) ; $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> >(tee $*.err) > $*.out
+
+$(WORKSPACE)/work/unit/test_MOM_file_parser.out: build/unit/test_MOM_file_parser
 	if [ $(REPORT_COVERAGE) ]; then \
 	  find build/unit -name *.gcda -exec rm -f '{}' \; ; \
 	fi
-	rm -rf $(@D)
 	mkdir -p $(@D)
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std.err > std.out \
+	  && rm -f input.nml logfile.0000*.out *_input MOM_parameter_doc.* \
+	  && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> test_MOM_file_parser.err > test_MOM_file_parser.out \
 	  || !( \
-	    cat std.out | tail -n 100 ; \
-	    cat std.err | tail -n 100 ; \
+	    cat test_MOM_file_parser.out | tail -n 100 ; \
+	    cat test_MOM_file_parser.err | tail -n 100 ; \
 	  )
 	cd $(@D) \
-	  && $(TIME) $(MPIRUN) -n 2 $(abspath $<) 2> p2.std.err > p2.std.out \
+	  && $(TIME) $(MPIRUN) -n 2 $(abspath $<) 2> p2.test_MOM_file_parser.err > p2.test_MOM_file_parser.out \
 	  || !( \
-	    cat p2.std.out | tail -n 100 ; \
-	    cat p2.std.err | tail -n 100 ; \
+	    cat p2.test_MOM_file_parser.out | tail -n 100 ; \
+	    cat p2.test_MOM_file_parser.err | tail -n 100 ; \
 	  )
 
 # NOTE: .gcov actually depends on .gcda, but .gcda is produced with std.out
 # TODO: Replace $(WORKSPACE)/work/unit/std.out with *.gcda?
-build/unit/MOM_file_parser_tests.F90.gcov: $(WORKSPACE)/work/unit/std.out
+build/unit/MOM_file_parser_tests.F90.gcov: $(WORKSPACE)/work/unit/test_MOM_file_parser.out
 	cd $(@D) \
 	  && gcov -b *.gcda > gcov.unit.out
 	find $(@D) -name "*.gcov" -exec sed -i -r 's/^( *[0-9]*)\*:/ \1:/g' {} \;
@@ -693,6 +728,10 @@ report.cov.unit: build/unit/MOM_file_parser_tests.F90.gcov codecov
 	    if [ "$(REQUIRE_COVERAGE_UPLOAD)" = true ] ; then false ; fi ; \
 	  }
 
+$(WORKSPACE)/work/timing/%.out: build/timing/% FORCE
+	@mkdir -p $(@D)
+	@echo Running $< in $(@D)
+	@cd $(@D) ; $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> $*.err > $*.out
 
 #---
 # Profiling based on FMS clocks

diff --git a/.testing/README.rst b/.testing/README.rst
@@ -22,6 +22,17 @@ Usage
 ``make clean``
    Delete the MOM6 test executables and dependency builds (FMS).
 
+``make -j build.unit``
+   Build the unit test programs in config_src/drivers/unit_tests
+
+``make -j run.unit``
+   Run the unit test programs from config_src/drivers/unit_tests in $(WORKSPACE)/work/unit
+
+``make -j build.timing``
+   Build the timing test programs in config_src/drivers/timing_tests
+
+``make -j run.timing``
+   Run the timing test programs from config_src/drivers/timing_tests in $(WORKSPACE)/work/timing
 
 Configuration
 =============

diff --git a/.testing/tools/disp_timing.py b/.testing/tools/disp_timing.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+
+import argparse
+import json
+import math
+
+scale = 1e6  # micro-seconds (should make this dynamic)
+
+
+def display_timing_file(file, show_all):
+    """Parse a JSON file of timing results and pretty-print the results"""
+
+    with open(file) as json_file:
+        timing_dict = json.load(json_file)
+
+    print("(Times measured in %5.0e seconds)" % (1./scale))
+    print("  Min time Module & function")
+    for sub in timing_dict.keys():
+        tmin = timing_dict[sub]['min'] * scale
+        print("%10.4e %s" % (tmin, sub))
+
+        if show_all:
+            tmean = timing_dict[sub]['mean'] * scale
+            tmax = timing_dict[sub]['max'] * scale
+            tstd = timing_dict[sub]['std'] * scale
+            nsamp = timing_dict[sub]['n_samples']
+            tsstd = tstd / math.sqrt(nsamp)
+            print("           (" +
+                  "mean = %10.4e " % (tmean) +
+                  "±%7.1e, " % (tsstd) +
+                  "max = %10.4e, " % (tmax) +
+                  "std = %8.2e, " % (tstd) +
+                  "# = %d)" % (nsamp))
+
+
+def compare_timing_files(file, ref, show_all, significance_threshold):
+    """Read and compare two JSON files of timing results"""
+
+    with open(file) as json_file:
+        timing_dict = json.load(json_file)
+
+    with open(ref) as json_file:
+        ref_dict = json.load(json_file)
+
+    print("(Times measured in %5.0e seconds)" % (1./scale))
+    print("  Delta (%)  Module & function")
+    for sub in {**ref_dict, **timing_dict}.keys():
+        T1 = ref_dict.get(sub)
+        T2 = timing_dict.get(sub)
+        if T1 is not None:
+            # stats for reference (old)
+            tmin1 = T1['min'] * scale
+            tmean1 = T1['mean'] * scale
+        if T2 is not None:
+            # stats for reference (old)
+            tmin2 = T2['min'] * scale
+            tmean2 = T2['mean'] * scale
+        if (T1 is not None) and (T2 is not None):
+            # change in actual minimum as percentage of old
+            dt = (tmin2 - tmin1) * 100 / tmin1
+            if dt < -significance_threshold:
+                color = '\033[92m'
+            elif dt > significance_threshold:
+                color = '\033[91m'
+            else:
+                color = ''
+            print("%s%+10.4f%%\033[0m  %s" % (color, dt, sub))
+        else:
+            if T2 is None:
+                print("   removed   %s" % (sub))
+            else:
+                print("     added   %s" % (sub))
+
+        if show_all:
+            if T2 is None:
+                print("               --")
+            else:
+                tmax2 = T2['max'] * scale
+                tstd2 = T2['std'] * scale
+                n2 = T2['n_samples']
+                tsstd2 = tstd2 / math.sqrt(n2)
+                print("               %10.4e (" % (tmin2) +
+                      "mean = %10.4e " % (tmean2) +
+                      "±%7.1e, " % (tsstd2) +
+                      "max=%10.4e, " % (tmax2) +
+                      "std=%8.2e, " % (tstd2) +
+                      "# = %d)" % (n2))
+            if T1 is None:
+                print("               --")
+            else:
+                tmax1 = T1['max'] * scale
+                tstd1 = T1['std'] * scale
+                n1 = T1['n_samples']
+                tsstd1 = tstd1 / math.sqrt(n1)
+                print("               %10.4e (" % (tmin1) +
+                      "mean = %10.4e " % (tmean1) +
+                      "±%7.1e, " % (tsstd1) +
+                      "max=%10.4e, " % (tmax1) +
+                      "std=%8.2e, " % (tstd1) +
+                      "# = %d)" % (n1))
+
+
+# Parse arguments
+parser = argparse.ArgumentParser(
+    description="Beautify timing output from MOM6 timing tests."
+)
+parser.add_argument(
+    'file',
+    help="File to process."
+)
+parser.add_argument(
+    '-a', '--all',
+    action='store_true',
+    help="Display all metrics rather than just the minimum time."
+)
+parser.add_argument(
+    '-t', '--threshold',
+    default=6.0, type=float,
+    help="Significance threshold to flag (percentage)."
+)
+parser.add_argument(
+    '-r', '--reference',
+    help="Reference file to compare against."
+)
+args = parser.parse_args()
+
+# Do the thing
+if args.reference is None:
+    display_timing_file(args.file, args.all)
+else:
+    compare_timing_files(args.file, args.reference, args.all, args.threshold)