Skip to content

Commit

Permalink
flush subnormals on x86 (#1709)
Browse files Browse the repository at this point in the history
* flush subnormals on x86

* less aggressive error threshold in single precision

* increase single-precision tolerance

* update tols in test_array_metadata

* update for assertVectorsClose rename

* lower tolerance in single precision
  • Loading branch information
stevengj authored Jul 29, 2021
1 parent ebaa9f7 commit ef18156
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 13 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ fi

##############################################################################
# Miscellaneous function and header checks
AC_CHECK_HEADERS([sys/time.h])
AC_CHECK_HEADERS([sys/time.h immintrin.h])
AC_CHECK_FUNCS([BSDgettimeofday gettimeofday cblas_ddot cblas_daxpy jn])

##############################################################################
Expand Down
1 change: 1 addition & 0 deletions python/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

verbosity = Verbosity(mp.cvar, 'meep', 1)

mp.set_zero_subnormals(True)

# Send output from Meep, ctlgeom, and MPB to Python's stdout
mp.set_meep_printf_callback(mp.py_master_printf_wrap)
Expand Down
10 changes: 4 additions & 6 deletions python/tests/test_array_metadata.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import meep as mp
import unittest
import numpy as np
from utils import ApproxComparisonTestCase

class TestArrayMetadata(unittest.TestCase):
class TestArrayMetadata(ApproxComparisonTestCase):

def test_array_metadata(self):
resolution = 25
Expand Down Expand Up @@ -80,11 +81,8 @@ def vec_func(r):
vec_func_sum = np.sum(W*(xm**2 + 2*ym**2))
pulse_modal_volume = np.sum(W*EpsE2)/np.max(EpsE2) * vec_func_sum

if ((mp.count_processors() % 2 == 0) and mp.is_single_precision()):
ref_val = 0.94
else:
ref_val = 1.00
self.assertAlmostEqual(cw_modal_volume/pulse_modal_volume, ref_val, places=2)
tol = 5e-2 if mp.is_single_precision() else 1e-2
self.assertClose(cw_modal_volume/pulse_modal_volume, 1.0, epsilon=tol)

if __name__ == '__main__':
unittest.main()
8 changes: 4 additions & 4 deletions python/tests/test_pw_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import cmath
import math
import unittest
from utils import ApproxComparisonTestCase


class TestPwSource(unittest.TestCase):
class TestPwSource(ApproxComparisonTestCase):

def setUp(self):
s = 11
Expand Down Expand Up @@ -69,8 +69,8 @@ def test_pw_source(self):
pt1 = self.sim.get_field_point(mp.Ez, v1)
pt2 = self.sim.get_field_point(mp.Ez, v2)

places = 3 if mp.is_single_precision() else 7
self.assertAlmostEqual(pt1 / pt2, 27.557668029008262, places=places)
tol = 1e-4 if mp.is_single_precision() else 1e-9
self.assertClose(pt1 / pt2, 27.557668029008262, epsilon=tol)

self.assertAlmostEqual(cmath.exp(1j * self.k.dot(v1 - v2)), 0.7654030066070924 - 0.6435512702783076j)

Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def my_src_func(t):
sim.run(mp.after_sources(h), until_after_sources=200)
fp = sim.get_field_point(mp.Ez, mp.Vector3(1))

self.assertAlmostEqual(fp, -0.021997617628500023 + 0j)
self.assertAlmostEqual(fp, -0.021997617628500023 + 0j, 5 if mp.is_single_precision() else 7)


def amp_fun(p):
Expand Down
3 changes: 3 additions & 0 deletions src/meep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2332,6 +2332,9 @@ class binary_partition {
std::unique_ptr<binary_partition> right;
};

// control whether CPU flushes subnormal values; see mympi.cpp
void set_zero_subnormals(bool iszero);

} /* namespace meep */

#endif /* MEEP_H */
42 changes: 42 additions & 0 deletions src/mympi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
#include <mpi.h>
#endif

#ifdef _OPENMP
#include "omp.h"
#else
#define omp_get_num_threads() (1)
#endif

#ifdef IGNORE_SIGFPE
#include <signal.h>
#endif
Expand Down Expand Up @@ -59,6 +65,10 @@ extern "C" int feenableexcept(int EXCEPTS);
#endif
#endif

#if HAVE_IMMINTRIN_H
#include <immintrin.h>
#endif

#define UNUSED(x) (void)x // silence compiler warnings

#define MPI_REALNUM (sizeof(realnum) == sizeof(double) ? MPI_DOUBLE : MPI_FLOAT)
Expand Down Expand Up @@ -127,6 +137,37 @@ std::unique_ptr<comms_manager> create_comms_manager() {

int verbosity = 1; // defined in meep.h

/* Set CPU to flush subnormal values to zero (if iszero == true). This slightly
reduces the range of floating-point numbers, but can greatly increase the speed
in cases where subnormal values might arise (e.g. deep in the tails of
exponentially decaying sources).
See also meep#1708.
code based on github.com/JuliaLang/julia/blob/master/src/processor_x86.cpp#L1087-L1104,
which is free software under the GPL-compatible "MIT license" */
static void _set_zero_subnormals(bool iszero)
{
#if HAVE_IMMINTRIN_H
unsigned int flags = 0x00008040; // assume a non-ancient processor with SSE2, supporting both FTZ and DAZ flags
unsigned int state = _mm_getcsr();
if (iszero)
state |= flags;
else
state &= ~flags;
_mm_setcsr(state);
#endif
}
void set_zero_subnormals(bool iszero)
{
int n = omp_get_num_threads();
#ifdef _OPENMP
#pragma omp parallel for schedule(static,1)
#endif
for (int i = 0; i < n; ++i)
_set_zero_subnormals(iszero); // This has to be done in every thread for OpenMP.
}

initialize::initialize(int &argc, char **&argv) {
#ifdef HAVE_MPI
MPI_Init(&argc, &argv);
Expand All @@ -144,6 +185,7 @@ initialize::initialize(int &argc, char **&argv) {
#ifdef IGNORE_SIGFPE
signal(SIGFPE, SIG_IGN);
#endif
set_zero_subnormals(true);
t_start = wall_time();
}

Expand Down
2 changes: 1 addition & 1 deletion tests/harmonics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ int main(int argc, char **argv) {

double a2, a3, a2_2, a3_2;

double thresh = sizeof(realnum) == sizeof(float) ? 1e-4 : 1e-5;
double thresh = sizeof(realnum) == sizeof(float) ? 1e-3 : 1e-5;
harmonics(freq, 0.27e-4, 1e-4, 1.0, a2, a3);
if (different(a2, 9.80330e-07, thresh, "2nd harmonic mismatches known val")) return 1;
if (sizeof(realnum) == sizeof(float)) {
Expand Down

0 comments on commit ef18156

Please sign in to comment.