diff --git a/doc/docs/Python_User_Interface.md b/doc/docs/Python_User_Interface.md
index 581dd3382..811be2601 100644
--- a/doc/docs/Python_User_Interface.md
+++ b/doc/docs/Python_User_Interface.md
@@ -3602,7 +3602,9 @@ current simulation time.
 + `cmplx`: `boolean`; if `True`, return complex-valued data otherwise return
   real-valued data (default).
 
-+ `arr`: optional field to pass a pre-allocated NumPy array of the correct size,
++ `arr`: optional parameter to pass a pre-allocated NumPy array of the correct size and
+  type (either `numpy.float32` or `numpy.float64` depending on the [floating-point precision
+  of the fields and materials](Build_From_Source.md#floating-point-precision-of-the-fields-and-materials-arrays))
   which will be overwritten with the field/material data instead of allocating a
   new array.  Normally, this will be the array returned from a previous call to
   `get_array` for a similar slice, allowing one to re-use `arr` (e.g., when
@@ -3655,7 +3657,8 @@ def get_dft_array(self, dft_obj, component, num_freq):
 
 <div class="method_docstring" markdown="1">
 
-Returns the Fourier-transformed fields as a NumPy array.
+Returns the Fourier-transformed fields as a NumPy array. The type is either `numpy.complex64`
+or `numpy.complex128` depending on the [floating-point precision of the fields](Build_From_Source.md#floating-point-precision-of-the-fields-and-materials-arrays).
 
 **Parameters:**
 
diff --git a/python/adjoint/optimization_problem.py b/python/adjoint/optimization_problem.py
index 2fb45d5ca..bce235748 100644
--- a/python/adjoint/optimization_problem.py
+++ b/python/adjoint/optimization_problem.py
@@ -248,7 +248,7 @@ def adjoint_run(self):
 
             # Store adjoint fields for each design set of design variables
             self.D_a.append(utils.gather_design_region_fields(self.sim,self.design_region_monitors,self.frequencies))
-        
+
         # reset the m number
         if utils._check_if_cylindrical(self.sim):
             self.sim.m = -self.sim.m
diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py
index 04b041fe7..38ffbd8f9 100644
--- a/python/adjoint/utils.py
+++ b/python/adjoint/utils.py
@@ -37,14 +37,14 @@ def __init__(
 
     def update_design_parameters(self, design_parameters):
         self.design_parameters.update_weights(design_parameters)
-    
+
     def update_beta(self,beta):
         self.design_parameters.beta=beta
 
     def get_gradient(self, sim, fields_a, fields_f, frequencies, finite_difference_step):
         num_freqs = onp.array(frequencies).size
         shapes = []
-        '''We have the option to linear scale the gradients up front
+        '''We have the option to linearly scale the gradients up front
         using the scalegrad parameter (leftover from MPB API). Not
         currently needed for any existing feature (but available for
         future use)'''
@@ -54,16 +54,18 @@ def get_gradient(self, sim, fields_a, fields_f, frequencies, finite_difference_s
             returns a singleton element for the forward and adjoint fields.
             This only occurs when we are in 2D and only working with a particular
             polarization (as the other fields are never stored). For example, the
-            2D in-plane polarization consists of a single scalar Ez field 
+            2D in-plane polarization consists of a single scalar Ez field
             (technically, meep doesn't store anything for these cases, but get_dft_array
             still returns 0).
-            
-            Our get_gradient algorithm, however, requires we pass an array of 
+
+            Our get_gradient algorithm, however, requires we pass an array of
             zeros with the proper shape as the design_region.'''
             spatial_shape = sim.get_array_slice_dimensions(component, vol=self.volume)[0]
             if (fields_a[component_idx][0,...].size == 1):
-                fields_a[component_idx] = onp.zeros(onp.insert(spatial_shape,0,num_freqs))
-                fields_f[component_idx] = onp.zeros(onp.insert(spatial_shape,0,num_freqs))
+                fields_a[component_idx] = onp.zeros(onp.insert(spatial_shape,0,num_freqs),
+                                                    dtype=onp.float32 if mp.is_single_precision() else onp.float64)
+                fields_f[component_idx] = onp.zeros(onp.insert(spatial_shape,0,num_freqs),
+                                                    dtype=onp.float32 if mp.is_single_precision() else onp.float64)
             if _check_if_cylindrical(sim):
                 '''For some reason, get_dft_array returns the field
                 components in a different order than the convention used
@@ -78,15 +80,16 @@ def get_gradient(self, sim, fields_a, fields_f, frequencies, finite_difference_s
         shapes = onp.asarray(shapes).flatten(order='C')
         fields_a = onp.concatenate(fields_a)
         fields_f = onp.concatenate(fields_f)
-        
+
         grad = onp.zeros((num_freqs, self.num_design_params))  # preallocate
         geom_list = sim.geometry
         f = sim.fields
         vol = sim._fit_volume_to_simulation(self.volume)
-        
+
         # compute the gradient
         mp._get_gradient(grad,scalegrad,fields_a,fields_f,
-        sim.gv,vol.swigobj,onp.array(frequencies),sim.geps,shapes,finite_difference_step)
+                         sim.gv,vol.swigobj,onp.array(frequencies),
+                         sim.geps,shapes,finite_difference_step)
         return onp.squeeze(grad).T
 
 def _check_if_cylindrical(sim):
@@ -203,7 +206,7 @@ def gather_design_region_fields(
       fairly awkward to inspect directly.  Their primary use case is supporting
       gradient calculations.
     """
-    fwd_fields = []
+    design_region_fields = []
     for monitor in design_region_monitors:
         fields_by_component = []
         for component in _compute_components(simulation):
@@ -212,8 +215,8 @@ def gather_design_region_fields(
                 fields = simulation.get_dft_array(monitor, component, freq_idx)
                 fields_by_freq.append(_make_at_least_nd(fields))
             fields_by_component.append(onp.stack(fields_by_freq))
-        fwd_fields.append(fields_by_component)
-    return fwd_fields
+        design_region_fields.append(fields_by_component)
+    return design_region_fields
 
 
 def validate_and_update_design(
@@ -258,7 +261,7 @@ def create_adjoint_sources(
         monitors: Iterable[ObjectiveQuantity],
         monitor_values_grad: onp.ndarray) -> List[mp.Source]:
     monitor_values_grad = onp.asarray(monitor_values_grad,
-                                      dtype=onp.complex128)
+                                      dtype=onp.complex64 if mp.is_single_precision() else onp.complex128)
     if not onp.any(monitor_values_grad):
         raise RuntimeError(
             'The gradient of all monitor values is zero, which '
@@ -273,7 +276,7 @@ def create_adjoint_sources(
     for monitor_idx, monitor in enumerate(monitors):
         # `dj` for each monitor will have a shape of (num frequencies,)
         dj = onp.asarray(monitor_values_grad[monitor_idx],
-                         dtype=onp.complex128)
+                         dtype=onp.complex64 if mp.is_single_precision() else onp.complex128)
         if onp.any(dj):
             adjoint_sources += monitor.place_adjoint_source(dj)
     assert adjoint_sources
diff --git a/python/adjoint/wrapper.py b/python/adjoint/wrapper.py
index be793573f..3fad98c10 100644
--- a/python/adjoint/wrapper.py
+++ b/python/adjoint/wrapper.py
@@ -229,7 +229,9 @@ def _simulate_fwd(design_variables):
         def _simulate_rev(res, monitor_values_grad):
             """Runs adjoint simulation, returning VJP of design wrt monitor values."""
             fwd_fields = jax.tree_map(
-                lambda x: onp.asarray(x, dtype=onp.complex128), res[0])
+                lambda x: onp.asarray(x,
+                                      dtype=onp.complex64 if mp.is_single_precision() else onp.complex128),
+                res[0])
             design_variable_shapes = res[1]
             adj_fields = self._run_adjoint_simulation(monitor_values_grad)
             vjps = self._calculate_vjps(fwd_fields, adj_fields,
diff --git a/python/meep.i b/python/meep.i
index f1dc8012b..bb7f1c80c 100644
--- a/python/meep.i
+++ b/python/meep.i
@@ -150,7 +150,7 @@ static std::complex<double> py_amp_func_wrap(const meep::vec &v) {
     return ret;
 }
 
-static std::complex<double> py_field_func_wrap(const std::complex<double> *fields,
+static std::complex<double> py_field_func_wrap(const std::complex<meep::realnum> *fields,
                                                const meep::vec &loc,
                                                void *data_) {
     SWIG_PYTHON_THREAD_SCOPED_BLOCK;
@@ -441,25 +441,25 @@ PyObject *_get_dft_array(meep::fields *f, dft_type dft, meep::component c, int n
     // Return value: New reference
     int rank;
     size_t dims[3];
-    std::complex<double> *dft_arr = f->get_dft_array(dft, c, num_freq, &rank, dims);
+    std::complex<meep::realnum> *dft_arr = f->get_dft_array(dft, c, num_freq, &rank, dims);
 
-    if (dft_arr==NULL){ // this can happen e.g. if component c vanishes by symmetry
-     std::complex<double> d[1] = {std::complex<double>(0,0)};
-     return PyArray_SimpleNewFromData(0, 0, NPY_CDOUBLE, d);
+    if (dft_arr == NULL) { // this can happen e.g. if component c vanishes by symmetry
+      std::complex<double> d[1] = {std::complex<double>(0,0)};
+      return PyArray_SimpleNewFromData(0, 0, sizeof(meep::realnum) == sizeof(float) ? NPY_CFLOAT : NPY_CDOUBLE, d);
     }
 
     if (rank == 0) // singleton results
-     return PyArray_SimpleNewFromData(0, 0, NPY_CDOUBLE, dft_arr);
+      return PyArray_SimpleNewFromData(0, 0, sizeof(meep::realnum) == sizeof(float) ? NPY_CFLOAT : NPY_CDOUBLE, dft_arr);
 
     size_t length = 1;
     npy_intp *arr_dims = new npy_intp[rank];
     for (int i = 0; i < rank; ++i) {
-        arr_dims[i] = dims[i];       // implicit size_t -> int cast, presumed safe for individual array dimensions
-        length *= dims[i];
+         arr_dims[i] = dims[i];       // implicit size_t -> int cast, presumed safe for individual array dimensions
+         length *= dims[i];
     }
 
-    PyObject *py_arr = PyArray_SimpleNew(rank, arr_dims, NPY_CDOUBLE);
-    memcpy(PyArray_DATA((PyArrayObject*)py_arr), dft_arr, sizeof(std::complex<double>) * length);
+    PyObject *py_arr = PyArray_SimpleNew(rank, arr_dims, sizeof(meep::realnum) == sizeof(float) ? NPY_CFLOAT : NPY_CDOUBLE);
+    memcpy(PyArray_DATA((PyArrayObject*)py_arr), dft_arr, sizeof(std::complex<meep::realnum>) * length);
     delete[] dft_arr;
     if (arr_dims) delete[] arr_dims;
 
@@ -844,8 +844,8 @@ meep::volume_list *make_volume_list(const meep::volume &v, int c,
 
 %inline %{
 void _get_gradient(PyObject *grad, double scalegrad, PyObject *fields_a, PyObject *fields_f, 
-    meep::grid_volume *grid_volume, meep::volume *where, PyObject *frequencies, 
-    meep_geom::geom_epsilon *geps, PyObject *fields_shapes, double fd_step) {
+                   meep::grid_volume *grid_volume, meep::volume *where, PyObject *frequencies,
+                   meep_geom::geom_epsilon *geps, PyObject *fields_shapes, double fd_step) {
     // clean the gradient array
     PyArrayObject *pao_grad = (PyArrayObject *)grad;
     if (!PyArray_Check(pao_grad)) meep::abort("grad parameter must be numpy array.");
@@ -859,14 +859,14 @@ void _get_gradient(PyObject *grad, double scalegrad, PyObject *fields_a, PyObjec
     if (!PyArray_Check(pao_fields_a)) meep::abort("adjoint fields parameter must be numpy array.");
     if (!PyArray_ISCARRAY(pao_fields_a)) meep::abort("Numpy adjoint fields array must be C-style contiguous.");
     if (PyArray_NDIM(pao_fields_a) !=1) {meep::abort("Numpy adjoint fields array must have 1 dimension.");}
-    std::complex<double> *fields_a_c = (std::complex<double> *)PyArray_DATA(pao_fields_a);
+    std::complex<meep::realnum> *fields_a_c = (std::complex<meep::realnum> *)PyArray_DATA(pao_fields_a);
 
     // clean the forward fields array
     PyArrayObject *pao_fields_f = (PyArrayObject *)fields_f;
     if (!PyArray_Check(pao_fields_f)) meep::abort("forward fields parameter must be numpy array.");
     if (!PyArray_ISCARRAY(pao_fields_f)) meep::abort("Numpy forward fields array must be C-style contiguous.");
     if (PyArray_NDIM(pao_fields_f) !=1) {meep::abort("Numpy forward fields array must have 1 dimension.");}
-    std::complex<double> *fields_f_c = (std::complex<double> *)PyArray_DATA(pao_fields_f);
+    std::complex<meep::realnum> *fields_f_c = (std::complex<meep::realnum> *)PyArray_DATA(pao_fields_f);
 
     // clean shapes array
     PyArrayObject *pao_fields_shapes = (PyArrayObject *)fields_shapes;
@@ -1025,20 +1025,20 @@ void _get_gradient(PyObject *grad, double scalegrad, PyObject *fields_a, PyObjec
     $1 = (size_t *)array_data($input);
 }
 
-%typecheck(SWIG_TYPECHECK_POINTER, fragment="NumPy_Fragments") double* slice {
+%typecheck(SWIG_TYPECHECK_POINTER, fragment="NumPy_Fragments") meep::realnum* slice {
     $1 = is_array($input);
 }
 
-%typemap(in, fragment="NumPy_Macros") double* slice {
-    $1 = (double *)array_data($input);
+%typemap(in, fragment="NumPy_Macros") meep::realnum* slice {
+    $1 = (meep::realnum *)array_data($input);
 }
 
-%typecheck(SWIG_TYPECHECK_POINTER, fragment="NumPy_Fragments") std::complex<double>* slice {
+%typecheck(SWIG_TYPECHECK_POINTER, fragment="NumPy_Fragments") std::complex<meep::realnum>* slice {
     $1 = is_array($input);
 }
 
-%typemap(in) std::complex<double>* slice {
-    $1 = (std::complex<double> *)array_data($input);
+%typemap(in) std::complex<meep::realnum>* slice {
+    $1 = (std::complex<meep::realnum> *)array_data($input);
 }
 
 %typecheck(SWIG_TYPECHECK_POINTER) meep::component {
diff --git a/python/simulation.py b/python/simulation.py
index 5fbf66155..b99dccf3b 100644
--- a/python/simulation.py
+++ b/python/simulation.py
@@ -3337,7 +3337,9 @@ def get_array(self, component=None, vol=None, center=None, size=None, cmplx=None
         + `cmplx`: `boolean`; if `True`, return complex-valued data otherwise return
           real-valued data (default).
 
-        + `arr`: optional field to pass a pre-allocated NumPy array of the correct size,
+        + `arr`: optional parameter to pass a pre-allocated NumPy array of the correct size and
+          type (either `numpy.float32` or `numpy.float64` depending on the [floating-point precision
+          of the fields and materials](Build_From_Source.md#floating-point-precision-of-the-fields-and-materials-arrays))
           which will be overwritten with the field/material data instead of allocating a
           new array.  Normally, this will be the array returned from a previous call to
           `get_array` for a similar slice, allowing one to re-use `arr` (e.g., when
@@ -3407,7 +3409,10 @@ def get_array(self, component=None, vol=None, center=None, size=None, cmplx=None
             arr = np.require(arr, requirements=['C', 'W'])
 
         else:
-            arr = np.zeros(dims, dtype=np.complex128 if cmplx else np.float64)
+            if mp.is_single_precision():
+                arr = np.zeros(dims, dtype=np.complex64 if cmplx else np.float32)
+            else:
+                arr = np.zeros(dims, dtype=np.complex128 if cmplx else np.float64)
 
         if np.iscomplexobj(arr):
             self.fields.get_complex_array_slice(v, component, arr, frequency, snap)
@@ -3418,7 +3423,8 @@ def get_array(self, component=None, vol=None, center=None, size=None, cmplx=None
 
     def get_dft_array(self, dft_obj, component, num_freq):
         """
-        Returns the Fourier-transformed fields as a NumPy array.
+        Returns the Fourier-transformed fields as a NumPy array. The type is either `numpy.complex64`
+        or `numpy.complex128` depending on the [floating-point precision of the fields](Build_From_Source.md#floating-point-precision-of-the-fields-and-materials-arrays).
 
         **Parameters:**
 
@@ -3464,7 +3470,7 @@ def get_source(self, component, vol=None, center=None, size=None):
         dim_sizes = np.zeros(3, dtype=np.uintp)
         mp._get_array_slice_dimensions(self.fields, v, dim_sizes, True, False)
         dims = [s for s in dim_sizes if s != 0]
-        arr = np.zeros(dims, dtype=np.complex128)
+        arr = np.zeros(dims, dtype=np.complex64 if mp.is_single_precision() else np.complex128)
         self.fields.get_source_slice(v, component, arr)
         return arr
 
diff --git a/python/tests/test_adjoint_jax.py b/python/tests/test_adjoint_jax.py
index 768d7ac82..17994a8fa 100644
--- a/python/tests/test_adjoint_jax.py
+++ b/python/tests/test_adjoint_jax.py
@@ -175,7 +175,8 @@ def test_design_region_monitor_helpers(self):
         for value in design_region_fields[0]:
             self.assertIsInstance(value, onp.ndarray)
             self.assertEqual(value.ndim, 4)  # dims: freq, x, y, pad
-            self.assertEqual(value.dtype, onp.complex128)
+            self.assertEqual(value.dtype,
+                             onp.complex64 if mp.is_single_precision() else onp.complex128)
 
 
 class WrapperTest(ApproxComparisonTestCase):
diff --git a/python/tests/test_adjoint_solver.py b/python/tests/test_adjoint_solver.py
index dae6e80f9..1f4efa360 100644
--- a/python/tests/test_adjoint_solver.py
+++ b/python/tests/test_adjoint_solver.py
@@ -15,7 +15,8 @@
 resolution = 30
 
 silicon = mp.Medium(epsilon=12)
-sapphire = mp.Medium(epsilon_diag=(10.225,10.225,9.95),epsilon_offdiag=(-0.825,-0.55*np.sqrt(3/2),0.55*np.sqrt(3/2)))
+sapphire = mp.Medium(epsilon_diag=(10.225,10.225,9.95),
+                     epsilon_offdiag=(-0.825,-0.55*np.sqrt(3/2),0.55*np.sqrt(3/2)))
 
 sxy = 5.0
 cell_size = mp.Vector3(sxy,sxy,0)
@@ -469,7 +470,7 @@ def test_complex_fields(self):
 
             ## compare objective results
             print("Ez2 -- adjoint solver: {}, traditional simulation: {}".format(adjsol_obj,Ez2_unperturbed))
-            self.assertClose(adjsol_obj,Ez2_unperturbed,epsilon=1e-8)
+            self.assertClose(adjsol_obj,Ez2_unperturbed,epsilon=1e-6)
 
             ## compute perturbed |Ez|^2
             Ez2_perturbed = forward_simulation_complex_fields(p+dp, frequencies)
@@ -533,7 +534,7 @@ def test_offdiagonal(self):
             adj_scale = (dp[None,:]@adjsol_grad).flatten()
             fd_grad = S12_perturbed-S12_unperturbed
             print("Directional derivative -- adjoint solver: {}, FD: {}".format(adj_scale,fd_grad))
-            tol = 0.04
+            tol = 0.05 if mp.is_single_precision() else 0.04
             self.assertClose(adj_scale,fd_grad,epsilon=tol)
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/tests/test_cavity_arrayslice.py b/python/tests/test_cavity_arrayslice.py
index bbedbebe6..42f758b6f 100644
--- a/python/tests/test_cavity_arrayslice.py
+++ b/python/tests/test_cavity_arrayslice.py
@@ -70,7 +70,7 @@ def test_2d_slice(self):
 
     def test_1d_slice_user_array(self):
         self.sim.run(until_after_sources=0)
-        arr = np.zeros(126, dtype=np.float64)
+        arr = np.zeros(126, dtype=np.float32 if mp.is_single_precision() else np.float64)
         vol = mp.Volume(center=self.center_1d, size=self.size_1d)
         self.sim.get_array(mp.Hz, vol, arr=arr)
         tol = 1e-5 if mp.is_single_precision() else 1e-8
@@ -78,7 +78,7 @@ def test_1d_slice_user_array(self):
 
     def test_2d_slice_user_array(self):
         self.sim.run(until_after_sources=0)
-        arr = np.zeros((126, 38), dtype=np.float64)
+        arr = np.zeros((126, 38), dtype=np.float32 if mp.is_single_precision() else np.float64)
         vol = mp.Volume(center=self.center_2d, size=self.size_2d)
         self.sim.get_array(mp.Hz, vol, arr=arr)
         tol = 1e-5 if mp.is_single_precision() else 1e-8
@@ -106,14 +106,14 @@ def test_1d_complex_slice(self):
         self.sim.run(until_after_sources=0)
         vol = mp.Volume(center=self.center_1d, size=self.size_1d)
         hl_slice1d = self.sim.get_array(mp.Hz, vol, cmplx=True)
-        self.assertTrue(hl_slice1d.dtype == np.complex128)
+        self.assertTrue(hl_slice1d.dtype == np.complex64 if mp.is_single_precision() else np.complex128)
         self.assertTrue(hl_slice1d.shape[0] == 126)
 
     def test_2d_complex_slice(self):
         self.sim.run(until_after_sources=0)
         vol = mp.Volume(center=self.center_2d, size=self.size_2d)
         hl_slice2d = self.sim.get_array(mp.Hz, vol, cmplx=True)
-        self.assertTrue(hl_slice2d.dtype == np.complex128)
+        self.assertTrue(hl_slice2d.dtype == np.complex64 if mp.is_single_precision() else np.complex128)
         self.assertTrue(hl_slice2d.shape[0] == 126 and hl_slice2d.shape[1] == 38)
 
 
diff --git a/scheme/meep.i b/scheme/meep.i
index b88885e5d..9c8929ba8 100644
--- a/scheme/meep.i
+++ b/scheme/meep.i
@@ -33,9 +33,9 @@ static inline std::complex<double> my_complex_func2(double t, void *f) {
 }
 
 typedef struct { SCM func; int nf; } my_field_func_data;
-static inline std::complex<double> my_field_func(const std::complex<double> *fields,
-					    const meep::vec &loc,
-					    void *data_) {
+static inline std::complex<double> my_field_func(const std::complex<meep::realnum> *fields,
+                                                 const meep::vec &loc,
+                                                 void *data_) {
   my_field_func_data *data = (my_field_func_data *) data_;
   int num_items = data->nf;
   cnumber *items = new cnumber[num_items];
diff --git a/src/array_slice.cpp b/src/array_slice.cpp
index b9905c16f..cb7d7aff3 100644
--- a/src/array_slice.cpp
+++ b/src/array_slice.cpp
@@ -43,17 +43,17 @@ constexpr size_t ARRAY_TO_ALL_BUFSIZE = 1 << 16; // Use (64k * 8 bytes) of buffe
 /* repeatedly call sum_to_all to consolidate all entries of    */
 /* an array on all cores.                                      */
 /* array: in/out ptr to the data                               */
-/* array_size: data size in multiples of sizeof(double)        */
+/* array_size: data size in multiples of sizeof(realnum)       */
 /***************************************************************/
-double *array_to_all(double *array, size_t array_size) {
+realnum *array_to_all(realnum *array, size_t array_size) {
 #ifdef HAVE_MPI
-  double *buffer = new double[ARRAY_TO_ALL_BUFSIZE];
+  realnum *buffer = new realnum[ARRAY_TO_ALL_BUFSIZE];
   ptrdiff_t offset = 0;
   size_t remaining = array_size;
   while (remaining != 0) {
     size_t xfer_size = (remaining > ARRAY_TO_ALL_BUFSIZE ? ARRAY_TO_ALL_BUFSIZE : remaining);
     sum_to_all(array + offset, buffer, xfer_size);
-    memcpy(array + offset, buffer, xfer_size * sizeof(double));
+    memcpy(array + offset, buffer, xfer_size * sizeof(realnum));
     remaining -= xfer_size;
     offset += xfer_size;
   }
@@ -64,14 +64,22 @@ double *array_to_all(double *array, size_t array_size) {
   return array;
 }
 
-complex<double> *array_to_all(complex<double> *array, size_t array_size) {
-  return (complex<double> *)array_to_all((double *)array, 2 * array_size);
+complex<realnum> *array_to_all(complex<realnum> *array, size_t array_size) {
+  return (complex<realnum> *)array_to_all((realnum *)array, 2 * array_size);
 }
 
 } // namespace
 
 /***************************************************************************/
 
+std::complex<double> cdouble(std::complex<float> z) {
+  return std::complex<double>(real(z), imag(z));
+}
+
+std::complex<double> cdouble(std::complex<double> z) {
+  return z;
+}
+
 typedef struct {
 
   // information related to the volume covered by the
@@ -101,7 +109,7 @@ typedef struct {
   // temporary internal storage buffers
   component *cS;
   complex<double> *ph;
-  complex<double> *fields;
+  complex<realnum> *fields;
   ptrdiff_t *offsets;
 
   double frequency;
@@ -119,16 +127,16 @@ typedef struct {
 } array_slice_data;
 
 /* passthrough field function equivalent to component_fun in h5fields.cpp */
-static complex<double> default_field_func(const complex<double> *fields, const vec &loc, void *data_) {
+static complex<double> default_field_func(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc;   // unused
   (void)data_; // unused
-  return fields[0];
+  return cdouble(fields[0]);
 }
 
-static double default_field_rfunc(const complex<double> *fields, const vec &loc, void *data_) {
+static double default_field_rfunc(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc;   // unused
   (void)data_; // unused
-  return real(fields[0]);
+  return real(cdouble(fields[0]));
 }
 
 /***************************************************************/
@@ -174,7 +182,7 @@ static void get_array_slice_dimensions_chunkloop(fields_chunk *fc, int ichnk, co
 typedef struct {
   component source_component;
   ivec slice_imin, slice_imax;
-  complex<double> *slice;
+  complex<realnum> *slice;
 } source_slice_data;
 
 bool in_range(int imin, int i, int imax) { return (imin <= i && i <= imax); }
@@ -310,18 +318,19 @@ static void get_array_slice_chunkloop(fields_chunk *fc, int ichnk, component cgr
   // Otherwise proceed to compute the function of field components to be   //
   // tabulated on the slice, exactly as in fields::integrate.              //
   //-----------------------------------------------------------------------//
-  double *slice = 0;
-  complex<double> *zslice = 0;
+  realnum *slice = 0;
+  complex<realnum> *zslice = 0;
   bool complex_data = (data->rfun == 0);
   if (complex_data)
-    zslice = (complex<double> *)data->vslice;
+    zslice = (complex<realnum> *)data->vslice;
   else
-    slice = (double *)data->vslice;
+    slice = (realnum *)data->vslice;
 
   ptrdiff_t *off = data->offsets;
   component *cS = data->cS;
   double frequency = data->frequency;
-  complex<double> *fields = data->fields, *ph = data->ph;
+  complex<realnum> *fields = data->fields;
+  complex<double> *ph = data->ph;
   const component *iecs = data->inveps_cs;
   const direction *ieds = data->inveps_ds;
   ptrdiff_t ieos[6];
@@ -515,8 +524,8 @@ bool increment(size_t *n, size_t *nMax, int rank) {
 }
 
 // data_size = 1,2 for real,complex-valued array
-double *collapse_array(double *array, int *rank, size_t dims[3], direction dirs[3], volume where,
-                       int data_size = 1) {
+realnum *collapse_array(realnum *array, int *rank, size_t dims[3], direction dirs[3], volume where,
+                        int data_size = 1) {
 
   /*--------------------------------------------------------------*/
   /*- detect empty dimensions and compute rank and strides for    */
@@ -560,9 +569,9 @@ double *collapse_array(double *array, int *rank, size_t dims[3], direction dirs[
   /*--------------------------------------------------------------*/
   size_t reduced_grid_size = reduced_dims[0] * (reduced_rank == 2 ? reduced_dims[1] : 1);
   size_t reduced_array_size = data_size * reduced_grid_size;
-  double *reduced_array = new double[reduced_array_size];
+  realnum *reduced_array = new realnum[reduced_array_size];
   if (!reduced_array) meep::abort("%s:%i: out of memory (%zu)", __FILE__, __LINE__, reduced_array_size);
-  memset(reduced_array, 0, reduced_array_size * sizeof(double));
+  memset(reduced_array, 0, reduced_array_size * sizeof(realnum));
 
   size_t n[3] = {0, 0, 0};
   do {
@@ -581,9 +590,9 @@ double *collapse_array(double *array, int *rank, size_t dims[3], direction dirs[
   return reduced_array;
 }
 
-complex<double> *collapse_array(complex<double> *array, int *rank, size_t dims[3], direction dirs[3],
-                                volume where) {
-  return (complex<double> *)collapse_array((double *)array, rank, dims, dirs, where, 2);
+complex<realnum> *collapse_array(complex<realnum> *array, int *rank, size_t dims[3], direction dirs[3],
+                                 volume where) {
+  return (complex<realnum> *)collapse_array((realnum *)array, rank, dims, dirs, where, 2);
 }
 
 /**********************************************************************/
@@ -607,7 +616,7 @@ void *fields::do_get_array_slice(const volume &where, std::vector<component> com
   int elem_size = complex_data ? 2 : 1;
   void *vslice_uncollapsed;
 
-  vslice_uncollapsed = memset(new double[slice_size * elem_size], 0, slice_size * elem_size * sizeof(double));
+  vslice_uncollapsed = memset(new realnum[slice_size * elem_size], 0, slice_size * elem_size * sizeof(realnum));
 
   data.vslice = vslice_uncollapsed;
   data.snap = snap;
@@ -619,7 +628,7 @@ void *fields::do_get_array_slice(const volume &where, std::vector<component> com
   int num_components = components.size();
   data.cS = new component[num_components];
   data.ph = new complex<double>[num_components];
-  data.fields = new complex<double>[num_components];
+  data.fields = new complex<realnum>[num_components];
   data.offsets = new ptrdiff_t[2 * num_components];
   memset(data.offsets, 0, 2 * num_components * sizeof(ptrdiff_t));
   data.empty_dim[0] = data.empty_dim[1] = data.empty_dim[2] = data.empty_dim[3] =
@@ -659,19 +668,19 @@ void *fields::do_get_array_slice(const volume &where, std::vector<component> com
   loop_in_chunks(get_array_slice_chunkloop, (void *)&data, where, Centered, true, snap);
 
   if (!snap) {
-    double *slice = collapse_array((double *)vslice_uncollapsed, &rank, dims, dirs, where, elem_size);
+    realnum *slice = collapse_array((realnum *)vslice_uncollapsed, &rank, dims, dirs, where, elem_size);
     rank = get_array_slice_dimensions(where, dims, dirs, true, false, 0, &data);
     slice_size = data.slice_size;
-    vslice_uncollapsed = (double*) slice;
+    vslice_uncollapsed = (realnum*) slice;
   }
   if (vslice) {
-    memcpy(vslice, vslice_uncollapsed, slice_size * elem_size * sizeof(double));
-    delete[] (double*) vslice_uncollapsed;
+    memcpy(vslice, vslice_uncollapsed, slice_size * elem_size * sizeof(realnum));
+    delete[] (realnum*) vslice_uncollapsed;
   }
   else
     vslice = vslice_uncollapsed;
 
-  array_to_all((double *)vslice, elem_size * slice_size);
+  array_to_all((realnum *)vslice, elem_size * slice_size);
 
   delete[] data.offsets;
   delete[] data.fields;
@@ -685,48 +694,48 @@ void *fields::do_get_array_slice(const volume &where, std::vector<component> com
 /***************************************************************/
 /* entry points to get_array_slice                             */
 /***************************************************************/
-double *fields::get_array_slice(const volume &where, std::vector<component> components,
-                                field_rfunction rfun, void *fun_data, double *slice,
-                                double frequency, bool snap) {
-  return (double *)do_get_array_slice(where, components, 0, rfun, fun_data, (void *)slice,
-                                      frequency, snap);
+realnum *fields::get_array_slice(const volume &where, std::vector<component> components,
+                                 field_rfunction rfun, void *fun_data, realnum *slice,
+                                 double frequency, bool snap) {
+  return (realnum *)do_get_array_slice(where, components, 0, rfun, fun_data, (void *)slice,
+                                       frequency, snap);
 }
 
-complex<double> *fields::get_complex_array_slice(const volume &where, std::vector<component> components,
-                                                 field_function fun, void *fun_data, complex<double> *slice,
-                                                 double frequency, bool snap) {
-  return (complex<double> *)do_get_array_slice(where, components, fun, 0, fun_data, (void *)slice,
-                                               frequency, snap);
+complex<realnum> *fields::get_complex_array_slice(const volume &where, std::vector<component> components,
+                                                  field_function fun, void *fun_data, complex<realnum> *slice,
+                                                  double frequency, bool snap) {
+  return (complex<realnum> *)do_get_array_slice(where, components, fun, 0, fun_data, (void *)slice,
+                                                frequency, snap);
 }
 
-double *fields::get_array_slice(const volume &where, component c, double *slice, double frequency,
-                                bool snap) {
+realnum *fields::get_array_slice(const volume &where, component c, realnum *slice, double frequency,
+                                 bool snap) {
   std::vector<component> components(1);
   components[0] = c;
-  return (double *)do_get_array_slice(where, components, 0, default_field_rfunc, 0, (void *)slice,
-                                      frequency, snap);
+  return (realnum *)do_get_array_slice(where, components, 0, default_field_rfunc, 0, (void *)slice,
+                                       frequency, snap);
 }
 
-double *fields::get_array_slice(const volume &where, derived_component c, double *slice,
-                                double frequency, bool snap) {
+realnum *fields::get_array_slice(const volume &where, derived_component c, realnum *slice,
+                                 double frequency, bool snap) {
   int nfields;
   component carray[12];
   field_rfunction rfun = derived_component_func(c, gv, nfields, carray);
   std::vector<component> cs(carray, carray + nfields);
-  return (double *)do_get_array_slice(where, cs, 0, rfun, &nfields, (void *)slice,
-                                      frequency, snap);
+  return (realnum *)do_get_array_slice(where, cs, 0, rfun, &nfields, (void *)slice,
+                                       frequency, snap);
 }
 
-complex<double> *fields::get_complex_array_slice(const volume &where, component c, complex<double> *slice,
-                                                 double frequency, bool snap) {
+complex<realnum> *fields::get_complex_array_slice(const volume &where, component c, complex<realnum> *slice,
+                                                  double frequency, bool snap) {
   std::vector<component> components(1);
   components[0] = c;
-  return (complex<double> *)do_get_array_slice(where, components, default_field_func, 0, 0, (void *)slice,
-                                       frequency, snap);
+  return (complex<realnum> *)do_get_array_slice(where, components, default_field_func, 0, 0, (void *)slice,
+                                                frequency, snap);
 }
 
-complex<double> *fields::get_source_slice(const volume &where, component source_slice_component,
-                                          complex<double> *slice) {
+complex<realnum> *fields::get_source_slice(const volume &where, component source_slice_component,
+                                           complex<realnum> *slice) {
   size_t dims[3];
   direction dirs[3];
   vec min_max_loc[2];
@@ -737,18 +746,18 @@ complex<double> *fields::get_source_slice(const volume &where, component source_
   data.source_component = source_slice_component;
   data.slice_imin = gv.round_vec(min_max_loc[0]);
   data.slice_imax = gv.round_vec(min_max_loc[1]);
-  data.slice = new complex<double>[slice_size];
+  data.slice = new complex<realnum>[slice_size];
   if (!data.slice) meep::abort("%s:%i: out of memory (%zu)", __FILE__, __LINE__, slice_size);
 
   loop_in_chunks(get_source_slice_chunkloop, (void *)&data, where, Centered, true, false);
 
-  complex<double> *slice_collapsed = collapse_array(data.slice, &rank, dims, dirs, where);
+  complex<realnum> *slice_collapsed = collapse_array(data.slice, &rank, dims, dirs, where);
   rank = get_array_slice_dimensions(where, dims, dirs, true, false);
   slice_size = dims[0] * (rank >= 2 ? dims[1] : 1) * (rank == 3 ? dims[2] : 1);
 
   if (slice) {
-    memcpy(slice, slice_collapsed, 2 * slice_size * sizeof(double));
-    delete[] (complex<double>*) slice_collapsed;
+    memcpy(slice, slice_collapsed, 2 * slice_size * sizeof(realnum));
+    delete[] (complex<realnum>*) slice_collapsed;
   }
   else
     slice = slice_collapsed;
@@ -769,7 +778,7 @@ std::vector<double> fields::get_array_metadata(const volume &where) {
   vec min_max_loc[2]; // extremal points in subgrid
   get_array_slice_dimensions(where, dims, dirs, true, false, min_max_loc);
 
-  double *weights = get_array_slice(where, NO_COMPONENT);
+  realnum *weights = get_array_slice(where, NO_COMPONENT);
 
   /* get length and endpoints of x,y,z tics arrays */
   size_t nxyz[3] = {1, 1, 1};
diff --git a/src/dft.cpp b/src/dft.cpp
index cb6cd13ad..f97bbe730 100644
--- a/src/dft.cpp
+++ b/src/dft.cpp
@@ -860,8 +860,8 @@ dft_fields fields::add_dft_fields(component *components, int num_components, con
 /* chunk-level processing for fields::process_dft_component.   */
 /***************************************************************/
 complex<double> dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corner, ivec max_corner,
-                                                 int num_freq, h5file *file, double *buffer, int reim,
-                                                 complex<double> *field_array, void *mode1_data, void *mode2_data,
+                                                 int num_freq, h5file *file, realnum *buffer, int reim,
+                                                 complex<realnum> *field_array, void *mode1_data, void *mode2_data,
                                                  int ic_conjugate, bool retain_interp_weights,
                                                  fields *parent) {
 
@@ -1026,7 +1026,7 @@ complex<double> dft_chunk::process_dft_component(int rank, direction *ds, ivec m
 /* are processed.                                              */
 /***************************************************************/
 complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_chunklists, int num_freq,
-                                              component c, const char *HDF5FileName, complex<double> **pfield_array,
+                                              component c, const char *HDF5FileName, complex<realnum> **pfield_array,
                                               int *array_rank, size_t *array_dims, direction *array_dirs,
                                               void *mode1_data, void *mode2_data, component c_conjugate,
                                               bool *first_component, bool retain_interp_weights) {
@@ -1099,15 +1099,15 @@ complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_ch
   /* buffer for process-local contributions to HDF5 output files,*/
   /* like h5_output_data::buf in h5fields.cpp                    */
   /***************************************************************/
-  double *buffer = 0;
-  complex<double> *field_array = 0;
+  realnum *buffer = 0;
+  complex<realnum> *field_array = 0;
   int reim_max = 0;
   if (HDF5FileName) {
-    buffer = new double[bufsz];
+    buffer = new realnum[bufsz];
     reim_max = 1;
   }
   else if (pfield_array)
-    *pfield_array = field_array = (array_size ? new complex<double>[array_size] : 0);
+    *pfield_array = field_array = (array_size ? new complex<realnum>[array_size] : 0);
 
   complex<double> overlap = 0.0;
   for (int reim = 0; reim <= reim_max; reim++) {
@@ -1118,7 +1118,7 @@ complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_ch
       char dataname[100];
       snprintf(dataname, 100, "%s_%i.%c", component_name(c), num_freq, reim ? 'i' : 'r');
       file->create_or_extend_data(dataname, rank, dims, false /* append_data */,
-                                  false /* single_precision */);
+                                  sizeof(realnum) == sizeof(float) /* single_precision */);
     }
 
     for (int ncl = 0; ncl < num_chunklists; ncl++)
@@ -1139,7 +1139,7 @@ complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_ch
 /* on all cores                                                */
 /***************************************************************/
 #define BUFSIZE 1 << 20  // use 1M element (16 MB) buffer
-      complex<double> *buf = new complex<double>[BUFSIZE];
+      complex<realnum> *buf = new complex<realnum>[BUFSIZE];
       ptrdiff_t offset = 0;
       size_t remaining = array_size;
       while (remaining != 0) {
@@ -1147,7 +1147,7 @@ complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_ch
         am_now_working_on(MpiAllTime);
         sum_to_all(field_array + offset, buf, size);
         finished_working();
-        memcpy(field_array + offset, buf, size * sizeof(complex<double>));
+        memcpy(field_array + offset, buf, size * sizeof(complex<realnum>));
         remaining -= size;
         offset += size;
       }
@@ -1169,46 +1169,46 @@ complex<double> fields::process_dft_component(dft_chunk **chunklists, int num_ch
 /***************************************************************/
 /* routines for fetching arrays of dft fields                  */
 /***************************************************************/
-complex<double> *collapse_array(complex<double> *array, int *rank, size_t dims[3], direction dirs[3], volume where);
+complex<realnum> *collapse_array(complex<realnum> *array, int *rank, size_t dims[3], direction dirs[3], volume where);
 
-complex<double> *fields::get_dft_array(dft_flux flux, component c, int num_freq, int *rank,
-                                       size_t dims[3]) {
+complex<realnum> *fields::get_dft_array(dft_flux flux, component c, int num_freq, int *rank,
+                                        size_t dims[3]) {
   dft_chunk *chunklists[2];
   chunklists[0] = flux.E;
   chunklists[1] = flux.H;
-  complex<double> *array;
+  complex<realnum> *array;
   direction dirs[3];
   process_dft_component(chunklists, 2, num_freq, c, 0, &array, rank, dims, dirs);
   return collapse_array(array, rank, dims, dirs, flux.where);
 }
 
-complex<double> *fields::get_dft_array(dft_force force, component c, int num_freq, int *rank,
-                                       size_t dims[3]) {
+complex<realnum> *fields::get_dft_array(dft_force force, component c, int num_freq, int *rank,
+                                        size_t dims[3]) {
   dft_chunk *chunklists[3];
   chunklists[0] = force.offdiag1;
   chunklists[1] = force.offdiag2;
   chunklists[2] = force.diag;
-  complex<double> *array;
+  complex<realnum> *array;
   direction dirs[3];
   process_dft_component(chunklists, 3, num_freq, c, 0, &array, rank, dims, dirs);
   return collapse_array(array, rank, dims, dirs, force.where);
 }
 
-complex<double> *fields::get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank,
-                                       size_t dims[3]) {
+complex<realnum> *fields::get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank,
+                                        size_t dims[3]) {
   dft_chunk *chunklists[1];
   chunklists[0] = n2f.F;
-  complex<double> *array;
+  complex<realnum> *array;
   direction dirs[3];
   process_dft_component(chunklists, 1, num_freq, c, 0, &array, rank, dims, dirs);
   return collapse_array(array, rank, dims, dirs, n2f.where);
 }
 
-complex<double> *fields::get_dft_array(dft_fields fdft, component c, int num_freq, int *rank,
-                                       size_t dims[3]) {
+complex<realnum> *fields::get_dft_array(dft_fields fdft, component c, int num_freq, int *rank,
+                                        size_t dims[3]) {
   dft_chunk *chunklists[1];
   chunklists[0] = fdft.chunks;
-  complex<double> *array;
+  complex<realnum> *array;
   direction dirs[3];
   process_dft_component(chunklists, 1, num_freq, c, 0, &array, rank, dims, dirs);
   return collapse_array(array, rank, dims, dirs, fdft.where);
@@ -1255,7 +1255,7 @@ void fields::output_dft_components(dft_chunk **chunklists, int num_chunklists, v
                               0, Ex, &first_component);
       }
       else {
-        complex<double> *array = 0;
+        complex<realnum> *array = 0;
         int rank;
         size_t dims[3];
         direction dirs[3];
diff --git a/src/energy_and_flux.cpp b/src/energy_and_flux.cpp
index 4514e8553..889785d09 100644
--- a/src/energy_and_flux.cpp
+++ b/src/energy_and_flux.cpp
@@ -58,10 +58,10 @@ double fields::field_energy_in_box(const volume &where) {
   return electric_energy_in_box(where) + cur_step_magnetic_energy;
 }
 
-static complex<double> dot_integrand(const complex<double> *fields, const vec &loc, void *data_) {
+static complex<double> dot_integrand(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc;
   (void)data_; // unused;
-  return real(conj(fields[0]) * fields[1]);
+  return real(conj(cdouble(fields[0])) * cdouble(fields[1]));
 }
 
 double fields::field_energy_in_box(component c, const volume &where) {
@@ -249,12 +249,13 @@ flux_vol *fields::add_flux_plane(const vec &p1, const vec &p2) {
    is more expensive and requires us to know the boundary orientation, and
    does not seem worth the trouble at this point. */
 
-static complex<double> dot3_max_integrand(const complex<double> *fields, const vec &loc,
+static complex<double> dot3_max_integrand(const complex<realnum> *fields, const vec &loc,
                                           void *data_) {
   (void)loc;
   (void)data_; // unused;
-  return (real(conj(fields[0]) * fields[3]) + real(conj(fields[1]) * fields[4]) +
-          real(conj(fields[2]) * fields[5]));
+  return (real(conj(cdouble(fields[0])) * cdouble(fields[3])) +
+          real(conj(cdouble(fields[1])) * cdouble(fields[4])) +
+          real(conj(cdouble(fields[2])) * cdouble(fields[5])));
 }
 
 double fields::electric_energy_max_in_box(const volume &where) {
@@ -294,10 +295,10 @@ double fields::modal_volume_in_box(const volume &where) {
 
 typedef double (*fx_func)(const vec &);
 
-static complex<double> dot_fx_integrand(const complex<double> *fields, const vec &loc,
+static complex<double> dot_fx_integrand(const complex<realnum> *fields, const vec &loc,
                                         void *data_) {
   fx_func fx = (fx_func)data_;
-  return (real(conj(fields[0]) * fields[1]) * fx(loc));
+  return (real(conj(cdouble(fields[0])) * cdouble(fields[1])) * fx(loc));
 }
 
 /* computes integral of f(x) * |E|^2 / integral epsilon*|E|^2 */
diff --git a/src/h5fields.cpp b/src/h5fields.cpp
index 672b273d1..d7840f89d 100644
--- a/src/h5fields.cpp
+++ b/src/h5fields.cpp
@@ -48,7 +48,7 @@ typedef struct {
   const component *components;
   component *cS;
   complex<double> *ph;
-  complex<double> *fields;
+  complex<realnum> *fields;
   ptrdiff_t *offsets;
   double frequency;
   int ninveps;
@@ -143,7 +143,8 @@ static void h5_output_chunkloop(fields_chunk *fc, int ichnk, component cgrid, iv
 
   ptrdiff_t *off = data->offsets;
   component *cS = data->cS;
-  complex<double> *fields = data->fields, *ph = data->ph;
+  complex<realnum> *fields = data->fields;
+  complex<double> *ph = data->ph;
   double frequency = data->frequency;
   const component *iecs = data->inveps_cs;
   const direction *ieds = data->inveps_ds;
@@ -267,7 +268,7 @@ void fields::output_hdf5(h5file *file, const char *dataname, int num_fields,
   data.components = components;
   data.cS = new component[num_fields];
   data.ph = new complex<double>[num_fields];
-  data.fields = new complex<double>[num_fields];
+  data.fields = new complex<realnum>[num_fields];
   data.fun = fun;
   data.fun_data_ = fun_data_;
 
@@ -351,7 +352,7 @@ typedef struct {
   void *fun_data_;
 } rintegrand_data;
 
-static complex<double> rintegrand_fun(const complex<double> *fields, const vec &loc, void *data_) {
+static complex<double> rintegrand_fun(const complex<realnum> *fields, const vec &loc, void *data_) {
   rintegrand_data *data = (rintegrand_data *)data_;
   return data->fun(fields, loc, data->fun_data_);
 }
@@ -374,10 +375,10 @@ void fields::output_hdf5(const char *dataname, int num_fields, const component *
 
 /***************************************************************************/
 
-static complex<double> component_fun(const complex<double> *fields, const vec &loc, void *data_) {
+static complex<double> component_fun(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc;   // unused
   (void)data_; // unused
-  return fields[0];
+  return cdouble(fields[0]);
 }
 
 void fields::output_hdf5(component c, const volume &where, h5file *file, bool append_data,
diff --git a/src/integrate.cpp b/src/integrate.cpp
index 7445df910..c94a63e09 100644
--- a/src/integrate.cpp
+++ b/src/integrate.cpp
@@ -29,7 +29,7 @@ struct integrate_data {
   const component *components;
   component *cS;
   complex<double> *ph;
-  complex<double> *fvals;
+  complex<realnum> *fvals;
   ptrdiff_t *offsets;
   int ninveps;
   component inveps_cs[3];
@@ -37,7 +37,7 @@ struct integrate_data {
   int ninvmu;
   component invmu_cs[3];
   direction invmu_ds[3];
-  complex<long double> sum;
+  complex<double> sum;
   double maxabs;
   field_function integrand;
   void *integrand_data_;
@@ -51,7 +51,8 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, i
   integrate_data *data = (integrate_data *)data_;
   ptrdiff_t *off = data->offsets;
   component *cS = data->cS;
-  complex<double> *fvals = data->fvals, *ph = data->ph;
+  complex<realnum> *fvals = data->fvals;
+  complex<double> *ph = data->ph;
   complex<long double> sum = 0.0;
   double maxabs = 0;
   const component *iecs = data->inveps_cs;
@@ -146,7 +147,7 @@ complex<double> fields::integrate(int num_fvals, const component *components,
   data.components = components;
   data.cS = new component[num_fvals];
   data.ph = new complex<double>[num_fvals];
-  data.fvals = new complex<double>[num_fvals];
+  data.fvals = new complex<realnum>[num_fvals];
   data.sum = 0;
   data.maxabs = 0;
   data.integrand = integrand;
@@ -196,14 +197,15 @@ complex<double> fields::integrate(int num_fvals, const component *components,
   if (maxabs) *maxabs = max_to_all(data.maxabs);
   data.sum = sum_to_all(data.sum);
 
-  return complex<double>(real(data.sum), imag(data.sum));
+  return cdouble(data.sum);
 }
 
 typedef struct {
   field_rfunction integrand;
   void *integrand_data;
 } rfun_wrap_data;
-static complex<double> rfun_wrap(const complex<double> *fvals, const vec &loc, void *data_) {
+
+static complex<double> rfun_wrap(const complex<realnum> *fvals, const vec &loc, void *data_) {
   rfun_wrap_data *data = (rfun_wrap_data *)data_;
   return data->integrand(fvals, loc, data->integrand_data);
 }
@@ -231,11 +233,11 @@ double fields::max_abs(int num_fvals, const component *components, field_rfuncti
   return max_abs(num_fvals, components, rfun_wrap, &data, where);
 }
 
-static complex<double> return_the_field(const complex<double> *fields, const vec &loc,
-                                        void *integrand_data_) {
+static complex<double> return_the_field(const complex<realnum> *fields, const vec &loc,
+                                         void *integrand_data_) {
   (void)integrand_data_;
   (void)loc; // unused
-  return fields[0];
+  return cdouble(fields[0]);
 }
 
 double fields::max_abs(int c, const volume &where) {
diff --git a/src/integrate2.cpp b/src/integrate2.cpp
index 48e255d74..f8671987c 100644
--- a/src/integrate2.cpp
+++ b/src/integrate2.cpp
@@ -35,7 +35,7 @@ struct integrate_data {
   const component *components2;
   component *cS;
   complex<double> *ph;
-  complex<double> *fvals;
+  complex<realnum> *fvals;
   ptrdiff_t *offsets;
   int ninveps;
   component inveps_cs[3];
@@ -43,7 +43,7 @@ struct integrate_data {
   int ninvmu;
   component invmu_cs[3];
   direction invmu_ds[3];
-  complex<long double> sum;
+  complex<double> sum;
   double maxabs;
   field_function integrand;
   void *integrand_data_;
@@ -57,7 +57,8 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, i
   integrate_data *data = (integrate_data *)data_;
   ptrdiff_t *off = data->offsets;
   component *cS = data->cS;
-  complex<double> *fvals = data->fvals, *ph = data->ph;
+  complex<realnum> *fvals = data->fvals;
+  complex<double> *ph = data->ph;
   complex<long double> sum = 0.0;
   double maxabs = 0;
   const component *iecs = data->inveps_cs;
@@ -223,7 +224,7 @@ complex<double> fields::integrate2(const fields &fields2, int num_fvals1,
   data.components2 = components2;
   data.cS = new component[num_fvals1 + num_fvals2];
   data.ph = new complex<double>[num_fvals1 + num_fvals2];
-  data.fvals = new complex<double>[num_fvals1 + num_fvals2];
+  data.fvals = new complex<realnum>[num_fvals1 + num_fvals2];
   data.sum = 0;
   data.maxabs = 0;
   data.integrand = integrand;
@@ -285,14 +286,15 @@ complex<double> fields::integrate2(const fields &fields2, int num_fvals1,
   if (maxabs) *maxabs = max_to_all(data.maxabs);
   data.sum = sum_to_all(data.sum);
 
-  return complex<double>(real(data.sum), imag(data.sum));
+  return cdouble(data.sum);
 }
 
 typedef struct {
   field_rfunction integrand;
   void *integrand_data;
 } rfun_wrap_data;
-static complex<double> rfun_wrap(const complex<double> *fields, const vec &loc, void *data_) {
+
+static complex<double> rfun_wrap(const complex<realnum> *fields, const vec &loc, void *data_) {
   rfun_wrap_data *data = (rfun_wrap_data *)data_;
   return data->integrand(fields, loc, data->integrand_data);
 }
diff --git a/src/meep.hpp b/src/meep.hpp
index 488c517b9..6c15378a2 100644
--- a/src/meep.hpp
+++ b/src/meep.hpp
@@ -60,6 +60,10 @@ const double nan = NAN;
 const double nan = -7.0415659787563146e103; // ideally, a value never encountered in practice
 #endif
 
+// Defined in array_slice.cpp
+std::complex<double> cdouble(std::complex<float> z);
+std::complex<double> cdouble(std::complex<double> z);
+
 class h5file;
 
 // Defined in monitor.cpp
@@ -1126,8 +1130,8 @@ class dft_chunk {
   // fields::process_dft_component
   std::complex<double> process_dft_component(int rank, direction *ds, ivec min_corner,
                                              ivec max_corner, int num_freq, h5file *file,
-                                             double *buffer, int reim,
-                                             std::complex<double> *field_array, void *mode1_data,
+                                             realnum *buffer, int reim,
+                                             std::complex<realnum> *field_array, void *mode1_data,
                                              void *mode2_data, int ic_conjugate,
                                              bool retain_interp_weights, fields *parent);
 
@@ -1627,9 +1631,9 @@ typedef void (*field_chunkloop)(fields_chunk *fc, int ichunk, component cgrid, i
                                 vec s0, vec s1, vec e0, vec e1, double dV0, double dV1, ivec shift,
                                 std::complex<double> shift_phase, const symmetry &S, int sn,
                                 void *chunkloop_data);
-typedef std::complex<double> (*field_function)(const std::complex<double> *fields, const vec &loc,
+typedef std::complex<double> (*field_function)(const std::complex<realnum> *fields, const vec &loc,
                                                void *integrand_data_);
-typedef double (*field_rfunction)(const std::complex<double> *fields, const vec &loc,
+typedef double (*field_rfunction)(const std::complex<realnum> *fields, const vec &loc,
                                   void *integrand_data_);
 
 field_rfunction derived_component_func(derived_component c, const grid_volume &gv, int &nfields,
@@ -1812,33 +1816,33 @@ class fields {
   // of the correct size.
   // otherwise, a new buffer is allocated and returned; it
   // must eventually be caller-deallocated via delete[].
-  double *get_array_slice(const volume &where, std::vector<component> components,
-                          field_rfunction rfun, void *fun_data, double *slice = 0,
-                          double frequency = 0,
-                          bool snap = false);
-
-  std::complex<double> *get_complex_array_slice(const volume &where,
-                                                std::vector<component> components,
-                                                field_function fun, void *fun_data,
-                                                std::complex<double> *slice = 0,
-                                                double frequency = 0,
-                                                bool snap = false);
+  realnum *get_array_slice(const volume &where, std::vector<component> components,
+                           field_rfunction rfun, void *fun_data, realnum *slice = 0,
+                           double frequency = 0,
+                           bool snap = false);
+
+  std::complex<realnum> *get_complex_array_slice(const volume &where,
+                                                 std::vector<component> components,
+                                                 field_function fun, void *fun_data,
+                                                 std::complex<realnum> *slice = 0,
+                                                 double frequency = 0,
+                                                 bool snap = false);
 
   // alternative entry points for when you have no field
   // function, i.e. you want just a single component or
   // derived component.)
-  double *get_array_slice(const volume &where, component c, double *slice = 0,
-                          double frequency = 0, bool snap = false);
-  double *get_array_slice(const volume &where, derived_component c, double *slice = 0,
-                          double frequency = 0, bool snap = false);
-  std::complex<double> *get_complex_array_slice(const volume &where, component c,
-                                                std::complex<double> *slice = 0,
-                                                double frequency = 0,
-                                                bool snap = false);
+  realnum *get_array_slice(const volume &where, component c, realnum *slice = 0,
+                           double frequency = 0, bool snap = false);
+  realnum *get_array_slice(const volume &where, derived_component c, realnum *slice = 0,
+                           double frequency = 0, bool snap = false);
+  std::complex<realnum> *get_complex_array_slice(const volume &where, component c,
+                                                 std::complex<realnum> *slice = 0,
+                                                 double frequency = 0,
+                                                 bool snap = false);
 
   // like get_array_slice, but for *sources* instead of fields
-  std::complex<double> *get_source_slice(const volume &where, component source_slice_component,
-                                         std::complex<double> *slice = 0);
+  std::complex<realnum> *get_source_slice(const volume &where, component source_slice_component,
+                                          std::complex<realnum> *slice = 0);
 
   // master routine for all above entry points
   void *do_get_array_slice(const volume &where, std::vector<component> components,
@@ -2079,7 +2083,7 @@ class fields {
   /********************************************************/
   std::complex<double> process_dft_component(dft_chunk **chunklists, int num_chunklists,
                                              int num_freq, component c, const char *HDF5FileName,
-                                             std::complex<double> **field_array = 0, int *rank = 0,
+                                             std::complex<realnum> **field_array = 0, int *rank = 0,
                                              size_t *dims = 0, direction *dirs = 0,
                                              void *mode1_data = 0, void *mode2_data = 0,
                                              component c_conjugate = Ex, bool *first_component = 0,
@@ -2095,14 +2099,14 @@ class fields {
   void output_dft(dft_fields fdft, const char *HDF5FileName);
 
   // get array of DFT field values
-  std::complex<double> *get_dft_array(dft_flux flux, component c, int num_freq, int *rank,
-                                      size_t dims[3]);
-  std::complex<double> *get_dft_array(dft_fields fdft, component c, int num_freq, int *rank,
-                                      size_t dims[3]);
-  std::complex<double> *get_dft_array(dft_force force, component c, int num_freq, int *rank,
-                                      size_t dims[3]);
-  std::complex<double> *get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank,
-                                      size_t dims[3]);
+  std::complex<realnum> *get_dft_array(dft_flux flux, component c, int num_freq, int *rank,
+                                       size_t dims[3]);
+  std::complex<realnum> *get_dft_array(dft_fields fdft, component c, int num_freq, int *rank,
+                                       size_t dims[3]);
+  std::complex<realnum> *get_dft_array(dft_force force, component c, int num_freq, int *rank,
+                                       size_t dims[3]);
+  std::complex<realnum> *get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank,
+                                       size_t dims[3]);
 
   // overlap integrals between eigenmode fields and DFT flux fields
   void get_overlap(void *mode1_data, void *mode2_data, dft_flux flux, int num_freq,
diff --git a/src/meep/mympi.hpp b/src/meep/mympi.hpp
index ac71bd1ed..1f29200ea 100644
--- a/src/meep/mympi.hpp
+++ b/src/meep/mympi.hpp
@@ -76,12 +76,14 @@ int min_to_all(int);
 float sum_to_master(float);   // Only returns the correct value to proc 0.
 double sum_to_master(double); // Only returns the correct value to proc 0.
 double sum_to_all(double);
+void sum_to_all(const float *in, float *out, int size);
 void sum_to_all(const double *in, double *out, int size);
 void sum_to_master(const float *in, float *out, int size);
 void sum_to_master(const double *in, double *out, int size);
 void sum_to_all(const float *in, double *out, int size);
 void sum_to_all(const std::complex<float> *in, std::complex<double> *out, int size);
 void sum_to_all(const std::complex<double> *in, std::complex<double> *out, int size);
+void sum_to_all(const std::complex<float> *in, std::complex<float> *out, int size);
 void sum_to_master(const std::complex<float> *in, std::complex<float> *out, int size);
 void sum_to_master(const std::complex<double> *in, std::complex<double> *out, int size);
 long double sum_to_all(long double);
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index a5fdfb56f..496012681 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -2678,10 +2678,10 @@ std::complex<double> get_material_gradient(
   else
     meep::abort("Invalid adjoint field component");
 
-  if (md->trivial){
+  if (md->trivial) {
     const double sd = 1.0; // FIXME: make user-changable?
     meep::volume v(r);
-    LOOP_OVER_DIRECTIONS(dim, d){
+    LOOP_OVER_DIRECTIONS(dim, d) {
       v.set_direction_min(d, r.in_direction(d) - 0.5*gv.inva*sd);
       v.set_direction_max(d, r.in_direction(d) + 0.5*gv.inva*sd);
     }
@@ -2695,8 +2695,7 @@ std::complex<double> get_material_gradient(
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
     return dA_du[dir_idx] * fields_f;
-  
-  }else{
+  } else {
     double orig = u[idx];
     std::complex<double> row_1[3], row_2[3], dA_du[3];
     u[idx] -= du;
@@ -2707,7 +2706,7 @@ std::complex<double> get_material_gradient(
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
     return dA_du[dir_idx] * fields_f  * cond_cmp(forward_c,r,freq,geps);
-  }  
+  }
 }
 
 /* A brute force approach to calculating Aᵤ using finite differences.
@@ -2715,17 +2714,15 @@ Past versions of the code only calculated dA/dε using a finite
 difference, and then multiplied by the analytic vJp (dε/du).
 With the addition of subpixel smoothing, however, the vJp became
 much more complicated and it is easier to calculate the entire gradient
-using finite differences (at the cost of slightly less accurate gradients 
-due to roundoff).
- */
+using finite differences (at the cost of slightly less accurate gradients
+due to floating-point roundoff errors). */
 void add_interpolate_weights(double rx, double ry, double rz,
                              double *data, int nx, int ny, int nz, int stride,
                              double scaleby, double *udata, int ukind, double uval,
-                             meep::vec r, geom_epsilon *geps, 
+                             meep::vec r, geom_epsilon *geps,
                              meep::component adjoint_c, meep::component forward_c,
-                                      std::complex<double> fwd, std::complex<double> adj,
-                                      double freq, meep::grid_volume &gv, double du
-                             ) {
+                             std::complex<double> fwd, std::complex<double> adj,
+                             double freq, meep::grid_volume &gv, double du) {
   int x1, y1, z1, x2, y2, z2;
   double dx, dy, dz, u;
 
@@ -2773,8 +2770,7 @@ in row-major order (the order used by HDF5): */
 void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, geom_epsilon *geps,
                                       meep::component adjoint_c, meep::component forward_c,
                                       std::complex<double> fwd, std::complex<double> adj,
-                                      double freq, meep::grid_volume &gv, double tol                                      
-                                      ) {
+                                      double freq, meep::grid_volume &gv, double tol) {
   geom_box_tree tp;
   int oi, ois;
   material_data *mg, *mg_sum;
@@ -2827,12 +2823,10 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     uval = tanh_projection(matgrid_val(p, tp, oi, mg), mg->beta, mg->eta);
     do {
       vector3 pb = to_geom_box_coords(p, &tp->objects[oi]);
-      add_interpolate_weights(
-          pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
-          scalegrad,ucur, kind, uval,
-          vector3_to_vec(p),geps,adjoint_c,forward_c,
-          fwd,adj,freq,gv,tol
-          );
+      add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
+                              scalegrad, ucur, kind, uval,
+                              vector3_to_vec(p), geps, adjoint_c, forward_c,
+                              fwd, adj, freq, gv, tol);
       if (kind == material_data::U_DEFAULT) break;
       tp = geom_tree_search_next(p, tp, &oi);
     } while (tp && is_material_grid((material_data *)tp->objects[oi].o->material));
@@ -2846,8 +2840,8 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta);
     add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1,
                             scalegrad, ucur, kind, uval,
-                            vector3_to_vec(p),geps,adjoint_c,forward_c,
-                            fwd,adj,freq,gv,tol);
+                            vector3_to_vec(p), geps, adjoint_c, forward_c,
+                            fwd, adj, freq, gv, tol);
   }
 }
 
@@ -2855,15 +2849,15 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
 /*              Some gradient helper routines             */
 /**********************************************************/
 
-#define LOOP_OVER_DIRECTIONS_BACKWARDS(dim, d)                                                               \
-  for (meep::direction d = (meep::direction)(meep::stop_at_direction(dim)-1),                                          \
-                       loop_stop_directi = meep::start_at_direction(dim);                           \
+#define LOOP_OVER_DIRECTIONS_BACKWARDS(dim, d)                                     \
+  for (meep::direction d = (meep::direction)(meep::stop_at_direction(dim)-1),      \
+                       loop_stop_directi = meep::start_at_direction(dim);          \
        d >= loop_stop_directi; d = (meep::direction)(d - 1))
 
 void set_strides(meep::ndim dim, ptrdiff_t *the_stride,const meep::ivec c1,const meep::ivec c2) {
   FOR_DIRECTIONS(d) { the_stride[d] = 1;}
   meep::ivec n_s = (c2 - c1)/2 + 1;
-  LOOP_OVER_DIRECTIONS_BACKWARDS(dim,d) { 
+  LOOP_OVER_DIRECTIONS_BACKWARDS(dim,d) {
     ptrdiff_t current_stride = 1;
     LOOP_OVER_DIRECTIONS_BACKWARDS(dim,d_i) {
       if (d_i==d){
@@ -2872,23 +2866,23 @@ void set_strides(meep::ndim dim, ptrdiff_t *the_stride,const meep::ivec c1,const
       }
       current_stride *= n_s.in_direction(d_i);
     }
-  } 
+  }
 }
 
 ptrdiff_t get_idx_from_ivec(meep::ndim dim, meep::ivec c1, ptrdiff_t *the_stride, meep::ivec v){
   ptrdiff_t idx = 0;
   meep::ivec diff = ((v-c1) / 2);
-  LOOP_OVER_DIRECTIONS(dim,d){
+  LOOP_OVER_DIRECTIONS(dim,d) {
     idx += diff.in_direction(d)*the_stride[d];
   }
   return idx;
 }
 
-void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fields_a,
-                                std::complex<double> *fields_f, size_t fields_shapes[12], double *frequencies,
-                                double scalegrad, meep::grid_volume &gv,
+void material_grids_addgradient(double *v, size_t ng, std::complex<meep::realnum> *fields_a,
+                                std::complex<meep::realnum> *fields_f, size_t fields_shapes[12],
+                                double *frequencies, double scalegrad, meep::grid_volume &gv,
                                 meep::volume &where, geom_epsilon *geps, double du) {
-  
+
 // only loop over field components relevant to our simulation (in the proper order)
 #define FOR_MY_COMPONENTS(c1) FOR_ELECTRIC_COMPONENTS(c1) if (!coordinate_mismatch(gv.dim, component_direction(c1)))
 
@@ -2915,10 +2909,10 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
     }
   }
   size_t c_start[3];
-  c_start[0] = 0; 
-  c_start[1] = nf*stride_row[0]; 
+  c_start[0] = 0;
+  c_start[1] = nf*stride_row[0];
   c_start[2] = nf*(stride_row[0]+stride_row[1]);
-  
+
 // fields dimensions are (components, nfreqs, x, y, z)
 #define GET_FIELDS(fields,comp,freq,idx) fields[c_start[comp] + freq*stride_row[comp] + idx]
 
@@ -2926,22 +2920,20 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
   // loop over frequency
   for (size_t f_i = 0; f_i < nf; ++f_i) {
     int ci_adjoint = 0;
-    FOR_MY_COMPONENTS(adjoint_c) { 
+    FOR_MY_COMPONENTS(adjoint_c) {
       LOOP_OVER_IVECS(gv, is[ci_adjoint], ie[ci_adjoint], idx) {
         size_t idx_fields = IVEC_LOOP_COUNTER;
         meep::ivec ip = gv.iloc(adjoint_c,idx);
         meep::vec p   = gv.loc(adjoint_c,idx);
-        std::complex<double> adj = GET_FIELDS(fields_a,ci_adjoint,f_i,idx_fields);
-        
+        std::complex<double> adj = GET_FIELDS(fields_a, ci_adjoint, f_i, idx_fields);
         material_type md;
         geps->get_material_pt(md, p);
         if (!md->trivial) adj *= cond_cmp(adjoint_c,p,frequencies[f_i], geps);
-        
         double cyl_scale;
         int ci_forward = 0;
         FOR_MY_COMPONENTS(forward_c) {
-          /* we need to calculate the bounds of 
-          the forward fields (in space) so that we 
+          /* we need to calculate the bounds of
+          the forward fields (in space) so that we
           can properly index into the fields array
           later */
           meep::ivec isf = is[ci_forward];
@@ -2959,20 +2951,20 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
           /********* compute -λᵀAᵤx *************/
 
           /* trivial case, no interpolation/restriction needed        */
-          if (forward_c == adjoint_c){
-            std::complex<double> fwd = GET_FIELDS(fields_f,ci_forward,f_i,idx_fields);
+          if (forward_c == adjoint_c) {
+            std::complex<double> fwd = GET_FIELDS(fields_f, ci_forward, f_i, idx_fields);
             cyl_scale = (gv.dim == meep::Dcyl) ? 2*p.r() : 1; // the pi is already factored in near2far.cpp
             material_grids_addgradient_point(
-              v+ng*f_i, vec_to_vector3(p), scalegrad*cyl_scale, geps,
-              adjoint_c, forward_c, fwd, adj, frequencies[f_i], gv, du);
+                v+ng*f_i, vec_to_vector3(p), scalegrad*cyl_scale, geps,
+                adjoint_c, forward_c, fwd, adj, frequencies[f_i], gv, du);
           /* more complicated case requires interpolation/restriction */
-          }else{
+          } else {
             /* we need to restrict the adjoint fields to
             the two nodes of interest (which requires a factor
-            of 0.5 to scale), and interpolate the forward fields 
-            to the same two nodes (which requires another factor of 0.5). 
+            of 0.5 to scale), and interpolate the forward fields
+            to the same two nodes (which requires another factor of 0.5).
             Then we perform our inner product at these nodes.
-            */            
+            */
             std::complex<double> fwd_avg, fwd1, fwd2, prod;
             ptrdiff_t fwd1_idx, fwd2_idx;
 
@@ -2985,34 +2977,34 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fiel
             meep::ivec fwd_pa  = (fwd_p + unit_a*2);
             meep::ivec fwd_pf  = (fwd_p - unit_f*2);
             meep::ivec fwd_paf = (fwd_p + unit_a*2 - unit_f*2);
-            
+
             //identify the two eps points
             meep::ivec ieps1 = (fwd_p + fwd_pf) / 2;
             meep::ivec ieps2 = (fwd_pa + fwd_paf) / 2;
 
             //operate on the first eps node
             fwd1_idx = get_idx_from_ivec(gv.dim, start_ivec, the_stride, fwd_p);
-            fwd1 = 0.5 * GET_FIELDS(fields_f,ci_forward,f_i,fwd1_idx);
+            fwd1 = 0.5 * meep::cdouble(GET_FIELDS(fields_f, ci_forward, f_i, fwd1_idx));
             fwd2_idx = get_idx_from_ivec(gv.dim, start_ivec, the_stride, fwd_pf);
-            fwd2 = 0.5 * GET_FIELDS(fields_f,ci_forward,f_i,fwd2_idx);
+            fwd2 = 0.5 * meep::cdouble(GET_FIELDS(fields_f, ci_forward, f_i, fwd2_idx));
             fwd_avg = fwd1 + fwd2;
             meep::vec eps1 = gv[ieps1];
             cyl_scale = (gv.dim == meep::Dcyl) ? eps1.r() : 1;
             material_grids_addgradient_point(
-              v+ng*f_i, vec_to_vector3(eps1), scalegrad*cyl_scale, geps,
-              adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
-            
+                v+ng*f_i, vec_to_vector3(eps1), scalegrad*cyl_scale, geps,
+                adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
+
             //operate on the second eps node
             fwd1_idx = get_idx_from_ivec(gv.dim, start_ivec, the_stride, fwd_pa);
-            fwd1 = 0.5 * GET_FIELDS(fields_f,ci_forward,f_i,fwd1_idx);
+            fwd1 = 0.5 * meep::cdouble(GET_FIELDS(fields_f, ci_forward, f_i, fwd1_idx));
             fwd2_idx = get_idx_from_ivec(gv.dim, start_ivec, the_stride, fwd_paf);
-            fwd2 = 0.5 * GET_FIELDS(fields_f,ci_forward,f_i,fwd2_idx);
+            fwd2 = 0.5 * meep::cdouble(GET_FIELDS(fields_f, ci_forward, f_i, fwd2_idx));
             fwd_avg = fwd1 + fwd2;
             meep::vec eps2 = gv[ieps2];
             cyl_scale = (gv.dim == meep::Dcyl) ? eps2.r() : 1;
             material_grids_addgradient_point(
-              v+ng*f_i, vec_to_vector3(eps2), scalegrad*cyl_scale, geps,
-              adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
+                v+ng*f_i, vec_to_vector3(eps2), scalegrad*cyl_scale, geps,
+                adjoint_c, forward_c, fwd_avg, 0.5*adj, frequencies[f_i], gv, du);
           }
           ci_forward++;
         }
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index f66c33e3b..bb2f0d132 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -296,10 +296,10 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
 double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md);
 double material_grid_val(vector3 p, material_data *md);
 geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g);
-void material_grids_addgradient(double *v, size_t ng, std::complex<double> *fields_a,
-                                std::complex<double> *fields_f, size_t fields_shapes[12],
-                                double *frequencies, double scalegrad,
-                                meep::grid_volume &gv, meep::volume &where, geom_epsilon *geps,double du=1e-6);
+void material_grids_addgradient(double *v, size_t ng, std::complex<meep::realnum> *fields_a,
+                                std::complex<meep::realnum> *fields_f, size_t fields_shapes[12],
+                                double *frequencies, double scalegrad, meep::grid_volume &gv,
+                                meep::volume &where, geom_epsilon *geps, double du = 1e-6);
 
 /***************************************************************/
 /* routines in GDSIIgeom.cc ************************************/
diff --git a/src/mympi.cpp b/src/mympi.cpp
index 8ab321dbf..331def6e6 100644
--- a/src/mympi.cpp
+++ b/src/mympi.cpp
@@ -441,6 +441,14 @@ double sum_to_all(double in) {
   return out;
 }
 
+void sum_to_all(const float *in, float *out, int size) {
+#ifdef HAVE_MPI
+  MPI_Allreduce((void *)in, out, size, MPI_FLOAT, MPI_SUM, mycomm);
+#else
+  memcpy(out, in, sizeof(float) * size);
+#endif
+}
+
 void sum_to_all(const double *in, double *out, int size) {
 #ifdef HAVE_MPI
   MPI_Allreduce((void *)in, out, size, MPI_DOUBLE, MPI_SUM, mycomm);
@@ -481,6 +489,10 @@ void sum_to_all(const complex<float> *in, complex<double> *out, int size) {
   sum_to_all((const float *)in, (double *)out, 2 * size);
 }
 
+void sum_to_all(const complex<float> *in, complex<float> *out, int size) {
+  sum_to_all((const float *)in, (float *)out, 2 * size);
+}
+
 void sum_to_master(const complex<float> *in, complex<float> *out, int size) {
   sum_to_master((const float *)in, (float *)out, 2 * size);
 }
diff --git a/src/vec.cpp b/src/vec.cpp
index 69e809e02..e4bf55a92 100644
--- a/src/vec.cpp
+++ b/src/vec.cpp
@@ -1479,18 +1479,19 @@ volume_list *symmetry::reduce(const volume_list *gl) const {
 
 /***************************************************************************/
 
-static double poynting_fun(const complex<double> *fields, const vec &loc, void *data_) {
+static double poynting_fun(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc;   // unused
   (void)data_; // unused
-  return (real(conj(fields[0]) * fields[1]) - real(conj(fields[2]) * fields[3]));
+  return (real(conj(cdouble(fields[0])) * cdouble(fields[1])) -
+          real(conj(cdouble(fields[2])) * cdouble(fields[3])));
 }
 
-static double energy_fun(const complex<double> *fields, const vec &loc, void *data_) {
+static double energy_fun(const complex<realnum> *fields, const vec &loc, void *data_) {
   (void)loc; // unused
   int nfields = *((int *)data_) / 2;
   double sum = 0;
   for (int k = 0; k < nfields; ++k)
-    sum += real(conj(fields[2 * k]) * fields[2 * k + 1]);
+    sum += real(conj(cdouble(fields[2 * k])) * cdouble(fields[2 * k + 1]));
   return sum * 0.5;
 }
 
diff --git a/tests/array-slice-ll.cpp b/tests/array-slice-ll.cpp
index af754384b..ea4e9aded 100644
--- a/tests/array-slice-ll.cpp
+++ b/tests/array-slice-ll.cpp
@@ -230,7 +230,7 @@ int main(int argc, char *argv[]) {
     //
     rank = f.get_array_slice_dimensions(v1d, dims1D, dirs1D, true, false);
     if (rank != 1 || dims1D[0] != NX) meep::abort("incorrect dimensions for 1D slice");
-    std::unique_ptr<std::complex<double> []> slice1d(f.get_complex_array_slice(v1d, Hz, 0, 0, true));
+    std::unique_ptr<std::complex<realnum> []> slice1d(f.get_complex_array_slice(v1d, Hz, 0, 0, true));
     std::vector<std::complex<realnum>> slice1d_realnum;
     for (int i = 0; i < NX; ++i)
       slice1d_realnum.emplace_back(slice1d[i]);
@@ -239,7 +239,7 @@ int main(int argc, char *argv[]) {
 
     rank = f.get_array_slice_dimensions(v2d, dims2D, dirs2D, true, false);
     if (rank != 2 || dims2D[0] != NX || dims2D[1] != NY) meep::abort("incorrect dimensions for 2D slice");
-    std::unique_ptr<double []> slice2d(f.get_array_slice(v2d, Sy, 0, 0, true));
+    std::unique_ptr<realnum []> slice2d(f.get_array_slice(v2d, Sy, 0, 0, true));
     std::unique_ptr<realnum[]> slice2d_realnum(new realnum[NX * NY]);
     for (int i = 0; i < NX * NY; ++i)
       slice2d_realnum[i] = static_cast<realnum>(slice2d[i]);
diff --git a/tests/dft-fields.cpp b/tests/dft-fields.cpp
index 241866703..6e1e44746 100644
--- a/tests/dft-fields.cpp
+++ b/tests/dft-fields.cpp
@@ -15,8 +15,6 @@
 
 using namespace meep;
 
-typedef std::complex<double> cdouble;
-
 vector3 v3(double x, double y = 0.0, double z = 0.0) {
   vector3 v;
   v.x = x;
@@ -35,7 +33,7 @@ double dummy_eps(const vec &) { return 1.0; }
 /***************************************************************/
 /***************************************************************/
 /***************************************************************/
-void Run(bool Pulse, double resolution, cdouble **field_array = 0, int *array_rank = 0,
+void Run(bool Pulse, double resolution, std::complex<meep::realnum> **field_array = 0, int *array_rank = 0,
          size_t *array_dims = 0) {
   /***************************************************************/
   /* initialize geometry                                         */
@@ -104,7 +102,7 @@ void Run(bool Pulse, double resolution, cdouble **field_array = 0, int *array_ra
 /***************************************************************/
 /* return L2 norm of error normalized by average of L2 norms   */
 /***************************************************************/
-double compare_array_to_dataset(cdouble *field_array, int array_rank, size_t *array_dims,
+double compare_array_to_dataset(std::complex<meep::realnum> *field_array, int array_rank, size_t *array_dims,
                                 const char *file, const char *name) {
   int file_rank;
   size_t file_dims[3];
@@ -121,8 +119,8 @@ double compare_array_to_dataset(cdouble *field_array, int array_rank, size_t *ar
 
   double NormArray = 0.0, NormFile = 0.0, NormDelta = 0.0;
   for (size_t n = 0; n < file_dims[0] * file_dims[1]; n++) {
-    cdouble zArray = field_array[n];
-    cdouble zFile = cdouble(rdata[n], idata[n]);
+    std::complex<double> zArray = field_array[n];
+    std::complex<double> zFile = std::complex<double>(rdata[n], idata[n]);
     NormArray += norm(zArray);
     NormFile += norm(zFile);
     NormDelta += norm(zArray - zFile);
@@ -181,12 +179,12 @@ double compare_complex_hdf5_datasets(const char *file1, const char *name1, const
   double max_abs1 = 0.0, max_abs2 = 0.0;
   double max_arg1 = 0.0, max_arg2 = 0.0;
   for (size_t n = 0; n < length; n++) {
-    cdouble z1 = cdouble(rdata1[n], idata1[n]);
+    std::complex<double> z1 = std::complex<double>(rdata1[n], idata1[n]);
     if (abs(z1) > max_abs1) {
       max_abs1 = abs(z1);
       max_arg1 = arg(z1);
     }
-    cdouble z2 = cdouble(rdata2[n], idata2[n]);
+    std::complex<double> z2 = std::complex<double>(rdata2[n], idata2[n]);
     if (abs(z2) > max_abs2) {
       max_abs2 = abs(z2);
       max_arg2 = arg(z2);
@@ -196,11 +194,11 @@ double compare_complex_hdf5_datasets(const char *file1, const char *name1, const
 
   // second pass to get L2 norm of difference between normalized data sets
   double norm1 = 0.0, norm2 = 0.0, normdiff = 0.0;
-  cdouble phase1 = exp(-cdouble(0, 1) * max_arg1);
-  cdouble phase2 = exp(-cdouble(0, 1) * max_arg2);
+  std::complex<double> phase1 = exp(-std::complex<double>(0, 1) * max_arg1);
+  std::complex<double> phase2 = exp(-std::complex<double>(0, 1) * max_arg2);
   for (size_t n = 0; n < length; n++) {
-    cdouble z1 = phase1 * cdouble(rdata1[n], idata1[n]) / max_abs1;
-    cdouble z2 = phase2 * cdouble(rdata2[n], idata2[n]) / max_abs2;
+    std::complex<double> z1 = phase1 * std::complex<double>(rdata1[n], idata1[n]) / max_abs1;
+    std::complex<double> z2 = phase2 * std::complex<double>(rdata2[n], idata2[n]) / max_abs2;
     norm1 += norm(z1);
     norm2 += norm(z2);
     normdiff += norm(z1 - z2);
@@ -235,7 +233,7 @@ int main(int argc, char *argv[]) {
       meep::abort("unknown argument %s", argv[narg]);
   }
 
-  cdouble *field_array = 0;
+  std::complex<meep::realnum> *field_array = 0;
   int array_rank;
   size_t array_dims[3];
   Run(true, resolution, &field_array, &array_rank, array_dims);
diff --git a/tests/integrate.cpp b/tests/integrate.cpp
index 8cada6a87..2bcf84549 100644
--- a/tests/integrate.cpp
+++ b/tests/integrate.cpp
@@ -41,7 +41,7 @@ typedef struct {
 } linear_integrand_data;
 
 /* integrand for integrating c + ax*x + ay*y + az*z. */
-static complex<double> linear_integrand(const complex<double> *fields, const vec &loc,
+static complex<double> linear_integrand(const complex<realnum> *fields, const vec &loc,
                                         void *data_) {
   linear_integrand_data *data = (linear_integrand_data *)data_;
 
@@ -190,7 +190,7 @@ void check_integral(fields &f, linear_integrand_data &d, const volume &v, compon
 
   double sum = real(f.integrate(0, 0, linear_integrand, (void *)&d, v));
   if (fabs(sum - correct_integral(v, d)) > 1e-9 * fabs(sum))
-    meep::abort("FAILED: %0.16g instead of %0.16g\n", (double)sum, correct_integral(v, d));
+    meep::abort("FAILED: %0.16g instead of %0.16g\n", sum, correct_integral(v, d));
   master_printf("...PASSED.\n");
 }
 
diff --git a/tests/pw-source-ll.cpp b/tests/pw-source-ll.cpp
index dd61c8ae7..56b98007a 100644
--- a/tests/pw-source-ll.cpp
+++ b/tests/pw-source-ll.cpp
@@ -22,8 +22,6 @@
 
 using namespace meep;
 
-typedef std::complex<double> cdouble;
-
 /***************************************************************/
 /*
 ; pw-amp is a function that returns the amplitude exp(ik(x+x0)) at a
@@ -40,12 +38,12 @@ typedef struct pw_amp_data {
   vec x0;
 } pw_amp_data;
 
-cdouble pw_amp(vec x, void *UserData) {
+std::complex<double> pw_amp(vec x, void *UserData) {
   pw_amp_data *data = (pw_amp_data *)UserData;
   vec k = data->k;
   vec x0 = data->x0;
 
-  const cdouble II(0.0, 1.0);
+  const std::complex<double> II(0.0, 1.0);
   return exp(II * (k & (x + x0)));
 }
 
@@ -56,10 +54,10 @@ cdouble pw_amp(vec x, void *UserData) {
 /* amplitude functions and global variables.                   */
 /***************************************************************/
 pw_amp_data pw_amp_data_left;
-cdouble pw_amp_left(const vec &x) { return pw_amp(x, (void *)&pw_amp_data_left); }
+std::complex<double> pw_amp_left(const vec &x) { return pw_amp(x, (void *)&pw_amp_data_left); }
 
 pw_amp_data pw_amp_data_bottom;
-cdouble pw_amp_bottom(const vec &x) { return pw_amp(x, (void *)&pw_amp_data_bottom); }
+std::complex<double> pw_amp_bottom(const vec &x) { return pw_amp(x, (void *)&pw_amp_data_bottom); }
 
 /***************************************************************/
 /* dummy material function needed to pass to structure( )      */
diff --git a/tests/ring-ll.cpp b/tests/ring-ll.cpp
index 321251cba..e28e6d07b 100644
--- a/tests/ring-ll.cpp
+++ b/tests/ring-ll.cpp
@@ -21,8 +21,6 @@
 
 using namespace meep;
 
-typedef std::complex<double> cdouble;
-
 vector3 v3(double x, double y = 0.0, double z = 0.0) {
   vector3 v;
   v.x = x;
@@ -109,7 +107,7 @@ int main(int argc, char *argv[]) {
 
   double T = 300.0;
   double stop_time = f.round_time() + T;
-  std::vector<cdouble> fieldData;
+  std::vector<std::complex<double>> fieldData;
   vec eval_pt(r + 0.1, 0.0);
   while (f.round_time() < stop_time) {
     f.step();
@@ -117,7 +115,7 @@ int main(int argc, char *argv[]) {
   };
 
 #define MAXBANDS 100
-  cdouble amp[MAXBANDS];
+  std::complex<double> amp[MAXBANDS];
   double freq_re[MAXBANDS];
   double freq_im[MAXBANDS];
   double err[MAXBANDS];
@@ -137,8 +135,8 @@ int main(int argc, char *argv[]) {
   int ref_bands = 3;
   double ref_freq_re[3] = {1.1807e-01, 1.4716e-01, 1.7525e-01};
   double ref_freq_im[3] = {-7.6133e-04, -2.1156e-04, -5.2215e-05};
-  cdouble ref_amp[3] = {cdouble(-8.28e-04, -1.34e-03), cdouble(1.23e-03, -1.25e-02),
-                        cdouble(2.83e-03, -6.52e-04)};
+  std::complex<double> ref_amp[3] = {std::complex<double>(-8.28e-04, -1.34e-03), std::complex<double>(1.23e-03, -1.25e-02),
+                        std::complex<double>(2.83e-03, -6.52e-04)};
   if (bands != 3) meep::abort("harminv found only %i/%i bands\n", bands, ref_bands);
   for (int nb = 0; nb < bands; nb++)
     if (fabs(freq_re[nb] - ref_freq_re[nb]) > 1.0e-2 * fabs(ref_freq_re[nb]) ||