SciTools · trexfeathers · Jan 18, 2024 · Jul 11, 2022 · Dec 15, 2023 · Dec 20, 2023
diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py
@@ -0,0 +1,38 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Stats benchmark tests."""
+
+import iris
+from iris.analysis.stats import pearsonr
+import iris.tests
+
+
+class PearsonR:
+    def setup(self):
+        cube_temp = iris.load_cube(
+            iris.tests.get_data_path(
+                ("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc")
+            )
+        )
+
+        # Make data non-lazy.
+        cube_temp.data
+
+        self.cube_a = cube_temp[:6]
+        self.cube_b = cube_temp[20:26]
+        self.cube_b.replace_coord(self.cube_a.coord("time"))
+        for name in ["latitude", "longitude"]:
+            self.cube_b.coord(name).guess_bounds()
+        self.weights = iris.analysis.cartography.area_weights(self.cube_b)
+
+    def time_real(self):
+        pearsonr(self.cube_a, self.cube_b, weights=self.weights)
+
+    def time_lazy(self):
+        for cube in self.cube_a, self.cube_b:
+            cube.data = cube.lazy_data()
+
+        result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
+        result.data
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -100,8 +100,9 @@ This document explains the changes made to Iris for this release
    lazy data from file. This will also speed up coordinate comparison.
    (:pull:`5610`)
 
-#. `@rcomer`_ modified :func:`~iris.analysis.stats.pearsonr` so it preserves
-   lazy data in all cases and also runs a little faster.  (:pull:`5638`)
+#. `@rcomer`_ and `@trexfeathers`_ (reviewer) modified 
+   :func:`~iris.analysis.stats.pearsonr` so it preserves lazy data in all cases
+   and also runs a little faster.  (:pull:`5638`)
 
 🔥 Deprecations
 ===============

diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py
@@ -30,7 +30,8 @@ def pearsonr(
     cube_a, cube_b : cubes
         Cubes between which the correlation will be calculated.  The cubes
         should either be the same shape and have the same dimension coordinates
-        or one cube should be broadcastable to the other.
+        or one cube should be broadcastable to the other.  Broadcasting rules
+        are the same as those for cube arithmetic (see :ref:`cube maths`).
     corr_coords : str or list of str
         The cube coordinate name(s) over which to calculate correlations. If no
         names are provided then correlation will be calculated over all common
@@ -84,88 +85,83 @@ def pearsonr(
 
     # Get the broadcast, auto-transposed safe versions of the cube operands.
     resolver = Resolve(cube_1, cube_2)
-    cube_1 = resolver.lhs_cube_resolved
-    cube_2 = resolver.rhs_cube_resolved
+    lhs_cube_resolved = resolver.lhs_cube_resolved
+    rhs_cube_resolved = resolver.rhs_cube_resolved
 
-    if cube_1.has_lazy_data() or cube_2.has_lazy_data():
+    if lhs_cube_resolved.has_lazy_data() or rhs_cube_resolved.has_lazy_data():
         al = da
-        array_1 = cube_1.lazy_data()
-        array_2 = cube_2.lazy_data()
+        array_lhs = lhs_cube_resolved.lazy_data()
+        array_rhs = rhs_cube_resolved.lazy_data()
     else:
         al = np
-        array_1 = cube_1.data
-        array_2 = cube_2.data
+        array_lhs = lhs_cube_resolved.data
+        array_rhs = rhs_cube_resolved.data
 
     # If no coords passed then set to all common dimcoords of cubes.
     if corr_coords is None:
-        dim_coords_1 = {coord.name() for coord in cube_1.dim_coords}
-        dim_coords_2 = {coord.name() for coord in cube_2.dim_coords}
+        dim_coords_1 = {coord.name() for coord in lhs_cube_resolved.dim_coords}
+        dim_coords_2 = {coord.name() for coord in rhs_cube_resolved.dim_coords}
         corr_coords = list(dim_coords_1.intersection(dim_coords_2))
 
     # Interpret coords as array dimensions.
     corr_dims = set()
     if isinstance(corr_coords, str):
         corr_coords = [corr_coords]
     for coord in corr_coords:
-        corr_dims.update(cube_1.coord_dims(coord))
+        corr_dims.update(lhs_cube_resolved.coord_dims(coord))
 
     corr_dims = tuple(corr_dims)
 
     # Match up data masks if required.
     if common_mask:
-        mask_1 = al.ma.getmaskarray(array_1)
+        mask_lhs = al.ma.getmaskarray(array_lhs)
         if al is np:
-            # Reduce all invariant dimensions of mask_1 to length 1.  This avoids
-            # unnecessary broadcasting of array_2.
+            # Reduce all invariant dimensions of mask_lhs to length 1.  This avoids
+            # unnecessary broadcasting of array_rhs.
             index = tuple(
                 slice(0, 1)
-                if np.array_equal(mask_1.any(axis=dim), mask_1.all(axis=dim))
+                if np.array_equal(mask_lhs.any(axis=dim), mask_lhs.all(axis=dim))
                 else slice(None)
-                for dim in range(mask_1.ndim)
+                for dim in range(mask_lhs.ndim)
             )
-            mask_1 = mask_1[index]
+            mask_lhs = mask_lhs[index]
 
-        array_2 = _mask_array(array_2, mask_1)
-        array_1 = _mask_array(array_1, al.ma.getmaskarray(array_2))
+        array_rhs = _mask_array(array_rhs, mask_lhs)
+        array_lhs = _mask_array(array_lhs, al.ma.getmaskarray(array_rhs))
 
     # Broadcast weights to shape of arrays if necessary.
     if weights is None:
-        weights_1 = weights_2 = None
+        weights_lhs = weights_rhs = None
     else:
         if weights.shape != smaller_shape:
             msg = f"weights array should have dimensions {smaller_shape}"
             raise ValueError(msg)
 
-        if resolver.reorder_src_dims is not None:
-            # Apply same transposition as was done to cube_2 within Resolve.
-            weights = weights.transpose(resolver.reorder_src_dims)
-
-        # Reshape to add in any length-1 dimensions needed for broadcasting.
-        weights = weights.reshape(cube_2.shape)
-
-        weights_2 = np.broadcast_to(weights, array_2.shape)
-        weights_1 = np.broadcast_to(weights, array_1.shape)
+        wt_resolver = Resolve(cube_1, cube_2.copy(weights))
+        weights = wt_resolver.rhs_cube_resolved.data
+        weights_rhs = np.broadcast_to(weights, array_rhs.shape)
+        weights_lhs = np.broadcast_to(weights, array_lhs.shape)
 
     # Calculate correlations.
-    s1 = array_1 - al.ma.average(
-        array_1, axis=corr_dims, weights=weights_1, keepdims=True
+    s_lhs = array_lhs - al.ma.average(
+        array_lhs, axis=corr_dims, weights=weights_lhs, keepdims=True
     )
-    s2 = array_2 - al.ma.average(
-        array_2, axis=corr_dims, weights=weights_2, keepdims=True
+    s_rhs = array_rhs - al.ma.average(
+        array_rhs, axis=corr_dims, weights=weights_rhs, keepdims=True
     )
 
-    s_prod = resolver.cube(s1 * s2)
+    s_prod = resolver.cube(s_lhs * s_rhs)
 
     # Use cube collapsed method as it takes care of coordinate collapsing and missing
     # data tolerance.
     covar = s_prod.collapsed(
-        corr_coords, iris.analysis.SUM, weights=weights_1, mdtol=mdtol
+        corr_coords, iris.analysis.SUM, weights=weights_lhs, mdtol=mdtol
     )
 
-    var_1 = iris.analysis._sum(s1**2, axis=corr_dims, weights=weights_1)
-    var_2 = iris.analysis._sum(s2**2, axis=corr_dims, weights=weights_2)
+    var_lhs = iris.analysis._sum(s_lhs**2, axis=corr_dims, weights=weights_lhs)
+    var_rhs = iris.analysis._sum(s_rhs**2, axis=corr_dims, weights=weights_rhs)
 
-    denom = np.sqrt(var_1 * var_2)
+    denom = np.sqrt(var_lhs * var_rhs)
 
     corr_cube = covar / denom
     corr_cube.rename("Pearson's r")

diff --git a/lib/iris/common/resolve.py b/lib/iris/common/resolve.py
@@ -304,7 +304,6 @@ def __init__(self, lhs=None, rhs=None):
         #: operands, where the direction of the mapping is governed by
         #: :attr:`~iris.common.resolve.Resolve.map_rhs_to_lhs`.
         self.mapping = None  # set in _metadata_mapping
-        self.reorder_src_dims = None  # set in _as_compatible_cubes
 
         #: Cache containing a list of dim, aux and scalar coordinates prepared
         #: and ready for creating and attaching to the resultant resolved
@@ -440,7 +439,6 @@ def _as_compatible_cubes(self):
 
         # Determine whether a transpose of the src cube is necessary.
         if order != sorted(order):
-            self.reorder_src_dims = order
             new_src_data = new_src_data.transpose(order)
             logger.debug(
                 f"transpose src {self._src_cube_position} cube with order {order}"