From 7db0e62afd3fa0779ffa294214017d2c6f0c98d6 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Mon, 11 Jul 2022 14:32:53 +0100
Subject: [PATCH 01/10] Re-write pearsonr to use Resolve

---
 docs/src/whatsnew/latest.rst                  |   2 +
 lib/iris/analysis/stats.py                    | 150 +++++++++---------
 lib/iris/common/resolve.py                    |   2 +
 .../unit/analysis/stats/test_pearsonr.py      |  37 ++---
 4 files changed, 99 insertions(+), 92 deletions(-)

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
index d7e9a40648d..7bc7bd3a53b 100644
--- a/docs/src/whatsnew/latest.rst
+++ b/docs/src/whatsnew/latest.rst
@@ -100,6 +100,8 @@ This document explains the changes made to Iris for this release
    lazy data from file. This will also speed up coordinate comparison.
    (:pull:`5610`)
 
+#. `@rcomer`_ modified :func:`~iris.analysis.stats.pearsonr` so it preserves
+   lazy data in all cases and also runs a little faster.  (:pull:`5638`)
 
 🔥 Deprecations
 ===============
diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py
index 33eb1713369..12d460cb360 100644
--- a/lib/iris/analysis/stats.py
+++ b/lib/iris/analysis/stats.py
@@ -4,11 +4,12 @@
 # See LICENSE in the root of the repository for full licensing details.
 """Statistical operations between cubes."""
 
+import dask.array as da
 import numpy as np
-import numpy.ma as ma
 
 import iris
-from iris.util import broadcast_to_shape
+from iris.common import Resolve
+from iris.util import _mask_array
 
 
 def pearsonr(
@@ -63,13 +64,13 @@ def pearsonr(
 
     Notes
     -----
+    If either of the input cubes has lazy data, the result will have lazy data.
+
     Reference:
         https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
 
-    This operation is non-lazy.
-
     """
-    # Assign larger cube to cube_1
+    # Assign larger cube to cube_1 for simplicity.
     if cube_b.ndim > cube_a.ndim:
         cube_1 = cube_b
         cube_2 = cube_a
@@ -79,90 +80,91 @@ def pearsonr(
 
     smaller_shape = cube_2.shape
 
-    dim_coords_1 = [coord.name() for coord in cube_1.dim_coords]
-    dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]
-    common_dim_coords = list(set(dim_coords_1) & set(dim_coords_2))
+    # Get the broadcast, auto-transposed safe versions of the cube operands.
+    resolver = Resolve(cube_1, cube_2)
+    cube_1 = resolver.lhs_cube_resolved
+    cube_2 = resolver.rhs_cube_resolved
+
+    if cube_1.has_lazy_data() or cube_2.has_lazy_data():
+        al = da
+        array_1 = cube_1.lazy_data()
+        array_2 = cube_2.lazy_data()
+    else:
+        al = np
+        array_1 = cube_1.data
+        array_2 = cube_2.data
+
     # If no coords passed then set to all common dimcoords of cubes.
     if corr_coords is None:
-        corr_coords = common_dim_coords
-
-    def _ones_like(cube):
-        # Return a copy of cube with the same mask, but all data values set to 1.
-        # The operation is non-lazy.
-        # For safety we also discard any cell-measures and ancillary-variables, to
-        # avoid cube arithmetic possibly objecting to them, or inadvertently retaining
-        # them in the result where they might be inappropriate.
-        ones_cube = cube.copy()
-        ones_cube.data = np.ones_like(cube.data)
-        ones_cube.rename("unknown")
-        ones_cube.units = 1
-        for cm in ones_cube.cell_measures():
-            ones_cube.remove_cell_measure(cm)
-        for av in ones_cube.ancillary_variables():
-            ones_cube.remove_ancillary_variable(av)
-        return ones_cube
+        dim_coords_1 = {coord.name() for coord in cube_1.dim_coords}
+        dim_coords_2 = {coord.name() for coord in cube_2.dim_coords}
+        corr_coords = list(dim_coords_1.intersection(dim_coords_2))
+
+    # Interpret coords as array dimensions.
+    corr_dims = set()
+    for coord in corr_coords:
+        corr_dims.update(cube_1.coord_dims(coord))
+
+    corr_dims = tuple(corr_dims)
 
     # Match up data masks if required.
     if common_mask:
-        # Create a cube of 1's with a common mask.
-        if ma.is_masked(cube_2.data):
-            mask_cube = _ones_like(cube_2)
-        else:
-            mask_cube = 1.0
-        if ma.is_masked(cube_1.data):
-            # Take a slice to avoid unnecessary broadcasting of cube_2.
-            slice_coords = [
-                dim_coords_1[i]
-                for i in range(cube_1.ndim)
-                if dim_coords_1[i] not in common_dim_coords
-                and np.array_equal(
-                    cube_1.data.mask.any(axis=i), cube_1.data.mask.all(axis=i)
-                )
-            ]
-            cube_1_slice = next(cube_1.slices_over(slice_coords))
-            mask_cube = _ones_like(cube_1_slice) * mask_cube
-        # Apply common mask to data.
-        if isinstance(mask_cube, iris.cube.Cube):
-            cube_1 = cube_1 * mask_cube
-            cube_2 = mask_cube * cube_2
-            dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]
-
-    # Broadcast weights to shape of cubes if necessary.
-    if weights is None or cube_1.shape == smaller_shape:
-        weights_1 = weights
-        weights_2 = weights
+        mask_1 = al.ma.getmaskarray(array_1)
+        if al is np:
+            # Reduce all invariant dimensions of mask_1 to length 1.  This avoids
+            # unnecessary broadcasting of array_2.
+            index = tuple(
+                slice(0, 1)
+                if np.array_equal(mask_1.any(axis=dim), mask_1.all(axis=dim))
+                else slice(None)
+                for dim in range(mask_1.ndim)
+            )
+            mask_1 = mask_1[index]
+
+        array_2 = _mask_array(array_2, mask_1)
+        array_1 = _mask_array(array_1, al.ma.getmaskarray(array_2))
+
+    # Broadcast weights to shape of arrays if necessary.
+    if weights is None:
+        weights_1 = weights_2 = None
     else:
         if weights.shape != smaller_shape:
-            raise ValueError(
-                "weights array should have dimensions {}".format(smaller_shape)
-            )
+            msg = f"weights array should have dimensions {smaller_shape}"
+            raise ValueError(msg)
+
+        if resolver.reorder_src_dims is not None:
+            # Apply same transposition as was done to cube_2 within Resolve.
+            weights = weights.transpose(resolver.reorder_src_dims)
+
+        # Reshape to add in any length-1 dimensions needed for broadcasting.
+        weights = weights.reshape(cube_2.shape)
 
-        dims_1_common = [
-            i for i in range(cube_1.ndim) if dim_coords_1[i] in common_dim_coords
-        ]
-        weights_1 = broadcast_to_shape(weights, cube_1.shape, dims_1_common)
-        if cube_2.shape != smaller_shape:
-            dims_2_common = [
-                i for i in range(cube_2.ndim) if dim_coords_2[i] in common_dim_coords
-            ]
-            weights_2 = broadcast_to_shape(weights, cube_2.shape, dims_2_common)
-        else:
-            weights_2 = weights
+        weights_2 = np.broadcast_to(weights, array_2.shape)
+        weights_1 = np.broadcast_to(weights, array_1.shape)
 
     # Calculate correlations.
-    s1 = cube_1 - cube_1.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_1)
-    s2 = cube_2 - cube_2.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_2)
+    s1 = array_1 - al.ma.average(
+        array_1, axis=corr_dims, weights=weights_1, keepdims=True
+    )
+    s2 = array_2 - al.ma.average(
+        array_2, axis=corr_dims, weights=weights_2, keepdims=True
+    )
 
-    covar = (s1 * s2).collapsed(
+    s_prod = resolver.cube(s1 * s2)
+
+    # Use cube collapsed method as it takes care of coordinate collapsing and missing
+    # data tolerance.
+    covar = s_prod.collapsed(
         corr_coords, iris.analysis.SUM, weights=weights_1, mdtol=mdtol
     )
-    var_1 = (s1**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_1)
-    var_2 = (s2**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_2)
 
-    denom = iris.analysis.maths.apply_ufunc(
-        np.sqrt, var_1 * var_2, new_unit=covar.units
-    )
+    var_1 = iris.analysis._sum(s1**2, axis=corr_dims, weights=weights_1)
+    var_2 = iris.analysis._sum(s2**2, axis=corr_dims, weights=weights_2)
+
+    denom = np.sqrt(var_1 * var_2)
+
     corr_cube = covar / denom
     corr_cube.rename("Pearson's r")
+    corr_cube.units = 1
 
     return corr_cube
diff --git a/lib/iris/common/resolve.py b/lib/iris/common/resolve.py
index b35397ee587..09555ef9727 100644
--- a/lib/iris/common/resolve.py
+++ b/lib/iris/common/resolve.py
@@ -304,6 +304,7 @@ def __init__(self, lhs=None, rhs=None):
         #: operands, where the direction of the mapping is governed by
         #: :attr:`~iris.common.resolve.Resolve.map_rhs_to_lhs`.
         self.mapping = None  # set in _metadata_mapping
+        self.reorder_src_dims = None  # set in _as_compatible_cubes
 
         #: Cache containing a list of dim, aux and scalar coordinates prepared
         #: and ready for creating and attaching to the resultant resolved
@@ -439,6 +440,7 @@ def _as_compatible_cubes(self):
 
         # Determine whether a transpose of the src cube is necessary.
         if order != sorted(order):
+            self.reorder_src_dims = order
             new_src_data = new_src_data.transpose(order)
             logger.debug(
                 f"transpose src {self._src_cube_position} cube with order {order}"
diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 50387e14182..9eb6e44bee3 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -12,12 +12,13 @@
 import numpy.ma as ma
 
 import iris
+import iris._lazy_data
 import iris.analysis.stats as stats
 from iris.exceptions import CoordinateNotFoundError
 
 
 @tests.skip_data
-class Test(tests.IrisTest):
+class TestLazy(tests.IrisTest):
     def setUp(self):
         # 3D cubes:
         cube_temp = iris.load_cube(
@@ -126,10 +127,7 @@ def test_non_existent_coord(self):
 
     def test_mdtol(self):
         cube_small = self.cube_a[:, 0, 0]
-        cube_small_masked = cube_small.copy()
-        cube_small_masked.data = ma.array(
-            cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)
-        )
+        cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1])
         r1 = stats.pearsonr(cube_small, cube_small_masked)
         r2 = stats.pearsonr(cube_small, cube_small_masked, mdtol=0.49)
         self.assertArrayAlmostEqual(r1.data, np.array([0.74586593]))
@@ -137,25 +135,21 @@ def test_mdtol(self):
 
     def test_common_mask_simple(self):
         cube_small = self.cube_a[:, 0, 0]
-        cube_small_masked = cube_small.copy()
-        cube_small_masked.data = ma.array(
-            cube_small.data, mask=np.array([0, 0, 0, 1, 1, 1], dtype=bool)
-        )
+        cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1])
         r = stats.pearsonr(cube_small, cube_small_masked, common_mask=True)
         self.assertArrayAlmostEqual(r.data, np.array([1.0]))
 
     def test_common_mask_broadcast(self):
-        cube_small = self.cube_a[:, 0, 0]
+        cube_small = iris.util.mask_cube(self.cube_a[:, 0, 0], [0, 0, 0, 0, 0, 1])
+        mask_2d = np.zeros((6, 2), dtype=bool)
+        # 2d mask varies on unshared coord:
+        mask_2d[0, 1] = 1
+
         cube_small_2d = self.cube_a[:, 0:2, 0]
-        cube_small.data = ma.array(
-            cube_small.data, mask=np.array([0, 0, 0, 0, 0, 1], dtype=bool)
-        )
-        cube_small_2d.data = ma.array(
-            np.tile(cube_small.data[:, np.newaxis], 2),
-            mask=np.zeros((6, 2), dtype=bool),
+        cube_small_2d.data = iris.util._mask_array(
+            cube_small.core_data().reshape(6, 1), mask_2d
         )
-        # 2d mask varies on unshared coord:
-        cube_small_2d.data.mask[0, 1] = 1
+
         r = stats.pearsonr(
             cube_small,
             cube_small_2d,
@@ -169,5 +163,12 @@ def test_common_mask_broadcast(self):
         self.assertArrayAlmostEqual(r.data, np.array([1.0, 1.0]))
 
 
+class TestReal(TestLazy):
+    def setUp(self):
+        super().setUp()
+        for cube in [self.cube_a, self.cube_b]:
+            cube.data
+
+
 if __name__ == "__main__":
     tests.main()

From 844ec33ea4069dedcd23805486ba041401736b12 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Fri, 15 Dec 2023 10:44:16 +0000
Subject: [PATCH 02/10] add test for weight transposing case

---
 .../unit/analysis/stats/test_pearsonr.py      | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 9eb6e44bee3..e709b36e895 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -112,6 +112,26 @@ def test_broadcast_cubes_weighted(self):
         ]
         self.assertArrayAlmostEqual(r.data, np.array(r_by_slice))
 
+    def test_broadcast_transpose_cubes_weighted(self):
+        # Reference is calculated with no transposition.
+        r_ref = stats.pearsonr(
+            self.cube_a,
+            self.cube_b[0, :, :],
+            ["latitude", "longitude"],
+            weights=self.weights[0, :, :],
+        )
+
+        self.cube_a.transpose()
+        r_test = stats.pearsonr(
+            self.cube_a,
+            self.cube_b[0, :, :],
+            ["latitude", "longitude"],
+            weights=self.weights[0, :, :],
+        )
+
+        # Should get the same result, but transposed.
+        self.assertArrayAlmostEqual(r_test.data, r_ref.data.T)
+
     def test_weight_error(self):
         with self.assertRaises(ValueError):
             stats.pearsonr(

From 0bca68a2a04ebba3f09eba45762b484e5a917525 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Wed, 20 Dec 2023 13:19:24 +0000
Subject: [PATCH 03/10] Fix single coordinate case

---
 lib/iris/analysis/stats.py                          | 2 ++
 lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py
index 12d460cb360..7db21bac7a3 100644
--- a/lib/iris/analysis/stats.py
+++ b/lib/iris/analysis/stats.py
@@ -102,6 +102,8 @@ def pearsonr(
 
     # Interpret coords as array dimensions.
     corr_dims = set()
+    if isinstance(corr_coords, str):
+        corr_coords = [corr_coords]
     for coord in corr_coords:
         corr_dims.update(cube_1.coord_dims(coord))
 
diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index e709b36e895..0d66635fc1b 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -60,6 +60,10 @@ def test_compatible_cubes(self):
             ],
         )
 
+    def test_single_coord(self):
+        # Smoke test that single coord can be passed as single string.
+        stats.pearsonr(self.cube_a, self.cube_b, "latitude")
+
     def test_broadcast_cubes(self):
         r1 = stats.pearsonr(
             self.cube_a, self.cube_b[0, :, :], ["latitude", "longitude"]

From 3bf0f251006282d47b0cff34b8325069fffd1d30 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Wed, 20 Dec 2023 13:56:12 +0000
Subject: [PATCH 04/10] use lenient coord handling

---
 lib/iris/analysis/stats.py                          | 4 +++-
 lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py
index 7db21bac7a3..96729b47f38 100644
--- a/lib/iris/analysis/stats.py
+++ b/lib/iris/analysis/stats.py
@@ -8,10 +8,12 @@
 import numpy as np
 
 import iris
-from iris.common import Resolve
+from iris.common import SERVICES, Resolve
+from iris.common.lenient import _lenient_client
 from iris.util import _mask_array
 
 
+@_lenient_client(services=SERVICES)
 def pearsonr(
     cube_a,
     cube_b,
diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 0d66635fc1b..908eafe4771 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -193,6 +193,10 @@ def setUp(self):
         for cube in [self.cube_a, self.cube_b]:
             cube.data
 
+    def test_lenient_handling(self):
+        self.cube_a.coord("time").var_name = "wibble"
+        stats.pearsonr(self.cube_a, self.cube_b)
+
 
 if __name__ == "__main__":
     tests.main()

From 09e83f73af796f4fe43c341355db0bfe1dfbfbd2 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Wed, 20 Dec 2023 14:09:02 +0000
Subject: [PATCH 05/10] Move coordinate handling tests to separate class

These do not need real and lazy versions
---
 .../unit/analysis/stats/test_pearsonr.py      | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 908eafe4771..6a0ffadebc4 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -17,8 +17,7 @@
 from iris.exceptions import CoordinateNotFoundError
 
 
-@tests.skip_data
-class TestLazy(tests.IrisTest):
+class Mixin:
     def setUp(self):
         # 3D cubes:
         cube_temp = iris.load_cube(
@@ -34,6 +33,9 @@ def setUp(self):
         cube_temp.coord("longitude").guess_bounds()
         self.weights = iris.analysis.cartography.area_weights(cube_temp)
 
+
+@tests.skip_data
+class TestLazy(Mixin, tests.IrisTest):
     def test_perfect_corr(self):
         r = stats.pearsonr(self.cube_a, self.cube_a, ["latitude", "longitude"])
         self.assertArrayEqual(r.data, np.array([1.0] * 6))
@@ -42,10 +44,6 @@ def test_perfect_corr_all_dims(self):
         r = stats.pearsonr(self.cube_a, self.cube_a)
         self.assertArrayEqual(r.data, np.array([1.0]))
 
-    def test_incompatible_cubes(self):
-        with self.assertRaises(ValueError):
-            stats.pearsonr(self.cube_a[:, 0, :], self.cube_b[0, :, :], "longitude")
-
     def test_compatible_cubes(self):
         r = stats.pearsonr(self.cube_a, self.cube_b, ["latitude", "longitude"])
         self.assertArrayAlmostEqual(
@@ -60,10 +58,6 @@ def test_compatible_cubes(self):
             ],
         )
 
-    def test_single_coord(self):
-        # Smoke test that single coord can be passed as single string.
-        stats.pearsonr(self.cube_a, self.cube_b, "latitude")
-
     def test_broadcast_cubes(self):
         r1 = stats.pearsonr(
             self.cube_a, self.cube_b[0, :, :], ["latitude", "longitude"]
@@ -193,10 +187,21 @@ def setUp(self):
         for cube in [self.cube_a, self.cube_b]:
             cube.data
 
+
+class TestCoordHandling(Mixin, tests.IrisTest):
     def test_lenient_handling(self):
+        # Smoke test that mismatched var_name does not prevent operation.
         self.cube_a.coord("time").var_name = "wibble"
         stats.pearsonr(self.cube_a, self.cube_b)
 
+    def test_incompatible_cubes(self):
+        with self.assertRaises(ValueError):
+            stats.pearsonr(self.cube_a[:, 0, :], self.cube_b[0, :, :], "longitude")
+
+    def test_single_coord(self):
+        # Smoke test that single coord can be passed as single string.
+        stats.pearsonr(self.cube_a, self.cube_b, "latitude")
+
 
 if __name__ == "__main__":
     tests.main()

From 23730352cdf023414352fec0f433927373ccd05a Mon Sep 17 00:00:00 2001
From: Ruth Comer <10599679+rcomer@users.noreply.github.com>
Date: Fri, 22 Dec 2023 10:34:07 +0000
Subject: [PATCH 06/10] Make some syntax highlighters happier

Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com>
---
 lib/iris/tests/unit/analysis/stats/test_pearsonr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 6a0ffadebc4..12de98debce 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -185,7 +185,7 @@ class TestReal(TestLazy):
     def setUp(self):
         super().setUp()
         for cube in [self.cube_a, self.cube_b]:
-            cube.data
+            _ = cube.data
 
 
 class TestCoordHandling(Mixin, tests.IrisTest):

From b3b9ddeeaffb7372481815599c6a1469b7694307 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Fri, 22 Dec 2023 16:03:36 +0000
Subject: [PATCH 07/10] Initial review actions

---
 .../tests/unit/analysis/stats/test_pearsonr.py    | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
index 12de98debce..f824d6930f4 100644
--- a/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
+++ b/lib/iris/tests/unit/analysis/stats/test_pearsonr.py
@@ -139,10 +139,6 @@ def test_weight_error(self):
                 weights=self.weights,
             )
 
-    def test_non_existent_coord(self):
-        with self.assertRaises(CoordinateNotFoundError):
-            stats.pearsonr(self.cube_a, self.cube_b, "bad_coord")
-
     def test_mdtol(self):
         cube_small = self.cube_a[:, 0, 0]
         cube_small_masked = iris.util.mask_cube(cube_small, [0, 0, 0, 1, 1, 1])
@@ -163,10 +159,13 @@ def test_common_mask_broadcast(self):
         # 2d mask varies on unshared coord:
         mask_2d[0, 1] = 1
 
+        # Make a (6, 2) cube.
         cube_small_2d = self.cube_a[:, 0:2, 0]
-        cube_small_2d.data = iris.util._mask_array(
-            cube_small.core_data().reshape(6, 1), mask_2d
+        # Duplicate data along unshared coord's dimension.
+        new_data = iris.util.broadcast_to_shape(
+            cube_small.core_data(), (6, 2), dim_map=[0]
         )
+        cube_small_2d.data = iris.util._mask_array(new_data, mask_2d)
 
         r = stats.pearsonr(
             cube_small,
@@ -202,6 +201,10 @@ def test_single_coord(self):
         # Smoke test that single coord can be passed as single string.
         stats.pearsonr(self.cube_a, self.cube_b, "latitude")
 
+    def test_non_existent_coord(self):
+        with self.assertRaises(CoordinateNotFoundError):
+            stats.pearsonr(self.cube_a, self.cube_b, "bad_coord")
+
 
 if __name__ == "__main__":
     tests.main()

From 49a9d7b788ab0a4d5291b80831f1314eadd4c467 Mon Sep 17 00:00:00 2001
From: Ruth Comer <10599679+rcomer@users.noreply.github.com>
Date: Fri, 12 Jan 2024 10:23:29 +0000
Subject: [PATCH 08/10] Clearer comment

Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com>
---
 lib/iris/analysis/stats.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py
index 96729b47f38..e165cc27113 100644
--- a/lib/iris/analysis/stats.py
+++ b/lib/iris/analysis/stats.py
@@ -140,7 +140,8 @@ def pearsonr(
             # Apply same transposition as was done to cube_2 within Resolve.
             weights = weights.transpose(resolver.reorder_src_dims)
 
-        # Reshape to add in any length-1 dimensions needed for broadcasting.
+        # Reshape to add in any length-1 dimensions that Resolve() has added
+        #  for broadcasting.
         weights = weights.reshape(cube_2.shape)
 
         weights_2 = np.broadcast_to(weights, array_2.shape)

From f1c970afd915412e3fbfdeaeb1a800f33fb89b7e Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Fri, 12 Jan 2024 12:42:12 +0000
Subject: [PATCH 09/10] assign reviewer blame

---
 docs/src/whatsnew/latest.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
index 7bc7bd3a53b..36bb5bba1d7 100644
--- a/docs/src/whatsnew/latest.rst
+++ b/docs/src/whatsnew/latest.rst
@@ -100,8 +100,9 @@ This document explains the changes made to Iris for this release
    lazy data from file. This will also speed up coordinate comparison.
    (:pull:`5610`)
 
-#. `@rcomer`_ modified :func:`~iris.analysis.stats.pearsonr` so it preserves
-   lazy data in all cases and also runs a little faster.  (:pull:`5638`)
+#. `@rcomer`_ and `@trexfeathers`_ (reviewer) modified 
+   :func:`~iris.analysis.stats.pearsonr` so it preserves lazy data in all cases
+   and also runs a little faster.  (:pull:`5638`)
 
 🔥 Deprecations
 ===============

From 045ce020ca85803bf89d1930c4422dd963c27465 Mon Sep 17 00:00:00 2001
From: Ruth Comer <ruth.comer@metoffice.gov.uk>
Date: Fri, 12 Jan 2024 17:00:56 +0000
Subject: [PATCH 10/10] first attempt at benchmarks

---
 benchmarks/benchmarks/stats.py | 38 ++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 benchmarks/benchmarks/stats.py

diff --git a/benchmarks/benchmarks/stats.py b/benchmarks/benchmarks/stats.py
new file mode 100644
index 00000000000..0530431900a
--- /dev/null
+++ b/benchmarks/benchmarks/stats.py
@@ -0,0 +1,38 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Stats benchmark tests."""
+
+import iris
+from iris.analysis.stats import pearsonr
+import iris.tests
+
+
+class PearsonR:
+    def setup(self):
+        cube_temp = iris.load_cube(
+            iris.tests.get_data_path(
+                ("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc")
+            )
+        )
+
+        # Make data non-lazy.
+        cube_temp.data
+
+        self.cube_a = cube_temp[:6]
+        self.cube_b = cube_temp[20:26]
+        self.cube_b.replace_coord(self.cube_a.coord("time"))
+        for name in ["latitude", "longitude"]:
+            self.cube_b.coord(name).guess_bounds()
+        self.weights = iris.analysis.cartography.area_weights(self.cube_b)
+
+    def time_real(self):
+        pearsonr(self.cube_a, self.cube_b, weights=self.weights)
+
+    def time_lazy(self):
+        for cube in self.cube_a, self.cube_b:
+            cube.data = cube.lazy_data()
+
+        result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
+        result.data